88 lines
2.7 KiB
C

/*
* Copyright (c) 2017, Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
/* Copyright 2015-2018 Espressif Systems (Shanghai) PTE LTD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* File adapted to use on IDF FreeRTOS component, extracted
* originally from zephyr RTOS code base:
* https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
*/
#ifndef __XT_ASM_UTILS_H
#define __XT_ASM_UTILS_H
/*
* SPILL_ALL_WINDOWS
*
* Spills all windowed registers (i.e. registers not visible as
* A0-A15) to their ABI-defined spill regions on the stack.
*
* Unlike the Xtensa HAL implementation, this code requires that the
* EXCM and WOE bit be enabled in PS, and relies on repeated hardware
* exception handling to do the register spills. The trick is to do a
* noop write to the high registers, which the hardware will trap
* (into an overflow exception) in the case where those registers are
* already used by an existing call frame. Then it rotates the window
* and repeats until all but the A0-A3 registers of the original frame
* are guaranteed to be spilled, eventually rotating back around into
* the original frame. Advantages:
*
* - Vastly smaller code size
*
* - More easily maintained if changes are needed to window over/underflow
* exception handling.
*
* - Requires no scratch registers to do its work, so can be used safely in any
* context.
*
* - If the WOE bit is not enabled (for example, in code written for
* the CALL0 ABI), this becomes a silent noop and operates compatbily.
*
* - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
* just a little bit, it's MUCH faster. With a mostly full register
* file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
* registers with this vs. 279 (!) to do it with
* xthal_spill_windows().
*/
.macro SPILL_ALL_WINDOWS
#if XCHAL_NUM_AREGS == 64
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 4
#elif XCHAL_NUM_AREGS == 32
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a4, a4, a4
rotw 2
#else
#error Unrecognized XCHAL_NUM_AREGS
#endif
.endm
#endif