mirror of
https://github.com/espressif/esp-idf.git
synced 2024-10-05 20:47:46 -04:00
9300bef9b8
This commit refactors the FPU handling code on the Xtensa port of Amazon SMP FreeRTOS in the following ways: Auto-pinning via XT_RTOS_CP_EXC_HOOK ------------------------------------ The "_xt_coproc_exc" exception would previously automatically pin a task that uses the FPU to the current core (to ensure that we can lazy save the task's FPU context). However, this would mean that "xtensa_vectors.S" would need to be OS-aware (to read the task's TCB structure). This is now refactored so that "_xt_coproc_exc" calls a CP exception hook function ("XT_RTOS_CP_EXC_HOOK") implemented in "portasm.S", thus allowing "xtensa_vectors.S" to remain OS agnostic. Using macros to acquire owner spinlock -------------------------------------- The taking and relasing of the "_xt_coproc_owner_sa_lock" is now mostly abstracted as the "spinlock_take" and "spinlock_release" macro. As a result, "_xt_coproc_release" and "_xt_coproc_exc" are refactored so that: - They are closer to their upstream (original) versions - The spinlock is only taken when building for multicore - The spinlock held region is shortened (now only protects the instructions that access the "_xt_coproc_owner_sa" array Other Changes ------------- - Updated placing and comments of various "offset_..." constants used by portasm.S - Update description of "get_cpsa_from_tcb" assembly macro - Tidied up some typos in the ".S" files
138 lines
4.2 KiB
C
138 lines
4.2 KiB
C
/*
|
|
* SPDX-FileCopyrightText: 2017, Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
* SPDX-FileContributor: 2016-2022 Espressif Systems (Shanghai) CO LTD
|
|
*/
|
|
|
|
/* File adapted to use on IDF FreeRTOS component, extracted
|
|
* originally from zephyr RTOS code base:
|
|
* https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
|
|
*/
|
|
|
|
#ifndef __XT_ASM_UTILS_H
|
|
#define __XT_ASM_UTILS_H
|
|
|
|
/*
|
|
* SPILL_ALL_WINDOWS
|
|
*
|
|
* Spills all windowed registers (i.e. registers not visible as
|
|
* A0-A15) to their ABI-defined spill regions on the stack.
|
|
*
|
|
* Unlike the Xtensa HAL implementation, this code requires that the
|
|
* EXCM and WOE bit be enabled in PS, and relies on repeated hardware
|
|
* exception handling to do the register spills. The trick is to do a
|
|
* noop write to the high registers, which the hardware will trap
|
|
* (into an overflow exception) in the case where those registers are
|
|
* already used by an existing call frame. Then it rotates the window
|
|
* and repeats until all but the A0-A3 registers of the original frame
|
|
* are guaranteed to be spilled, eventually rotating back around into
|
|
* the original frame. Advantages:
|
|
*
|
|
* - Vastly smaller code size
|
|
*
|
|
* - More easily maintained if changes are needed to window over/underflow
|
|
* exception handling.
|
|
*
|
|
* - Requires no scratch registers to do its work, so can be used safely in any
|
|
* context.
|
|
*
|
|
* - If the WOE bit is not enabled (for example, in code written for
|
|
* the CALL0 ABI), this becomes a silent noop and operates compatbily.
|
|
*
|
|
* - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
|
|
* just a little bit, it's MUCH faster. With a mostly full register
|
|
* file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
|
|
* registers with this vs. 279 (!) to do it with
|
|
* xthal_spill_windows().
|
|
*/
|
|
|
|
.macro SPILL_ALL_WINDOWS
|
|
#if XCHAL_NUM_AREGS == 64
|
|
and a12, a12, a12
|
|
rotw 3
|
|
and a12, a12, a12
|
|
rotw 3
|
|
and a12, a12, a12
|
|
rotw 3
|
|
and a12, a12, a12
|
|
rotw 3
|
|
and a12, a12, a12
|
|
rotw 4
|
|
#elif XCHAL_NUM_AREGS == 32
|
|
and a12, a12, a12
|
|
rotw 3
|
|
and a12, a12, a12
|
|
rotw 3
|
|
and a4, a4, a4
|
|
rotw 2
|
|
#else
|
|
#error Unrecognized XCHAL_NUM_AREGS
|
|
#endif
|
|
.endm
|
|
|
|
/*
|
|
--------------------------------------------------------------------------------
|
|
Macro spinlock_take
|
|
|
|
This macro will repeatedley attempt to atomically set a spinlock variable
|
|
using the s32c1i instruciton. A spinlock is considered free if its value is 0.
|
|
|
|
Entry:
|
|
- "reg_A/B" as scratch registers
|
|
- "lock_var" spinlock variable's symbol
|
|
- Interrupts must already be disabled by caller
|
|
Exit:
|
|
- Spinlock set to current core's ID (PRID)
|
|
- "reg_A/B" clobbered
|
|
--------------------------------------------------------------------------------
|
|
*/
|
|
|
|
#if portNUM_PROCESSORS > 1
|
|
|
|
.macro spinlock_take reg_A reg_B lock_var
|
|
|
|
movi \reg_A, \lock_var /* reg_A = &lock_var */
|
|
.L_spinlock_loop:
|
|
movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */
|
|
wsr \reg_B, SCOMPARE1
|
|
rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */
|
|
rsr \reg_B, PRID /* Load the current core's ID into reg_B */
|
|
s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */
|
|
bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
|
|
|
|
.endm
|
|
|
|
#endif /* portNUM_PROCESSORS > 1 */
|
|
|
|
/*
|
|
--------------------------------------------------------------------------------
|
|
Macro spinlock_release
|
|
|
|
This macro will release a spinlock variable previously taken by the
|
|
spinlock_take macro.
|
|
|
|
Entry:
|
|
- "reg_A/B" as scratch registers
|
|
- "lock_var" spinlock variable's symbol
|
|
- Interrupts must already be disabled by caller
|
|
Exit:
|
|
- "reg_A/B" clobbered
|
|
--------------------------------------------------------------------------------
|
|
*/
|
|
|
|
#if portNUM_PROCESSORS > 1
|
|
|
|
.macro spinlock_release reg_A reg_B lock_var
|
|
|
|
movi \reg_A, \lock_var /* reg_A = &lock_var */
|
|
movi \reg_B, 0
|
|
s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */
|
|
|
|
.endm
|
|
|
|
#endif /* portNUM_PROCESSORS > 1 */
|
|
|
|
#endif /* __XT_ASM_UTILS_H */
|