freertos(SMP): Refactor FPU handling on the Xtensa port of Amaazon SMP FreeRTOS

This commit refactors the FPU handling code on the Xtensa port of Amazon SMP
FreeRTOS in the following ways:

Auto-pinning via XT_RTOS_CP_EXC_HOOK
------------------------------------

The "_xt_coproc_exc" exception would previously automatically pin a task that
uses the FPU to the current core (to ensure that we can lazy save the task's FPU
context). However, this would mean that "xtensa_vectors.S" would need to be
OS-aware (to read the task's TCB structure).

This is now refactored so that "_xt_coproc_exc" calls a CP exception hook
function ("XT_RTOS_CP_EXC_HOOK") implemented in "portasm.S", thus allowing
"xtensa_vectors.S" to remain OS agnostic.

Using macros to acquire owner spinlock
--------------------------------------

The taking and relasing of the "_xt_coproc_owner_sa_lock" is now mostly
abstracted as the "spinlock_take" and "spinlock_release" macro. As a result,
"_xt_coproc_release" and "_xt_coproc_exc" are refactored so that:

- They are closer to their upstream (original) versions
- The spinlock is only taken when building for multicore
- The spinlock held region is shortened (now only protects the instructions
  that access the "_xt_coproc_owner_sa" array

Other Changes
-------------

- Updated placing and comments of various "offset_..." constants used by
  portasm.S
- Update description of "get_cpsa_from_tcb" assembly macro
- Tidied up some typos in the ".S" files
This commit is contained in:
Darian Leung 2022-12-14 20:17:51 +08:00
parent fd48daf278
commit 9300bef9b8
6 changed files with 251 additions and 168 deletions

View File

@ -170,6 +170,15 @@ The implementation may use only a2-4, a15 (all other regs must be preserved).
// void* XT_RTOS_CP_STATE(void)
#define XT_RTOS_CP_STATE _frxt_task_coproc_state
/*
RTOS provided hook function that is called on every coprocessor exception. May
only be called from assembly code and by the 'call0' instruction.
The implementation may use only a2-4, a15 (all other regs must be preserved).
*/
// void XT_RTOS_CP_EXC_HOOK(void)
#if XCHAL_CP_NUM > 0
#define XT_RTOS_CP_EXC_HOOK _frxt_coproc_exc_hook
#endif
/*******************************************************************************

View File

@ -41,9 +41,25 @@
_Static_assert(portBYTE_ALIGNMENT == 16, "portBYTE_ALIGNMENT must be set to 16");
/*
OS state variables
*/
/* ---------------------------------------------------- Variables ------------------------------------------------------
* - Various variables used to maintain the FreeRTOS port's state. Used from both port.c and various .S files
* - Constant offsets are used by assembly to jump to particular TCB members or a stack area (such as the CPSA). We use
* C constants instead of preprocessor macros due to assembly lacking "offsetof()".
* ------------------------------------------------------------------------------------------------------------------ */
#if XCHAL_CP_NUM > 0
/* Offsets used to navigate to a task's CPSA on the stack */
const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE; /* Offset to start of the CPSA area on the stack. See uxInitialiseStackCPSA(). */
#if configNUM_CORES > 1
/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */
const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25);
#if configUSE_CORE_AFFINITY != 1
#error "configUSE_CORE_AFFINITY must be 1 on multicore targets with coprocessor support"
#endif
#endif /* configNUM_CORES > 1 */
#endif /* XCHAL_CP_NUM > 0 */
volatile unsigned port_xSchedulerRunning[portNUM_PROCESSORS] = {0}; // Indicates whether scheduler is running on a per-core basis
unsigned int port_interruptNesting[portNUM_PROCESSORS] = {0}; // Interrupt nesting level. Increased/decreased in portasm.c, _frxt_int_enter/_frxt_int_exit
//FreeRTOS SMP Locks
@ -423,12 +439,6 @@ static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters)
}
#endif
const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25);
#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE;
/**
* @brief Align stack pointer in a downward growing stack
*
@ -677,7 +687,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
/*
HIGH ADDRESS
|---------------------------| <- pxTopOfStack on entry
| Coproc Save Area |
| Coproc Save Area | (CPSA MUST BE FIRST)
| ------------------------- |
| TLS Variables |
| ------------------------- | <- Start of useable stack
@ -697,7 +707,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
#if XCHAL_CP_NUM > 0
// Initialize the coprocessor save area
// Initialize the coprocessor save area. THIS MUST BE THE FIRST AREA due to access from _frxt_task_coproc_state()
uxStackPointer = uxInitialiseStackCPSA(uxStackPointer);
configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
#endif /* XCHAL_CP_NUM > 0 */
@ -717,25 +727,25 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
// -------------------- Co-Processor -----------------------
#if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xCoreID);
void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID);
void vPortCleanUpCoprocArea( void *pxTCB )
{
UBaseType_t uxCoprocArea;
BaseType_t xCoreID;
BaseType_t xTargetCoreID;
/* Get pointer to the task's coprocessor save area from TCB->pxEndOfStack. See uxInitialiseStackCPSA() */
uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pxTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */
uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE);
/* Extract core ID from the affinity mask */
xCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ;
xCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xCoreID );
assert( xCoreID >= 1 ); // __builtin_ffs always returns first set index + 1
xCoreID -= 1;
xTargetCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ;
xTargetCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xTargetCoreID );
assert( xTargetCoreID >= 1 ); // __builtin_ffs always returns first set index + 1
xTargetCoreID -= 1;
/* If task has live floating point registers somewhere, release them */
_xt_coproc_release( (void *)uxCoprocArea, xCoreID );
_xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID );
}
#endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )

View File

@ -34,32 +34,45 @@
#define TOPOFSTACK_OFFS 0x00 /* StackType_t *pxTopOfStack */
.extern pxCurrentTCBs
#if XCHAL_CP_NUM > 0
/* Offsets used to get a task's coprocessor save area (CPSA) from its TCB */
.extern offset_pxEndOfStack
.extern offset_cpsa
#if configNUM_CORES > 1
/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */
.extern offset_uxCoreAffinityMask
#endif /* configNUM_CORES > 1 */
#endif /* XCHAL_CP_NUM > 0 */
/*
Macro to get a task's coprocessor save area (CPSA) from its TCB
--------------------------------------------------------------------------------
Macro get_cpsa_from_tcb - get the pointer to a task's CPSA form its TCB
Entry:
- reg_A contains a pointer to the TCB
Exit:
- reg_A contains a pointer to the CPSA
- reg_B destroyed
Entry:
- "reg_A" contains a pointer to the task's TCB
Exit:
- "reg_A" contains pointer the the task's CPSA
- "reg_B" clobbered
The two arguments must be different AR registers.
--------------------------------------------------------------------------------
*/
#if XCHAL_CP_NUM > 0
.macro get_cpsa_from_tcb reg_A reg_B
// Get TCB.pxEndOfStack from reg_A
/* Get TCB.pxEndOfStack from reg_A */
movi \reg_B, offset_pxEndOfStack /* Move &offset_pxEndOfStack into reg_B */
l32i \reg_B, \reg_B, 0 /* Load offset_pxEndOfStack into reg_B */
add \reg_A, \reg_A, \reg_B /* Calculate &pxEndOfStack to reg_A (&TCB + offset_pxEndOfStack) */
l32i \reg_A, \reg_A, 0 /* Load TCB.pxEndOfStack into reg_A */
//Offset to start of coproc save area
/* Offset to start of CP save area */
movi \reg_B, offset_cpsa /* Move &offset_cpsa into reg_B */
l32i \reg_B, \reg_B, 0 /* Load offset_cpsa into reg_B */
sub \reg_A, \reg_A, \reg_B /* Subtract offset_cpsa from pxEndOfStack to get to start of CP save area (unaligned) */
//Align down start of CP save area to 16 byte boundary
/* Align down start of CP save area to 16 byte boundary */
movi \reg_B, ~(0xF)
and \reg_A, \reg_A, \reg_B /* Align CPSA pointer to 16 bytes */
and \reg_A, \reg_A, \reg_B /* Align CP save area pointer to 16 bytes */
.endm
#endif /* XCHAL_CP_NUM > 0 */
.global port_IntStack
.global port_switch_flag //Required by sysview_tracing build
@ -692,3 +705,54 @@ _frxt_task_coproc_state:
2: ret
#endif /* XCHAL_CP_NUM > 0 */
/*
**********************************************************************************************************
* _frxt_coproc_exc_hook
* void _frxt_coproc_exc_hook(void)
*
* Implements the Xtensa RTOS porting layer's XT_RTOS_CP_EXC_HOOK function for FreeRTOS.
*
* May only be called from assembly code by the 'call0' instruction. Does NOT obey ABI conventions.
* May only only use a2-4, a15 (all other regs must be preserved).
* See the detailed description of the XT_RTOS_ENTER macro in xtensa_rtos.h.
*
**********************************************************************************************************
*/
#if XCHAL_CP_NUM > 0
.globl _frxt_coproc_exc_hook
.type _frxt_coproc_exc_hook,@function
.align 4
_frxt_coproc_exc_hook:
#if configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1
getcoreid a2 /* a2 = xCurCoreID */
/* if (port_xSchedulerRunning[xCurCoreID] == 0) */
movi a3, port_xSchedulerRunning
addx4 a3, a2, a3
l32i a3, a3, 0
beqz a3, 1f /* Scheduler hasn't started yet. Return. */
/* if (port_interruptNesting[xCurCoreID] != 0) */
movi a3, port_interruptNesting
addx4 a3, a2, a3
l32i a3, a3, 0
bnez a3, 1f /* We are in an interrupt. Return*/
/* CP operations are incompatible with unpinned tasks. Thus we pin the task
to the current running core by updating its TCB.uxCoreAffinityMask field. */
movi a3, pxCurrentTCBs
addx4 a3, a2, a3
l32i a3, a3, 0 /* a3 = pxCurrentTCBs[xCurCoreID] */
movi a4, offset_uxCoreAffinityMask
l32i a4, a4, 0 /* a4 = offset_uxCoreAffinityMask */
add a3, a3, a4 /* a3 = &TCB.uxCoreAffinityMask */
ssl a2 /* Use xCurCoreID as left shift amount */
movi a4, 1
sll a4, a4 /* a4 = (1 << xCurCoreID) */
s32i a4, a3, 0 /* TCB.uxCoreAffinityMask = a4 = (1 << xCurCoreID) */
1:
#endif /* configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 */
ret
#endif /* XCHAL_CP_NUM > 0 */

View File

@ -50,26 +50,88 @@
.macro SPILL_ALL_WINDOWS
#if XCHAL_NUM_AREGS == 64
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 4
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 4
#elif XCHAL_NUM_AREGS == 32
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a4, a4, a4
rotw 2
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a4, a4, a4
rotw 2
#else
#error Unrecognized XCHAL_NUM_AREGS
#endif
.endm
#endif
/*
--------------------------------------------------------------------------------
Macro spinlock_take
This macro will repeatedley attempt to atomically set a spinlock variable
using the s32c1i instruciton. A spinlock is considered free if its value is 0.
Entry:
- "reg_A/B" as scratch registers
- "lock_var" spinlock variable's symbol
- Interrupts must already be disabled by caller
Exit:
- Spinlock set to current core's ID (PRID)
- "reg_A/B" clobbered
--------------------------------------------------------------------------------
*/
#if portNUM_PROCESSORS > 1
.macro spinlock_take reg_A reg_B lock_var
movi \reg_A, \lock_var /* reg_A = &lock_var */
.L_spinlock_loop:
movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */
wsr \reg_B, SCOMPARE1
rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */
rsr \reg_B, PRID /* Load the current core's ID into reg_B */
s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */
bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
.endm
#endif /* portNUM_PROCESSORS > 1 */
/*
--------------------------------------------------------------------------------
Macro spinlock_release
This macro will release a spinlock variable previously taken by the
spinlock_take macro.
Entry:
- "reg_A/B" as scratch registers
- "lock_var" spinlock variable's symbol
- Interrupts must already be disabled by caller
Exit:
- "reg_A/B" clobbered
--------------------------------------------------------------------------------
*/
#if portNUM_PROCESSORS > 1
.macro spinlock_release reg_A reg_B lock_var
movi \reg_A, \lock_var /* reg_A = &lock_var */
movi \reg_B, 0
s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */
.endm
#endif /* portNUM_PROCESSORS > 1 */
#endif /* __XT_ASM_UTILS_H */

View File

@ -397,18 +397,16 @@ May be called when a thread terminates or completes but does not delete
the co-proc save area, to avoid the exception handler having to save the
thread's co-proc state before another thread can use it (optimization).
Needs to be called on the processor the thread was running on. Unpinned threads
won't have an entry here because they get pinned as soon they use a coprocessor.
Entry Conditions:
A2 = Pointer to base of co-processor state save area.
A3 = Core ID of the pinned task
A3 = Core ID of the task (must be pinned) who's coproc ownership we are
releasing.
Exit conditions:
None.
Obeys ABI conventions per prototype:
void _xt_coproc_release(void * coproc_sa_base, BaseType_t xCoreID)
void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID)
*******************************************************************************/
@ -421,43 +419,33 @@ Obeys ABI conventions per prototype:
.align 4
_xt_coproc_release:
ENTRY0 /* a2 = base of save area */
/* a3 = core ID */
/* a3 = xTargetCoreID */
rsil a7, XCHAL_EXCM_LEVEL /* lock interrupts */
movi a4, XCHAL_CP_MAX << 2 /* a4 = size of an owner array */
mull a4, a3, a4 /* a4 = offset to the owner array of the target core */
movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */
add a3, a3, a4 /* a3 = base of owner array of the target core */
addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array of the target core */
movi a5, 0 /* a5 = 0 (unowned) */
/* Aquire spinlock before proceeding with the routine.
* Refer _xt_coproc_exc for details on the puspose of
* the _xt_coproc_owner_sa_lock lock and its intended use.
*/
.L_spinlock_loop:
mov a8, a3 /* Save a copy of the core ID in a8 */
movi a10, _xt_coproc_owner_sa_lock /* a10 = base address of lock variable */
addx4 a10, a8, a10 /* Use core ID in a8 to calculate the offset to the lock variable for the core */
movi a11, 0 /* a11 = 0 */
wsr a11, scompare1 /* scompare1 = a11 :- Expect the spinlock to be free (value = 0) */
movi a11, 1 /* a11 = 1 :- Write 1 to take the spinlock */
s32c1i a11, a10, 0 /* if (lock == scompare1) {tmp = lock; lock = a11; a11 = tmp} else {a11 = lock} */
bnez a11, .L_spinlock_loop /* if (a11 != 0) {loop} :- Keep spinning until the spinlock is available */
rsil a6, XCHAL_EXCM_LEVEL /* lock interrupts */
#if portNUM_PROCESSORS > 1
/* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
* to ensure thread safe access of _xt_coproc_owner_sa between cores. */
spinlock_take a7 a8 _xt_coproc_owner_sa_lock
#endif /* portNUM_PROCESSORS > 1 */
movi a4, XCHAL_CP_MAX << 2
mull a3, a3, a4
movi a4, _xt_coproc_owner_sa /* a4 = base of owner array */
add a4, a4, a3
1: l32i a7, a3, 0 /* a7 = owner at a3 */
bne a2, a7, 2f /* if (coproc_sa_base == owner) */
s32i a5, a3, 0 /* owner = unowned */
2: addi a3, a3, 1<<2 /* a3 = next entry in owner array */
bltu a3, a4, 1b /* repeat until end of array */
addi a5, a4, XCHAL_CP_MAX << 2 /* a5 = top+1 of owner array */
movi a6, 0 /* a6 = 0 (unowned) */
1: l32i a8, a4, 0 /* a8 = owner at a4 */
bne a2, a8, 2f /* if (coproc_sa_base == owner) */
s32i a6, a4, 0 /* owner = unowned */
2: addi a4, a4, 1<<2 /* a4 = next entry in owner array */
bltu a4, a5, 1b /* repeat until end of array */
3: wsr a7, PS /* restore interrupts */
/* Release spinlock */
movi a11, 0 /* a11 = 0 */
s32ri a11, a10, 0 /* a10 = base address of lock variable. Write 0 to release the lock */
#if portNUM_PROCESSORS > 1
/* Release previously taken spinlock */
spinlock_release a7 a8 _xt_coproc_owner_sa_lock
#endif /* portNUM_PROCESSORS > 1 */
wsr a6, PS /* restore interrupts */
RET0

View File

@ -102,15 +102,8 @@
#include "esp_private/panic_reason.h"
#include "sdkconfig.h"
#include "soc/soc.h"
#include "xt_asm_utils.h"
/*
Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used.
Please change this when the tcb structure is changed
*/
.extern pxCurrentTCBs
#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
.extern offset_uxCoreAffinityMask
#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
/*
--------------------------------------------------------------------------------
@ -859,22 +852,22 @@ _xt_coproc_owner_sa:
/* Spinlock per core for accessing _xt_coproc_owner_sa array
*
* 0 = Spinlock available
* 1 = Spinlock taken
* PRID = Spinlock taken
*
* The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array.
* This array can be modified by both _xt_coproc_exc and _xt_coproc_release routines
* simultaneously owing to the fact that the FreeRTOS SMP Kernel allows cross-core
* task deletion. Therefore, the same memory location in the owner save-area array
* could be modified at the same time.
* The array can be modified by multiple cores simultaneously (via _xt_coproc_exc
* and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread
* safe access of the _xt_coproc_owner_sa array.
*/
#if portNUM_PROCESSORS > 1
.global _xt_coproc_owner_sa_lock
.type _xt_coproc_owner_sa_lock,@object
.align 16 /* minimize crossing cache boundaries */
_xt_coproc_owner_sa_lock:
.space (portNUM_PROCESSORS) << 2
.space 4
#endif /* portNUM_PROCESSORS > 1 */
.section .iram1,"ax"
.align 4
.L_goto_invalid:
j .L_xt_coproc_invalid /* not in a thread (invalid) */
@ -924,51 +917,15 @@ _xt_coproc_exc:
s32i a4, sp, XT_STK_A4
s32i a15, sp, XT_STK_A15
/* Aquire spinlock before proceeding with the exception handler.
* (Refer _xt_coproc_release for competing routine for the lock.)
*
* [refactor-todo]: The spinlock aquire/release routine can be
* refactored in to a macro later if the need arises to use it
* at more than one place in the port assembler files.
*/
.L_spinlock_loop:
movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of lock variable */
getcoreid a0 /* get the core ID in a0 to calculate the offset of the lock variable */
addx4 a2, a0, a2 /* a2 = address of desired lock variable */
movi a0, 0 /* a0 = 0 */
wsr a0, scompare1 /* scompare1 = a0 :- Expect the spinlock to be free (value = 0) */
movi a0, 1 /* a0 = 1 :- Write 1 to take the spinlock */
s32c1i a0, a2, 0 /* if (lock == scompare1) {tmp = lock; lock = a0; a0 = tmp} else {a0 = lock} */
bnez a0, .L_spinlock_loop /* if (a0 != 0) {loop} :- Keep spinning until the spinlock is available */
/* Call the RTOS coprocessor exception hook */
call0 XT_RTOS_CP_EXC_HOOK
/* Get co-processor state save area of new owner thread. */
call0 XT_RTOS_CP_STATE /* a15 = new owner's save area */
#if CONFIG_FREERTOS_FPU_IN_ISR
beqz a15, .L_skip_core_pin /* CP used in ISR, skip task pinning */
#else
#if !CONFIG_FREERTOS_FPU_IN_ISR
beqz a15, .L_goto_invalid /* not in a thread (invalid) */
#endif
#if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
/* CP operations are incompatible with unpinned tasks. Thus we pin the task
to the current running core. */
movi a2, pxCurrentTCBs
getcoreid a3 /* a3 = current core ID */
addx4 a2, a3, a2
l32i a2, a2, 0 /* a2 = start of pxCurrentTCBs[cpuid] */
movi a4, offset_uxCoreAffinityMask
l32i a4, a4, 0 /* a4 = offset_uxCoreAffinityMask */
add a2, a2, a4 /* a2 = &TCB.uxCoreAffinityMask */
ssl a3 /* Use core ID as shift amount */
movi a4, 1
sll a4, a4 /* a4 = uxCoreAffinityMask = (1 << core ID) */
s32i a4, a2, 0 /* Store affinity mask to TCB.uxCoreAffinityMask */
#endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
#if CONFIG_FREERTOS_FPU_IN_ISR
.L_skip_core_pin:
#endif
/* Enable the co-processor's bit in CPENABLE. */
movi a0, _xt_coproc_mask
rsr a4, CPENABLE /* a4 = CPENABLE */
@ -978,17 +935,18 @@ _xt_coproc_exc:
or a4, a4, a2 /* a4 = CPENABLE | (1 << n) */
wsr a4, CPENABLE
/*
Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value
everywhere): _xt_coproc_release assumes it works like this in order not to need
locking.
*/
/* Grab correct xt_coproc_owner_sa for this core */
/* Grab the xt_coproc_owner_sa owner array for current core */
getcoreid a3 /* a3 = current core ID */
movi a2, XCHAL_CP_MAX << 2
mull a2, a2, a3 /* multiply by current processor id */
movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */
add a3, a3, a2 /* a3 = owner area needed for this processor */
movi a2, XCHAL_CP_MAX << 2 /* a2 = size of an owner array */
mull a2, a2, a3 /* a2 = offset to the owner array of the current core*/
movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */
add a3, a3, a2 /* a3 = base of owner array of the current core */
#if portNUM_PROCESSORS > 1
/* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
* to ensure thread safe access of _xt_coproc_owner_sa between cores. */
spinlock_take a0 a2 _xt_coproc_owner_sa_lock
#endif /* portNUM_PROCESSORS > 1 */
/* Get old coprocessor owner thread (save area ptr) and assign new one. */
addx4 a3, a5, a3 /* a3 = &_xt_coproc_owner_sa[n] */
@ -996,13 +954,21 @@ locking.
s32i a15, a3, 0 /* _xt_coproc_owner_sa[n] = new */
rsync /* ensure wsr.CPENABLE is complete */
#if portNUM_PROCESSORS > 1
/* Release previously taken spinlock */
spinlock_release a0 a2 _xt_coproc_owner_sa_lock
#endif /* portNUM_PROCESSORS > 1 */
/* Only need to context switch if new owner != old owner. */
/* If float is necessary on ISR, we need to remove this check */
/* below, because on restoring from ISR we may have new == old condition used
* to force cp restore to next thread
* Todo: IDF-6418
*/
#ifndef CONFIG_FREERTOS_FPU_IN_ISR
beq a15, a2, .L_goto_done /* new owner == old, we're done */
#if !CONFIG_FREERTOS_FPU_IN_ISR
bne a15, a2, .L_switch_context
j .L_goto_done /* new owner == old, we're done */
.L_switch_context:
#endif
/* If no old owner then nothing to save. */
@ -1072,14 +1038,6 @@ locking.
/* Restore interruptee's saved registers. */
/* Can omit rsync for wsr.CPENABLE here because _xt_user_exit does it. */
.L_xt_coproc_done:
/* Release spinlock */
movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of the lock variable */
getcoreid a0 /* a0 = core ID to calculate the offset of the lock variable */
addx4 a2, a0, a2 /* a2 = address of the lock variable */
movi a0, 0 /* a0 = 0 */
s32ri a0, a2, 0 /* a2 = a0 :- Write 0 to release the lock */
l32i a15, sp, XT_STK_A15
l32i a5, sp, XT_STK_A5
l32i a4, sp, XT_STK_A4
@ -1107,14 +1065,6 @@ locking.
/* Co-processor exception occurred outside a thread (not supported). */
.L_xt_coproc_invalid:
/* Release spinlock */
movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of the lock variable */
getcoreid a0 /* a0 = core ID to calculate the offset of the lock variable */
addx4 a2, a0, a2 /* a2 = address of the lock variable */
movi a0, 0 /* a0 = 0 */
s32ri a0, a2, 0 /* a2 = a0 :- Write 0 to release the lock */
movi a0,PANIC_RSN_COPROCEXCEPTION
wsr a0,EXCCAUSE
call0 _xt_panic /* not in a thread (invalid) */
@ -1735,7 +1685,7 @@ _Level6Vector:
.global xt_nmi
.align 4
_NMIExceptionVector:
wsr a0, EXCSAVE + XCHAL_NMILEVEL _ /* preserve a0 */
wsr a0, EXCSAVE + XCHAL_NMILEVEL /* preserve a0 */
call0 xt_nmi /* load interrupt handler */
/* never returns here - call0 is used as a jump (see note at top) */
@ -1856,9 +1806,9 @@ _xt_alloca_exc:
wsr a2, PS /* update PS.OWB to new window base */
rsync
_bbci.l a4, 31, _WindowUnderflow4
bbci.l a4, 31, _WindowUnderflow4
rotw -1 /* original a0 goes to a8 */
_bbci.l a8, 30, _WindowUnderflow8
bbci.l a8, 30, _WindowUnderflow8
rotw -1
j _WindowUnderflow12