mirror of
https://github.com/espressif/esp-idf.git
synced 2024-10-05 20:47:46 -04:00
freertos(SMP): Refactor FPU handling on the Xtensa port of Amaazon SMP FreeRTOS
This commit refactors the FPU handling code on the Xtensa port of Amazon SMP FreeRTOS in the following ways: Auto-pinning via XT_RTOS_CP_EXC_HOOK ------------------------------------ The "_xt_coproc_exc" exception would previously automatically pin a task that uses the FPU to the current core (to ensure that we can lazy save the task's FPU context). However, this would mean that "xtensa_vectors.S" would need to be OS-aware (to read the task's TCB structure). This is now refactored so that "_xt_coproc_exc" calls a CP exception hook function ("XT_RTOS_CP_EXC_HOOK") implemented in "portasm.S", thus allowing "xtensa_vectors.S" to remain OS agnostic. Using macros to acquire owner spinlock -------------------------------------- The taking and relasing of the "_xt_coproc_owner_sa_lock" is now mostly abstracted as the "spinlock_take" and "spinlock_release" macro. As a result, "_xt_coproc_release" and "_xt_coproc_exc" are refactored so that: - They are closer to their upstream (original) versions - The spinlock is only taken when building for multicore - The spinlock held region is shortened (now only protects the instructions that access the "_xt_coproc_owner_sa" array Other Changes ------------- - Updated placing and comments of various "offset_..." constants used by portasm.S - Update description of "get_cpsa_from_tcb" assembly macro - Tidied up some typos in the ".S" files
This commit is contained in:
parent
fd48daf278
commit
9300bef9b8
@ -170,6 +170,15 @@ The implementation may use only a2-4, a15 (all other regs must be preserved).
|
||||
// void* XT_RTOS_CP_STATE(void)
|
||||
#define XT_RTOS_CP_STATE _frxt_task_coproc_state
|
||||
|
||||
/*
|
||||
RTOS provided hook function that is called on every coprocessor exception. May
|
||||
only be called from assembly code and by the 'call0' instruction.
|
||||
The implementation may use only a2-4, a15 (all other regs must be preserved).
|
||||
*/
|
||||
// void XT_RTOS_CP_EXC_HOOK(void)
|
||||
#if XCHAL_CP_NUM > 0
|
||||
#define XT_RTOS_CP_EXC_HOOK _frxt_coproc_exc_hook
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
|
@ -41,9 +41,25 @@
|
||||
|
||||
_Static_assert(portBYTE_ALIGNMENT == 16, "portBYTE_ALIGNMENT must be set to 16");
|
||||
|
||||
/*
|
||||
OS state variables
|
||||
*/
|
||||
/* ---------------------------------------------------- Variables ------------------------------------------------------
|
||||
* - Various variables used to maintain the FreeRTOS port's state. Used from both port.c and various .S files
|
||||
* - Constant offsets are used by assembly to jump to particular TCB members or a stack area (such as the CPSA). We use
|
||||
* C constants instead of preprocessor macros due to assembly lacking "offsetof()".
|
||||
* ------------------------------------------------------------------------------------------------------------------ */
|
||||
|
||||
#if XCHAL_CP_NUM > 0
|
||||
/* Offsets used to navigate to a task's CPSA on the stack */
|
||||
const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
|
||||
const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE; /* Offset to start of the CPSA area on the stack. See uxInitialiseStackCPSA(). */
|
||||
#if configNUM_CORES > 1
|
||||
/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */
|
||||
const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25);
|
||||
#if configUSE_CORE_AFFINITY != 1
|
||||
#error "configUSE_CORE_AFFINITY must be 1 on multicore targets with coprocessor support"
|
||||
#endif
|
||||
#endif /* configNUM_CORES > 1 */
|
||||
#endif /* XCHAL_CP_NUM > 0 */
|
||||
|
||||
volatile unsigned port_xSchedulerRunning[portNUM_PROCESSORS] = {0}; // Indicates whether scheduler is running on a per-core basis
|
||||
unsigned int port_interruptNesting[portNUM_PROCESSORS] = {0}; // Interrupt nesting level. Increased/decreased in portasm.c, _frxt_int_enter/_frxt_int_exit
|
||||
//FreeRTOS SMP Locks
|
||||
@ -423,12 +439,6 @@ static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters)
|
||||
}
|
||||
#endif
|
||||
|
||||
const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8);
|
||||
#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25);
|
||||
#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE;
|
||||
|
||||
/**
|
||||
* @brief Align stack pointer in a downward growing stack
|
||||
*
|
||||
@ -677,7 +687,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
|
||||
/*
|
||||
HIGH ADDRESS
|
||||
|---------------------------| <- pxTopOfStack on entry
|
||||
| Coproc Save Area |
|
||||
| Coproc Save Area | (CPSA MUST BE FIRST)
|
||||
| ------------------------- |
|
||||
| TLS Variables |
|
||||
| ------------------------- | <- Start of useable stack
|
||||
@ -697,7 +707,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
|
||||
configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
|
||||
|
||||
#if XCHAL_CP_NUM > 0
|
||||
// Initialize the coprocessor save area
|
||||
// Initialize the coprocessor save area. THIS MUST BE THE FIRST AREA due to access from _frxt_task_coproc_state()
|
||||
uxStackPointer = uxInitialiseStackCPSA(uxStackPointer);
|
||||
configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0);
|
||||
#endif /* XCHAL_CP_NUM > 0 */
|
||||
@ -717,25 +727,25 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack,
|
||||
// -------------------- Co-Processor -----------------------
|
||||
#if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
|
||||
void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xCoreID);
|
||||
void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID);
|
||||
|
||||
void vPortCleanUpCoprocArea( void *pxTCB )
|
||||
{
|
||||
UBaseType_t uxCoprocArea;
|
||||
BaseType_t xCoreID;
|
||||
BaseType_t xTargetCoreID;
|
||||
|
||||
/* Get pointer to the task's coprocessor save area from TCB->pxEndOfStack. See uxInitialiseStackCPSA() */
|
||||
uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pxTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */
|
||||
uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE);
|
||||
|
||||
/* Extract core ID from the affinity mask */
|
||||
xCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ;
|
||||
xCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xCoreID );
|
||||
assert( xCoreID >= 1 ); // __builtin_ffs always returns first set index + 1
|
||||
xCoreID -= 1;
|
||||
xTargetCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ;
|
||||
xTargetCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xTargetCoreID );
|
||||
assert( xTargetCoreID >= 1 ); // __builtin_ffs always returns first set index + 1
|
||||
xTargetCoreID -= 1;
|
||||
|
||||
/* If task has live floating point registers somewhere, release them */
|
||||
_xt_coproc_release( (void *)uxCoprocArea, xCoreID );
|
||||
_xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID );
|
||||
}
|
||||
#endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
|
||||
|
@ -34,32 +34,45 @@
|
||||
#define TOPOFSTACK_OFFS 0x00 /* StackType_t *pxTopOfStack */
|
||||
|
||||
.extern pxCurrentTCBs
|
||||
#if XCHAL_CP_NUM > 0
|
||||
/* Offsets used to get a task's coprocessor save area (CPSA) from its TCB */
|
||||
.extern offset_pxEndOfStack
|
||||
.extern offset_cpsa
|
||||
#if configNUM_CORES > 1
|
||||
/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */
|
||||
.extern offset_uxCoreAffinityMask
|
||||
#endif /* configNUM_CORES > 1 */
|
||||
#endif /* XCHAL_CP_NUM > 0 */
|
||||
|
||||
/*
|
||||
Macro to get a task's coprocessor save area (CPSA) from its TCB
|
||||
--------------------------------------------------------------------------------
|
||||
Macro get_cpsa_from_tcb - get the pointer to a task's CPSA form its TCB
|
||||
|
||||
Entry:
|
||||
- reg_A contains a pointer to the TCB
|
||||
Exit:
|
||||
- reg_A contains a pointer to the CPSA
|
||||
- reg_B destroyed
|
||||
Entry:
|
||||
- "reg_A" contains a pointer to the task's TCB
|
||||
Exit:
|
||||
- "reg_A" contains pointer the the task's CPSA
|
||||
- "reg_B" clobbered
|
||||
|
||||
The two arguments must be different AR registers.
|
||||
--------------------------------------------------------------------------------
|
||||
*/
|
||||
#if XCHAL_CP_NUM > 0
|
||||
.macro get_cpsa_from_tcb reg_A reg_B
|
||||
// Get TCB.pxEndOfStack from reg_A
|
||||
/* Get TCB.pxEndOfStack from reg_A */
|
||||
movi \reg_B, offset_pxEndOfStack /* Move &offset_pxEndOfStack into reg_B */
|
||||
l32i \reg_B, \reg_B, 0 /* Load offset_pxEndOfStack into reg_B */
|
||||
add \reg_A, \reg_A, \reg_B /* Calculate &pxEndOfStack to reg_A (&TCB + offset_pxEndOfStack) */
|
||||
l32i \reg_A, \reg_A, 0 /* Load TCB.pxEndOfStack into reg_A */
|
||||
//Offset to start of coproc save area
|
||||
/* Offset to start of CP save area */
|
||||
movi \reg_B, offset_cpsa /* Move &offset_cpsa into reg_B */
|
||||
l32i \reg_B, \reg_B, 0 /* Load offset_cpsa into reg_B */
|
||||
sub \reg_A, \reg_A, \reg_B /* Subtract offset_cpsa from pxEndOfStack to get to start of CP save area (unaligned) */
|
||||
//Align down start of CP save area to 16 byte boundary
|
||||
/* Align down start of CP save area to 16 byte boundary */
|
||||
movi \reg_B, ~(0xF)
|
||||
and \reg_A, \reg_A, \reg_B /* Align CPSA pointer to 16 bytes */
|
||||
and \reg_A, \reg_A, \reg_B /* Align CP save area pointer to 16 bytes */
|
||||
.endm
|
||||
#endif /* XCHAL_CP_NUM > 0 */
|
||||
|
||||
.global port_IntStack
|
||||
.global port_switch_flag //Required by sysview_tracing build
|
||||
@ -692,3 +705,54 @@ _frxt_task_coproc_state:
|
||||
2: ret
|
||||
|
||||
#endif /* XCHAL_CP_NUM > 0 */
|
||||
|
||||
/*
|
||||
**********************************************************************************************************
|
||||
* _frxt_coproc_exc_hook
|
||||
* void _frxt_coproc_exc_hook(void)
|
||||
*
|
||||
* Implements the Xtensa RTOS porting layer's XT_RTOS_CP_EXC_HOOK function for FreeRTOS.
|
||||
*
|
||||
* May only be called from assembly code by the 'call0' instruction. Does NOT obey ABI conventions.
|
||||
* May only only use a2-4, a15 (all other regs must be preserved).
|
||||
* See the detailed description of the XT_RTOS_ENTER macro in xtensa_rtos.h.
|
||||
*
|
||||
**********************************************************************************************************
|
||||
*/
|
||||
#if XCHAL_CP_NUM > 0
|
||||
|
||||
.globl _frxt_coproc_exc_hook
|
||||
.type _frxt_coproc_exc_hook,@function
|
||||
.align 4
|
||||
_frxt_coproc_exc_hook:
|
||||
|
||||
#if configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1
|
||||
getcoreid a2 /* a2 = xCurCoreID */
|
||||
/* if (port_xSchedulerRunning[xCurCoreID] == 0) */
|
||||
movi a3, port_xSchedulerRunning
|
||||
addx4 a3, a2, a3
|
||||
l32i a3, a3, 0
|
||||
beqz a3, 1f /* Scheduler hasn't started yet. Return. */
|
||||
/* if (port_interruptNesting[xCurCoreID] != 0) */
|
||||
movi a3, port_interruptNesting
|
||||
addx4 a3, a2, a3
|
||||
l32i a3, a3, 0
|
||||
bnez a3, 1f /* We are in an interrupt. Return*/
|
||||
/* CP operations are incompatible with unpinned tasks. Thus we pin the task
|
||||
to the current running core by updating its TCB.uxCoreAffinityMask field. */
|
||||
movi a3, pxCurrentTCBs
|
||||
addx4 a3, a2, a3
|
||||
l32i a3, a3, 0 /* a3 = pxCurrentTCBs[xCurCoreID] */
|
||||
movi a4, offset_uxCoreAffinityMask
|
||||
l32i a4, a4, 0 /* a4 = offset_uxCoreAffinityMask */
|
||||
add a3, a3, a4 /* a3 = &TCB.uxCoreAffinityMask */
|
||||
ssl a2 /* Use xCurCoreID as left shift amount */
|
||||
movi a4, 1
|
||||
sll a4, a4 /* a4 = (1 << xCurCoreID) */
|
||||
s32i a4, a3, 0 /* TCB.uxCoreAffinityMask = a4 = (1 << xCurCoreID) */
|
||||
1:
|
||||
#endif /* configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 */
|
||||
|
||||
ret
|
||||
|
||||
#endif /* XCHAL_CP_NUM > 0 */
|
||||
|
@ -50,26 +50,88 @@
|
||||
|
||||
.macro SPILL_ALL_WINDOWS
|
||||
#if XCHAL_NUM_AREGS == 64
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 4
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 4
|
||||
#elif XCHAL_NUM_AREGS == 32
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a4, a4, a4
|
||||
rotw 2
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a4, a4, a4
|
||||
rotw 2
|
||||
#else
|
||||
#error Unrecognized XCHAL_NUM_AREGS
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#endif
|
||||
/*
|
||||
--------------------------------------------------------------------------------
|
||||
Macro spinlock_take
|
||||
|
||||
This macro will repeatedley attempt to atomically set a spinlock variable
|
||||
using the s32c1i instruciton. A spinlock is considered free if its value is 0.
|
||||
|
||||
Entry:
|
||||
- "reg_A/B" as scratch registers
|
||||
- "lock_var" spinlock variable's symbol
|
||||
- Interrupts must already be disabled by caller
|
||||
Exit:
|
||||
- Spinlock set to current core's ID (PRID)
|
||||
- "reg_A/B" clobbered
|
||||
--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#if portNUM_PROCESSORS > 1
|
||||
|
||||
.macro spinlock_take reg_A reg_B lock_var
|
||||
|
||||
movi \reg_A, \lock_var /* reg_A = &lock_var */
|
||||
.L_spinlock_loop:
|
||||
movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */
|
||||
wsr \reg_B, SCOMPARE1
|
||||
rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */
|
||||
rsr \reg_B, PRID /* Load the current core's ID into reg_B */
|
||||
s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */
|
||||
bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
|
||||
|
||||
.endm
|
||||
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
|
||||
/*
|
||||
--------------------------------------------------------------------------------
|
||||
Macro spinlock_release
|
||||
|
||||
This macro will release a spinlock variable previously taken by the
|
||||
spinlock_take macro.
|
||||
|
||||
Entry:
|
||||
- "reg_A/B" as scratch registers
|
||||
- "lock_var" spinlock variable's symbol
|
||||
- Interrupts must already be disabled by caller
|
||||
Exit:
|
||||
- "reg_A/B" clobbered
|
||||
--------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#if portNUM_PROCESSORS > 1
|
||||
|
||||
.macro spinlock_release reg_A reg_B lock_var
|
||||
|
||||
movi \reg_A, \lock_var /* reg_A = &lock_var */
|
||||
movi \reg_B, 0
|
||||
s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */
|
||||
|
||||
.endm
|
||||
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
|
||||
#endif /* __XT_ASM_UTILS_H */
|
||||
|
@ -397,18 +397,16 @@ May be called when a thread terminates or completes but does not delete
|
||||
the co-proc save area, to avoid the exception handler having to save the
|
||||
thread's co-proc state before another thread can use it (optimization).
|
||||
|
||||
Needs to be called on the processor the thread was running on. Unpinned threads
|
||||
won't have an entry here because they get pinned as soon they use a coprocessor.
|
||||
|
||||
Entry Conditions:
|
||||
A2 = Pointer to base of co-processor state save area.
|
||||
A3 = Core ID of the pinned task
|
||||
A3 = Core ID of the task (must be pinned) who's coproc ownership we are
|
||||
releasing.
|
||||
|
||||
Exit conditions:
|
||||
None.
|
||||
|
||||
Obeys ABI conventions per prototype:
|
||||
void _xt_coproc_release(void * coproc_sa_base, BaseType_t xCoreID)
|
||||
void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID)
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
@ -421,43 +419,33 @@ Obeys ABI conventions per prototype:
|
||||
.align 4
|
||||
_xt_coproc_release:
|
||||
ENTRY0 /* a2 = base of save area */
|
||||
/* a3 = core ID */
|
||||
/* a3 = xTargetCoreID */
|
||||
|
||||
rsil a7, XCHAL_EXCM_LEVEL /* lock interrupts */
|
||||
movi a4, XCHAL_CP_MAX << 2 /* a4 = size of an owner array */
|
||||
mull a4, a3, a4 /* a4 = offset to the owner array of the target core */
|
||||
movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */
|
||||
add a3, a3, a4 /* a3 = base of owner array of the target core */
|
||||
addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array of the target core */
|
||||
movi a5, 0 /* a5 = 0 (unowned) */
|
||||
|
||||
/* Aquire spinlock before proceeding with the routine.
|
||||
* Refer _xt_coproc_exc for details on the puspose of
|
||||
* the _xt_coproc_owner_sa_lock lock and its intended use.
|
||||
*/
|
||||
.L_spinlock_loop:
|
||||
mov a8, a3 /* Save a copy of the core ID in a8 */
|
||||
movi a10, _xt_coproc_owner_sa_lock /* a10 = base address of lock variable */
|
||||
addx4 a10, a8, a10 /* Use core ID in a8 to calculate the offset to the lock variable for the core */
|
||||
movi a11, 0 /* a11 = 0 */
|
||||
wsr a11, scompare1 /* scompare1 = a11 :- Expect the spinlock to be free (value = 0) */
|
||||
movi a11, 1 /* a11 = 1 :- Write 1 to take the spinlock */
|
||||
s32c1i a11, a10, 0 /* if (lock == scompare1) {tmp = lock; lock = a11; a11 = tmp} else {a11 = lock} */
|
||||
bnez a11, .L_spinlock_loop /* if (a11 != 0) {loop} :- Keep spinning until the spinlock is available */
|
||||
rsil a6, XCHAL_EXCM_LEVEL /* lock interrupts */
|
||||
#if portNUM_PROCESSORS > 1
|
||||
/* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
|
||||
* to ensure thread safe access of _xt_coproc_owner_sa between cores. */
|
||||
spinlock_take a7 a8 _xt_coproc_owner_sa_lock
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
|
||||
movi a4, XCHAL_CP_MAX << 2
|
||||
mull a3, a3, a4
|
||||
movi a4, _xt_coproc_owner_sa /* a4 = base of owner array */
|
||||
add a4, a4, a3
|
||||
1: l32i a7, a3, 0 /* a7 = owner at a3 */
|
||||
bne a2, a7, 2f /* if (coproc_sa_base == owner) */
|
||||
s32i a5, a3, 0 /* owner = unowned */
|
||||
2: addi a3, a3, 1<<2 /* a3 = next entry in owner array */
|
||||
bltu a3, a4, 1b /* repeat until end of array */
|
||||
|
||||
addi a5, a4, XCHAL_CP_MAX << 2 /* a5 = top+1 of owner array */
|
||||
movi a6, 0 /* a6 = 0 (unowned) */
|
||||
|
||||
1: l32i a8, a4, 0 /* a8 = owner at a4 */
|
||||
bne a2, a8, 2f /* if (coproc_sa_base == owner) */
|
||||
s32i a6, a4, 0 /* owner = unowned */
|
||||
2: addi a4, a4, 1<<2 /* a4 = next entry in owner array */
|
||||
bltu a4, a5, 1b /* repeat until end of array */
|
||||
|
||||
3: wsr a7, PS /* restore interrupts */
|
||||
|
||||
/* Release spinlock */
|
||||
movi a11, 0 /* a11 = 0 */
|
||||
s32ri a11, a10, 0 /* a10 = base address of lock variable. Write 0 to release the lock */
|
||||
#if portNUM_PROCESSORS > 1
|
||||
/* Release previously taken spinlock */
|
||||
spinlock_release a7 a8 _xt_coproc_owner_sa_lock
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
wsr a6, PS /* restore interrupts */
|
||||
|
||||
RET0
|
||||
|
||||
|
@ -102,15 +102,8 @@
|
||||
#include "esp_private/panic_reason.h"
|
||||
#include "sdkconfig.h"
|
||||
#include "soc/soc.h"
|
||||
#include "xt_asm_utils.h"
|
||||
|
||||
/*
|
||||
Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used.
|
||||
Please change this when the tcb structure is changed
|
||||
*/
|
||||
.extern pxCurrentTCBs
|
||||
#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
.extern offset_uxCoreAffinityMask
|
||||
#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
|
||||
/*
|
||||
--------------------------------------------------------------------------------
|
||||
@ -859,22 +852,22 @@ _xt_coproc_owner_sa:
|
||||
/* Spinlock per core for accessing _xt_coproc_owner_sa array
|
||||
*
|
||||
* 0 = Spinlock available
|
||||
* 1 = Spinlock taken
|
||||
* PRID = Spinlock taken
|
||||
*
|
||||
* The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array.
|
||||
* This array can be modified by both _xt_coproc_exc and _xt_coproc_release routines
|
||||
* simultaneously owing to the fact that the FreeRTOS SMP Kernel allows cross-core
|
||||
* task deletion. Therefore, the same memory location in the owner save-area array
|
||||
* could be modified at the same time.
|
||||
* The array can be modified by multiple cores simultaneously (via _xt_coproc_exc
|
||||
* and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread
|
||||
* safe access of the _xt_coproc_owner_sa array.
|
||||
*/
|
||||
#if portNUM_PROCESSORS > 1
|
||||
.global _xt_coproc_owner_sa_lock
|
||||
.type _xt_coproc_owner_sa_lock,@object
|
||||
.align 16 /* minimize crossing cache boundaries */
|
||||
_xt_coproc_owner_sa_lock:
|
||||
.space (portNUM_PROCESSORS) << 2
|
||||
.space 4
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
|
||||
.section .iram1,"ax"
|
||||
|
||||
.align 4
|
||||
.L_goto_invalid:
|
||||
j .L_xt_coproc_invalid /* not in a thread (invalid) */
|
||||
@ -924,51 +917,15 @@ _xt_coproc_exc:
|
||||
s32i a4, sp, XT_STK_A4
|
||||
s32i a15, sp, XT_STK_A15
|
||||
|
||||
/* Aquire spinlock before proceeding with the exception handler.
|
||||
* (Refer _xt_coproc_release for competing routine for the lock.)
|
||||
*
|
||||
* [refactor-todo]: The spinlock aquire/release routine can be
|
||||
* refactored in to a macro later if the need arises to use it
|
||||
* at more than one place in the port assembler files.
|
||||
*/
|
||||
.L_spinlock_loop:
|
||||
movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of lock variable */
|
||||
getcoreid a0 /* get the core ID in a0 to calculate the offset of the lock variable */
|
||||
addx4 a2, a0, a2 /* a2 = address of desired lock variable */
|
||||
movi a0, 0 /* a0 = 0 */
|
||||
wsr a0, scompare1 /* scompare1 = a0 :- Expect the spinlock to be free (value = 0) */
|
||||
movi a0, 1 /* a0 = 1 :- Write 1 to take the spinlock */
|
||||
s32c1i a0, a2, 0 /* if (lock == scompare1) {tmp = lock; lock = a0; a0 = tmp} else {a0 = lock} */
|
||||
bnez a0, .L_spinlock_loop /* if (a0 != 0) {loop} :- Keep spinning until the spinlock is available */
|
||||
/* Call the RTOS coprocessor exception hook */
|
||||
call0 XT_RTOS_CP_EXC_HOOK
|
||||
|
||||
/* Get co-processor state save area of new owner thread. */
|
||||
call0 XT_RTOS_CP_STATE /* a15 = new owner's save area */
|
||||
#if CONFIG_FREERTOS_FPU_IN_ISR
|
||||
beqz a15, .L_skip_core_pin /* CP used in ISR, skip task pinning */
|
||||
#else
|
||||
#if !CONFIG_FREERTOS_FPU_IN_ISR
|
||||
beqz a15, .L_goto_invalid /* not in a thread (invalid) */
|
||||
#endif
|
||||
|
||||
#if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
/* CP operations are incompatible with unpinned tasks. Thus we pin the task
|
||||
to the current running core. */
|
||||
movi a2, pxCurrentTCBs
|
||||
getcoreid a3 /* a3 = current core ID */
|
||||
addx4 a2, a3, a2
|
||||
l32i a2, a2, 0 /* a2 = start of pxCurrentTCBs[cpuid] */
|
||||
movi a4, offset_uxCoreAffinityMask
|
||||
l32i a4, a4, 0 /* a4 = offset_uxCoreAffinityMask */
|
||||
add a2, a2, a4 /* a2 = &TCB.uxCoreAffinityMask */
|
||||
ssl a3 /* Use core ID as shift amount */
|
||||
movi a4, 1
|
||||
sll a4, a4 /* a4 = uxCoreAffinityMask = (1 << core ID) */
|
||||
s32i a4, a2, 0 /* Store affinity mask to TCB.uxCoreAffinityMask */
|
||||
#endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 )
|
||||
|
||||
#if CONFIG_FREERTOS_FPU_IN_ISR
|
||||
.L_skip_core_pin:
|
||||
#endif
|
||||
|
||||
/* Enable the co-processor's bit in CPENABLE. */
|
||||
movi a0, _xt_coproc_mask
|
||||
rsr a4, CPENABLE /* a4 = CPENABLE */
|
||||
@ -978,17 +935,18 @@ _xt_coproc_exc:
|
||||
or a4, a4, a2 /* a4 = CPENABLE | (1 << n) */
|
||||
wsr a4, CPENABLE
|
||||
|
||||
/*
|
||||
Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value
|
||||
everywhere): _xt_coproc_release assumes it works like this in order not to need
|
||||
locking.
|
||||
*/
|
||||
/* Grab correct xt_coproc_owner_sa for this core */
|
||||
/* Grab the xt_coproc_owner_sa owner array for current core */
|
||||
getcoreid a3 /* a3 = current core ID */
|
||||
movi a2, XCHAL_CP_MAX << 2
|
||||
mull a2, a2, a3 /* multiply by current processor id */
|
||||
movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */
|
||||
add a3, a3, a2 /* a3 = owner area needed for this processor */
|
||||
movi a2, XCHAL_CP_MAX << 2 /* a2 = size of an owner array */
|
||||
mull a2, a2, a3 /* a2 = offset to the owner array of the current core*/
|
||||
movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */
|
||||
add a3, a3, a2 /* a3 = base of owner array of the current core */
|
||||
|
||||
#if portNUM_PROCESSORS > 1
|
||||
/* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock
|
||||
* to ensure thread safe access of _xt_coproc_owner_sa between cores. */
|
||||
spinlock_take a0 a2 _xt_coproc_owner_sa_lock
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
|
||||
/* Get old coprocessor owner thread (save area ptr) and assign new one. */
|
||||
addx4 a3, a5, a3 /* a3 = &_xt_coproc_owner_sa[n] */
|
||||
@ -996,13 +954,21 @@ locking.
|
||||
s32i a15, a3, 0 /* _xt_coproc_owner_sa[n] = new */
|
||||
rsync /* ensure wsr.CPENABLE is complete */
|
||||
|
||||
#if portNUM_PROCESSORS > 1
|
||||
/* Release previously taken spinlock */
|
||||
spinlock_release a0 a2 _xt_coproc_owner_sa_lock
|
||||
#endif /* portNUM_PROCESSORS > 1 */
|
||||
|
||||
/* Only need to context switch if new owner != old owner. */
|
||||
/* If float is necessary on ISR, we need to remove this check */
|
||||
/* below, because on restoring from ISR we may have new == old condition used
|
||||
* to force cp restore to next thread
|
||||
* Todo: IDF-6418
|
||||
*/
|
||||
#ifndef CONFIG_FREERTOS_FPU_IN_ISR
|
||||
beq a15, a2, .L_goto_done /* new owner == old, we're done */
|
||||
#if !CONFIG_FREERTOS_FPU_IN_ISR
|
||||
bne a15, a2, .L_switch_context
|
||||
j .L_goto_done /* new owner == old, we're done */
|
||||
.L_switch_context:
|
||||
#endif
|
||||
|
||||
/* If no old owner then nothing to save. */
|
||||
@ -1072,14 +1038,6 @@ locking.
|
||||
/* Restore interruptee's saved registers. */
|
||||
/* Can omit rsync for wsr.CPENABLE here because _xt_user_exit does it. */
|
||||
.L_xt_coproc_done:
|
||||
|
||||
/* Release spinlock */
|
||||
movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of the lock variable */
|
||||
getcoreid a0 /* a0 = core ID to calculate the offset of the lock variable */
|
||||
addx4 a2, a0, a2 /* a2 = address of the lock variable */
|
||||
movi a0, 0 /* a0 = 0 */
|
||||
s32ri a0, a2, 0 /* a2 = a0 :- Write 0 to release the lock */
|
||||
|
||||
l32i a15, sp, XT_STK_A15
|
||||
l32i a5, sp, XT_STK_A5
|
||||
l32i a4, sp, XT_STK_A4
|
||||
@ -1107,14 +1065,6 @@ locking.
|
||||
|
||||
/* Co-processor exception occurred outside a thread (not supported). */
|
||||
.L_xt_coproc_invalid:
|
||||
|
||||
/* Release spinlock */
|
||||
movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of the lock variable */
|
||||
getcoreid a0 /* a0 = core ID to calculate the offset of the lock variable */
|
||||
addx4 a2, a0, a2 /* a2 = address of the lock variable */
|
||||
movi a0, 0 /* a0 = 0 */
|
||||
s32ri a0, a2, 0 /* a2 = a0 :- Write 0 to release the lock */
|
||||
|
||||
movi a0,PANIC_RSN_COPROCEXCEPTION
|
||||
wsr a0,EXCCAUSE
|
||||
call0 _xt_panic /* not in a thread (invalid) */
|
||||
@ -1735,7 +1685,7 @@ _Level6Vector:
|
||||
.global xt_nmi
|
||||
.align 4
|
||||
_NMIExceptionVector:
|
||||
wsr a0, EXCSAVE + XCHAL_NMILEVEL _ /* preserve a0 */
|
||||
wsr a0, EXCSAVE + XCHAL_NMILEVEL /* preserve a0 */
|
||||
call0 xt_nmi /* load interrupt handler */
|
||||
/* never returns here - call0 is used as a jump (see note at top) */
|
||||
|
||||
@ -1856,9 +1806,9 @@ _xt_alloca_exc:
|
||||
wsr a2, PS /* update PS.OWB to new window base */
|
||||
rsync
|
||||
|
||||
_bbci.l a4, 31, _WindowUnderflow4
|
||||
bbci.l a4, 31, _WindowUnderflow4
|
||||
rotw -1 /* original a0 goes to a8 */
|
||||
_bbci.l a8, 30, _WindowUnderflow8
|
||||
bbci.l a8, 30, _WindowUnderflow8
|
||||
rotw -1
|
||||
j _WindowUnderflow12
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user