mirror of
https://github.com/espressif/esp-idf.git
synced 2024-10-05 20:47:46 -04:00
Merge branch 'feature/faster_task_switching' into 'master'
Optimize task switching xPortGetCoreID to inline assembly: 32 to 25 uS Spinlock optimizations: another 1.5uS. See merge request !139
This commit is contained in:
commit
ede25dbc17
@ -192,8 +192,14 @@ void vPortEndScheduler( void ) PRIVILEGED_FUNCTION;
|
||||
#endif
|
||||
|
||||
/* Multi-core: get current core ID */
|
||||
int xPortGetCoreID( void );
|
||||
|
||||
inline uint32_t xPortGetCoreID() {
|
||||
int id;
|
||||
asm volatile(
|
||||
"rsr.prid %0\n"
|
||||
" extui %0,%0,13,1"
|
||||
:"=r"(id));
|
||||
return id;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -225,6 +225,26 @@ static inline unsigned portENTER_CRITICAL_NESTED() { unsigned state = XTOS_SET_I
|
||||
#define portCLEAR_INTERRUPT_MASK_FROM_ISR(state) portEXIT_CRITICAL_NESTED(state)
|
||||
|
||||
|
||||
/*
|
||||
* Wrapper for the Xtensa compare-and-set instruction. This subroutine will atomically compare
|
||||
* *mux to compare, and if it's the same, will set *mux to set. It will return the old value
|
||||
* of *addr in *set.
|
||||
*
|
||||
* Warning: From the ISA docs: in some (unspecified) cases, the s32c1i instruction may return the
|
||||
* *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the
|
||||
* ESP32, though. (Would show up directly if it did because the magic wouldn't match.)
|
||||
*/
|
||||
inline void uxPortCompareSet(volatile uint32_t *addr, uint32_t compare, uint32_t *set) {
|
||||
__asm__ __volatile__(
|
||||
"WSR %2,SCOMPARE1 \n"
|
||||
"ISYNC \n"
|
||||
"S32C1I %0, %1, 0 \n"
|
||||
:"=r"(*set)
|
||||
:"r"(addr), "r"(compare), "0"(*set)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/*-----------------------------------------------------------*/
|
||||
|
||||
/* Architecture specifics. */
|
||||
|
@ -322,12 +322,7 @@ STRUCT_END(XtSolFrame)
|
||||
#ifdef __ASSEMBLER__
|
||||
.macro getcoreid reg
|
||||
rsr.prid \reg
|
||||
bbci \reg,1,1f
|
||||
movi \reg,1
|
||||
j 2f
|
||||
1:
|
||||
movi \reg,0
|
||||
2:
|
||||
extui \reg,\reg,13,1
|
||||
.endm
|
||||
#endif
|
||||
|
||||
|
@ -76,7 +76,6 @@ inline static void panicPutHex(int a) { }
|
||||
inline static void panicPutDec(int a) { }
|
||||
#endif
|
||||
|
||||
int xPortGetCoreID();
|
||||
|
||||
void __attribute__((weak)) vApplicationStackOverflowHook( TaskHandle_t xTask, signed char *pcTaskName ) {
|
||||
panicPutStr("***ERROR*** A stack overflow in task ");
|
||||
|
@ -253,28 +253,6 @@ void vPortAssertIfInISR()
|
||||
configASSERT(port_interruptNesting[xPortGetCoreID()]==0)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Wrapper for the Xtensa compare-and-set instruction. This subroutine will atomically compare
|
||||
* *mux to compare, and if it's the same, will set *mux to set. It will return the old value
|
||||
* of *addr.
|
||||
*
|
||||
* Warning: From the ISA docs: in some (unspecified) cases, the s32c1i instruction may return the
|
||||
* *bitwise inverse* of the old mem if the mem wasn't written. This doesn't seem to happen on the
|
||||
* ESP32, though. (Would show up directly if it did because the magic wouldn't match.)
|
||||
*/
|
||||
uint32_t uxPortCompareSet(volatile uint32_t *mux, uint32_t compare, uint32_t set)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"WSR %2,SCOMPARE1 \n" //initialize SCOMPARE1
|
||||
"ISYNC \n" //wait sync
|
||||
"S32C1I %0, %1, 0 \n" //store id into the lock, if the lock is the same as comparel. Otherwise, no write-access
|
||||
:"=r"(set) \
|
||||
:"r"(mux), "r"(compare), "0"(set) \
|
||||
);
|
||||
return set;
|
||||
}
|
||||
|
||||
/*
|
||||
* For kernel use: Initialize a per-CPU mux. Mux will be initialized unlocked.
|
||||
*/
|
||||
@ -310,7 +288,8 @@ void vPortCPUAcquireMutex(portMUX_TYPE *mux) {
|
||||
irqStatus=portENTER_CRITICAL_NESTED();
|
||||
do {
|
||||
//Lock mux if it's currently unlocked
|
||||
res=uxPortCompareSet(&mux->mux, portMUX_FREE_VAL, (xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL);
|
||||
res=(xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL;
|
||||
uxPortCompareSet(&mux->mux, portMUX_FREE_VAL, &res);
|
||||
//If it wasn't free and we're the owner of the lock, we are locking recursively.
|
||||
if ( (res != portMUX_FREE_VAL) && (((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) == xPortGetCoreID()) ) {
|
||||
//Mux was already locked by us. Just bump the recurse count by one.
|
||||
@ -362,29 +341,33 @@ portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux) {
|
||||
if ( (mux->mux & portMUX_MAGIC_MASK) != portMUX_MAGIC_VAL ) ets_printf("ERROR: vPortCPUReleaseMutex: mux %p is uninitialized (0x%X)!\n", mux, mux->mux);
|
||||
#endif
|
||||
//Unlock mux if it's currently locked with a recurse count of 0
|
||||
res=uxPortCompareSet(&mux->mux, (xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL, portMUX_FREE_VAL);
|
||||
res=portMUX_FREE_VAL;
|
||||
uxPortCompareSet(&mux->mux, (xPortGetCoreID()<<portMUX_VAL_SHIFT)|portMUX_MAGIC_VAL, &res);
|
||||
|
||||
if ( res == portMUX_FREE_VAL ) {
|
||||
if ( ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) == xPortGetCoreID() ) {
|
||||
//Lock is valid, we can return safely. Just need to check if it's a recursive lock; if so we need to decrease the refcount.
|
||||
if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) {
|
||||
//We locked this, but the reccount isn't zero. Decrease refcount and continue.
|
||||
recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT;
|
||||
recCnt--;
|
||||
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE
|
||||
ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line);
|
||||
#endif
|
||||
mux->mux=portMUX_MAGIC_VAL|(recCnt<<portMUX_CNT_SHIFT)|(xPortGetCoreID()<<portMUX_VAL_SHIFT);
|
||||
}
|
||||
} else if ( res == portMUX_FREE_VAL ) {
|
||||
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG
|
||||
ets_printf("ERROR: vPortCPUReleaseMutex: mux %p was already unlocked!\n", mux);
|
||||
ets_printf("Last non-recursive unlock %s line %d, curr unlock %s line %d\n", lastLockedFn, lastLockedLine, fnName, line);
|
||||
#endif
|
||||
ret=pdFALSE;
|
||||
} else if ( ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) != xPortGetCoreID() ) {
|
||||
} else {
|
||||
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG
|
||||
ets_printf("ERROR: vPortCPUReleaseMutex: mux %p wasn't locked by this core (%d) but by core %d (ret=%x, mux=%x).\n", mux, xPortGetCoreID(), ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT), res, mux->mux);
|
||||
ets_printf("Last non-recursive lock %s line %d\n", lastLockedFn, lastLockedLine);
|
||||
ets_printf("Called by %s line %d\n", fnName, line);
|
||||
#endif
|
||||
ret=pdFALSE;
|
||||
} else if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) {
|
||||
//We locked this, but the reccount isn't zero. Decrease refcount and continue.
|
||||
recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT;
|
||||
recCnt--;
|
||||
#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE
|
||||
ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line);
|
||||
#endif
|
||||
mux->mux=portMUX_MAGIC_VAL|(recCnt<<portMUX_CNT_SHIFT)|(xPortGetCoreID()<<portMUX_VAL_SHIFT);
|
||||
}
|
||||
portEXIT_CRITICAL_NESTED(irqStatus);
|
||||
return ret;
|
||||
|
@ -51,18 +51,6 @@ port_switch_flag:
|
||||
|
||||
.text
|
||||
|
||||
|
||||
|
||||
/* C function to get proc ID.*/
|
||||
.global xPortGetCoreID
|
||||
.type xPortGetCoreID,@function
|
||||
.align 4
|
||||
xPortGetCoreID:
|
||||
ENTRY(16)
|
||||
getcoreid a2
|
||||
RET(16)
|
||||
|
||||
|
||||
/*
|
||||
*******************************************************************************
|
||||
* _frxt_setup_switch
|
||||
@ -81,9 +69,8 @@ _frxt_setup_switch:
|
||||
ENTRY(16)
|
||||
|
||||
getcoreid a3
|
||||
slli a3, a3, 2
|
||||
movi a2, port_switch_flag
|
||||
add a2, a2, a3
|
||||
addx4 a2, a3, a2
|
||||
|
||||
movi a3, 1
|
||||
s32i a3, a2, 0
|
||||
@ -128,12 +115,11 @@ _frxt_int_enter:
|
||||
Manage nesting directly rather than call the generic IntEnter()
|
||||
(in windowed ABI we can't call a C function here anyway because PS.EXCM is still set).
|
||||
*/
|
||||
getcoreid a3
|
||||
slli a4, a3, 2 /* a4 = cpuid * 4 */
|
||||
getcoreid a4
|
||||
movi a2, port_xSchedulerRunning
|
||||
add a2, a2, a4
|
||||
addx4 a2, a4, a2
|
||||
movi a3, port_interruptNesting
|
||||
add a3, a3, a4
|
||||
addx4 a3, a4, a3
|
||||
l32i a2, a2, 0 /* a2 = port_xSchedulerRunning */
|
||||
beqz a2, 1f /* scheduler not running, no tasks */
|
||||
l32i a2, a3, 0 /* a2 = port_interruptNesting */
|
||||
@ -142,14 +128,13 @@ _frxt_int_enter:
|
||||
bnei a2, 1, .Lnested /* !=0 before incr, so nested */
|
||||
|
||||
movi a2, pxCurrentTCB
|
||||
add a2, a2, a4
|
||||
addx4 a2, a4, a2
|
||||
l32i a2, a2, 0 /* a2 = current TCB */
|
||||
beqz a2, 1f
|
||||
s32i a1, a2, TOPOFSTACK_OFFS /* pxCurrentTCB->pxTopOfStack = SP */
|
||||
movi a1, port_IntStackTop /* a1 = top of intr stack */
|
||||
movi a2, configISR_STACK_SIZE
|
||||
getcoreid a3
|
||||
mull a2, a3, a2
|
||||
mull a2, a4, a2
|
||||
add a1, a1, a2 /* for current proc */
|
||||
|
||||
.Lnested:
|
||||
@ -177,12 +162,11 @@ _frxt_int_enter:
|
||||
.align 4
|
||||
_frxt_int_exit:
|
||||
|
||||
getcoreid a3
|
||||
slli a4, a3, 2 /* a4 is core * 4 */
|
||||
getcoreid a4
|
||||
movi a2, port_xSchedulerRunning
|
||||
add a2, a2, a4
|
||||
addx4 a2, a4, a2
|
||||
movi a3, port_interruptNesting
|
||||
add a3, a3, a4
|
||||
addx4 a3, a4, a3
|
||||
rsil a0, XCHAL_EXCM_LEVEL /* lock out interrupts */
|
||||
l32i a2, a2, 0 /* a2 = port_xSchedulerRunning */
|
||||
beqz a2, .Lnoswitch /* scheduler not running, no tasks */
|
||||
@ -192,13 +176,13 @@ _frxt_int_exit:
|
||||
bnez a2, .Lnesting /* !=0 after decr so still nested */
|
||||
|
||||
movi a2, pxCurrentTCB
|
||||
add a2, a2, a4
|
||||
addx4 a2, a4, a2
|
||||
l32i a2, a2, 0 /* a2 = current TCB */
|
||||
beqz a2, 1f /* no task ? go to dispatcher */
|
||||
l32i a1, a2, TOPOFSTACK_OFFS /* SP = pxCurrentTCB->pxTopOfStack */
|
||||
|
||||
movi a2, port_switch_flag /* address of switch flag */
|
||||
add a2, a2, a4 /* point to flag for this cpu */
|
||||
addx4 a2, a4, a2 /* point to flag for this cpu */
|
||||
l32i a3, a2, 0 /* a3 = port_switch_flag */
|
||||
beqz a3, .Lnoswitch /* flag = 0 means no switch reqd */
|
||||
movi a3, 0
|
||||
@ -404,14 +388,12 @@ _frxt_dispatch:
|
||||
call0 vTaskSwitchContext // Get next TCB to resume
|
||||
movi a2, pxCurrentTCB
|
||||
getcoreid a3
|
||||
slli a3, a3, 2
|
||||
add a2, a2, a3
|
||||
addx4 a2, a3, a2
|
||||
#else
|
||||
call4 vTaskSwitchContext // Get next TCB to resume
|
||||
movi a2, pxCurrentTCB
|
||||
getcoreid a3
|
||||
slli a3, a3, 2
|
||||
add a2, a2, a3
|
||||
addx4 a2, a3, a2
|
||||
#endif
|
||||
l32i a3, a2, 0
|
||||
l32i sp, a3, TOPOFSTACK_OFFS /* SP = next_TCB->pxTopOfStack; */
|
||||
@ -451,8 +433,7 @@ _frxt_dispatch:
|
||||
/* Restore CPENABLE from task's co-processor save area. */
|
||||
movi a3, pxCurrentTCB /* cp_state = */
|
||||
getcoreid a2
|
||||
slli a2, a2, 2
|
||||
add a3, a2, a3
|
||||
addx4 a3, a2, a3
|
||||
l32i a3, a3, 0
|
||||
l32i a2, a3, CP_TOPOFSTACK_OFFS /* StackType_t *pxStack; */
|
||||
l16ui a3, a2, XT_CPENABLE /* CPENABLE = cp_state->cpenable; */
|
||||
@ -541,8 +522,7 @@ vPortYield:
|
||||
|
||||
movi a2, pxCurrentTCB
|
||||
getcoreid a3
|
||||
slli a3, a3, 2
|
||||
add a2, a2, a3
|
||||
addx4 a2, a3, a2
|
||||
l32i a2, a2, 0 /* a2 = pxCurrentTCB */
|
||||
movi a3, 0
|
||||
s32i a3, sp, XT_SOL_EXIT /* 0 to flag as solicited frame */
|
||||
@ -593,8 +573,7 @@ vPortYieldFromInt:
|
||||
/* Save CPENABLE in task's co-processor save area, and clear CPENABLE. */
|
||||
movi a3, pxCurrentTCB /* cp_state = */
|
||||
getcoreid a2
|
||||
slli a2, a2, 2
|
||||
add a3, a2, a3
|
||||
addx4 a3, a2, a3
|
||||
l32i a3, a3, 0
|
||||
|
||||
l32i a2, a3, CP_TOPOFSTACK_OFFS
|
||||
@ -637,18 +616,17 @@ _frxt_task_coproc_state:
|
||||
|
||||
/* We can use a3 as a scratchpad, the instances of code calling XT_RTOS_CP_STATE don't seem to need it saved. */
|
||||
getcoreid a3
|
||||
slli a3, a3, 2 /* a3=coreid*4 */
|
||||
movi a15, port_xSchedulerRunning /* if (port_xSchedulerRunning */
|
||||
add a15, a15, a3
|
||||
addx4 a15, a3,a15
|
||||
l32i a15, a15, 0
|
||||
beqz a15, 1f
|
||||
movi a15, port_interruptNesting /* && port_interruptNesting == 0 */
|
||||
add a15, a15, a3
|
||||
addx4 a15, a3, a15
|
||||
l32i a15, a15, 0
|
||||
bnez a15, 1f
|
||||
|
||||
movi a15, pxCurrentTCB
|
||||
add a15, a3, a15
|
||||
addx4 a15, a3, a15
|
||||
l32i a15, a15, 0 /* && pxCurrentTCB != 0) { */
|
||||
|
||||
|
||||
|
@ -904,16 +904,13 @@ _xt_coproc_exc:
|
||||
core we're running on now. */
|
||||
movi a2, pxCurrentTCB
|
||||
getcoreid a3
|
||||
slli a3, a3, 2
|
||||
add a2, a2, a3
|
||||
addx4 a2, a3, a2
|
||||
l32i a2, a2, 0 /* a2 = start of pxCurrentTCB[cpuid] */
|
||||
addi a2, a2, TASKTCB_XCOREID_OFFSET /* offset to xCoreID in tcb struct */
|
||||
getcoreid a3
|
||||
s32i a3, a2, 0 /* store current cpuid */
|
||||
|
||||
/* Grab correct xt_coproc_owner_sa for this core */
|
||||
getcoreid a2
|
||||
movi a3, XCHAL_CP_MAX << 2
|
||||
movi a2, XCHAL_CP_MAX << 2
|
||||
mull a2, a2, a3
|
||||
movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */
|
||||
add a3, a3, a2
|
||||
|
Loading…
x
Reference in New Issue
Block a user