feat(riscv): add support for PIE coprocessor and HWLP feature

FreeRTOS tasks may now freely use the PIE coprocessor and HWLP feature.
Just like the FPU, usiing these coprocessors result in the task being pinned
to the core it is currently running on.
This commit is contained in:
Omar Chebib 2024-04-30 10:37:42 +08:00
parent 8ac349524f
commit 55acc5e5e7
12 changed files with 1026 additions and 151 deletions

View File

@ -230,7 +230,7 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackTLS(UBaseType_t uxStackPointer, u
#if CONFIG_FREERTOS_TASK_FUNCTION_WRAPPER
static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters)
{
__asm__ volatile(".cfi_undefined ra"); // tell to debugger that it's outermost (inital) frame
__asm__ volatile(".cfi_undefined ra"); // tell to debugger that it's outermost (initial) frame
extern void __attribute__((noreturn)) panic_abort(const char *details);
static char DRAM_ATTR msg[80] = "FreeRTOS: FreeRTOS Task \"\0";
pxCode(pvParameters);
@ -356,7 +356,7 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC
HIGH ADDRESS
|---------------------------| <- pxTopOfStack on entry
| TLS Variables |
| ------------------------- | <- Start of useable stack
| ------------------------- | <- Start of usable stack
| Starting stack frame |
| ------------------------- | <- pxTopOfStack on return (which is the tasks current SP)
| | |
@ -374,7 +374,7 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC
| Coproc. Save Area | <- RvCoprocSaveArea
| ------------------------- |
| TLS Variables |
| ------------------------- | <- Start of useable stack
| ------------------------- | <- Start of usable stack
| Starting stack frame |
| ------------------------- | <- pxTopOfStack on return (which is the tasks current SP)
| | |
@ -430,7 +430,7 @@ BaseType_t xPortInIsrContext(void)
/* Disable interrupts to fetch the coreID atomically */
irqStatus = portSET_INTERRUPT_MASK_FROM_ISR();
/* Return the interrupt nexting counter for this core */
/* Return the interrupt nesting counter for this core */
ret = port_uxInterruptNesting[xPortGetCoreID()];
/* Restore interrupts */
@ -445,7 +445,7 @@ BaseType_t xPortInIsrContext(void)
BaseType_t IRAM_ATTR xPortInterruptedFromISRContext(void)
{
/* Return the interrupt nexting counter for this core */
/* Return the interrupt nesting counter for this core */
return port_uxInterruptNesting[xPortGetCoreID()];
}
@ -536,7 +536,7 @@ BaseType_t __attribute__((optimize("-O3"))) xPortEnterCriticalTimeout(portMUX_TY
void __attribute__((optimize("-O3"))) vPortExitCriticalMultiCore(portMUX_TYPE *mux)
{
/* This function may be called in a nested manner. Therefore, we only need
* to reenable interrupts if this is the last call to exit the critical. We
* to re-enable interrupts if this is the last call to exit the critical. We
* can use the nesting count to determine whether this is the last exit call.
*/
spinlock_release(mux);
@ -787,9 +787,14 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop
/* Check if coprocessor area is allocated */
if (allocate && sa->sa_coprocs[coproc] == NULL) {
const uint32_t coproc_sa_sizes[] = {
RV_COPROC0_SIZE, RV_COPROC1_SIZE
RV_COPROC0_SIZE, RV_COPROC1_SIZE, RV_COPROC2_SIZE
};
/* The allocator points to a usable part of the stack, use it for the coprocessor */
const uint32_t coproc_sa_align[] = {
RV_COPROC0_ALIGN, RV_COPROC1_ALIGN, RV_COPROC2_ALIGN
};
/* The allocator points to a usable part of the stack, use it for the coprocessor.
* Align it up to the coprocessor save area requirement */
sa->sa_allocator = (sa->sa_allocator + coproc_sa_align[coproc] - 1) & ~(coproc_sa_align[coproc] - 1);
sa->sa_coprocs[coproc] = (void*) (sa->sa_allocator);
sa->sa_allocator += coproc_sa_sizes[coproc];
/* Update the lowest address of the stack to prevent FreeRTOS performing overflow/watermark checks on the coprocessors contexts */
@ -800,9 +805,9 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop
if (task_sp <= task->pxDummy6) {
/* In theory we need to call vApplicationStackOverflowHook to trigger the stack overflow callback,
* but in practice, since we are already in an exception handler, this won't work, so let's manually
* trigger an exception with the previous FPU owner's TCB */
* trigger an exception with the previous coprocessor owner's TCB */
g_panic_abort = true;
g_panic_abort_details = (char *) "ERROR: Stack overflow while saving FPU context!\n";
g_panic_abort_details = (char *) "ERROR: Stack overflow while saving coprocessor context!\n";
xt_unhandled_exception(task_sp);
}
}
@ -821,7 +826,8 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop
* @param coreid Current core
* @param coproc Coprocessor to save context of
*
* @returns Coprocessor former owner's save area
* @returns Coprocessor former owner's save are, can be NULL is there was no owner yet, can be -1 if
* the former owner is the same as the new owner.
*/
RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* owner)
{
@ -830,8 +836,11 @@ RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t*
StaticTask_t** owner_addr = &port_uxCoprocOwner[ coreid ][ coproc ];
/* Atomically exchange former owner with the new one */
StaticTask_t* former = Atomic_SwapPointers_p32((void**) owner_addr, owner);
/* Get the save area of former owner */
if (former != NULL) {
/* Get the save area of former owner. small optimization here, if the former owner is the new owner,
* return -1. This will simplify the assembly code while making it faster. */
if (former == owner) {
sa = (void*) -1;
} else if (former != NULL) {
/* Allocate coprocessor memory if not available yet */
sa = pxPortGetCoprocArea(former, true, coproc);
}

View File

@ -8,6 +8,8 @@
#include "freertos/FreeRTOSConfig.h"
#include "soc/soc_caps.h"
#include "riscv/rvruntime-frames.h"
#include "riscv/csr_hwlp.h"
#include "riscv/csr_pie.h"
.extern pxCurrentTCBs
@ -33,6 +35,257 @@
#if SOC_CPU_COPROC_NUM > 0
/**
* @brief Macro to generate a routine that saves a coprocessor's registers in the previous owner's TCB dedicated save area.
* This routine aborts if the coprocessor is used from an ISR, since this is not allowed in ESP-IDF.
* However it is allowed to use these coprocessors in the init process, so no error will be triggered if the
* current TCB is NULL.
*
* @param name The name of the coprocessor, this will be used to generate the label, so it must not contain special characters
* @param coproc_idx Index of the coprocessor in the coprocessor save area, this value can be found in rvruntime definition
* @param enable_coproc Macro that takes a scratch register as a parameter and enables the coprocessor.
* @param save_coproc_regs Macro that takes a frame as a parameter and saves all the coprocessors' registers in that frame.
* @param restore_coproc_regs Macro that takes a frame as a parameter and restores all the coprocessors' registers from that.
*
* Note: macros given as parameters can freely use temporary registers
*/
.macro generate_coprocessor_routine name, coproc_idx, enable_coproc, save_coproc_regs, restore_coproc_regs
.global rtos_save_\name\()_coproc
.type rtos_save_\name\()_coproc, @function
rtos_save_\name\()_coproc:
/* If we are in an interrupt context, we have to abort. We don't allow using the coprocessors from ISR */
#if ( configNUM_CORES > 1 )
csrr a2, mhartid /* a2 = coreID */
slli a2, a2, 2 /* a2 = coreID * 4 */
la a1, port_uxInterruptNesting /* a1 = &port_uxInterruptNesting */
add a1, a1, a2 /* a1 = &port_uxInterruptNesting[coreID] */
lw a1, 0(a1) /* a1 = port_uxInterruptNesting[coreID] */
#else /* ( configNUM_CORES <= 1 ) */
lw a1, (port_uxInterruptNesting) /* a1 = port_uxInterruptNesting */
#endif /* ( configNUM_CORES > 1 ) */
/* SP still contains the RvExcFrame address */
mv a0, sp
bnez a1, vPortCoprocUsedInISR
/* Enable the coprocessor needed by the current task */
\enable_coproc a1
mv s0, ra
call rtos_current_tcb
/* If the current TCB is NULL, the coprocessor is used during initialization, even before
* the scheduler started. Consider this a valid usage, it will be disabled as soon as the
* scheduler is started anyway */
beqz a0, rtos_save_\name\()_coproc_norestore
mv s1, a0 /* s1 = pxCurrentTCBs */
/* Prepare parameters of pxPortUpdateCoprocOwner */
mv a2, a0
li a1, \coproc_idx
csrr a0, mhartid
call pxPortUpdateCoprocOwner
/* If the save area is NULL, no need to save context */
beqz a0, rtos_save_\name\()_coproc_nosave
/* If the former owner is the current task (new owner), the return value is -1, we can skip restoring the
* coprocessor context and return directly */
li a1, -1
beq a0, a1, rtos_save_\name\()_coproc_norestore
/* Save the coprocessor context in the structure */
lw a0, RV_COPROC_SA+\coproc_idx*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[coproc_idx] */
\save_coproc_regs a0
rtos_save_\name\()_coproc_nosave:
#if ( configNUM_CORES > 1 )
/* Pin current task to current core */
mv a0, s1
csrr a1, mhartid
call vPortTaskPinToCore
#endif /* configNUM_CORES > 1 */
/* Check if we have to restore a previous context from the current TCB */
mv a0, s1
/* Do not allocate memory for the coprocessor yet, delay this until another task wants to use it.
* This guarantees that if a stack overflow occurs when allocating the coprocessor context on the stack,
* the current task context is flushed and updated in the TCB, generating a correct backtrace
* from the panic handler. */
li a1, 0
li a2, \coproc_idx
call pxPortGetCoprocArea
/* Get the enable flags from the coprocessor save area */
lw a1, RV_COPROC_ENABLE(a0)
/* To avoid having branches below, set the coprocessor enable flag now */
ori a2, a1, 1 << \coproc_idx
sw a2, RV_COPROC_ENABLE(a0)
/* Check if the former coprocessor enable bit was set */
andi a2, a1, 1 << \coproc_idx
beqz a2, rtos_save_\name\()_coproc_norestore
/* Enable bit was set, restore the coprocessor context */
lw a0, RV_COPROC_SA+\coproc_idx*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[\coproc_idx] */
\restore_coproc_regs a0
rtos_save_\name\()_coproc_norestore:
/* Return from routine via s0, instead of ra */
jr s0
.size rtos_save_\name\()_coproc, .-rtos_save_\name\()_coproc
.endm
#if SOC_CPU_HAS_HWLOOP
/**
* @brief Macros to enable and disable the hardware loop feature on the current core
*/
.macro hwlp_enable scratch_reg=a0
li \scratch_reg, 1
csrw CSR_HWLP_STATE_REG, \scratch_reg
.endm
/**
* @brief Disable HW Loop CPU feature while returning the former status in the given register
*/
.macro hwlp_disable reg
csrrw \reg, CSR_HWLP_STATE_REG, zero
/* Only keep the lowest two bits */
andi \reg, \reg, 0b11
/* If register is 0, HWLP was off */
beqz \reg, 1f
/* It was ON, return the enable bit in \reg */
li \reg, 1 << HWLP_COPROC_IDX
1:
.endm
/**
* @brief Macros to save and restore the hardware loop registers to and from the given frame
*/
.macro hwlp_save_regs frame=sp
csrr a1, CSR_LOOP0_START_ADDR
sw a1, RV_HWLOOP_START0(\frame)
csrr a1, CSR_LOOP0_END_ADDR
sw a1, RV_HWLOOP_END0(\frame)
csrr a1, CSR_LOOP0_COUNT
sw a1, RV_HWLOOP_COUNT0(\frame)
csrr a1, CSR_LOOP1_START_ADDR
sw a1, RV_HWLOOP_START1(\frame)
csrr a1, CSR_LOOP1_END_ADDR
sw a1, RV_HWLOOP_END1(\frame)
csrr a1, CSR_LOOP1_COUNT
sw a1, RV_HWLOOP_COUNT1(\frame)
.endm
.macro hwlp_restore_regs frame=sp
lw a1, RV_HWLOOP_START0(\frame)
csrw CSR_LOOP0_START_ADDR, a1
lw a1, RV_HWLOOP_END0(\frame)
csrw CSR_LOOP0_END_ADDR, a1
lw a1, RV_HWLOOP_COUNT0(\frame)
csrw CSR_LOOP0_COUNT, a1
lw a1, RV_HWLOOP_START1(\frame)
csrw CSR_LOOP1_START_ADDR, a1
lw a1, RV_HWLOOP_END1(\frame)
csrw CSR_LOOP1_END_ADDR, a1
lw a1, RV_HWLOOP_COUNT1(\frame)
csrw CSR_LOOP1_COUNT, a1
.endm
generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, hwlp_restore_regs
#endif /* SOC_CPU_HAS_HWLOOP */
#if SOC_CPU_HAS_PIE
/**
* @brief Macros to enable and disable the hardware loop feature on the current core
*/
.macro pie_enable scratch_reg=a0
li \scratch_reg, 1
csrw CSR_PIE_STATE_REG, \scratch_reg
.endm
/**
* @brief Disable HW Loop CPU feature while returning the former status in the given register
*/
.macro pie_disable reg
csrrw \reg, CSR_PIE_STATE_REG, zero
/* Only keep the lowest two bits, if register is 0, PIE was off */
andi \reg, \reg, 0b11
beqz \reg, 1f
/* It was ON, return the enable bit in \reg */
li \reg, 1 << PIE_COPROC_IDX
1:
.endm
/**
* @brief Macros to save and restore the hardware loop registers to and from the given frame
*/
.macro pie_save_regs frame=a0
/* Save the 128-bit Q registers from the frame memory and then frame += 16 */
esp.vst.128.ip q0, \frame, 16
esp.vst.128.ip q1, \frame, 16
esp.vst.128.ip q2, \frame, 16
esp.vst.128.ip q4, \frame, 16
esp.vst.128.ip q5, \frame, 16
esp.vst.128.ip q6, \frame, 16
esp.vst.128.ip q7, \frame, 16
/* Save the QACC_H and QACC_L registers, each being 256 bits big */
esp.st.qacc.l.l.128.ip \frame, 16
esp.st.qacc.l.h.128.ip \frame, 16
esp.st.qacc.h.l.128.ip \frame, 16
esp.st.qacc.h.h.128.ip \frame, 16
/* UA_STATE register (128 bits) */
esp.st.ua.state.ip \frame, 16
/* XACC register (40 bits) */
esp.st.u.xacc.ip \frame, 8
/* The following registers will be stored in the same word */
/* SAR register (6 bits) */
esp.movx.r.sar a1
slli a2, a1, 8
/* SAR_BYTES register (4 bits) */
esp.movx.r.sar.bytes a1
slli a1, a1, 4
or a2, a2, a1
/* FFT_BIT_WIDTH register (4 bits) */
esp.movx.r.fft.bit.width a1
or a2, a2, a1
sw a2, (\frame)
.endm
.macro pie_restore_regs frame=a0
/* Restore the 128-bit Q registers from the frame memory and then frame += 16 */
esp.vld.128.ip q0, \frame, 16
esp.vld.128.ip q1, \frame, 16
esp.vld.128.ip q2, \frame, 16
esp.vld.128.ip q4, \frame, 16
esp.vld.128.ip q5, \frame, 16
esp.vld.128.ip q6, \frame, 16
esp.vld.128.ip q7, \frame, 16
/* Save the QACC_H and QACC_L registers, each being 256 bits big */
esp.ld.qacc.l.l.128.ip \frame, 16
esp.ld.qacc.l.h.128.ip \frame, 16
esp.ld.qacc.h.l.128.ip \frame, 16
esp.ld.qacc.h.h.128.ip \frame, 16
/* UA_STATE register (128 bits) */
esp.ld.ua.state.ip \frame, 16
/* XACC register (40 bits) */
esp.ld.xacc.ip \frame, 8
/* The following registers are stored in the same word */
lw a2, (\frame)
/* FFT_BIT_WIDTH register (4 bits) */
andi a1, a2, 0xf
esp.movx.w.sar a1
/* SAR_BYTES register (4 bits) */
srli a2, a2, 4
andi a1, a2, 0xf
esp.movx.w.sar.bytes a1
/* SAR register (6 bits) */
srli a2, a2, 4
andi a1, a2, 0x3f
esp.movx.w.fft.bit.width a1
.endm
generate_coprocessor_routine pie, PIE_COPROC_IDX, pie_enable, pie_save_regs, pie_restore_regs
#endif /* SOC_CPU_HAS_PIE */
#if SOC_CPU_HAS_FPU
/* Bit to set in mstatus to enable the FPU */
@ -40,7 +293,7 @@
/* Bit to clear in mstatus to disable the FPU */
#define CSR_MSTATUS_FPU_DISABLE (3 << 13)
.macro save_fpu_regs frame=sp
.macro fpu_save_regs frame=sp
fsw ft0, RV_FPU_FT0(\frame)
fsw ft1, RV_FPU_FT1(\frame)
fsw ft2, RV_FPU_FT2(\frame)
@ -73,9 +326,11 @@
fsw ft9, RV_FPU_FT9 (\frame)
fsw ft10, RV_FPU_FT10(\frame)
fsw ft11, RV_FPU_FT11(\frame)
csrr a1, fcsr
sw a1, RV_FPU_FCSR(\frame)
.endm
.macro restore_fpu_regs frame=sp
.macro fpu_restore_regs frame=sp
flw ft0, RV_FPU_FT0(\frame)
flw ft1, RV_FPU_FT1(\frame)
flw ft2, RV_FPU_FT2(\frame)
@ -108,6 +363,8 @@
flw ft9, RV_FPU_FT9(\frame)
flw ft10, RV_FPU_FT10(\frame)
flw ft11, RV_FPU_FT11(\frame)
lw a1, RV_FPU_FCSR(\frame)
csrw fcsr, a1
.endm
@ -125,98 +382,17 @@
.macro fpu_enable reg
li \reg, CSR_MSTATUS_FPU_ENABLE
li \reg, CSR_MSTATUS_FPU_ENABLE
csrs mstatus, \reg
.endm
.macro fpu_disable reg
li \reg, CSR_MSTATUS_FPU_DISABLE
li \reg, CSR_MSTATUS_FPU_DISABLE
csrc mstatus, \reg
.endm
.global vPortTaskPinToCore
.global vPortCoprocUsedInISR
.global pxPortUpdateCoprocOwner
/**
* @brief Save the current FPU context in the FPU owner's save area
*
* @param sp Interuptee's RvExcFrame address
*
* Note: Since this routine is ONLY meant to be called from _panic_handler routine,
* it is possible to alter `s0-s11` registers
*/
.global rtos_save_fpu_coproc
.type rtos_save_fpu_coproc, @function
rtos_save_fpu_coproc:
/* If we are in an interrupt context, we have to abort. We don't allow using the FPU from ISR */
#if ( configNUM_CORES > 1 )
csrr a2, mhartid /* a2 = coreID */
slli a2, a2, 2 /* a2 = coreID * 4 */
la a1, port_uxInterruptNesting /* a1 = &port_uxInterruptNesting */
add a1, a1, a2 /* a1 = &port_uxInterruptNesting[coreID] */
lw a1, 0(a1) /* a1 = port_uxInterruptNesting[coreID] */
#else /* ( configNUM_CORES <= 1 ) */
lw a1, (port_uxInterruptNesting) /* a1 = port_uxInterruptNesting */
#endif /* ( configNUM_CORES > 1 ) */
/* SP still contains the RvExcFrame address */
mv a0, sp
bnez a1, vPortCoprocUsedInISR
/* Enable the FPU needed by the current task */
fpu_enable a1
mv s0, ra
call rtos_current_tcb
/* If the current TCB is NULL, the FPU is used during initialization, even before
* the scheduler started. Consider this a valid usage, the FPU will be disabled
* as soon as the scheduler is started anyway*/
beqz a0, rtos_save_fpu_coproc_norestore
mv s1, a0 /* s1 = pxCurrentTCBs */
/* Prepare parameters of pxPortUpdateCoprocOwner */
mv a2, a0
li a1, FPU_COPROC_IDX
csrr a0, mhartid
call pxPortUpdateCoprocOwner
/* If the save area is NULL, no need to save context */
beqz a0, rtos_save_fpu_coproc_nosave
/* Save the FPU context in the structure */
lw a0, RV_COPROC_SA+FPU_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[FPU_COPROC_IDX] */
save_fpu_regs a0
csrr a1, fcsr
sw a1, RV_FPU_FCSR(a0)
rtos_save_fpu_coproc_nosave:
#if ( configNUM_CORES > 1 )
/* Pin current task to current core */
mv a0, s1
csrr a1, mhartid
call vPortTaskPinToCore
#endif /* configNUM_CORES > 1 */
/* Check if we have to restore a previous FPU context from the current TCB */
mv a0, s1
/* Do not allocate memory for the FPU yet, delay this until another task wants to use it.
* This guarantees that if a stack overflow occurs when allocating FPU context on the stack,
* the current task context is flushed and updated in the TCB, generating a correct backtrace
* from the panic handler. */
li a1, 0
li a2, FPU_COPROC_IDX
call pxPortGetCoprocArea
/* Get the enable flags from the coprocessor save area */
lw a1, RV_COPROC_ENABLE(a0)
/* To avoid having branches below, set the FPU enable flag now */
ori a2, a1, 1 << FPU_COPROC_IDX
sw a2, RV_COPROC_ENABLE(a0)
/* Check if the former FPU enable bit was set */
andi a2, a1, 1 << FPU_COPROC_IDX
beqz a2, rtos_save_fpu_coproc_norestore
/* FPU enable bit was set, restore the FPU context */
lw a0, RV_COPROC_SA+FPU_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[FPU_COPROC_IDX] */
restore_fpu_regs a0
lw a1, RV_FPU_FCSR(a0)
csrw fcsr, a1
rtos_save_fpu_coproc_norestore:
/* Return from routine via s0, instead of ra */
jr s0
.size rtos_save_fpu_coproc, .-rtos_save_fpu_coproc
generate_coprocessor_routine fpu, FPU_COPROC_IDX, fpu_enable, fpu_save_regs, fpu_restore_regs
#endif /* SOC_CPU_HAS_FPU */
@ -249,6 +425,8 @@ rtos_current_tcb:
* TODO: ISR nesting code improvements ?
* In the routines below, let's use a0-a5 registers to let the compiler generate
* 16-bit instructions.
* @returns Context that should be given to `rtos_int_exit`. On targets that have coprocessors,
* this value is a bitmap where bit i is 1 if coprocessor i is enable, 0 if it is disabled.
*/
.global rtos_int_enter
.type rtos_int_enter, @function
@ -262,6 +440,7 @@ rtos_int_enter:
#else
lw a0, port_xSchedulerRunning /* a0 = port_xSchedulerRunning */
#endif /* ( configNUM_CORES > 1 ) */
/* In case we jump, return value (a0) is correct */
beqz a0, rtos_int_enter_end /* if (port_xSchedulerRunning[coreID] == 0) jump to rtos_int_enter_end */
/* Increment the ISR nesting count */
@ -274,12 +453,27 @@ rtos_int_enter:
sw a2, 0(a0) /* port_uxInterruptNesting[coreID] = a2 */
/* If we reached here from another low-priority ISR, i.e, port_uxInterruptNesting[coreID] > 0, then skip stack pushing to TCB */
li a0, 0 /* return 0 in case we are going to branch */
bnez a1, rtos_int_enter_end /* if (port_uxInterruptNesting[coreID] > 0) jump to rtos_int_enter_end */
li a7, 0
#if SOC_CPU_COPROC_NUM > 0
/* Disable the FPU to forbid the ISR from using it. We don't need to re-enable it manually since the caller
* will restore `mstatus` before returning from interrupt. */
/* Disable the coprocessors to forbid the ISR from using it */
#if SOC_CPU_HAS_HWLOOP
/* The current HWLP status will be returned in a0 */
hwlp_disable a0
or a7, a7, a0
#endif /* SOC_CPU_HAS_HWLOOP */
#if SOC_CPU_HAS_PIE
/* The current HWLP status will be returned in a0 */
pie_disable a0
or a7, a7, a0
#endif /* SOC_CPU_HAS_PIE */
#if SOC_CPU_HAS_FPU
fpu_disable a0
#endif /* SOC_CPU_HAS_FPU */
#endif /* SOC_CPU_COPROC_NUM > 0 */
@ -320,6 +514,8 @@ rtos_int_enter:
ESP_HW_STACK_GUARD_MONITOR_START_CUR_CORE a0 a1
#endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */
/* Return the coprocessor context from a7 */
mv a0, a7
rtos_int_enter_end:
ret
@ -327,6 +523,8 @@ rtos_int_enter_end:
* @brief Restore the stack pointer of the next task to run.
*
* @param a0 Former mstatus
* @param a1 Context returned by `rtos_int_enter`. On targets that have coprocessors, this value is a bitmap
* where bit i is 1 if coprocessor i was enable, 0 if it was disabled.
*
* @returns New mstatus (potentially with coprocessors disabled)
*/
@ -334,9 +532,14 @@ rtos_int_enter_end:
.type rtos_int_exit, @function
rtos_int_exit:
/* To speed up this routine and because this current routine is only meant to be called from the interrupt
* handler, let's use callee-saved registers instead of stack space. Registers `s3-s11` are not used by
* handler, let's use callee-saved registers instead of stack space. Registers `s5-s11` are not used by
* the caller */
mv s11, a0
#if SOC_CPU_COPROC_NUM > 0
/* Save a1 as it contains the bitmap with the enabled coprocessors */
mv s8, a1
#endif
#if ( configNUM_CORES > 1 )
csrr a1, mhartid /* a1 = coreID */
slli a1, a1, 2 /* a1 = a1 * 4 */
@ -366,12 +569,12 @@ isr_skip_decrement:
/* If the CPU reached this label, a2 (uxInterruptNesting) is 0 for sure */
/* Schedule the next task if a yield is pending */
la a0, xPortSwitchFlag /* a0 = &xPortSwitchFlag */
la s7, xPortSwitchFlag /* a0 = &xPortSwitchFlag */
#if ( configNUM_CORES > 1 )
add a0, a0, a1 /* a0 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */
add s7, s7, a1 /* a0 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */
#endif /* ( configNUM_CORES > 1 ) */
lw a2, 0(a0) /* a2 = xPortSwitchFlag[coreID] */
beqz a2, no_switch /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch */
lw a0, 0(s7) /* a2 = xPortSwitchFlag[coreID] */
beqz a0, no_switch_restore_coproc /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch_restore_coproc */
/* Preserve return address and schedule next task. To speed up the process, and because this current routine
* is only meant to be called from the interrupt handle, let's save some speed and space by using callee-saved
@ -379,33 +582,52 @@ isr_skip_decrement:
mv s10, ra
#if ( SOC_CPU_COPROC_NUM > 0 )
/* In the cases where the newly scheduled task is different from the previously running one,
* we have to disable the coprocessor(s) to let them trigger an exception on first use.
* Else, if the same task is scheduled, do not change the coprocessor(s) state. */
* we have to disable the coprocessors to let them trigger an exception on first use.
* Else, if the same task is scheduled, restore the former coprocessors state (before the interrupt) */
call rtos_current_tcb
/* Keep former TCB in s9 */
mv s9, a0
#endif
call vTaskSwitchContext
#if ( SOC_CPU_COPROC_NUM == 0 )
mv ra, s10 /* Restore original return address */
#endif
/* Clears the switch pending flag (stored in s7) */
sw zero, 0(s7) /* xPortSwitchFlag[coreID] = 0; */
#if ( SOC_CPU_COPROC_NUM > 0 )
/* If the Task to schedule is NOT the same as the former one (s9), keep the coprocessors disabled */
call rtos_current_tcb
beq a0, s9, rtos_int_exit_no_change
/* Disable the coprocessors in s11 register (former mstatus) */
mv ra, s10 /* Restore original return address */
beq a0, s9, no_switch_restore_coproc
#if SOC_CPU_HAS_FPU
/* Disable the FPU in the `mstatus` value to return */
li a0, ~CSR_MSTATUS_FPU_DISABLE
and s11, s11, a0
rtos_int_exit_no_change:
#else /* ( SOC_CPU_COPROC_NUM == 0 ) */
call vTaskSwitchContext
#endif /* SOC_CPU_HAS_FPU */
j no_switch_restored
#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */
mv ra, s10
/* Clears the switch pending flag */
la a0, xPortSwitchFlag /* a0 = &xPortSwitchFlag */
#if ( configNUM_CORES > 1 )
/* C routine vTaskSwitchContext may change the temp registers, so we read again */
csrr a1, mhartid /* a1 = coreID */
slli a1, a1, 2 /* a1 = a1 * 4 */
add a0, a0, a1 /* a0 = &xPortSwitchFlag[coreID]; */
#endif /* ( configNUM_CORES > 1 ) */
sw zero, 0(a0) /* xPortSwitchFlag[coreID] = 0; */
no_switch_restore_coproc:
/* We reach here either because there is no switch scheduled or because the TCB that is going to be scheduled
* is the same as the one that has been interrupted. In both cases, we need to restore the coprocessors status */
#if SOC_CPU_HAS_HWLOOP
andi a0, s8, 1 << HWLP_COPROC_IDX
beqz a0, 1f
hwlp_enable a0
1:
#endif /* SOC_CPU_HAS_HWLOOP */
no_switch:
#if SOC_CPU_HAS_PIE
andi a0, s8, 1 << PIE_COPROC_IDX
beqz a0, 1f
pie_enable a0
1:
#endif /* SOC_CPU_HAS_PIE */
no_switch_restored:
#if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
/* esp_hw_stack_guard_monitor_stop(); pass the scratch registers */

View File

@ -165,7 +165,7 @@ static void unpinned_task(void *arg)
TEST_ASSERT_EQUAL(cur_core_num, xTaskGetCoreID(NULL));
#endif
#endif // !CONFIG_FREERTOS_UNICORE
// Reenable scheduling/preemption
// Re-enable scheduling/preemption
#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) )
vTaskPreemptionEnable(NULL);
#else
@ -242,7 +242,7 @@ TEST_CASE("FPU: Unsolicited context switch between tasks using FPU", "[freertos]
};
xTaskCreatePinnedToCore(fpu_calculation, "Task1", 2048, params + 0, UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1);
xTaskCreatePinnedToCore(fpu_calculation, "Task2", 2048, params + 1, UNITY_FREERTOS_PRIORITY + 2, &tasks[2], 1);
xTaskCreatePinnedToCore(fpu_calculation, "Task2", 2048, params + 1, UNITY_FREERTOS_PRIORITY + 1, &tasks[1], 1);
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);

View File

@ -0,0 +1,151 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "sdkconfig.h"
#include <string.h>
#include "soc/soc_caps.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "unity.h"
/**
* On RISC-V targets that have coprocessors, the contexts are saved at the lowest address of the stack,
* which can lead to wrong stack watermark calculation in FreeRTOS in theory.
* As such, the port layer of FreeRTOS will adjust the lowest address of the stack when a coprocessor
* context is saved.
*/
#if SOC_CPU_HAS_HWLOOP
static uint32_t use_hwlp(uint32_t count)
{
uint32_t ret;
asm volatile(
/* The toolchain doesn't support HWLP instructions yet, manually set it up */
"la a2, start\n"
"csrw 0x7c6, a2\n"
"la a2, end\n"
"csrw 0x7c7, a2\n"
"csrw 0x7c8, a0\n"
"li a1, 0\n"
/* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */
"start:\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"end:\n"
"addi a1, a1, 1\n"
"mv %0, a1\n"
"ret\n"
: "=r"(ret) :);
return ret;
}
static void other_task(void* arg)
{
const TaskHandle_t main_task = (TaskHandle_t) arg;
use_hwlp(10);
xTaskNotifyGive(main_task);
vTaskDelete(NULL);
}
TEST_CASE("HWLP: Context save does not affect stack watermark", "[freertos]")
{
TaskHandle_t pvCreatedTask;
/* Force the FreeRTOS port layer to store a HWLP context in the current task.
* So let's use the it and make sure another task, on the SAME CORE, also uses it */
const int core_id = xPortGetCoreID();
const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle();
/* Get the current stack watermark */
const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle);
/* Use the HWLP unit, the context will NOT be flushed until another task starts using it */
use_hwlp(20);
xTaskCreatePinnedToCore(other_task,
"OtherTask",
2048,
(void*) current_handle,
CONFIG_UNITY_FREERTOS_PRIORITY - 1,
&pvCreatedTask,
core_id);
vTaskDelay(10);
/* Wait for other task to complete */
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle);
TEST_ASSERT_TRUE(after_watermark > before_watermark / 2);
}
typedef struct {
uint32_t count;
TaskHandle_t main;
} ParamsHWLP;
void calculation(void* arg)
{
ParamsHWLP* p = (ParamsHWLP*) arg;
const uint32_t count = p->count;
uint32_t result = 0;
int i = 0;
for (i = 0; i < 10; i++) {
uint32_t current = use_hwlp(count);
result += current;
/* Give some time to the other to interrupt us before checking `f` value */
esp_rom_delay_us(1000);
/* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the count is still correct.
* The function `use_hwlp` should return (count * 16) */
assert(count * 16 == current);
/* Give the hand back to FreeRTOS to avoid any watchdog error */
vTaskDelay(2);
}
/* Make sure the result is correct */
assert(count * 16 * i == result);
xTaskNotifyGive(p->main);
vTaskDelete(NULL);
}
TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[freertos]")
{
/* Create two tasks that are on the same core and use the same FPU */
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
TaskHandle_t tasks[2];
ParamsHWLP params[2] = {
{ .count = 10, .main = unity_task_handle },
{ .count = 200, .main = unity_task_handle },
};
xTaskCreatePinnedToCore(calculation, "Task1", 2048, params + 0, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1);
xTaskCreatePinnedToCore(calculation, "Task2", 2048, params + 1, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[1], 1);
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
}
#endif // SOC_CPU_HAS_HWLOOP

View File

@ -0,0 +1,253 @@
/*
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "sdkconfig.h"
#include <math.h>
#include "soc/soc_caps.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
#include "unity.h"
#include "test_utils.h"
#if SOC_CPU_HAS_PIE
/**
* @brief Performs the sum of two 4-word vectors using the PIE.
*
* @param a First vector
* @param b Second vector
* @param dst Destination to store the sum
*
* @returns a will store a + b
*/
static void pie_vector_add(const int32_t a[4], const int32_t b[4], int32_t dst[4])
{
asm volatile("esp.vld.128.ip q0, a0, 0\n"
"esp.vld.128.ip q1, a1, 0\n"
"esp.vadd.s32 q2, q0, q1\n"
"esp.vst.128.ip q2, a2, 0\n"
::);
}
/* ------------------------------------------------------------------------------------------------------------------ */
/*
Test PIE usage from a task context
Purpose:
- Test that the PIE can be used from a task context
- Test that PIE context is properly saved and restored
- Test that PIE context is cleaned up on task deletion by running multiple iterations
Procedure:
- Create TEST_PINNED_NUM_TASKS tasks pinned to each core
- Start each task
- Each task updates a float variable and then blocks (to allow other tasks to run thus forcing the a PIE context
save and restore).
- Delete each task
- Repeat test for TEST_PINNED_NUM_ITERS iterations
Expected:
- Correct float value calculated by each task
- Each task cleans up its PIE context on deletion
*/
#define TEST_PINNED_NUM_TASKS 3
#define TEST_PINNED_NUM_ITERS 5
static void pinned_task(void *arg)
{
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
int32_t a[4] = { 42, 42, 42, 42};
int32_t b[4] = { 10, 20, 30, 40 };
int32_t dst[4] = { 0 };
pie_vector_add(a, b, dst);
// Indicate done wand wait to be deleted
xSemaphoreGive((SemaphoreHandle_t)arg);
vTaskSuspend(NULL);
}
TEST_CASE("PIE: Usage in task", "[freertos]")
{
SemaphoreHandle_t done_sem = xSemaphoreCreateCounting(CONFIG_FREERTOS_NUMBER_OF_CORES * TEST_PINNED_NUM_TASKS, 0);
TEST_ASSERT_NOT_EQUAL(NULL, done_sem);
for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) {
TaskHandle_t task_handles[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS];
// Create test tasks for each core
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
}
}
// Start the created tasks simultaneously
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
xTaskNotifyGive(task_handles[i][j]);
}
}
// Wait for the tasks to complete
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES * TEST_PINNED_NUM_TASKS; i++) {
xSemaphoreTake(done_sem, portMAX_DELAY);
}
// Delete the tasks
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
vTaskDelete(task_handles[i][j]);
}
}
vTaskDelay(10); // Short delay to allow idle task to be free task memory and FPU contexts
}
vSemaphoreDelete(done_sem);
}
/* ------------------------------------------------------------------------------------------------------------------ */
/*
Test PIE usage will pin an unpinned task
Purpose:
- Test that unpinned tasks are automatically pinned to the current core on the task's first use of the PIE
- Test that PIE context is cleaned up on task deletion by running multiple iterations
Procedure:
- Create an unpinned task
- Task disables scheduling/preemption to ensure that it does not switch cores
- Task uses the PIE
- Task checks its core affinity after PIE usage
- Task deletes itself
- Repeat test for TEST_UNPINNED_NUM_ITERS iterations
Expected:
- Task remains unpinned until its first usage of the PIE
- The task becomes pinned to the current core after first use of the PIE
- Each task cleans up its PIE context on deletion
*/
#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1
#define TEST_UNPINNED_NUM_ITERS 5
static void unpinned_task(void *arg)
{
// Disable scheduling/preemption to make sure current core ID doesn't change
#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) )
vTaskPreemptionDisable(NULL);
#else
vTaskSuspendAll();
#endif
BaseType_t cur_core_num = xPortGetCoreID();
// Check that the task is unpinned
#if !CONFIG_FREERTOS_UNICORE
#if CONFIG_FREERTOS_SMP
TEST_ASSERT_EQUAL(tskNO_AFFINITY, vTaskCoreAffinityGet(NULL));
#else
TEST_ASSERT_EQUAL(tskNO_AFFINITY, xTaskGetCoreID(NULL));
#endif
#endif // !CONFIG_FREERTOS_UNICORE
int32_t a[4] = { 0, 1, 2, 3};
int32_t b[4] = { 111, 222, 333, 444 };
int32_t dst[4] = { 0 };
pie_vector_add(a, b, dst);
for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) {
TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]);
}
#if !CONFIG_FREERTOS_UNICORE
#if CONFIG_FREERTOS_SMP
TEST_ASSERT_EQUAL(1 << cur_core_num, vTaskCoreAffinityGet(NULL));
#else
TEST_ASSERT_EQUAL(cur_core_num, xTaskGetCoreID(NULL));
#endif
#endif // !CONFIG_FREERTOS_UNICORE
// Re-enable scheduling/preemption
#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) )
vTaskPreemptionEnable(NULL);
#else
xTaskResumeAll();
#endif
// Indicate done and self delete
xTaskNotifyGive((TaskHandle_t)arg);
vTaskDelete(NULL);
}
TEST_CASE("PIE: Usage in unpinned task", "[freertos]")
{
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
for (int iter = 0; iter < TEST_UNPINNED_NUM_ITERS; iter++) {
// Create unpinned task
xTaskCreate(unpinned_task, "unpin", 4096, (void *)unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL);
// Wait for task to complete
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
vTaskDelay(10); // Short delay to allow task memory to be freed
}
}
typedef struct {
int32_t cst;
TaskHandle_t main;
} ParamsPIE;
/**
* @brief Function performing some simple calculation using the PIE coprocessor.
* The goal is to be preempted by a task that also uses the PIE on the same core.
*/
void pie_calculation(void* arg)
{
ParamsPIE* p = (ParamsPIE*) arg;
const int32_t cst = p->cst;
int32_t a[4] = { cst, cst, cst, cst };
int32_t dst[4] = { 0 };
for (int i = 0; i < 10; i++) {
pie_vector_add(a, dst, dst);
/* Give some time to the other to interrupt us before checking `f` value */
esp_rom_delay_us(1000);
/* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the sign is still correct */
assert((dst[0] < 0 && cst < 0) || (dst[0] > 0 && cst > 0));
/* Give the hand back to FreeRTOS to avoid any watchdog error */
vTaskDelay(2);
}
/* Make sure the result is correct */
assert((dst[0] * cst == 10));
xTaskNotifyGive(p->main);
vTaskDelete(NULL);
}
TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freertos]")
{
/* Create two tasks that are on the same core and use the same FPU */
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
TaskHandle_t tasks[2];
ParamsPIE params[2] = {
{ .cst = 1, .main = unity_task_handle },
{ .cst = -1, .main = unity_task_handle },
};
xTaskCreatePinnedToCore(pie_calculation, "Task1", 2048, params + 0, UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1);
xTaskCreatePinnedToCore(pie_calculation, "Task2", 2048, params + 1, UNITY_FREERTOS_PRIORITY + 1, &tasks[1], 1);
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
}
#endif // CONFIG_FREERTOS_NUMBER_OF_CORES > 1
#endif // SOC_CPU_HAS_PIE

View File

@ -0,0 +1,86 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "sdkconfig.h"
#include <string.h>
#include "soc/soc_caps.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "unity.h"
#define TASKS_STATUS_ARRAY_LEN 16
/**
* On RISC-V targets that have coprocessors, the contexts are saved at the lowest address of the stack,
* which can lead to wrong stack watermark calculation in FreeRTOS in theory.
* As such, the port layer of FreeRTOS will adjust the lowest address of the stack when a coprocessor
* context is saved.
*/
#if SOC_CPU_HAS_PIE
static void use_pie(uint32_t a[4], uint32_t b[4])
{
asm volatile("esp.vld.128.ip q0, %0, 0\n"
"esp.vld.128.ip q1, %2, 0\n"
"esp.vadd.u32 q2, q0, q1\n"
"esp.vst.128.ip q2, %0, 0\n"
: "=r"(a) : "r"(a), "r"(b));
}
static void other_task(void* arg)
{
uint32_t a[4] = { 1, 2, 3, 4};
uint32_t b[4] = { 42, 43, 44, 45};
const TaskHandle_t main_task = (TaskHandle_t) arg;
/* This task must also use the PIE coprocessor to force a PIE context flush on the main task */
use_pie(a, b);
xTaskNotifyGive(main_task);
vTaskDelete(NULL);
}
TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]")
{
/* Setup some random values */
uint32_t a[4] = { 0x3f00ffff, 0xffe10045, 0xffe10096, 0x42434546};
uint32_t b[4] = { 0x42, 0xbb43, 0x6644, 0x845};
TaskHandle_t pvCreatedTask;
/* Force the FreeRTOS port layer to store a PIE context in the current task.
* So let's use the PIE and make sure another task, on the SAME CORE, also uses it */
const int core_id = xPortGetCoreID();
const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle();
/* Get the current stack watermark */
const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle);
/* Use the PIE unit, the context will NOT be flushed until another task starts using it */
use_pie(a, b);
xTaskCreatePinnedToCore(other_task,
"OtherTask",
2048,
(void*) current_handle,
CONFIG_UNITY_FREERTOS_PRIORITY - 1,
&pvCreatedTask,
core_id);
vTaskDelay(10);
/* Wait for other task to complete */
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle);
/* The current task has seen a PIE registers context save, so we have at least 8 16-byte registers saved on the
* stack, which represents 128 bytes. In practice, it may be very different, for example a call to printf would
* result is more than 1KB of additional stack space used. So let's just make sure that the watermark is bigger
* than 50% of the former watermark. */
TEST_ASSERT_TRUE(after_watermark > before_watermark / 2);
}
#endif // SOC_CPU_HAS_PIE

View File

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "soc/soc_caps.h"
#if SOC_CPU_HAS_HWLOOP
/* CSR 0x7F1 lowest 2 bits describe the following states:
* 00: OFF
* 01: Initial
* 10: Clean
* 11: Dirty
*/
#define CSR_HWLP_STATE_REG 0x7F1
#define CSR_LOOP0_START_ADDR 0x7C6
#define CSR_LOOP0_END_ADDR 0x7C7
#define CSR_LOOP0_COUNT 0x7C8
#define CSR_LOOP1_START_ADDR 0x7C9
#define CSR_LOOP1_END_ADDR 0x7CA
#define CSR_LOOP1_COUNT 0x7CB
#endif /* SOC_CPU_HAS_HWLOOP */

View File

@ -0,0 +1,21 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "soc/soc_caps.h"
#if SOC_CPU_HAS_PIE
/* CSR lowest 2 bits describe the following states:
* 00: OFF
* 01: Initial
* 10: Clean
* 11: Dirty
*/
#define CSR_PIE_STATE_REG 0x7F2
#endif /* SOC_CPU_HAS_PIE */

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2015-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -86,7 +86,21 @@ STRUCT_END(RvExcFrame)
#if SOC_CPU_COPROC_NUM > 0
/* Define the default size of each coprocessor save area */
#define RV_COPROC0_SIZE 0
#define RV_COPROC1_SIZE 0
#define RV_COPROC2_SIZE 0
/* And the alignment for each of them */
#define RV_COPROC0_ALIGN 4
#define RV_COPROC1_ALIGN 4
#define RV_COPROC2_ALIGN 4
#if SOC_CPU_HAS_FPU
/* Floating-Point Unit coprocessor is now considered coprocessor 0 */
#define FPU_COPROC_IDX 0
/**
* @brief Floating-Point Unit save area
*/
@ -126,29 +140,97 @@ STRUCT_FIELD (long, 4, RV_FPU_FT11, ft11)
STRUCT_FIELD (long, 4, RV_FPU_FCSR, fcsr) /* fcsr special register */
STRUCT_END(RvFPUSaveArea)
/* Floating-Point Unit coprocessor is now considered coprocessor 0 */
#define FPU_COPROC_IDX 0
/* PIE/AIA coprocessor is coprocessor 1 */
#define PIE_COPROC_IDX 1
/* Define the size of each coprocessor save area */
/* Redefine the coprocessor area size previously defined to 0 */
#undef RV_COPROC0_SIZE
#if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__)
#define RV_COPROC0_SIZE RvFPUSaveAreaSize
#define RV_COPROC1_SIZE 0 // PIE/AIA coprocessor area
#define RV_COPROC0_SIZE RvFPUSaveAreaSize
#else
#define RV_COPROC0_SIZE sizeof(RvFPUSaveArea)
#define RV_COPROC1_SIZE 0 // PIE/AIA coprocessor area
#define RV_COPROC0_SIZE sizeof(RvFPUSaveArea)
#endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */
#endif /* SOC_CPU_HAS_FPU */
#if SOC_CPU_HAS_HWLOOP
/* Hardware Loop extension is "coprocessor" 1 */
#define HWLP_COPROC_IDX 1
/**
* @brief Hardware loop save area
*/
STRUCT_BEGIN
STRUCT_FIELD (long, 4, RV_HWLOOP_START0, start0)
STRUCT_FIELD (long, 4, RV_HWLOOP_END0, end0)
STRUCT_FIELD (long, 4, RV_HWLOOP_COUNT0, count0)
STRUCT_FIELD (long, 4, RV_HWLOOP_START1, start1)
STRUCT_FIELD (long, 4, RV_HWLOOP_END1, end1)
STRUCT_FIELD (long, 4, RV_HWLOOP_COUNT1, count1)
STRUCT_END(RvHWLPSaveArea)
/* Redefine the coprocessor area size previously defined to 0 */
#undef RV_COPROC1_SIZE
#if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__)
#define RV_COPROC1_SIZE RvHWLPSaveAreaSize
#else
#define RV_COPROC1_SIZE sizeof(RvHWLPSaveArea)
#endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */
#endif /* SOC_CPU_HAS_HWLOOP */
#if SOC_CPU_HAS_PIE
/* PIE/AIA coprocessor is now considered coprocessor 2 */
#define PIE_COPROC_IDX 2
/**
* @brief PIE save area
*/
STRUCT_BEGIN
STRUCT_AFIELD (long, 4, RV_PIE_Q0, q0, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q1, q1, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q2, q2, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q3, q3, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q4, q4, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q5, q5, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q6, q6, 4)
STRUCT_AFIELD (long, 4, RV_PIE_Q7, q7, 4)
STRUCT_AFIELD (long, 4, RV_PIE_QACC_L_L, qacc_l_l, 4)
STRUCT_AFIELD (long, 4, RV_PIE_QACC_L_H, qacc_l_h, 4)
STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_L, qacc_h_l, 4)
STRUCT_AFIELD (long, 4, RV_PIE_QACC_H_H, qacc_h_h, 4)
STRUCT_AFIELD (long, 4, RV_PIE_UA_STATE, ua_state, 4)
STRUCT_FIELD (long, 4, RV_PIE_XACC, xacc)
/* This register contains SAR, SAR_BYTES and FFT_BIT_WIDTH in this order (from top to low) */
STRUCT_FIELD (long, 4, RV_PIE_MISC, misc)
STRUCT_END(RvPIESaveArea)
/* Redefine the coprocessor area size previously defined to 0 */
#undef RV_COPROC2_SIZE
#if defined(_ASMLANGUAGE) || defined(__ASSEMBLER__)
#define RV_COPROC2_SIZE RvPIESaveAreaSize
#else
#define RV_COPROC2_SIZE sizeof(RvPIESaveArea)
#endif /* defined(_ASMLANGUAGE) || defined(__ASSEMBLER__) */
/* The PIE save area structure must be aligned on 16 bytes */
#undef RV_COPROC2_ALIGN
#define RV_COPROC2_ALIGN 16
#endif /* SOC_CPU_HAS_PIE */
/**
* @brief Coprocessors save area, containing each coprocessor save area
*/
STRUCT_BEGIN
/* Enable bitmap: BIT(i) represents coprocessor i, 1 is used, 0 else */
STRUCT_FIELD (long, 4, RV_COPROC_ENABLE, sa_enable)
/* Address of the original lowest stack address, convenient when the stack needs to re-initialized */
/* Address of the original lowest stack address, convenient when the stack needs to be re-initialized */
STRUCT_FIELD (void*, 4, RV_COPROC_TCB_STACK, sa_tcbstack)
/* Address of the pool of memory used to allocate coprocessors save areas */
STRUCT_FIELD (long, 4, RV_COPROC_ALLOCATOR, sa_allocator)

View File

@ -25,11 +25,11 @@
/* EXT_ILL CSR reasons are stored as follows:
* - Bit 0: FPU core instruction (Load/Store instructions NOT concerned)
* - Bit 1: Low-power core
* - Bit 1: Hardware Loop instructions
* - Bit 2: PIE core */
.equ EXT_ILL_RSN_FPU, 1
.equ EXT_ILL_RSN_LP, 2
.equ EXT_ILL_RSN_PIE, 4
.equ EXT_ILL_RSN_FPU, 1
.equ EXT_ILL_RSN_HWLP, 2
.equ EXT_ILL_RSN_PIE, 4
#endif /* SOC_CPU_COPROC_NUM > 0 */
/* Macro which first allocates space on the stack to save general
@ -166,12 +166,24 @@ _panic_handler:
/* In case this is due to a coprocessor, set ra right now to simplify the logic below */
la ra, _return_from_exception
/* EXT_ILL CSR should contain the reason for the Illegal Instruction */
csrr a0, EXT_ILL_CSR
mv a2, a0
csrrw a0, EXT_ILL_CSR, zero
#if SOC_CPU_HAS_HWLOOP
/* Check if the HWLOOP bit is set. */
andi a1, a0, EXT_ILL_RSN_HWLP
bnez a1, rtos_save_hwlp_coproc
#endif // SOC_CPU_HAS_HWLOOP
#if SOC_CPU_HAS_PIE
/* Check if the HWLOOP bit is set. */
andi a1, a0, EXT_ILL_RSN_PIE
bnez a1, rtos_save_pie_coproc
#endif // SOC_CPU_HAS_HWLOOP
#if SOC_CPU_HAS_FPU
/* Check if the FPU bit is set. When targets have the FPU reason bug (SOC_CPU_HAS_FPU_EXT_ILL_BUG),
* it is possible that another bit is set even if the reason is an FPU instruction.
* For example, bit 1 can be set and bit 0 won't, even if the reason is an FPU instruction. */
#if SOC_CPU_HAS_FPU
andi a1, a0, EXT_ILL_RSN_FPU
bnez a1, rtos_save_fpu_coproc
#if SOC_CPU_HAS_FPU_EXT_ILL_BUG
@ -202,8 +214,6 @@ _panic_handler_not_fpu:
#endif /* SOC_CPU_HAS_FPU_EXT_ILL_BUG */
#endif /* SOC_CPU_HAS_FPU */
/* Need to check the other coprocessors reason now, instruction is in register a2 */
/* Ignore LP and PIE for now, continue the exception */
_panic_handler_not_coproc:
#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */
@ -298,9 +308,11 @@ _interrupt_handler:
/* Save SP former value */
sw a0, RV_STK_SP(sp)
/* Notify the RTOS that an interrupt ocurred, it will save the current stack pointer
* in the running TCB, no need to pass it as a parameter */
/* Notify the RTOS that an interrupt occurred, it will save the current stack pointer
* in the running TCB, no need to pass it as a parameter
* Returns an abstract context in a0, needs to be passed to `rtos_int_exit` */
call rtos_int_enter
mv s4, a0
/* If this is a non-nested interrupt, SP now points to the interrupt stack */
/* Before dispatch c handler, restore interrupt to enable nested intr */
@ -366,6 +378,7 @@ _interrupt_handler:
/* The RTOS will restore the current TCB stack pointer. This routine will preserve s1 and s2.
* Returns the new `mstatus` value. */
mv a0, s2 /* a0 = mstatus */
mv a1, s4 /* a1 = abstract context returned by `rtos_int_enter` */
call rtos_int_exit
/* Restore the rest of the registers.

View File

@ -419,9 +419,17 @@ config SOC_CPU_HAS_FPU_EXT_ILL_BUG
bool
default y
config SOC_CPU_HAS_HWLOOP
bool
default y
config SOC_CPU_HAS_PIE
bool
default y
config SOC_CPU_COPROC_NUM
int
default 2
default 3
config SOC_HP_CPU_HAS_MULTIPLE_CORES
bool

View File

@ -160,7 +160,9 @@
#define SOC_BRANCH_PREDICTOR_SUPPORTED 1
#define SOC_CPU_HAS_FPU 1
#define SOC_CPU_HAS_FPU_EXT_ILL_BUG 1 // EXT_ILL CSR doesn't support FLW/FSW
#define SOC_CPU_COPROC_NUM 2
#define SOC_CPU_HAS_HWLOOP 1
#define SOC_CPU_HAS_PIE 1
#define SOC_CPU_COPROC_NUM 3
#define SOC_HP_CPU_HAS_MULTIPLE_CORES 1 // Convenience boolean macro used to determine if a target has multiple cores.
#define SOC_CPU_BREAKPOINTS_NUM 3