mirror of
https://github.com/espressif/esp-idf.git
synced 2024-10-05 20:47:46 -04:00
fix(riscv): make HWLP feature use direct saving of lazy saving
This commit is contained in:
parent
6eba7a536a
commit
0928ff027b
@ -60,6 +60,11 @@
|
||||
#include "soc/hp_system_reg.h"
|
||||
#endif
|
||||
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
#include "riscv/csr.h"
|
||||
#include "riscv/csr_hwlp.h"
|
||||
#endif
|
||||
|
||||
#if ( SOC_CPU_COPROC_NUM > 0 )
|
||||
|
||||
#include "esp_private/panic_internal.h"
|
||||
@ -125,9 +130,23 @@ StackType_t *xIsrStackBottom[portNUM_PROCESSORS] = {0};
|
||||
BaseType_t xPortStartScheduler(void)
|
||||
{
|
||||
#if ( SOC_CPU_COPROC_NUM > 0 )
|
||||
|
||||
#if SOC_CPU_HAS_FPU
|
||||
/* Disable FPU so that the first task to use it will trigger an exception */
|
||||
rv_utils_disable_fpu();
|
||||
#endif
|
||||
#endif /* SOC_CPU_HAS_FPU */
|
||||
|
||||
#if SOC_CPU_HAS_PIE
|
||||
/* Similarly, disable PIE */
|
||||
rv_utils_disable_pie();
|
||||
#endif /* SOC_CPU_HAS_FPU */
|
||||
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
/* Initialize the Hardware loop feature */
|
||||
RV_WRITE_CSR(CSR_HWLP_STATE_REG, HWLP_INITIAL_STATE);
|
||||
#endif /* SOC_CPU_HAS_HWLOOP */
|
||||
#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */
|
||||
|
||||
/* Initialize all kernel state tracking variables */
|
||||
BaseType_t coreID = xPortGetCoreID();
|
||||
port_uxInterruptNesting[coreID] = 0;
|
||||
@ -826,7 +845,7 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop
|
||||
* @param coreid Current core
|
||||
* @param coproc Coprocessor to save context of
|
||||
*
|
||||
* @returns Coprocessor former owner's save are, can be NULL is there was no owner yet, can be -1 if
|
||||
* @returns Coprocessor former owner's save area, can be NULL if there was no owner yet, can be -1 if
|
||||
* the former owner is the same as the new owner.
|
||||
*/
|
||||
RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* owner)
|
||||
|
@ -92,7 +92,7 @@ rtos_save_\name\()_coproc:
|
||||
\save_coproc_regs a0
|
||||
rtos_save_\name\()_coproc_nosave:
|
||||
#if ( configNUM_CORES > 1 )
|
||||
/* Pin current task to current core */
|
||||
/* Pin current task to current core, s1 has pxCurrentTCBs */
|
||||
mv a0, s1
|
||||
csrr a1, mhartid
|
||||
call vPortTaskPinToCore
|
||||
@ -184,7 +184,33 @@ rtos_save_\name\()_coproc_norestore:
|
||||
.endm
|
||||
|
||||
|
||||
generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, hwlp_restore_regs
|
||||
/**
|
||||
* @brief Restore the HWLP registers contained in the dedicated save area if the given task ever used it.
|
||||
* This routine sets the HWLP context to clean in any case.
|
||||
*
|
||||
* @param a0 StaticTask address for the newly scheduled task
|
||||
*/
|
||||
hwlp_restore_if_used:
|
||||
addi sp, sp, -16
|
||||
sw ra, (sp)
|
||||
/* Check if the HWLP was in use beforehand */
|
||||
li a1, 0
|
||||
li a2, HWLP_COPROC_IDX
|
||||
call pxPortGetCoprocArea
|
||||
/* Get the enable flags from the coprocessor save area */
|
||||
lw a1, RV_COPROC_ENABLE(a0)
|
||||
/* To avoid having branches below, set the coprocessor enable flag now */
|
||||
andi a2, a1, 1 << HWLP_COPROC_IDX
|
||||
beqz a2, _hwlp_restore_never_used
|
||||
/* Enable bit was set, restore the coprocessor context */
|
||||
lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[HWLP_COPROC_IDX] */
|
||||
hwlp_restore_regs a0
|
||||
_hwlp_restore_never_used:
|
||||
/* Clear the context */
|
||||
csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE
|
||||
lw ra, (sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
#endif /* SOC_CPU_HAS_HWLOOP */
|
||||
|
||||
@ -192,7 +218,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs,
|
||||
#if SOC_CPU_HAS_PIE
|
||||
|
||||
/**
|
||||
* @brief Macros to enable and disable the hardware loop feature on the current core
|
||||
* @brief Macros to enable and disable the PIE coprocessor on the current core
|
||||
*/
|
||||
.macro pie_enable scratch_reg=a0
|
||||
li \scratch_reg, 1
|
||||
@ -200,7 +226,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs,
|
||||
.endm
|
||||
|
||||
/**
|
||||
* @brief Disable HW Loop CPU feature while returning the former status in the given register
|
||||
* @brief Disable the PIE coprocessor while returning the former status in the given register
|
||||
*/
|
||||
.macro pie_disable reg
|
||||
csrrw \reg, CSR_PIE_STATE_REG, zero
|
||||
@ -213,7 +239,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs,
|
||||
.endm
|
||||
|
||||
/**
|
||||
* @brief Macros to save and restore the hardware loop registers to and from the given frame
|
||||
* @brief Macros to save and restore the PIE coprocessor registers to and from the given frame
|
||||
*/
|
||||
.macro pie_save_regs frame=a0
|
||||
/* Save the 128-bit Q registers from the frame memory and then frame += 16 */
|
||||
@ -427,15 +453,16 @@ rtos_current_tcb:
|
||||
* 16-bit instructions.
|
||||
* @returns Context that should be given to `rtos_int_exit`. On targets that have coprocessors,
|
||||
* this value is a bitmap where bit i is 1 if coprocessor i is enable, 0 if it is disabled.
|
||||
* This routine can use the s registers too since they are not used by the caller (yet)
|
||||
*/
|
||||
.global rtos_int_enter
|
||||
.type rtos_int_enter, @function
|
||||
rtos_int_enter:
|
||||
#if ( configNUM_CORES > 1 )
|
||||
csrr a5, mhartid /* a5 = coreID */
|
||||
slli a5, a5, 2 /* a5 = coreID * 4 */
|
||||
csrr s0, mhartid /* s0 = coreID */
|
||||
slli s0, s0, 2 /* s0 = coreID * 4 */
|
||||
la a0, port_xSchedulerRunning /* a0 = &port_xSchedulerRunning */
|
||||
add a0, a0, a5 /* a0 = &port_xSchedulerRunning[coreID] */
|
||||
add a0, a0, s0 /* a0 = &port_xSchedulerRunning[coreID] */
|
||||
lw a0, (a0) /* a0 = port_xSchedulerRunning[coreID] */
|
||||
#else
|
||||
lw a0, port_xSchedulerRunning /* a0 = port_xSchedulerRunning */
|
||||
@ -446,7 +473,7 @@ rtos_int_enter:
|
||||
/* Increment the ISR nesting count */
|
||||
la a0, port_uxInterruptNesting /* a0 = &port_uxInterruptNesting */
|
||||
#if ( configNUM_CORES > 1 )
|
||||
add a0, a0, a5 /* a0 = &port_uxInterruptNesting[coreID] // a5 already contains coreID * 4 */
|
||||
add a0, a0, s0 /* a0 = &port_uxInterruptNesting[coreID] // s0 contains coreID * 4 */
|
||||
#endif /* ( configNUM_CORES > 1 ) */
|
||||
lw a1, 0(a0) /* a1 = port_uxInterruptNesting[coreID] */
|
||||
addi a2, a1, 1 /* a2 = a1 + 1 */
|
||||
@ -456,19 +483,13 @@ rtos_int_enter:
|
||||
li a0, 0 /* return 0 in case we are going to branch */
|
||||
bnez a1, rtos_int_enter_end /* if (port_uxInterruptNesting[coreID] > 0) jump to rtos_int_enter_end */
|
||||
|
||||
li a7, 0
|
||||
li s2, 0
|
||||
#if SOC_CPU_COPROC_NUM > 0
|
||||
/* Disable the coprocessors to forbid the ISR from using it */
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
/* The current HWLP status will be returned in a0 */
|
||||
hwlp_disable a0
|
||||
or a7, a7, a0
|
||||
#endif /* SOC_CPU_HAS_HWLOOP */
|
||||
|
||||
#if SOC_CPU_HAS_PIE
|
||||
/* The current HWLP status will be returned in a0 */
|
||||
/* The current PIE coprocessor status will be returned in a0 */
|
||||
pie_disable a0
|
||||
or a7, a7, a0
|
||||
or s2, s2, a0
|
||||
#endif /* SOC_CPU_HAS_PIE */
|
||||
|
||||
#if SOC_CPU_HAS_FPU
|
||||
@ -485,24 +506,48 @@ rtos_int_enter:
|
||||
/* Save the current sp in pxCurrentTCBs[coreID] and load the ISR stack on to sp */
|
||||
#if ( configNUM_CORES > 1 )
|
||||
la a0, pxCurrentTCBs /* a0 = &pxCurrentTCBs */
|
||||
add a0, a0, a5 /* a0 = &pxCurrentTCBs[coreID] // a5 already contains coreID * 4 */
|
||||
add a0, a0, s0 /* a0 = &pxCurrentTCBs[coreID] // s0 already contains coreID * 4 */
|
||||
lw a0, (a0) /* a0 = pxCurrentTCBs[coreID] */
|
||||
sw sp, 0(a0) /* pxCurrentTCBs[coreID] = sp */
|
||||
la a0, xIsrStackTop /* a0 = &xIsrStackTop */
|
||||
add a0, a0, a5 /* a0 = &xIsrStackTop[coreID] // a5 already contains coreID * 4 */
|
||||
lw sp, (a0) /* sp = xIsrStackTop[coreID] */
|
||||
/* We may need a0 below to call pxPortGetCoprocArea */
|
||||
la a1, xIsrStackTop /* a1 = &xIsrStackTop */
|
||||
add a1, a1, s0 /* a1 = &xIsrStackTop[coreID] // s0 already contains coreID * 4 */
|
||||
lw sp, (a1) /* sp = xIsrStackTop[coreID] */
|
||||
#else
|
||||
lw a0, pxCurrentTCBs /* a0 = pxCurrentTCBs */
|
||||
sw sp, 0(a0) /* pxCurrentTCBs[0] = sp */
|
||||
lw sp, xIsrStackTop /* sp = xIsrStackTop */
|
||||
#endif /* ( configNUM_CORES > 1 ) */
|
||||
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
/* Check if the current task used the Hardware loop feature, by reading the state */
|
||||
csrr a1, CSR_HWLP_STATE_REG
|
||||
addi a1, a1, -HWLP_DIRTY_STATE
|
||||
bnez a1, 1f
|
||||
/* State is dirty! The hardware loop feature was used, save the registers */
|
||||
li a1, 1 /* Allocate the save area if not already allocated */
|
||||
li a2, HWLP_COPROC_IDX
|
||||
mv s1, ra
|
||||
call pxPortGetCoprocArea
|
||||
mv ra, s1
|
||||
/* Set the enable flags from the coprocessor save area */
|
||||
lw a1, RV_COPROC_ENABLE(a0)
|
||||
ori a1, a1, 1 << HWLP_COPROC_IDX
|
||||
sw a1, RV_COPROC_ENABLE(a0)
|
||||
/* Get the area where we need to save the HWLP registers */
|
||||
lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[\coproc_idx] */
|
||||
hwlp_save_regs a0
|
||||
/* Disable the HWLP feature so that ISR cannot use them */
|
||||
csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE
|
||||
1:
|
||||
#endif
|
||||
|
||||
#if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
|
||||
/* Prepare the parameters for esp_hw_stack_guard_set_bounds(xIsrStackBottom, xIsrStackTop); */
|
||||
#if ( configNUM_CORES > 1 )
|
||||
/* Load the xIsrStack for the current core and set the new bounds */
|
||||
la a0, xIsrStackBottom
|
||||
add a0, a0, a5 /* a0 = &xIsrStackBottom[coreID] */
|
||||
add a0, a0, s0 /* a0 = &xIsrStackBottom[coreID] */
|
||||
lw a0, (a0) /* a0 = xIsrStackBottom[coreID] */
|
||||
#else
|
||||
lw a0, xIsrStackBottom
|
||||
@ -514,8 +559,8 @@ rtos_int_enter:
|
||||
ESP_HW_STACK_GUARD_MONITOR_START_CUR_CORE a0 a1
|
||||
#endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */
|
||||
|
||||
/* Return the coprocessor context from a7 */
|
||||
mv a0, a7
|
||||
/* Return the coprocessor context from s2 */
|
||||
mv a0, s2
|
||||
rtos_int_enter_end:
|
||||
ret
|
||||
|
||||
@ -569,11 +614,11 @@ isr_skip_decrement:
|
||||
/* If the CPU reached this label, a2 (uxInterruptNesting) is 0 for sure */
|
||||
|
||||
/* Schedule the next task if a yield is pending */
|
||||
la s7, xPortSwitchFlag /* a0 = &xPortSwitchFlag */
|
||||
la s7, xPortSwitchFlag /* s7 = &xPortSwitchFlag */
|
||||
#if ( configNUM_CORES > 1 )
|
||||
add s7, s7, a1 /* a0 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */
|
||||
add s7, s7, a1 /* s7 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */
|
||||
#endif /* ( configNUM_CORES > 1 ) */
|
||||
lw a0, 0(s7) /* a2 = xPortSwitchFlag[coreID] */
|
||||
lw a0, 0(s7) /* a0 = xPortSwitchFlag[coreID] */
|
||||
beqz a0, no_switch_restore_coproc /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch_restore_coproc */
|
||||
|
||||
/* Preserve return address and schedule next task. To speed up the process, and because this current routine
|
||||
@ -601,10 +646,19 @@ isr_skip_decrement:
|
||||
mv ra, s10 /* Restore original return address */
|
||||
beq a0, s9, no_switch_restore_coproc
|
||||
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
/* We have to restore the context of the HWLP if the newly scheduled task used it before. In all cases, this
|
||||
* routine will also clean the state and set it to clean */
|
||||
mv s7, ra
|
||||
/* a0 contains the current TCB address */
|
||||
call hwlp_restore_if_used
|
||||
mv ra, s7
|
||||
#endif /* SOC_CPU_HAS_HWLOOP */
|
||||
|
||||
#if SOC_CPU_HAS_FPU
|
||||
/* Disable the FPU in the `mstatus` value to return */
|
||||
li a0, ~CSR_MSTATUS_FPU_DISABLE
|
||||
and s11, s11, a0
|
||||
li a1, ~CSR_MSTATUS_FPU_DISABLE
|
||||
and s11, s11, a1
|
||||
#endif /* SOC_CPU_HAS_FPU */
|
||||
j no_switch_restored
|
||||
|
||||
@ -614,17 +668,24 @@ no_switch_restore_coproc:
|
||||
/* We reach here either because there is no switch scheduled or because the TCB that is going to be scheduled
|
||||
* is the same as the one that has been interrupted. In both cases, we need to restore the coprocessors status */
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
andi a0, s8, 1 << HWLP_COPROC_IDX
|
||||
beqz a0, 1f
|
||||
hwlp_enable a0
|
||||
/* Check if the ISR altered the state of the HWLP */
|
||||
csrr a1, CSR_HWLP_STATE_REG
|
||||
addi a1, a1, -HWLP_DIRTY_STATE
|
||||
bnez a1, 1f
|
||||
/* ISR used the HWLP, restore the HWLP context! */
|
||||
mv s7, ra
|
||||
/* a0 contains the current TCB address */
|
||||
call hwlp_restore_if_used
|
||||
mv ra, s7
|
||||
1:
|
||||
/* Else, the ISR hasn't touched HWLP registers, we don't need to restore the HWLP registers */
|
||||
#endif /* SOC_CPU_HAS_HWLOOP */
|
||||
|
||||
#if SOC_CPU_HAS_PIE
|
||||
andi a0, s8, 1 << PIE_COPROC_IDX
|
||||
beqz a0, 1f
|
||||
beqz a0, 2f
|
||||
pie_enable a0
|
||||
1:
|
||||
2:
|
||||
#endif /* SOC_CPU_HAS_PIE */
|
||||
|
||||
no_switch_restored:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
@ -192,7 +192,7 @@ TEST_CASE("FPU: Usage in unpinned task", "[freertos]")
|
||||
typedef struct {
|
||||
bool negative;
|
||||
TaskHandle_t main;
|
||||
} ParamsFPU;
|
||||
} fpu_params_t;
|
||||
|
||||
/**
|
||||
* @brief Function performing some simple calculation using several FPU registers.
|
||||
@ -200,7 +200,7 @@ typedef struct {
|
||||
*/
|
||||
void fpu_calculation(void* arg)
|
||||
{
|
||||
ParamsFPU* p = (ParamsFPU*) arg;
|
||||
fpu_params_t* p = (fpu_params_t*) arg;
|
||||
const bool negative = p->negative;
|
||||
const float init = negative ? -1.f : 1.f;
|
||||
float f = init;
|
||||
@ -236,7 +236,7 @@ TEST_CASE("FPU: Unsolicited context switch between tasks using FPU", "[freertos]
|
||||
/* Create two tasks that are on the same core and use the same FPU */
|
||||
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
|
||||
TaskHandle_t tasks[2];
|
||||
ParamsFPU params[2] = {
|
||||
fpu_params_t params[2] = {
|
||||
{ .negative = false, .main = unity_task_handle },
|
||||
{ .negative = true, .main = unity_task_handle },
|
||||
};
|
||||
|
@ -19,109 +19,50 @@
|
||||
*/
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
|
||||
static uint32_t use_hwlp(uint32_t count)
|
||||
{
|
||||
uint32_t ret;
|
||||
asm volatile(
|
||||
/* The toolchain doesn't support HWLP instructions yet, manually set it up */
|
||||
"la a2, start\n"
|
||||
"csrw 0x7c6, a2\n"
|
||||
"la a2, end\n"
|
||||
"csrw 0x7c7, a2\n"
|
||||
"csrw 0x7c8, a0\n"
|
||||
"li a1, 0\n"
|
||||
/* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */
|
||||
"start:\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"end:\n"
|
||||
"addi a1, a1, 1\n"
|
||||
"mv %0, a1\n"
|
||||
"ret\n"
|
||||
: "=r"(ret) :);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void other_task(void* arg)
|
||||
{
|
||||
const TaskHandle_t main_task = (TaskHandle_t) arg;
|
||||
|
||||
use_hwlp(10);
|
||||
|
||||
xTaskNotifyGive(main_task);
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
uint32_t use_hwlp(uint32_t count);
|
||||
|
||||
TEST_CASE("HWLP: Context save does not affect stack watermark", "[freertos]")
|
||||
{
|
||||
TaskHandle_t pvCreatedTask;
|
||||
/* Force the FreeRTOS port layer to store a HWLP context in the current task.
|
||||
* So let's use the it and make sure another task, on the SAME CORE, also uses it */
|
||||
const int core_id = xPortGetCoreID();
|
||||
const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle();
|
||||
|
||||
/* Get the current stack watermark */
|
||||
const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle);
|
||||
|
||||
/* Use the HWLP unit, the context will NOT be flushed until another task starts using it */
|
||||
/* Use the HWLP unit, the context will NOT be flushed until a context switch is done */
|
||||
use_hwlp(20);
|
||||
|
||||
xTaskCreatePinnedToCore(other_task,
|
||||
"OtherTask",
|
||||
2048,
|
||||
(void*) current_handle,
|
||||
CONFIG_UNITY_FREERTOS_PRIORITY - 1,
|
||||
&pvCreatedTask,
|
||||
core_id);
|
||||
|
||||
/* Make sure FreeRTOS switches to another task, even Idle task, so that the current Task saves
|
||||
* the HWLP current context */
|
||||
vTaskDelay(10);
|
||||
|
||||
/* Wait for other task to complete */
|
||||
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
|
||||
|
||||
const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle);
|
||||
|
||||
TEST_ASSERT_TRUE(after_watermark > before_watermark / 2);
|
||||
}
|
||||
|
||||
#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1
|
||||
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
TaskHandle_t main;
|
||||
} ParamsHWLP;
|
||||
} hwlp_params_t;
|
||||
|
||||
void calculation(void* arg)
|
||||
static void calculation(void* arg)
|
||||
{
|
||||
ParamsHWLP* p = (ParamsHWLP*) arg;
|
||||
hwlp_params_t* p = (hwlp_params_t*) arg;
|
||||
const uint32_t count = p->count;
|
||||
uint32_t result = 0;
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < 10; i++) {
|
||||
for (i = 0; i < 50000; i++) {
|
||||
uint32_t current = use_hwlp(count);
|
||||
result += current;
|
||||
|
||||
/* Give some time to the other to interrupt us before checking `f` value */
|
||||
esp_rom_delay_us(1000);
|
||||
|
||||
/* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the count is still correct.
|
||||
* The function `use_hwlp` should return (count * 16) */
|
||||
assert(count * 16 == current);
|
||||
|
||||
/* Give the hand back to FreeRTOS to avoid any watchdog error */
|
||||
vTaskDelay(2);
|
||||
}
|
||||
|
||||
/* Make sure the result is correct */
|
||||
@ -131,14 +72,14 @@ void calculation(void* arg)
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[freertos]")
|
||||
TEST_CASE("HWLP: Unsolicited context switch between tasks using HWLP", "[freertos]")
|
||||
{
|
||||
/* Create two tasks that are on the same core and use the same FPU */
|
||||
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
|
||||
TaskHandle_t tasks[2];
|
||||
ParamsHWLP params[2] = {
|
||||
{ .count = 10, .main = unity_task_handle },
|
||||
{ .count = 200, .main = unity_task_handle },
|
||||
hwlp_params_t params[2] = {
|
||||
{ .count = 1024, .main = unity_task_handle },
|
||||
{ .count = 2048, .main = unity_task_handle },
|
||||
};
|
||||
|
||||
xTaskCreatePinnedToCore(calculation, "Task1", 2048, params + 0, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1);
|
||||
@ -148,4 +89,6 @@ TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[free
|
||||
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */
|
||||
|
||||
#endif // SOC_CPU_HAS_HWLOOP
|
||||
|
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
#include "sdkconfig.h"
|
||||
#include "soc/soc_caps.h"
|
||||
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
/**
|
||||
* @brief Perform a hardware loop with a given number of iterations
|
||||
*
|
||||
* @param a0 Number of iterations
|
||||
*/
|
||||
.global use_hwlp
|
||||
.type use_hwlp, @function
|
||||
use_hwlp:
|
||||
/* The toolchain doesn't support HWLP instructions yet, manually set it up */
|
||||
la a2, start
|
||||
csrw 0x7c6, a2
|
||||
la a2, end
|
||||
csrw 0x7c7, a2
|
||||
csrw 0x7c8, a0
|
||||
li a1, 0
|
||||
/* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */
|
||||
start:
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
addi a1, a1, 1
|
||||
end:
|
||||
addi a1, a1, 1
|
||||
mv a0, a1
|
||||
ret
|
||||
.size use_hwlp, .-use_hwlp
|
||||
|
||||
#endif /* SOC_CPU_HAS_HWLOOP */
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
@ -13,28 +13,26 @@
|
||||
#include "unity.h"
|
||||
#include "test_utils.h"
|
||||
|
||||
/* PIE instructions set is currently only supported in GCC compiler */
|
||||
#if SOC_CPU_HAS_PIE
|
||||
|
||||
/**
|
||||
* @brief Performs the sum of two 4-word vectors using the PIE.
|
||||
* @brief Performs the signed sum of two 4-word vectors using the PIE.
|
||||
*
|
||||
* @param a First vector
|
||||
* @param b Second vector
|
||||
* @param dst Destination to store the sum
|
||||
*
|
||||
* @returns a will store a + b
|
||||
*/
|
||||
static void pie_vector_add(const int32_t a[4], const int32_t b[4], int32_t dst[4])
|
||||
{
|
||||
asm volatile("esp.vld.128.ip q0, a0, 0\n"
|
||||
"esp.vld.128.ip q1, a1, 0\n"
|
||||
"esp.vadd.s32 q2, q0, q1\n"
|
||||
"esp.vst.128.ip q2, a2, 0\n"
|
||||
::);
|
||||
}
|
||||
void pie_vector_signed_add(const int32_t a[4], const int32_t b[4], int32_t dst[4]);
|
||||
|
||||
/* ------------------------------------------------------------------------------------------------------------------ */
|
||||
|
||||
typedef struct {
|
||||
int32_t cst;
|
||||
TaskHandle_t main;
|
||||
SemaphoreHandle_t sem;
|
||||
} pie_params_t;
|
||||
|
||||
/*
|
||||
Test PIE usage from a task context
|
||||
|
||||
@ -59,16 +57,22 @@ Expected:
|
||||
|
||||
static void pinned_task(void *arg)
|
||||
{
|
||||
pie_params_t *param = (pie_params_t*) arg;
|
||||
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
|
||||
|
||||
int32_t a[4] = { 42, 42, 42, 42};
|
||||
int32_t constant = 42 * param->cst;
|
||||
int32_t a[4] = { constant, constant, constant, constant };
|
||||
int32_t b[4] = { 10, 20, 30, 40 };
|
||||
int32_t dst[4] = { 0 };
|
||||
|
||||
pie_vector_add(a, b, dst);
|
||||
pie_vector_signed_add(a, b, dst);
|
||||
|
||||
// Indicate done wand wait to be deleted
|
||||
xSemaphoreGive((SemaphoreHandle_t)arg);
|
||||
for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) {
|
||||
TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]);
|
||||
}
|
||||
|
||||
// Indicate done and wait to be deleted
|
||||
xSemaphoreGive((SemaphoreHandle_t)param->sem);
|
||||
vTaskSuspend(NULL);
|
||||
}
|
||||
|
||||
@ -79,15 +83,20 @@ TEST_CASE("PIE: Usage in task", "[freertos]")
|
||||
|
||||
for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) {
|
||||
TaskHandle_t task_handles[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS];
|
||||
pie_params_t params[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS];
|
||||
|
||||
// Create test tasks for each core
|
||||
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
|
||||
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
|
||||
TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
|
||||
params[i][j] = (pie_params_t) {
|
||||
.cst = i + j + 1,
|
||||
.sem = done_sem,
|
||||
};
|
||||
TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *) ¶ms[i][j], UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
|
||||
}
|
||||
}
|
||||
|
||||
// Start the created tasks simultaneously
|
||||
// Start the created tasks
|
||||
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
|
||||
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
|
||||
xTaskNotifyGive(task_handles[i][j]);
|
||||
@ -159,7 +168,7 @@ static void unpinned_task(void *arg)
|
||||
int32_t b[4] = { 111, 222, 333, 444 };
|
||||
int32_t dst[4] = { 0 };
|
||||
|
||||
pie_vector_add(a, b, dst);
|
||||
pie_vector_signed_add(a, b, dst);
|
||||
|
||||
for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) {
|
||||
TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]);
|
||||
@ -196,24 +205,19 @@ TEST_CASE("PIE: Usage in unpinned task", "[freertos]")
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int32_t cst;
|
||||
TaskHandle_t main;
|
||||
} ParamsPIE;
|
||||
|
||||
/**
|
||||
* @brief Function performing some simple calculation using the PIE coprocessor.
|
||||
* The goal is to be preempted by a task that also uses the PIE on the same core.
|
||||
*/
|
||||
void pie_calculation(void* arg)
|
||||
static void pie_calculation(void* arg)
|
||||
{
|
||||
ParamsPIE* p = (ParamsPIE*) arg;
|
||||
pie_params_t* p = (pie_params_t*) arg;
|
||||
const int32_t cst = p->cst;
|
||||
int32_t a[4] = { cst, cst, cst, cst };
|
||||
int32_t dst[4] = { 0 };
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
pie_vector_add(a, dst, dst);
|
||||
pie_vector_signed_add(a, dst, dst);
|
||||
|
||||
/* Give some time to the other to interrupt us before checking `f` value */
|
||||
esp_rom_delay_us(1000);
|
||||
@ -237,7 +241,7 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer
|
||||
/* Create two tasks that are on the same core and use the same FPU */
|
||||
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
|
||||
TaskHandle_t tasks[2];
|
||||
ParamsPIE params[2] = {
|
||||
pie_params_t params[2] = {
|
||||
{ .cst = 1, .main = unity_task_handle },
|
||||
{ .cst = -1, .main = unity_task_handle },
|
||||
};
|
||||
@ -249,5 +253,6 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer
|
||||
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
|
||||
}
|
||||
|
||||
#endif // CONFIG_FREERTOS_NUMBER_OF_CORES > 1
|
||||
#endif // SOC_CPU_HAS_PIE
|
||||
#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */
|
||||
|
||||
#endif /* SOC_CPU_HAS_PIE */
|
||||
|
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
#include "sdkconfig.h"
|
||||
#include "soc/soc_caps.h"
|
||||
|
||||
/* PIE instructions set is currently only supported in GCC compiler */
|
||||
#if SOC_CPU_HAS_PIE
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
/**
|
||||
* @brief Performs the unsigned sum of two 4-word vectors using the PIE.
|
||||
*
|
||||
* @param a0 First vector
|
||||
* @param a1 Second vector
|
||||
* @param a2 Destination to store the sum
|
||||
*/
|
||||
.type pie_vector_unsigned_add, @function
|
||||
.global pie_vector_unsigned_add
|
||||
pie_vector_unsigned_add:
|
||||
esp.vld.128.ip q0, a0, 0
|
||||
esp.vld.128.ip q1, a1, 0
|
||||
esp.vadd.u32 q2, q0, q1
|
||||
esp.vst.128.ip q2, a2, 0
|
||||
ret
|
||||
.size pie_vector_unsigned_add, .-pie_vector_unsigned_add
|
||||
|
||||
|
||||
/**
|
||||
* @brief Performs the signed sum of two 4-word vectors using the PIE.
|
||||
*
|
||||
* @param a0 First vector
|
||||
* @param a1 Second vector
|
||||
* @param a2 Destination to store the sum
|
||||
*/
|
||||
.type pie_vector_signed_add, @function
|
||||
.global pie_vector_signed_add
|
||||
pie_vector_signed_add:
|
||||
esp.vld.128.ip q0, a0, 0
|
||||
esp.vld.128.ip q1, a1, 0
|
||||
esp.vadd.s32 q2, q0, q1
|
||||
esp.vst.128.ip q2, a2, 0
|
||||
ret
|
||||
.size pie_vector_signed_add, .-pie_vector_signed_add
|
||||
|
||||
#endif /* SOC_CPU_HAS_PIE */
|
@ -21,23 +21,24 @@
|
||||
*/
|
||||
#if SOC_CPU_HAS_PIE
|
||||
|
||||
static void use_pie(uint32_t a[4], uint32_t b[4])
|
||||
{
|
||||
asm volatile("esp.vld.128.ip q0, %0, 0\n"
|
||||
"esp.vld.128.ip q1, %2, 0\n"
|
||||
"esp.vadd.u32 q2, q0, q1\n"
|
||||
"esp.vst.128.ip q2, %0, 0\n"
|
||||
: "=r"(a) : "r"(a), "r"(b));
|
||||
}
|
||||
/**
|
||||
* @brief Performs the signed sum of two 4-word vectors using the PIE.
|
||||
*
|
||||
* @param a First vector
|
||||
* @param b Second vector
|
||||
* @param dst Destination to store the sum
|
||||
*/
|
||||
void pie_vector_unsigned_add(const uint32_t a[4], const uint32_t b[4], uint32_t dst[4]);
|
||||
|
||||
static void other_task(void* arg)
|
||||
{
|
||||
uint32_t a[4] = { 1, 2, 3, 4};
|
||||
uint32_t b[4] = { 42, 43, 44, 45};
|
||||
uint32_t dst[4] = { 0 };
|
||||
const TaskHandle_t main_task = (TaskHandle_t) arg;
|
||||
|
||||
/* This task must also use the PIE coprocessor to force a PIE context flush on the main task */
|
||||
use_pie(a, b);
|
||||
pie_vector_unsigned_add(a, b, dst);
|
||||
|
||||
xTaskNotifyGive(main_task);
|
||||
vTaskDelete(NULL);
|
||||
@ -48,6 +49,7 @@ TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]")
|
||||
/* Setup some random values */
|
||||
uint32_t a[4] = { 0x3f00ffff, 0xffe10045, 0xffe10096, 0x42434546};
|
||||
uint32_t b[4] = { 0x42, 0xbb43, 0x6644, 0x845};
|
||||
uint32_t dst[4] = { 0 };
|
||||
|
||||
TaskHandle_t pvCreatedTask;
|
||||
/* Force the FreeRTOS port layer to store a PIE context in the current task.
|
||||
@ -59,7 +61,7 @@ TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]")
|
||||
const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle);
|
||||
|
||||
/* Use the PIE unit, the context will NOT be flushed until another task starts using it */
|
||||
use_pie(a, b);
|
||||
pie_vector_unsigned_add(a, b, dst);
|
||||
|
||||
xTaskCreatePinnedToCore(other_task,
|
||||
"OtherTask",
|
||||
|
@ -18,6 +18,11 @@
|
||||
*/
|
||||
#define CSR_HWLP_STATE_REG 0x7F1
|
||||
|
||||
#define HWLP_OFF_STATE 0
|
||||
#define HWLP_INITIAL_STATE 1
|
||||
#define HWLP_CLEAN_STATE 2
|
||||
#define HWLP_DIRTY_STATE 3
|
||||
|
||||
#define CSR_LOOP0_START_ADDR 0x7C6
|
||||
#define CSR_LOOP0_END_ADDR 0x7C7
|
||||
#define CSR_LOOP0_COUNT 0x7C8
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "esp_attr.h"
|
||||
#include "riscv/csr.h"
|
||||
#include "riscv/interrupt.h"
|
||||
#include "riscv/csr_pie.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -168,6 +169,27 @@ FORCE_INLINE_ATTR void rv_utils_disable_fpu(void)
|
||||
#endif /* SOC_CPU_HAS_FPU */
|
||||
|
||||
|
||||
/* ------------------------------------------------- PIE Related ----------------------------------------------------
|
||||
*
|
||||
* ------------------------------------------------------------------------------------------------------------------ */
|
||||
|
||||
#if SOC_CPU_HAS_PIE
|
||||
|
||||
FORCE_INLINE_ATTR void rv_utils_enable_pie(void)
|
||||
{
|
||||
RV_WRITE_CSR(CSR_PIE_STATE_REG, 1);
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_ATTR void rv_utils_disable_pie(void)
|
||||
{
|
||||
RV_WRITE_CSR(CSR_PIE_STATE_REG, 0);
|
||||
}
|
||||
|
||||
#endif /* SOC_CPU_HAS_FPU */
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------------- Memory Ports -----------------------------------------------------
|
||||
*
|
||||
* ------------------------------------------------------------------------------------------------------------------ */
|
||||
|
@ -168,17 +168,12 @@ _panic_handler:
|
||||
/* EXT_ILL CSR should contain the reason for the Illegal Instruction */
|
||||
csrrw a0, EXT_ILL_CSR, zero
|
||||
|
||||
#if SOC_CPU_HAS_HWLOOP
|
||||
/* Check if the HWLOOP bit is set. */
|
||||
andi a1, a0, EXT_ILL_RSN_HWLP
|
||||
bnez a1, rtos_save_hwlp_coproc
|
||||
#endif // SOC_CPU_HAS_HWLOOP
|
||||
|
||||
/* Hardware loop cannot be treated lazily, so we should never end here if a HWLP instruction is used */
|
||||
#if SOC_CPU_HAS_PIE
|
||||
/* Check if the HWLOOP bit is set. */
|
||||
/* Check if the PIE bit is set. */
|
||||
andi a1, a0, EXT_ILL_RSN_PIE
|
||||
bnez a1, rtos_save_pie_coproc
|
||||
#endif // SOC_CPU_HAS_HWLOOP
|
||||
#endif /* SOC_CPU_HAS_PIE */
|
||||
|
||||
#if SOC_CPU_HAS_FPU
|
||||
/* Check if the FPU bit is set. When targets have the FPU reason bug (SOC_CPU_HAS_FPU_EXT_ILL_BUG),
|
||||
|
@ -403,6 +403,10 @@ config SOC_BRANCH_PREDICTOR_SUPPORTED
|
||||
bool
|
||||
default y
|
||||
|
||||
config SOC_CPU_COPROC_NUM
|
||||
int
|
||||
default 3
|
||||
|
||||
config SOC_CPU_HAS_FPU
|
||||
bool
|
||||
default y
|
||||
@ -419,10 +423,6 @@ config SOC_CPU_HAS_PIE
|
||||
bool
|
||||
default y
|
||||
|
||||
config SOC_CPU_COPROC_NUM
|
||||
int
|
||||
default 3
|
||||
|
||||
config SOC_HP_CPU_HAS_MULTIPLE_CORES
|
||||
bool
|
||||
default y
|
||||
|
@ -157,11 +157,15 @@
|
||||
#define SOC_INT_CLIC_SUPPORTED 1
|
||||
#define SOC_INT_HW_NESTED_SUPPORTED 1 // Support for hardware interrupts nesting
|
||||
#define SOC_BRANCH_PREDICTOR_SUPPORTED 1
|
||||
#define SOC_CPU_COPROC_NUM 3
|
||||
#define SOC_CPU_HAS_FPU 1
|
||||
#define SOC_CPU_HAS_FPU_EXT_ILL_BUG 1 // EXT_ILL CSR doesn't support FLW/FSW
|
||||
#define SOC_CPU_HAS_HWLOOP 1
|
||||
/* PIE coprocessor assembly is only supported with GCC compiler */
|
||||
#ifndef __clang__
|
||||
#define SOC_CPU_HAS_PIE 1
|
||||
#define SOC_CPU_COPROC_NUM 3
|
||||
#endif
|
||||
|
||||
#define SOC_HP_CPU_HAS_MULTIPLE_CORES 1 // Convenience boolean macro used to determine if a target has multiple cores.
|
||||
|
||||
#define SOC_CPU_BREAKPOINTS_NUM 3
|
||||
|
Loading…
x
Reference in New Issue
Block a user