fix(riscv): make HWLP feature use direct saving of lazy saving

This commit is contained in:
Omar Chebib 2024-04-30 16:37:40 +08:00
parent 55acc5e5e7
commit 82668dd3fe
14 changed files with 325 additions and 167 deletions

View File

@ -60,6 +60,11 @@
#include "soc/hp_system_reg.h"
#endif
#if SOC_CPU_HAS_HWLOOP
#include "riscv/csr.h"
#include "riscv/csr_hwlp.h"
#endif
#if ( SOC_CPU_COPROC_NUM > 0 )
#include "esp_private/panic_internal.h"
@ -125,9 +130,23 @@ StackType_t *xIsrStackBottom[portNUM_PROCESSORS] = {0};
BaseType_t xPortStartScheduler(void)
{
#if ( SOC_CPU_COPROC_NUM > 0 )
#if SOC_CPU_HAS_FPU
/* Disable FPU so that the first task to use it will trigger an exception */
rv_utils_disable_fpu();
#endif
#endif /* SOC_CPU_HAS_FPU */
#if SOC_CPU_HAS_PIE
/* Similarly, disable PIE */
rv_utils_disable_pie();
#endif /* SOC_CPU_HAS_FPU */
#if SOC_CPU_HAS_HWLOOP
/* Initialize the Hardware loop feature */
RV_WRITE_CSR(CSR_HWLP_STATE_REG, HWLP_INITIAL_STATE);
#endif /* SOC_CPU_HAS_HWLOOP */
#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */
/* Initialize all kernel state tracking variables */
BaseType_t coreID = xPortGetCoreID();
port_uxInterruptNesting[coreID] = 0;
@ -826,7 +845,7 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop
* @param coreid Current core
* @param coproc Coprocessor to save context of
*
* @returns Coprocessor former owner's save are, can be NULL is there was no owner yet, can be -1 if
* @returns Coprocessor former owner's save area, can be NULL if there was no owner yet, can be -1 if
* the former owner is the same as the new owner.
*/
RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* owner)

View File

@ -92,7 +92,7 @@ rtos_save_\name\()_coproc:
\save_coproc_regs a0
rtos_save_\name\()_coproc_nosave:
#if ( configNUM_CORES > 1 )
/* Pin current task to current core */
/* Pin current task to current core, s1 has pxCurrentTCBs */
mv a0, s1
csrr a1, mhartid
call vPortTaskPinToCore
@ -184,7 +184,33 @@ rtos_save_\name\()_coproc_norestore:
.endm
generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, hwlp_restore_regs
/**
* @brief Restore the HWLP registers contained in the dedicated save area if the given task ever used it.
* This routine sets the HWLP context to clean in any case.
*
* @param a0 StaticTask address for the newly scheduled task
*/
hwlp_restore_if_used:
addi sp, sp, -16
sw ra, (sp)
/* Check if the HWLP was in use beforehand */
li a1, 0
li a2, HWLP_COPROC_IDX
call pxPortGetCoprocArea
/* Get the enable flags from the coprocessor save area */
lw a1, RV_COPROC_ENABLE(a0)
/* To avoid having branches below, set the coprocessor enable flag now */
andi a2, a1, 1 << HWLP_COPROC_IDX
beqz a2, _hwlp_restore_never_used
/* Enable bit was set, restore the coprocessor context */
lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[HWLP_COPROC_IDX] */
hwlp_restore_regs a0
_hwlp_restore_never_used:
/* Clear the context */
csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE
lw ra, (sp)
addi sp, sp, 16
ret
#endif /* SOC_CPU_HAS_HWLOOP */
@ -192,7 +218,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs,
#if SOC_CPU_HAS_PIE
/**
* @brief Macros to enable and disable the hardware loop feature on the current core
* @brief Macros to enable and disable the PIE coprocessor on the current core
*/
.macro pie_enable scratch_reg=a0
li \scratch_reg, 1
@ -200,7 +226,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs,
.endm
/**
* @brief Disable HW Loop CPU feature while returning the former status in the given register
* @brief Disable the PIE coprocessor while returning the former status in the given register
*/
.macro pie_disable reg
csrrw \reg, CSR_PIE_STATE_REG, zero
@ -213,7 +239,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs,
.endm
/**
* @brief Macros to save and restore the hardware loop registers to and from the given frame
* @brief Macros to save and restore the PIE coprocessor registers to and from the given frame
*/
.macro pie_save_regs frame=a0
/* Save the 128-bit Q registers from the frame memory and then frame += 16 */
@ -427,15 +453,16 @@ rtos_current_tcb:
* 16-bit instructions.
* @returns Context that should be given to `rtos_int_exit`. On targets that have coprocessors,
* this value is a bitmap where bit i is 1 if coprocessor i is enable, 0 if it is disabled.
* This routine can use the s registers too since they are not used by the caller (yet)
*/
.global rtos_int_enter
.type rtos_int_enter, @function
rtos_int_enter:
#if ( configNUM_CORES > 1 )
csrr a5, mhartid /* a5 = coreID */
slli a5, a5, 2 /* a5 = coreID * 4 */
csrr s0, mhartid /* s0 = coreID */
slli s0, s0, 2 /* s0 = coreID * 4 */
la a0, port_xSchedulerRunning /* a0 = &port_xSchedulerRunning */
add a0, a0, a5 /* a0 = &port_xSchedulerRunning[coreID] */
add a0, a0, s0 /* a0 = &port_xSchedulerRunning[coreID] */
lw a0, (a0) /* a0 = port_xSchedulerRunning[coreID] */
#else
lw a0, port_xSchedulerRunning /* a0 = port_xSchedulerRunning */
@ -446,7 +473,7 @@ rtos_int_enter:
/* Increment the ISR nesting count */
la a0, port_uxInterruptNesting /* a0 = &port_uxInterruptNesting */
#if ( configNUM_CORES > 1 )
add a0, a0, a5 /* a0 = &port_uxInterruptNesting[coreID] // a5 already contains coreID * 4 */
add a0, a0, s0 /* a0 = &port_uxInterruptNesting[coreID] // s0 contains coreID * 4 */
#endif /* ( configNUM_CORES > 1 ) */
lw a1, 0(a0) /* a1 = port_uxInterruptNesting[coreID] */
addi a2, a1, 1 /* a2 = a1 + 1 */
@ -456,19 +483,13 @@ rtos_int_enter:
li a0, 0 /* return 0 in case we are going to branch */
bnez a1, rtos_int_enter_end /* if (port_uxInterruptNesting[coreID] > 0) jump to rtos_int_enter_end */
li a7, 0
li s2, 0
#if SOC_CPU_COPROC_NUM > 0
/* Disable the coprocessors to forbid the ISR from using it */
#if SOC_CPU_HAS_HWLOOP
/* The current HWLP status will be returned in a0 */
hwlp_disable a0
or a7, a7, a0
#endif /* SOC_CPU_HAS_HWLOOP */
#if SOC_CPU_HAS_PIE
/* The current HWLP status will be returned in a0 */
/* The current PIE coprocessor status will be returned in a0 */
pie_disable a0
or a7, a7, a0
or s2, s2, a0
#endif /* SOC_CPU_HAS_PIE */
#if SOC_CPU_HAS_FPU
@ -485,24 +506,48 @@ rtos_int_enter:
/* Save the current sp in pxCurrentTCBs[coreID] and load the ISR stack on to sp */
#if ( configNUM_CORES > 1 )
la a0, pxCurrentTCBs /* a0 = &pxCurrentTCBs */
add a0, a0, a5 /* a0 = &pxCurrentTCBs[coreID] // a5 already contains coreID * 4 */
add a0, a0, s0 /* a0 = &pxCurrentTCBs[coreID] // s0 already contains coreID * 4 */
lw a0, (a0) /* a0 = pxCurrentTCBs[coreID] */
sw sp, 0(a0) /* pxCurrentTCBs[coreID] = sp */
la a0, xIsrStackTop /* a0 = &xIsrStackTop */
add a0, a0, a5 /* a0 = &xIsrStackTop[coreID] // a5 already contains coreID * 4 */
lw sp, (a0) /* sp = xIsrStackTop[coreID] */
/* We may need a0 below to call pxPortGetCoprocArea */
la a1, xIsrStackTop /* a1 = &xIsrStackTop */
add a1, a1, s0 /* a1 = &xIsrStackTop[coreID] // s0 already contains coreID * 4 */
lw sp, (a1) /* sp = xIsrStackTop[coreID] */
#else
lw a0, pxCurrentTCBs /* a0 = pxCurrentTCBs */
sw sp, 0(a0) /* pxCurrentTCBs[0] = sp */
lw sp, xIsrStackTop /* sp = xIsrStackTop */
#endif /* ( configNUM_CORES > 1 ) */
#if SOC_CPU_HAS_HWLOOP
/* Check if the current task used the Hardware loop feature, by reading the state */
csrr a1, CSR_HWLP_STATE_REG
addi a1, a1, -HWLP_DIRTY_STATE
bnez a1, 1f
/* State is dirty! The hardware loop feature was used, save the registers */
li a1, 1 /* Allocate the save area if not already allocated */
li a2, HWLP_COPROC_IDX
mv s1, ra
call pxPortGetCoprocArea
mv ra, s1
/* Set the enable flags from the coprocessor save area */
lw a1, RV_COPROC_ENABLE(a0)
ori a1, a1, 1 << HWLP_COPROC_IDX
sw a1, RV_COPROC_ENABLE(a0)
/* Get the area where we need to save the HWLP registers */
lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[\coproc_idx] */
hwlp_save_regs a0
/* Disable the HWLP feature so that ISR cannot use them */
csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE
1:
#endif
#if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
/* Prepare the parameters for esp_hw_stack_guard_set_bounds(xIsrStackBottom, xIsrStackTop); */
#if ( configNUM_CORES > 1 )
/* Load the xIsrStack for the current core and set the new bounds */
la a0, xIsrStackBottom
add a0, a0, a5 /* a0 = &xIsrStackBottom[coreID] */
add a0, a0, s0 /* a0 = &xIsrStackBottom[coreID] */
lw a0, (a0) /* a0 = xIsrStackBottom[coreID] */
#else
lw a0, xIsrStackBottom
@ -514,8 +559,8 @@ rtos_int_enter:
ESP_HW_STACK_GUARD_MONITOR_START_CUR_CORE a0 a1
#endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */
/* Return the coprocessor context from a7 */
mv a0, a7
/* Return the coprocessor context from s2 */
mv a0, s2
rtos_int_enter_end:
ret
@ -569,11 +614,11 @@ isr_skip_decrement:
/* If the CPU reached this label, a2 (uxInterruptNesting) is 0 for sure */
/* Schedule the next task if a yield is pending */
la s7, xPortSwitchFlag /* a0 = &xPortSwitchFlag */
la s7, xPortSwitchFlag /* s7 = &xPortSwitchFlag */
#if ( configNUM_CORES > 1 )
add s7, s7, a1 /* a0 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */
add s7, s7, a1 /* s7 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */
#endif /* ( configNUM_CORES > 1 ) */
lw a0, 0(s7) /* a2 = xPortSwitchFlag[coreID] */
lw a0, 0(s7) /* a0 = xPortSwitchFlag[coreID] */
beqz a0, no_switch_restore_coproc /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch_restore_coproc */
/* Preserve return address and schedule next task. To speed up the process, and because this current routine
@ -601,10 +646,19 @@ isr_skip_decrement:
mv ra, s10 /* Restore original return address */
beq a0, s9, no_switch_restore_coproc
#if SOC_CPU_HAS_HWLOOP
/* We have to restore the context of the HWLP if the newly scheduled task used it before. In all cases, this
* routine will also clean the state and set it to clean */
mv s7, ra
/* a0 contains the current TCB address */
call hwlp_restore_if_used
mv ra, s7
#endif /* SOC_CPU_HAS_HWLOOP */
#if SOC_CPU_HAS_FPU
/* Disable the FPU in the `mstatus` value to return */
li a0, ~CSR_MSTATUS_FPU_DISABLE
and s11, s11, a0
li a1, ~CSR_MSTATUS_FPU_DISABLE
and s11, s11, a1
#endif /* SOC_CPU_HAS_FPU */
j no_switch_restored
@ -614,17 +668,24 @@ no_switch_restore_coproc:
/* We reach here either because there is no switch scheduled or because the TCB that is going to be scheduled
* is the same as the one that has been interrupted. In both cases, we need to restore the coprocessors status */
#if SOC_CPU_HAS_HWLOOP
andi a0, s8, 1 << HWLP_COPROC_IDX
beqz a0, 1f
hwlp_enable a0
/* Check if the ISR altered the state of the HWLP */
csrr a1, CSR_HWLP_STATE_REG
addi a1, a1, -HWLP_DIRTY_STATE
bnez a1, 1f
/* ISR used the HWLP, restore the HWLP context! */
mv s7, ra
/* a0 contains the current TCB address */
call hwlp_restore_if_used
mv ra, s7
1:
/* Else, the ISR hasn't touched HWLP registers, we don't need to restore the HWLP registers */
#endif /* SOC_CPU_HAS_HWLOOP */
#if SOC_CPU_HAS_PIE
andi a0, s8, 1 << PIE_COPROC_IDX
beqz a0, 1f
beqz a0, 2f
pie_enable a0
1:
2:
#endif /* SOC_CPU_HAS_PIE */
no_switch_restored:

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/

View File

@ -192,7 +192,7 @@ TEST_CASE("FPU: Usage in unpinned task", "[freertos]")
typedef struct {
bool negative;
TaskHandle_t main;
} ParamsFPU;
} fpu_params_t;
/**
* @brief Function performing some simple calculation using several FPU registers.
@ -200,7 +200,7 @@ typedef struct {
*/
void fpu_calculation(void* arg)
{
ParamsFPU* p = (ParamsFPU*) arg;
fpu_params_t* p = (fpu_params_t*) arg;
const bool negative = p->negative;
const float init = negative ? -1.f : 1.f;
float f = init;
@ -236,7 +236,7 @@ TEST_CASE("FPU: Unsolicited context switch between tasks using FPU", "[freertos]
/* Create two tasks that are on the same core and use the same FPU */
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
TaskHandle_t tasks[2];
ParamsFPU params[2] = {
fpu_params_t params[2] = {
{ .negative = false, .main = unity_task_handle },
{ .negative = true, .main = unity_task_handle },
};

View File

@ -19,109 +19,50 @@
*/
#if SOC_CPU_HAS_HWLOOP
static uint32_t use_hwlp(uint32_t count)
{
uint32_t ret;
asm volatile(
/* The toolchain doesn't support HWLP instructions yet, manually set it up */
"la a2, start\n"
"csrw 0x7c6, a2\n"
"la a2, end\n"
"csrw 0x7c7, a2\n"
"csrw 0x7c8, a0\n"
"li a1, 0\n"
/* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */
"start:\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"addi a1, a1, 1\n"
"end:\n"
"addi a1, a1, 1\n"
"mv %0, a1\n"
"ret\n"
: "=r"(ret) :);
return ret;
}
static void other_task(void* arg)
{
const TaskHandle_t main_task = (TaskHandle_t) arg;
use_hwlp(10);
xTaskNotifyGive(main_task);
vTaskDelete(NULL);
}
uint32_t use_hwlp(uint32_t count);
TEST_CASE("HWLP: Context save does not affect stack watermark", "[freertos]")
{
TaskHandle_t pvCreatedTask;
/* Force the FreeRTOS port layer to store a HWLP context in the current task.
* So let's use the it and make sure another task, on the SAME CORE, also uses it */
const int core_id = xPortGetCoreID();
const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle();
/* Get the current stack watermark */
const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle);
/* Use the HWLP unit, the context will NOT be flushed until another task starts using it */
/* Use the HWLP unit, the context will NOT be flushed until a context switch is done */
use_hwlp(20);
xTaskCreatePinnedToCore(other_task,
"OtherTask",
2048,
(void*) current_handle,
CONFIG_UNITY_FREERTOS_PRIORITY - 1,
&pvCreatedTask,
core_id);
/* Make sure FreeRTOS switches to another task, even Idle task, so that the current Task saves
* the HWLP current context */
vTaskDelay(10);
/* Wait for other task to complete */
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle);
TEST_ASSERT_TRUE(after_watermark > before_watermark / 2);
}
#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1
typedef struct {
uint32_t count;
TaskHandle_t main;
} ParamsHWLP;
} hwlp_params_t;
void calculation(void* arg)
static void calculation(void* arg)
{
ParamsHWLP* p = (ParamsHWLP*) arg;
hwlp_params_t* p = (hwlp_params_t*) arg;
const uint32_t count = p->count;
uint32_t result = 0;
int i = 0;
for (i = 0; i < 10; i++) {
for (i = 0; i < 50000; i++) {
uint32_t current = use_hwlp(count);
result += current;
/* Give some time to the other to interrupt us before checking `f` value */
esp_rom_delay_us(1000);
/* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the count is still correct.
* The function `use_hwlp` should return (count * 16) */
assert(count * 16 == current);
/* Give the hand back to FreeRTOS to avoid any watchdog error */
vTaskDelay(2);
}
/* Make sure the result is correct */
@ -131,14 +72,14 @@ void calculation(void* arg)
vTaskDelete(NULL);
}
TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[freertos]")
TEST_CASE("HWLP: Unsolicited context switch between tasks using HWLP", "[freertos]")
{
/* Create two tasks that are on the same core and use the same FPU */
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
TaskHandle_t tasks[2];
ParamsHWLP params[2] = {
{ .count = 10, .main = unity_task_handle },
{ .count = 200, .main = unity_task_handle },
hwlp_params_t params[2] = {
{ .count = 1024, .main = unity_task_handle },
{ .count = 2048, .main = unity_task_handle },
};
xTaskCreatePinnedToCore(calculation, "Task1", 2048, params + 0, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1);
@ -148,4 +89,6 @@ TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[free
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
}
#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */
#endif // SOC_CPU_HAS_HWLOOP

View File

@ -0,0 +1,52 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "sdkconfig.h"
#include "soc/soc_caps.h"
#if SOC_CPU_HAS_HWLOOP
.text
.align 4
/**
* @brief Perform a hardware loop with a given number of iterations
*
* @param a0 Number of iterations
*/
.global use_hwlp
.type use_hwlp, @function
use_hwlp:
/* The toolchain doesn't support HWLP instructions yet, manually set it up */
la a2, start
csrw 0x7c6, a2
la a2, end
csrw 0x7c7, a2
csrw 0x7c8, a0
li a1, 0
/* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */
start:
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
addi a1, a1, 1
end:
addi a1, a1, 1
mv a0, a1
ret
.size use_hwlp, .-use_hwlp
#endif /* SOC_CPU_HAS_HWLOOP */

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -13,28 +13,26 @@
#include "unity.h"
#include "test_utils.h"
/* PIE instructions set is currently only supported in GCC compiler */
#if SOC_CPU_HAS_PIE
/**
* @brief Performs the sum of two 4-word vectors using the PIE.
* @brief Performs the signed sum of two 4-word vectors using the PIE.
*
* @param a First vector
* @param b Second vector
* @param dst Destination to store the sum
*
* @returns a will store a + b
*/
static void pie_vector_add(const int32_t a[4], const int32_t b[4], int32_t dst[4])
{
asm volatile("esp.vld.128.ip q0, a0, 0\n"
"esp.vld.128.ip q1, a1, 0\n"
"esp.vadd.s32 q2, q0, q1\n"
"esp.vst.128.ip q2, a2, 0\n"
::);
}
void pie_vector_signed_add(const int32_t a[4], const int32_t b[4], int32_t dst[4]);
/* ------------------------------------------------------------------------------------------------------------------ */
typedef struct {
int32_t cst;
TaskHandle_t main;
SemaphoreHandle_t sem;
} pie_params_t;
/*
Test PIE usage from a task context
@ -59,16 +57,22 @@ Expected:
static void pinned_task(void *arg)
{
pie_params_t *param = (pie_params_t*) arg;
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
int32_t a[4] = { 42, 42, 42, 42};
int32_t constant = 42 * param->cst;
int32_t a[4] = { constant, constant, constant, constant };
int32_t b[4] = { 10, 20, 30, 40 };
int32_t dst[4] = { 0 };
pie_vector_add(a, b, dst);
pie_vector_signed_add(a, b, dst);
// Indicate done wand wait to be deleted
xSemaphoreGive((SemaphoreHandle_t)arg);
for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) {
TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]);
}
// Indicate done and wait to be deleted
xSemaphoreGive((SemaphoreHandle_t)param->sem);
vTaskSuspend(NULL);
}
@ -79,15 +83,20 @@ TEST_CASE("PIE: Usage in task", "[freertos]")
for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) {
TaskHandle_t task_handles[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS];
pie_params_t params[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS];
// Create test tasks for each core
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
params[i][j] = (pie_params_t) {
.cst = i + j + 1,
.sem = done_sem,
};
TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *) &params[i][j], UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i));
}
}
// Start the created tasks simultaneously
// Start the created tasks
for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) {
for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) {
xTaskNotifyGive(task_handles[i][j]);
@ -159,7 +168,7 @@ static void unpinned_task(void *arg)
int32_t b[4] = { 111, 222, 333, 444 };
int32_t dst[4] = { 0 };
pie_vector_add(a, b, dst);
pie_vector_signed_add(a, b, dst);
for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) {
TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]);
@ -196,24 +205,19 @@ TEST_CASE("PIE: Usage in unpinned task", "[freertos]")
}
}
typedef struct {
int32_t cst;
TaskHandle_t main;
} ParamsPIE;
/**
* @brief Function performing some simple calculation using the PIE coprocessor.
* The goal is to be preempted by a task that also uses the PIE on the same core.
*/
void pie_calculation(void* arg)
static void pie_calculation(void* arg)
{
ParamsPIE* p = (ParamsPIE*) arg;
pie_params_t* p = (pie_params_t*) arg;
const int32_t cst = p->cst;
int32_t a[4] = { cst, cst, cst, cst };
int32_t dst[4] = { 0 };
for (int i = 0; i < 10; i++) {
pie_vector_add(a, dst, dst);
pie_vector_signed_add(a, dst, dst);
/* Give some time to the other to interrupt us before checking `f` value */
esp_rom_delay_us(1000);
@ -237,7 +241,7 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer
/* Create two tasks that are on the same core and use the same FPU */
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
TaskHandle_t tasks[2];
ParamsPIE params[2] = {
pie_params_t params[2] = {
{ .cst = 1, .main = unity_task_handle },
{ .cst = -1, .main = unity_task_handle },
};
@ -249,5 +253,6 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
}
#endif // CONFIG_FREERTOS_NUMBER_OF_CORES > 1
#endif // SOC_CPU_HAS_PIE
#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */
#endif /* SOC_CPU_HAS_PIE */

View File

@ -0,0 +1,50 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "sdkconfig.h"
#include "soc/soc_caps.h"
/* PIE instructions set is currently only supported in GCC compiler */
#if SOC_CPU_HAS_PIE
.text
.align 4
/**
* @brief Performs the unsigned sum of two 4-word vectors using the PIE.
*
* @param a0 First vector
* @param a1 Second vector
* @param a2 Destination to store the sum
*/
.type pie_vector_unsigned_add, @function
.global pie_vector_unsigned_add
pie_vector_unsigned_add:
esp.vld.128.ip q0, a0, 0
esp.vld.128.ip q1, a1, 0
esp.vadd.u32 q2, q0, q1
esp.vst.128.ip q2, a2, 0
ret
.size pie_vector_unsigned_add, .-pie_vector_unsigned_add
/**
* @brief Performs the signed sum of two 4-word vectors using the PIE.
*
* @param a0 First vector
* @param a1 Second vector
* @param a2 Destination to store the sum
*/
.type pie_vector_signed_add, @function
.global pie_vector_signed_add
pie_vector_signed_add:
esp.vld.128.ip q0, a0, 0
esp.vld.128.ip q1, a1, 0
esp.vadd.s32 q2, q0, q1
esp.vst.128.ip q2, a2, 0
ret
.size pie_vector_signed_add, .-pie_vector_signed_add
#endif /* SOC_CPU_HAS_PIE */

View File

@ -21,23 +21,24 @@
*/
#if SOC_CPU_HAS_PIE
static void use_pie(uint32_t a[4], uint32_t b[4])
{
asm volatile("esp.vld.128.ip q0, %0, 0\n"
"esp.vld.128.ip q1, %2, 0\n"
"esp.vadd.u32 q2, q0, q1\n"
"esp.vst.128.ip q2, %0, 0\n"
: "=r"(a) : "r"(a), "r"(b));
}
/**
* @brief Performs the signed sum of two 4-word vectors using the PIE.
*
* @param a First vector
* @param b Second vector
* @param dst Destination to store the sum
*/
void pie_vector_unsigned_add(const uint32_t a[4], const uint32_t b[4], uint32_t dst[4]);
static void other_task(void* arg)
{
uint32_t a[4] = { 1, 2, 3, 4};
uint32_t b[4] = { 42, 43, 44, 45};
uint32_t dst[4] = { 0 };
const TaskHandle_t main_task = (TaskHandle_t) arg;
/* This task must also use the PIE coprocessor to force a PIE context flush on the main task */
use_pie(a, b);
pie_vector_unsigned_add(a, b, dst);
xTaskNotifyGive(main_task);
vTaskDelete(NULL);
@ -48,6 +49,7 @@ TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]")
/* Setup some random values */
uint32_t a[4] = { 0x3f00ffff, 0xffe10045, 0xffe10096, 0x42434546};
uint32_t b[4] = { 0x42, 0xbb43, 0x6644, 0x845};
uint32_t dst[4] = { 0 };
TaskHandle_t pvCreatedTask;
/* Force the FreeRTOS port layer to store a PIE context in the current task.
@ -59,7 +61,7 @@ TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]")
const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle);
/* Use the PIE unit, the context will NOT be flushed until another task starts using it */
use_pie(a, b);
pie_vector_unsigned_add(a, b, dst);
xTaskCreatePinnedToCore(other_task,
"OtherTask",

View File

@ -18,6 +18,11 @@
*/
#define CSR_HWLP_STATE_REG 0x7F1
#define HWLP_OFF_STATE 0
#define HWLP_INITIAL_STATE 1
#define HWLP_CLEAN_STATE 2
#define HWLP_DIRTY_STATE 3
#define CSR_LOOP0_START_ADDR 0x7C6
#define CSR_LOOP0_END_ADDR 0x7C7
#define CSR_LOOP0_COUNT 0x7C8

View File

@ -14,6 +14,7 @@
#include "esp_attr.h"
#include "riscv/csr.h"
#include "riscv/interrupt.h"
#include "riscv/csr_pie.h"
#ifdef __cplusplus
extern "C" {
@ -168,6 +169,27 @@ FORCE_INLINE_ATTR void rv_utils_disable_fpu(void)
#endif /* SOC_CPU_HAS_FPU */
/* ------------------------------------------------- PIE Related ----------------------------------------------------
*
* ------------------------------------------------------------------------------------------------------------------ */
#if SOC_CPU_HAS_PIE
FORCE_INLINE_ATTR void rv_utils_enable_pie(void)
{
RV_WRITE_CSR(CSR_PIE_STATE_REG, 1);
}
FORCE_INLINE_ATTR void rv_utils_disable_pie(void)
{
RV_WRITE_CSR(CSR_PIE_STATE_REG, 0);
}
#endif /* SOC_CPU_HAS_FPU */
/* -------------------------------------------------- Memory Ports -----------------------------------------------------
*
* ------------------------------------------------------------------------------------------------------------------ */

View File

@ -168,17 +168,12 @@ _panic_handler:
/* EXT_ILL CSR should contain the reason for the Illegal Instruction */
csrrw a0, EXT_ILL_CSR, zero
#if SOC_CPU_HAS_HWLOOP
/* Check if the HWLOOP bit is set. */
andi a1, a0, EXT_ILL_RSN_HWLP
bnez a1, rtos_save_hwlp_coproc
#endif // SOC_CPU_HAS_HWLOOP
/* Hardware loop cannot be treated lazily, so we should never end here if a HWLP instruction is used */
#if SOC_CPU_HAS_PIE
/* Check if the HWLOOP bit is set. */
/* Check if the PIE bit is set. */
andi a1, a0, EXT_ILL_RSN_PIE
bnez a1, rtos_save_pie_coproc
#endif // SOC_CPU_HAS_HWLOOP
#endif /* SOC_CPU_HAS_PIE */
#if SOC_CPU_HAS_FPU
/* Check if the FPU bit is set. When targets have the FPU reason bug (SOC_CPU_HAS_FPU_EXT_ILL_BUG),

View File

@ -411,6 +411,10 @@ config SOC_BRANCH_PREDICTOR_SUPPORTED
bool
default y
config SOC_CPU_COPROC_NUM
int
default 3
config SOC_CPU_HAS_FPU
bool
default y
@ -427,10 +431,6 @@ config SOC_CPU_HAS_PIE
bool
default y
config SOC_CPU_COPROC_NUM
int
default 3
config SOC_HP_CPU_HAS_MULTIPLE_CORES
bool
default y

View File

@ -158,11 +158,15 @@
#define SOC_INT_CLIC_SUPPORTED 1
#define SOC_INT_HW_NESTED_SUPPORTED 1 // Support for hardware interrupts nesting
#define SOC_BRANCH_PREDICTOR_SUPPORTED 1
#define SOC_CPU_COPROC_NUM 3
#define SOC_CPU_HAS_FPU 1
#define SOC_CPU_HAS_FPU_EXT_ILL_BUG 1 // EXT_ILL CSR doesn't support FLW/FSW
#define SOC_CPU_HAS_HWLOOP 1
/* PIE coprocessor assembly is only supported with GCC compiler */
#ifndef __clang__
#define SOC_CPU_HAS_PIE 1
#define SOC_CPU_COPROC_NUM 3
#endif
#define SOC_HP_CPU_HAS_MULTIPLE_CORES 1 // Convenience boolean macro used to determine if a target has multiple cores.
#define SOC_CPU_BREAKPOINTS_NUM 3