feat(wdt): add multicore support for WDTs on RISCV

This commit is contained in:
Marius Vikhammer 2023-08-31 12:19:49 +08:00
parent 51434a8367
commit 22091c8744
8 changed files with 126 additions and 98 deletions

View File

@ -30,12 +30,10 @@
#define REASON_YIELD BIT(0)
#define REASON_FREQ_SWITCH BIT(1)
#define REASON_GDB_CALL BIT(3)
#if CONFIG_IDF_TARGET_ARCH_XTENSA
#define REASON_PRINT_BACKTRACE BIT(2)
#define REASON_GDB_CALL BIT(3)
#define REASON_TWDT_ABORT BIT(4)
#endif
static portMUX_TYPE reason_spinlock = portMUX_INITIALIZER_UNLOCKED;
static volatile uint32_t reason[portNUM_PROCESSORS];
@ -100,20 +98,22 @@ static void IRAM_ATTR esp_crosscore_isr(void *arg) {
update_breakpoints();
}
#endif // !CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME
#if CONFIG_IDF_TARGET_ARCH_XTENSA // IDF-2986
if (my_reason_val & REASON_PRINT_BACKTRACE) {
esp_backtrace_print(100);
}
#if CONFIG_ESP_TASK_WDT_EN
if (my_reason_val & REASON_TWDT_ABORT) {
extern void task_wdt_timeout_abort_xtensa(bool);
extern void task_wdt_timeout_abort(bool);
/* Called from a crosscore interrupt, thus, we are not the core that received
* the TWDT interrupt, call the function with `false` as a parameter. */
task_wdt_timeout_abort_xtensa(false);
task_wdt_timeout_abort(false);
}
#endif // CONFIG_ESP_TASK_WDT_EN
#endif // CONFIG_IDF_TARGET_ARCH_XTENSA
}
//Initialize the crosscore interrupt on this core. Call this once
@ -182,7 +182,6 @@ void IRAM_ATTR esp_crosscore_int_send_gdb_call(int core_id)
esp_crosscore_int_send(core_id, REASON_GDB_CALL);
}
#if CONFIG_IDF_TARGET_ARCH_XTENSA
void IRAM_ATTR esp_crosscore_int_send_print_backtrace(int core_id)
{
esp_crosscore_int_send(core_id, REASON_PRINT_BACKTRACE);
@ -193,4 +192,3 @@ void IRAM_ATTR esp_crosscore_int_send_twdt_abort(int core_id) {
esp_crosscore_int_send(core_id, REASON_TWDT_ABORT);
}
#endif // CONFIG_ESP_TASK_WDT_EN
#endif

View File

@ -103,6 +103,11 @@ esp_err_t esp_backtrace_print_from_frame(int depth, const esp_backtrace_frame_t*
*
* @param depth The maximum number of stack frames to print (should be > 0)
*
* @note On RISC-V targets printing backtrace at run-time is only available if
* CONFIG_ESP_SYSTEM_USE_EH_FRAME is selected. Otherwise we simply print
* a register dump. Function assumes it is called in a context where the
* calling task will not migrate to another core, e.g. interrupts disabled/panic handler.
*
* @return
* - ESP_OK Backtrace successfully printed to completion or to depth limit
* - ESP_FAIL Backtrace is corrupted

View File

@ -41,6 +41,7 @@ if(CONFIG_IDF_TARGET_ARCH_XTENSA)
elseif(CONFIG_IDF_TARGET_ARCH_RISCV)
list(APPEND srcs "arch/riscv/expression_with_stack.c"
"arch/riscv/panic_arch.c"
"arch/riscv/debug_helpers.c"
"arch/riscv/debug_stubs.c")
endif()

View File

@ -0,0 +1,60 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "esp_debug_helpers.h"
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "esp_private/freertos_debug.h"
#include "esp_err.h"
#include "esp_attr.h"
#include "riscv/rvruntime-frames.h"
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
#include "esp_private/eh_frame_parser.h"
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
#if !CONFIG_ESP_SYSTEM_USE_EH_FRAME
/* Function used to print all the registers pointed by the given frame .*/
extern void panic_print_registers(const void *frame, int core);
#endif // !CONFIG_ESP_SYSTEM_USE_EH_FRAME
/* Targets based on a RISC-V CPU cannot perform backtracing that easily.
* We have two options here:
* - Perform backtracing at runtime.
* - Let IDF monitor do the backtracing for us. Used during panic already.
* This could be configurable, choosing one or the other depending on
* CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option.
*
* In both cases, this takes time, and we might be in an ISR, we must
* exit this handler as fast as possible, then we will simply print
* the interruptee's registers.
*/
esp_err_t IRAM_ATTR esp_backtrace_print(int depth)
{
(void)depth;
const int current_core = xPortGetCoreID();
TaskSnapshot_t snapshot = { 0 };
BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandleForCPU(current_core), &snapshot);
if (ret != pdTRUE) {
return ESP_ERR_NOT_FOUND;
}
void *frame = snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
esp_rom_printf("Print CPU %d (current core) backtrace\n", current_core);
esp_eh_frame_print_backtrace(frame);
#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME
esp_rom_printf("Print CPU %d (current core) registers\n", current_core);
panic_print_registers(frame, current_core);
esp_rom_printf("\r\n");
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
return ESP_OK;
}

View File

@ -13,6 +13,7 @@
#else
#include "soc/extmem_reg.h"
#endif
#include "soc/soc_caps.h"
#include "esp_private/panic_internal.h"
#include "esp_private/panic_reason.h"
#include "riscv/rvruntime-frames.h"
@ -276,8 +277,6 @@ static void panic_print_register_array(const char* names[], const uint32_t* regs
void panic_print_registers(const void *f, int core)
{
const RvExcFrame *frame = (RvExcFrame *)f;
/**
* General Purpose context, only print ABI name
*/
@ -290,7 +289,7 @@ void panic_print_registers(const void *f, int core)
};
panic_print_str("Core ");
panic_print_dec(frame->mhartid);
panic_print_dec(core);
panic_print_str(" register dump:");
panic_print_register_array(desc, f, DIM(desc));
}
@ -319,23 +318,21 @@ void panic_soc_fill_info(void *f, panic_info_t *info)
info->reason = "Cache error";
info->details = print_cache_err_details;
} else if (frame->mcause == ETS_INT_WDT_INUM) {
/* Watchdog interrupt occured, get the core on which it happened
* and update the reason/message accordingly. */
const int core = esp_cache_err_get_cpuid();
} else if (frame->mcause == PANIC_RSN_INTWDT_CPU0) {
const int core = 0;
info->core = core;
info->exception = PANIC_EXCEPTION_IWDT;
#if SOC_CPU_NUM > 1
#error "TODO: define PANIC_RSN_INTWDT_CPU1 in panic_reason.h"
_Static_assert(PANIC_RSN_INTWDT_CPU0 + 1 == PANIC_RSN_INTWDT_CPU1,
"PANIC_RSN_INTWDT_CPU1 must be equal to PANIC_RSN_INTWDT_CPU0 + 1");
info->reason = core == 0 ? "Interrupt wdt timeout on CPU0" : "Interrupt wdt timeout on CPU1";
#else
info->reason = "Interrupt wdt timeout on CPU0";
#endif
}
#if SOC_CPU_CORES_NUM > 1
else if (frame->mcause == PANIC_RSN_INTWDT_CPU1) {
const int core = 1;
info->core = core;
info->exception = PANIC_EXCEPTION_IWDT;
info->reason = "Interrupt wdt timeout on CPU1";
}
#endif
#if CONFIG_ESP_SYSTEM_HW_STACK_GUARD
else if (frame->mcause == ETS_ASSIST_DEBUG_INUM) {
info->core = esp_cache_err_get_cpuid();

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -24,6 +24,12 @@
#include "esp_private/esp_task_wdt.h"
#include "esp_private/esp_task_wdt_impl.h"
#if CONFIG_IDF_TARGET_ARCH_RISCV
#include "riscv/rvruntime-frames.h"
#endif //CONFIG_IDF_TARGET_ARCH_RISCV
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
#include "esp_private/eh_frame_parser.h"
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
@ -331,65 +337,6 @@ static void subscribe_idle(uint32_t core_mask)
*
*/
#if CONFIG_IDF_TARGET_ARCH_RISCV
static void task_wdt_timeout_handling(int cores_fail, bool panic)
{
/* For RISC-V, make sure the cores that fail is only composed of core 0. */
assert(cores_fail == BIT(0));
const int current_core = 0;
TaskSnapshot_t snapshot = { 0 };
BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot);
if (p_twdt_obj->panic) {
assert(ret == pdTRUE);
ESP_EARLY_LOGE(TAG, "Aborting.");
esp_reset_reason_set_hint(ESP_RST_TASK_WDT);
/**
* We cannot simply use `abort` here because the `panic` handler would
* interpret it as if the task watchdog ISR aborted and so, print this
* current ISR backtrace/context. We want to trick the `panic` handler
* to think the task itself is aborting.
* To do so, we need to get the interruptee's top of the stack. It contains
* its own context, saved when the interrupt occurred.
* We must also set the global flag that states that an abort occurred
* (and not a panic)
**/
g_panic_abort = true;
g_twdt_isr = true;
void *frame = (void *) snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core);
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
xt_unhandled_exception(frame);
} else {
/* Targets based on a RISC-V CPU cannot perform backtracing that easily.
* We have two options here:
* - Perform backtracing at runtime.
* - Let IDF monitor do the backtracing for us. Used during panic already.
* This could be configurable, choosing one or the other depending on
* CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option.
*
* In both cases, this takes time, and we are in an ISR, we must
* exit this handler as fast as possible, then we will simply print
* the interruptee's registers.
*/
if (ret == pdTRUE) {
void *frame = (void *) snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core);
esp_eh_frame_print_backtrace(frame);
#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) registers", current_core);
panic_print_registers(frame, current_core);
esp_rom_printf("\r\n");
#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME
}
}
}
#else // CONFIG_IDF_TARGET_ARCH_RISCV
/**
* Function simulating an abort coming from the interrupted task of the current
@ -398,7 +345,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
* in the case where the other core (than the main one) has to abort because one
* of his tasks didn't reset the TWDT on time.
*/
void task_wdt_timeout_abort_xtensa(bool current_core)
void task_wdt_timeout_abort(bool current_core)
{
TaskSnapshot_t snapshot = { 0 };
BaseType_t ret = pdTRUE;
@ -408,26 +355,32 @@ void task_wdt_timeout_abort_xtensa(bool current_core)
ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot);
assert(ret == pdTRUE);
g_panic_abort = true;
/* For Xtensa, we should set this flag as late as possible, as this function may
/* We should set this flag as late as possible, as this function may
* be called after a crosscore interrupt. Indeed, a higher interrupt may occur
* after calling the crosscore interrupt, if its handler fails, this flag
* shall not be set.
* This flag will tell the coredump component (if activated) that yes, we are in
* an ISR context, but it is intended, it is not because an ISR encountered an
* exception. If we don't set such flag, later tested by coredump, the later would
* exception. If we don't set such flag, later tested by coredump, the latter would
* switch the execution frame/context we are giving it to the interrupt stack.
* For details about this behavior in the TODO task: IDF-5694
*/
g_twdt_isr = true;
void *frame = (void *) snapshot.pxTopOfStack;
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME | CONFIG_IDF_TARGET_ARCH_XTENSA
if (current_core) {
ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", xPortGetCoreID());
} else {
ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", xPortGetCoreID());
}
#endif
xt_unhandled_exception(frame);
}
static void task_wdt_timeout_handling(int cores_fail, bool panic)
{
const int current_core = xPortGetCoreID();
@ -453,7 +406,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
}
#endif // !CONFIG_FREERTOS_UNICORE
/* Current core is failing, abort right now */
task_wdt_timeout_abort_xtensa(true);
task_wdt_timeout_abort(true);
} else {
/* Print backtrace of the core that failed to reset the watchdog */
if (cores_fail & BIT(current_core)) {
@ -470,8 +423,6 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic)
}
}
#endif // CONFIG_IDF_TARGET_ARCH_RISCV
// ---------------------- Callbacks ------------------------

View File

@ -5,14 +5,16 @@
*/
#pragma once
/* Since riscv does not replace mcause with "pseudo_reason" as it xtensa does
* PANIC_RSN_* defined with original interrupt numbers to make it work in
* common code
#include "soc/soc_caps.h"
/* Need a way to signal which core caused the INT WDT like we do with EXCAUSE on xtensa.
Choosing a large number that is unlikely to conflict with any actual riscv mcauses
bit 12 and above are always zero on the CPU used by P4
*/
#define PANIC_RSN_INTWDT_CPU0 ETS_INT_WDT_INUM
//TODO: IDF-7511
#if SOC_CPU_CORES_NUM > 1
#define PANIC_RSN_INTWDT_CPU1 ETS_INT_WDT_INUM
#define PANIC_RSN_INTWDT_CPU1_FLAG (1 << 12)
#define PANIC_RSN_INTWDT_CPU1 (PANIC_RSN_INTWDT_CPU1_FLAG | ETS_INT_WDT_INUM)
#endif
#define PANIC_RSN_CACHEERR 3

View File

@ -10,6 +10,7 @@
#include "soc/soc_caps.h"
#include "sdkconfig.h"
#include "esp_private/vectors_const.h"
#include "esp_private/panic_reason.h"
.equ SAVE_REGS, 32
@ -224,6 +225,19 @@ _call_panic_handler:
/* When CLIC is supported, external interrupts are shifted by 16, deduct this difference from mcause */
add a1, a1, -16
#endif // CONFIG_SOC_INT_CLIC_SUPPORTED
#if CONFIG_ESP_INT_WDT_CHECK_CPU1
/* Check if this was a INT WDT */
li t0, PANIC_RSN_INTWDT_CPU0
bne a1, t0, _store_mcause
/* Check if the cause is the app cpu failing to tick, if so then update mcause to reflect this*/
lw t0, int_wdt_cpu1_ticked
bnez t0, _store_mcause
li t0, PANIC_RSN_INTWDT_CPU1_FLAG
add a1, a1, t0
#endif
_store_mcause:
sw a1, RV_STK_MCAUSE(sp)
call panic_from_isr
/* We arrive here if the exception handler has returned. This means that