From 22091c87442594aeac1bf6e035dec8ca5c08597e Mon Sep 17 00:00:00 2001 From: Marius Vikhammer Date: Thu, 31 Aug 2023 12:19:49 +0800 Subject: [PATCH] feat(wdt): add multicore support for WDTs on RISCV --- components/esp_system/crosscore_int.c | 18 ++-- .../esp_system/include/esp_debug_helpers.h | 5 ++ components/esp_system/port/CMakeLists.txt | 1 + .../port/arch/riscv/debug_helpers.c | 60 ++++++++++++++ .../esp_system/port/arch/riscv/panic_arch.c | 29 +++---- components/esp_system/task_wdt/task_wdt.c | 83 ++++--------------- .../riscv/include/esp_private/panic_reason.h | 14 ++-- components/riscv/vectors.S | 14 ++++ 8 files changed, 126 insertions(+), 98 deletions(-) create mode 100644 components/esp_system/port/arch/riscv/debug_helpers.c diff --git a/components/esp_system/crosscore_int.c b/components/esp_system/crosscore_int.c index 2602c1354e..47f64df21e 100644 --- a/components/esp_system/crosscore_int.c +++ b/components/esp_system/crosscore_int.c @@ -30,12 +30,10 @@ #define REASON_YIELD BIT(0) #define REASON_FREQ_SWITCH BIT(1) -#define REASON_GDB_CALL BIT(3) - -#if CONFIG_IDF_TARGET_ARCH_XTENSA #define REASON_PRINT_BACKTRACE BIT(2) +#define REASON_GDB_CALL BIT(3) #define REASON_TWDT_ABORT BIT(4) -#endif + static portMUX_TYPE reason_spinlock = portMUX_INITIALIZER_UNLOCKED; static volatile uint32_t reason[portNUM_PROCESSORS]; @@ -100,20 +98,22 @@ static void IRAM_ATTR esp_crosscore_isr(void *arg) { update_breakpoints(); } #endif // !CONFIG_ESP_SYSTEM_GDBSTUB_RUNTIME -#if CONFIG_IDF_TARGET_ARCH_XTENSA // IDF-2986 + if (my_reason_val & REASON_PRINT_BACKTRACE) { esp_backtrace_print(100); } + + #if CONFIG_ESP_TASK_WDT_EN if (my_reason_val & REASON_TWDT_ABORT) { - extern void task_wdt_timeout_abort_xtensa(bool); + extern void task_wdt_timeout_abort(bool); /* Called from a crosscore interrupt, thus, we are not the core that received * the TWDT interrupt, call the function with `false` as a parameter. */ - task_wdt_timeout_abort_xtensa(false); + task_wdt_timeout_abort(false); } #endif // CONFIG_ESP_TASK_WDT_EN -#endif // CONFIG_IDF_TARGET_ARCH_XTENSA + } //Initialize the crosscore interrupt on this core. Call this once @@ -182,7 +182,6 @@ void IRAM_ATTR esp_crosscore_int_send_gdb_call(int core_id) esp_crosscore_int_send(core_id, REASON_GDB_CALL); } -#if CONFIG_IDF_TARGET_ARCH_XTENSA void IRAM_ATTR esp_crosscore_int_send_print_backtrace(int core_id) { esp_crosscore_int_send(core_id, REASON_PRINT_BACKTRACE); @@ -193,4 +192,3 @@ void IRAM_ATTR esp_crosscore_int_send_twdt_abort(int core_id) { esp_crosscore_int_send(core_id, REASON_TWDT_ABORT); } #endif // CONFIG_ESP_TASK_WDT_EN -#endif diff --git a/components/esp_system/include/esp_debug_helpers.h b/components/esp_system/include/esp_debug_helpers.h index c5d92a5070..02b7bc6a54 100644 --- a/components/esp_system/include/esp_debug_helpers.h +++ b/components/esp_system/include/esp_debug_helpers.h @@ -103,6 +103,11 @@ esp_err_t esp_backtrace_print_from_frame(int depth, const esp_backtrace_frame_t* * * @param depth The maximum number of stack frames to print (should be > 0) * + * @note On RISC-V targets printing backtrace at run-time is only available if + * CONFIG_ESP_SYSTEM_USE_EH_FRAME is selected. Otherwise we simply print + * a register dump. Function assumes it is called in a context where the + * calling task will not migrate to another core, e.g. interrupts disabled/panic handler. + * * @return * - ESP_OK Backtrace successfully printed to completion or to depth limit * - ESP_FAIL Backtrace is corrupted diff --git a/components/esp_system/port/CMakeLists.txt b/components/esp_system/port/CMakeLists.txt index 0ad5418428..7f6861b512 100644 --- a/components/esp_system/port/CMakeLists.txt +++ b/components/esp_system/port/CMakeLists.txt @@ -41,6 +41,7 @@ if(CONFIG_IDF_TARGET_ARCH_XTENSA) elseif(CONFIG_IDF_TARGET_ARCH_RISCV) list(APPEND srcs "arch/riscv/expression_with_stack.c" "arch/riscv/panic_arch.c" + "arch/riscv/debug_helpers.c" "arch/riscv/debug_stubs.c") endif() diff --git a/components/esp_system/port/arch/riscv/debug_helpers.c b/components/esp_system/port/arch/riscv/debug_helpers.c new file mode 100644 index 0000000000..9fef778c79 --- /dev/null +++ b/components/esp_system/port/arch/riscv/debug_helpers.c @@ -0,0 +1,60 @@ +/* + * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "esp_debug_helpers.h" +#include "sdkconfig.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_private/freertos_debug.h" +#include "esp_err.h" +#include "esp_attr.h" +#include "riscv/rvruntime-frames.h" + +#if CONFIG_ESP_SYSTEM_USE_EH_FRAME +#include "esp_private/eh_frame_parser.h" +#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME + +#if !CONFIG_ESP_SYSTEM_USE_EH_FRAME +/* Function used to print all the registers pointed by the given frame .*/ +extern void panic_print_registers(const void *frame, int core); +#endif // !CONFIG_ESP_SYSTEM_USE_EH_FRAME + +/* Targets based on a RISC-V CPU cannot perform backtracing that easily. + * We have two options here: + * - Perform backtracing at runtime. + * - Let IDF monitor do the backtracing for us. Used during panic already. + * This could be configurable, choosing one or the other depending on + * CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option. + * + * In both cases, this takes time, and we might be in an ISR, we must + * exit this handler as fast as possible, then we will simply print + * the interruptee's registers. + */ +esp_err_t IRAM_ATTR esp_backtrace_print(int depth) +{ + (void)depth; + + const int current_core = xPortGetCoreID(); + + TaskSnapshot_t snapshot = { 0 }; + BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandleForCPU(current_core), &snapshot); + + if (ret != pdTRUE) { + return ESP_ERR_NOT_FOUND; + } + + void *frame = snapshot.pxTopOfStack; + +#if CONFIG_ESP_SYSTEM_USE_EH_FRAME + esp_rom_printf("Print CPU %d (current core) backtrace\n", current_core); + esp_eh_frame_print_backtrace(frame); +#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME + esp_rom_printf("Print CPU %d (current core) registers\n", current_core); + panic_print_registers(frame, current_core); + esp_rom_printf("\r\n"); +#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME + + return ESP_OK; +} diff --git a/components/esp_system/port/arch/riscv/panic_arch.c b/components/esp_system/port/arch/riscv/panic_arch.c index b56e379096..c24c2f712e 100644 --- a/components/esp_system/port/arch/riscv/panic_arch.c +++ b/components/esp_system/port/arch/riscv/panic_arch.c @@ -13,6 +13,7 @@ #else #include "soc/extmem_reg.h" #endif +#include "soc/soc_caps.h" #include "esp_private/panic_internal.h" #include "esp_private/panic_reason.h" #include "riscv/rvruntime-frames.h" @@ -276,8 +277,6 @@ static void panic_print_register_array(const char* names[], const uint32_t* regs void panic_print_registers(const void *f, int core) { - const RvExcFrame *frame = (RvExcFrame *)f; - /** * General Purpose context, only print ABI name */ @@ -290,7 +289,7 @@ void panic_print_registers(const void *f, int core) }; panic_print_str("Core "); - panic_print_dec(frame->mhartid); + panic_print_dec(core); panic_print_str(" register dump:"); panic_print_register_array(desc, f, DIM(desc)); } @@ -319,23 +318,21 @@ void panic_soc_fill_info(void *f, panic_info_t *info) info->reason = "Cache error"; info->details = print_cache_err_details; - } else if (frame->mcause == ETS_INT_WDT_INUM) { - /* Watchdog interrupt occured, get the core on which it happened - * and update the reason/message accordingly. */ - - const int core = esp_cache_err_get_cpuid(); + } else if (frame->mcause == PANIC_RSN_INTWDT_CPU0) { + const int core = 0; info->core = core; info->exception = PANIC_EXCEPTION_IWDT; - -#if SOC_CPU_NUM > 1 -#error "TODO: define PANIC_RSN_INTWDT_CPU1 in panic_reason.h" - _Static_assert(PANIC_RSN_INTWDT_CPU0 + 1 == PANIC_RSN_INTWDT_CPU1, - "PANIC_RSN_INTWDT_CPU1 must be equal to PANIC_RSN_INTWDT_CPU0 + 1"); - info->reason = core == 0 ? "Interrupt wdt timeout on CPU0" : "Interrupt wdt timeout on CPU1"; -#else info->reason = "Interrupt wdt timeout on CPU0"; -#endif } +#if SOC_CPU_CORES_NUM > 1 + else if (frame->mcause == PANIC_RSN_INTWDT_CPU1) { + const int core = 1; + info->core = core; + info->exception = PANIC_EXCEPTION_IWDT; + info->reason = "Interrupt wdt timeout on CPU1"; + } +#endif + #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD else if (frame->mcause == ETS_ASSIST_DEBUG_INUM) { info->core = esp_cache_err_get_cpuid(); diff --git a/components/esp_system/task_wdt/task_wdt.c b/components/esp_system/task_wdt/task_wdt.c index eba1677783..1eafe09836 100644 --- a/components/esp_system/task_wdt/task_wdt.c +++ b/components/esp_system/task_wdt/task_wdt.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -24,6 +24,12 @@ #include "esp_private/esp_task_wdt.h" #include "esp_private/esp_task_wdt_impl.h" + +#if CONFIG_IDF_TARGET_ARCH_RISCV +#include "riscv/rvruntime-frames.h" +#endif //CONFIG_IDF_TARGET_ARCH_RISCV + + #if CONFIG_ESP_SYSTEM_USE_EH_FRAME #include "esp_private/eh_frame_parser.h" #endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME @@ -331,65 +337,6 @@ static void subscribe_idle(uint32_t core_mask) * */ -#if CONFIG_IDF_TARGET_ARCH_RISCV - -static void task_wdt_timeout_handling(int cores_fail, bool panic) -{ - /* For RISC-V, make sure the cores that fail is only composed of core 0. */ - assert(cores_fail == BIT(0)); - - const int current_core = 0; - TaskSnapshot_t snapshot = { 0 }; - BaseType_t ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot); - - if (p_twdt_obj->panic) { - assert(ret == pdTRUE); - ESP_EARLY_LOGE(TAG, "Aborting."); - esp_reset_reason_set_hint(ESP_RST_TASK_WDT); - /** - * We cannot simply use `abort` here because the `panic` handler would - * interpret it as if the task watchdog ISR aborted and so, print this - * current ISR backtrace/context. We want to trick the `panic` handler - * to think the task itself is aborting. - * To do so, we need to get the interruptee's top of the stack. It contains - * its own context, saved when the interrupt occurred. - * We must also set the global flag that states that an abort occurred - * (and not a panic) - **/ - g_panic_abort = true; - g_twdt_isr = true; - void *frame = (void *) snapshot.pxTopOfStack; -#if CONFIG_ESP_SYSTEM_USE_EH_FRAME - ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); -#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME - xt_unhandled_exception(frame); - } else { - /* Targets based on a RISC-V CPU cannot perform backtracing that easily. - * We have two options here: - * - Perform backtracing at runtime. - * - Let IDF monitor do the backtracing for us. Used during panic already. - * This could be configurable, choosing one or the other depending on - * CONFIG_ESP_SYSTEM_USE_EH_FRAME configuration option. - * - * In both cases, this takes time, and we are in an ISR, we must - * exit this handler as fast as possible, then we will simply print - * the interruptee's registers. - */ - if (ret == pdTRUE) { - void *frame = (void *) snapshot.pxTopOfStack; -#if CONFIG_ESP_SYSTEM_USE_EH_FRAME - ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", current_core); - esp_eh_frame_print_backtrace(frame); -#else // CONFIG_ESP_SYSTEM_USE_EH_FRAME - ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) registers", current_core); - panic_print_registers(frame, current_core); - esp_rom_printf("\r\n"); -#endif // CONFIG_ESP_SYSTEM_USE_EH_FRAME - } - } -} - -#else // CONFIG_IDF_TARGET_ARCH_RISCV /** * Function simulating an abort coming from the interrupted task of the current @@ -398,7 +345,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic) * in the case where the other core (than the main one) has to abort because one * of his tasks didn't reset the TWDT on time. */ -void task_wdt_timeout_abort_xtensa(bool current_core) +void task_wdt_timeout_abort(bool current_core) { TaskSnapshot_t snapshot = { 0 }; BaseType_t ret = pdTRUE; @@ -408,26 +355,32 @@ void task_wdt_timeout_abort_xtensa(bool current_core) ret = vTaskGetSnapshot(xTaskGetCurrentTaskHandle(), &snapshot); assert(ret == pdTRUE); g_panic_abort = true; - /* For Xtensa, we should set this flag as late as possible, as this function may + /* We should set this flag as late as possible, as this function may * be called after a crosscore interrupt. Indeed, a higher interrupt may occur * after calling the crosscore interrupt, if its handler fails, this flag * shall not be set. * This flag will tell the coredump component (if activated) that yes, we are in * an ISR context, but it is intended, it is not because an ISR encountered an - * exception. If we don't set such flag, later tested by coredump, the later would + * exception. If we don't set such flag, later tested by coredump, the latter would * switch the execution frame/context we are giving it to the interrupt stack. * For details about this behavior in the TODO task: IDF-5694 */ g_twdt_isr = true; void *frame = (void *) snapshot.pxTopOfStack; + +#if CONFIG_ESP_SYSTEM_USE_EH_FRAME | CONFIG_IDF_TARGET_ARCH_XTENSA if (current_core) { ESP_EARLY_LOGE(TAG, "Print CPU %d (current core) backtrace", xPortGetCoreID()); } else { ESP_EARLY_LOGE(TAG, "Print CPU %d backtrace", xPortGetCoreID()); } +#endif + xt_unhandled_exception(frame); } + + static void task_wdt_timeout_handling(int cores_fail, bool panic) { const int current_core = xPortGetCoreID(); @@ -453,7 +406,7 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic) } #endif // !CONFIG_FREERTOS_UNICORE /* Current core is failing, abort right now */ - task_wdt_timeout_abort_xtensa(true); + task_wdt_timeout_abort(true); } else { /* Print backtrace of the core that failed to reset the watchdog */ if (cores_fail & BIT(current_core)) { @@ -470,8 +423,6 @@ static void task_wdt_timeout_handling(int cores_fail, bool panic) } } -#endif // CONFIG_IDF_TARGET_ARCH_RISCV - // ---------------------- Callbacks ------------------------ diff --git a/components/riscv/include/esp_private/panic_reason.h b/components/riscv/include/esp_private/panic_reason.h index 1a9e1101cb..a89f5708f7 100644 --- a/components/riscv/include/esp_private/panic_reason.h +++ b/components/riscv/include/esp_private/panic_reason.h @@ -5,14 +5,16 @@ */ #pragma once -/* Since riscv does not replace mcause with "pseudo_reason" as it xtensa does - * PANIC_RSN_* defined with original interrupt numbers to make it work in - * common code +#include "soc/soc_caps.h" + +/* Need a way to signal which core caused the INT WDT like we do with EXCAUSE on xtensa. + Choosing a large number that is unlikely to conflict with any actual riscv mcauses + bit 12 and above are always zero on the CPU used by P4 */ #define PANIC_RSN_INTWDT_CPU0 ETS_INT_WDT_INUM - -//TODO: IDF-7511 #if SOC_CPU_CORES_NUM > 1 -#define PANIC_RSN_INTWDT_CPU1 ETS_INT_WDT_INUM +#define PANIC_RSN_INTWDT_CPU1_FLAG (1 << 12) +#define PANIC_RSN_INTWDT_CPU1 (PANIC_RSN_INTWDT_CPU1_FLAG | ETS_INT_WDT_INUM) #endif + #define PANIC_RSN_CACHEERR 3 diff --git a/components/riscv/vectors.S b/components/riscv/vectors.S index 5fad1a26b3..58a248309f 100644 --- a/components/riscv/vectors.S +++ b/components/riscv/vectors.S @@ -10,6 +10,7 @@ #include "soc/soc_caps.h" #include "sdkconfig.h" #include "esp_private/vectors_const.h" +#include "esp_private/panic_reason.h" .equ SAVE_REGS, 32 @@ -224,6 +225,19 @@ _call_panic_handler: /* When CLIC is supported, external interrupts are shifted by 16, deduct this difference from mcause */ add a1, a1, -16 #endif // CONFIG_SOC_INT_CLIC_SUPPORTED + +#if CONFIG_ESP_INT_WDT_CHECK_CPU1 + /* Check if this was a INT WDT */ + li t0, PANIC_RSN_INTWDT_CPU0 + bne a1, t0, _store_mcause + /* Check if the cause is the app cpu failing to tick, if so then update mcause to reflect this*/ + lw t0, int_wdt_cpu1_ticked + bnez t0, _store_mcause + li t0, PANIC_RSN_INTWDT_CPU1_FLAG + add a1, a1, t0 +#endif + +_store_mcause: sw a1, RV_STK_MCAUSE(sp) call panic_from_isr /* We arrive here if the exception handler has returned. This means that