From 0b79d07d34340a0811c457144b3e80f1be44b8e6 Mon Sep 17 00:00:00 2001 From: Jeroen Domburg Date: Thu, 9 Mar 2017 20:50:39 +0800 Subject: [PATCH] add detection of invalid cache access - fix level 4 interrupt vectors to produce correct backtrace - initialize invalid cache access interrupt on startup - handle invalid cache access in panic handler --- components/esp32/cache_err_int.c | 99 ++++++++++++++++++++ components/esp32/cpu_start.c | 3 + components/esp32/include/esp_cache_err_int.h | 33 +++++++ components/esp32/include/esp_panic.h | 3 +- components/esp32/panic.c | 64 ++++++++++--- components/freertos/xtensa_vectors.S | 92 +++++++++++++----- components/soc/esp32/include/soc/soc.h | 8 +- 7 files changed, 262 insertions(+), 40 deletions(-) create mode 100644 components/esp32/cache_err_int.c create mode 100644 components/esp32/include/esp_cache_err_int.h diff --git a/components/esp32/cache_err_int.c b/components/esp32/cache_err_int.c new file mode 100644 index 0000000000..ece6c2d604 --- /dev/null +++ b/components/esp32/cache_err_int.c @@ -0,0 +1,99 @@ +// Copyright 2015-2017 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + The cache has an interrupt that can be raised as soon as an access to a cached + region (flash, psram) is done without the cache being enabled. We use that here + to panic the CPU, which from a debugging perspective is better than grabbing bad + data from the bus. +*/ + +#include +#include +#include +#include +#include "freertos/FreeRTOS.h" +#include "esp_err.h" +#include "esp_intr.h" +#include "esp_attr.h" +#include "soc/dport_reg.h" +#include "sdkconfig.h" + +void esp_cache_err_int_init() +{ + uint32_t core_id = xPortGetCoreID(); + ESP_INTR_DISABLE(ETS_CACHEERR_INUM); + + // We do not register a handler for the interrupt because it is interrupt + // level 4 which is not serviceable from C. Instead, xtensa_vectors.S has + // a call to the panic handler for + // this interrupt. + intr_matrix_set(core_id, ETS_CACHE_IA_INTR_SOURCE, ETS_CACHEERR_INUM); + + // Enable invalid cache access interrupt when the cache is disabled. + // When the interrupt happens, we can not determine the CPU where the + // invalid cache access has occurred. We enable the interrupt to catch + // invalid access on both CPUs, but the interrupt is connected to the + // CPU which happens to call this function. + // For this reason, panic handler backtrace will not be correct if the + // interrupt is connected to PRO CPU and invalid access happens on the APP + // CPU. + + if (core_id == PRO_CPU_NUM) { + SET_PERI_REG_MASK(DPORT_CACHE_IA_INT_EN_REG, + DPORT_CACHE_IA_INT_PRO_OPPOSITE | + DPORT_CACHE_IA_INT_PRO_DRAM1 | + DPORT_CACHE_IA_INT_PRO_DROM0 | + DPORT_CACHE_IA_INT_PRO_IROM0 | + DPORT_CACHE_IA_INT_PRO_IRAM0 | + DPORT_CACHE_IA_INT_PRO_IRAM1); + } else { + SET_PERI_REG_MASK(DPORT_CACHE_IA_INT_EN_REG, + DPORT_CACHE_IA_INT_APP_OPPOSITE | + DPORT_CACHE_IA_INT_APP_DRAM1 | + DPORT_CACHE_IA_INT_APP_DROM0 | + DPORT_CACHE_IA_INT_APP_IROM0 | + DPORT_CACHE_IA_INT_APP_IRAM0 | + DPORT_CACHE_IA_INT_APP_IRAM1); + } + ESP_INTR_ENABLE(ETS_CACHEERR_INUM); +} + +int IRAM_ATTR esp_cache_err_get_cpuid() +{ + const uint32_t pro_mask = + DPORT_PRO_CPU_DISABLED_CACHE_IA_DRAM1 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_DROM0 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_IROM0 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_IRAM0 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_IRAM1 | + DPORT_APP_CPU_DISABLED_CACHE_IA_OPPOSITE; + + if (GET_PERI_REG_MASK(DPORT_PRO_DCACHE_DBUG3_REG, pro_mask)) { + return PRO_CPU_NUM; + } + + const uint32_t app_mask = + DPORT_APP_CPU_DISABLED_CACHE_IA_DRAM1 | + DPORT_APP_CPU_DISABLED_CACHE_IA_DROM0 | + DPORT_APP_CPU_DISABLED_CACHE_IA_IROM0 | + DPORT_APP_CPU_DISABLED_CACHE_IA_IRAM0 | + DPORT_APP_CPU_DISABLED_CACHE_IA_IRAM1 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_OPPOSITE; + + if (GET_PERI_REG_MASK(DPORT_APP_DCACHE_DBUG3_REG, app_mask)) { + return APP_CPU_NUM; + } + return -1; +} diff --git a/components/esp32/cpu_start.c b/components/esp32/cpu_start.c index 61fd7f8b44..c3ef142c6b 100644 --- a/components/esp32/cpu_start.c +++ b/components/esp32/cpu_start.c @@ -55,6 +55,7 @@ #include "esp_int_wdt.h" #include "esp_task_wdt.h" #include "esp_phy_init.h" +#include "esp_cache_err_int.h" #include "esp_coexist.h" #include "esp_panic.h" #include "esp_core_dump.h" @@ -228,6 +229,7 @@ void start_cpu0_default(void) #if CONFIG_TASK_WDT esp_task_wdt_init(); #endif + esp_cache_err_int_init(); esp_crosscore_int_init(); esp_ipc_init(); spi_flash_init(); @@ -257,6 +259,7 @@ void start_cpu1_default(void) } //Take care putting stuff here: if asked, FreeRTOS will happily tell you the scheduler //has started, but it isn't active *on this CPU* yet. + esp_cache_err_int_init(); esp_crosscore_int_init(); ESP_EARLY_LOGI(TAG, "Starting scheduler on APP CPU."); diff --git a/components/esp32/include/esp_cache_err_int.h b/components/esp32/include/esp_cache_err_int.h new file mode 100644 index 0000000000..bcbd63e799 --- /dev/null +++ b/components/esp32/include/esp_cache_err_int.h @@ -0,0 +1,33 @@ +// Copyright 2015-2017 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +/** + * @brief initialize cache invalid access interrupt + * + * This function enables cache invalid access interrupt source and connects it + * to interrupt input number ETS_CACHEERR_INUM (see soc/soc.h). It is called + * from the startup code. + */ +void esp_cache_err_int_init(); + + +/** + * @brief get the CPU which caused cache invalid access interrupt + * @return + * - PRO_CPU_NUM, if PRO_CPU has caused cache IA interrupt + * - APP_CPU_NUM, if APP_CPU has caused cache IA interrupt + * - (-1) otherwise + */ +int esp_cache_err_get_cpuid(); diff --git a/components/esp32/include/esp_panic.h b/components/esp32/include/esp_panic.h index c9ee14099d..c4cc385720 100644 --- a/components/esp32/include/esp_panic.h +++ b/components/esp32/include/esp_panic.h @@ -13,7 +13,8 @@ extern "C" #define PANIC_RSN_COPROCEXCEPTION 4 #define PANIC_RSN_INTWDT_CPU0 5 #define PANIC_RSN_INTWDT_CPU1 6 -#define PANIC_RSN_MAX 6 +#define PANIC_RSN_CACHEERR 7 +#define PANIC_RSN_MAX 7 #ifndef __ASSEMBLER__ diff --git a/components/esp32/panic.c b/components/esp32/panic.c index 753851c11c..67f7b404e6 100644 --- a/components/esp32/panic.c +++ b/components/esp32/panic.c @@ -16,6 +16,7 @@ #include #include "rom/rtc.h" +#include "rom/uart.h" #include "freertos/FreeRTOS.h" #include "freertos/task.h" @@ -28,6 +29,7 @@ #include "soc/timer_group_struct.h" #include "soc/timer_group_reg.h" #include "soc/cpu.h" +#include "soc/rtc.h" #include "esp_gdbstub.h" #include "esp_panic.h" @@ -35,6 +37,7 @@ #include "esp_err.h" #include "esp_core_dump.h" #include "esp_spi_flash.h" +#include "esp_cache_err_int.h" /* Panic handlers; these get called when an unhandled exception occurs or the assembly-level @@ -120,7 +123,7 @@ static __attribute__((noreturn)) inline void invoke_abort() void abort() { #if !CONFIG_ESP32_PANIC_SILENT_REBOOT - ets_printf("abort() was called at PC 0x%08x\n", (intptr_t)__builtin_return_address(0) - 3); + ets_printf("abort() was called at PC 0x%08x on core %d\n", (intptr_t)__builtin_return_address(0) - 3, xPortGetCoreID()); #endif invoke_abort(); } @@ -149,9 +152,23 @@ static void haltOtherCore() esp_cpu_stall( xPortGetCoreID() == 0 ? 1 : 0 ); } + +static void setFirstBreakpoint(uint32_t pc) +{ + asm( + "wsr.ibreaka0 %0\n" \ + "rsr.ibreakenable a3\n" \ + "movi a4,1\n" \ + "or a4, a4, a3\n" \ + "wsr.ibreakenable a4\n" \ + ::"r"(pc):"a3", "a4"); +} + + void panicHandler(XtExcFrame *frame) { int *regs = (int *)frame; + int core_id = xPortGetCoreID(); //Please keep in sync with PANIC_RSN_* defines const char *reasons[] = { "Unknown reason", @@ -161,15 +178,21 @@ void panicHandler(XtExcFrame *frame) "Coprocessor exception", "Interrupt wdt timeout on CPU0", "Interrupt wdt timeout on CPU1", + "Cache disabled but cached memory region accessed", }; const char *reason = reasons[0]; //The panic reason is stored in the EXCCAUSE register. if (regs[20] <= PANIC_RSN_MAX) { reason = reasons[regs[20]]; } + if (frame->exccause == PANIC_RSN_CACHEERR && esp_cache_err_get_cpuid() != core_id) { + // Cache error interrupt will be handled by the panic handler + // on the other CPU. + return; + } haltOtherCore(); panicPutStr("Guru Meditation Error: Core "); - panicPutDec(xPortGetCoreID()); + panicPutDec(core_id); panicPutStr(" panic'ed ("); if (!abort_called) { panicPutStr(reason); @@ -204,22 +227,12 @@ void panicHandler(XtExcFrame *frame) } if (esp_cpu_in_ocd_debug_mode()) { - asm("break.n 1"); + setFirstBreakpoint(regs[1]); + return; } commonErrorHandler(frame); } -static void setFirstBreakpoint(uint32_t pc) -{ - asm( - "wsr.ibreaka0 %0\n" \ - "rsr.ibreakenable a3\n" \ - "movi a4,1\n" \ - "or a4, a4, a3\n" \ - "wsr.ibreakenable a4\n" \ - ::"r"(pc):"a3", "a4"); -} - void xt_unhandled_exception(XtExcFrame *frame) { int *regs = (int *)frame; @@ -315,6 +328,22 @@ void esp_panic_wdt_stop() WRITE_PERI_REG(RTC_CNTL_WDTWPROTECT_REG, 0); } +static void esp_panic_dig_reset() __attribute__((noreturn)); + +static void esp_panic_dig_reset() +{ + // make sure all the panic handler output is sent from UART FIFO + uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM); + // switch to XTAL (otherwise we will keep running from the PLL) + rtc_clk_cpu_freq_set(RTC_CPU_FREQ_XTAL); + // reset the digital part + esp_cpu_unstall(PRO_CPU_NUM); + SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_SYS_RST); + while (true) { + ; + } +} + static inline bool stackPointerIsSane(uint32_t sp) { return !(sp < 0x3ffae010 || sp > 0x3ffffff0 || ((sp & 0xf) != 0)); @@ -416,7 +445,12 @@ static void commonErrorHandler(XtExcFrame *frame) esp_panic_wdt_stop(); #if CONFIG_ESP32_PANIC_PRINT_REBOOT || CONFIG_ESP32_PANIC_SILENT_REBOOT panicPutStr("Rebooting...\r\n"); - esp_restart_noos(); + if (frame->exccause != PANIC_RSN_CACHEERR) { + esp_restart_noos(); + } else { + // The only way to clear invalid cache access interrupt is to reset the digital part + esp_panic_dig_reset(); + } #else disableAllWdts(); panicPutStr("CPU halted.\r\n"); diff --git a/components/freertos/xtensa_vectors.S b/components/freertos/xtensa_vectors.S index 7baae07ce0..ffbdb51407 100644 --- a/components/freertos/xtensa_vectors.S +++ b/components/freertos/xtensa_vectors.S @@ -93,6 +93,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xtensa_rtos.h" #include "esp_panic.h" #include "sdkconfig.h" +#include "soc/soc.h" /* Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used. Please change this when the tcb structure is changed @@ -360,20 +361,17 @@ _xt_panic: /* _xt_context_save seems to save the current a0, but we need the interuptees a0. Fix this. */ rsr a0, EXCSAVE_1 /* save interruptee's a0 */ + s32i a0, sp, XT_STK_A0 - /* Set up PS for C, disable all interrupts, and clear EXCM. */ - movi a0, PS_INTLEVEL(7) | PS_UM | PS_WOE + /* Set up PS for C, disable all interrupts except NMI and debug, and clear EXCM. */ + movi a0, PS_INTLEVEL(5) | PS_UM | PS_WOE wsr a0, PS //Call panic handler mov a6,sp call4 panicHandler -1: j 1b /* loop infinitely */ - - retw - .align 4 //Call using call0. Prints the hex char in a2. Kills a3, a4, a5 @@ -1638,34 +1636,84 @@ _xt_highint4: 1: #endif - /* USER_EDIT: - ADD HIGH PRIORITY LEVEL 4 INTERRUPT HANDLER CODE HERE. - */ + /* On the ESP32, this level is used for panic events that are detected by hardware and should + also panic when interrupts are disabled. At the moment, these are the interrupt watchdog + as well as the cache invalid access interrupt. (24 and 25) */ + /* Allocate exception frame and save minimal context. */ + mov a0, sp + addi sp, sp, -XT_STK_FRMSZ + s32i a0, sp, XT_STK_A1 + #if XCHAL_HAVE_WINDOWED + s32e a0, sp, -12 /* for debug backtrace */ + #endif + rsr a0, PS /* save interruptee's PS */ + s32i a0, sp, XT_STK_PS + rsr a0, EPC_4 /* save interruptee's PC */ + s32i a0, sp, XT_STK_PC + #if XCHAL_HAVE_WINDOWED + s32e a0, sp, -16 /* for debug backtrace */ + #endif + s32i a12, sp, XT_STK_A12 /* _xt_context_save requires A12- */ + s32i a13, sp, XT_STK_A13 /* A13 to have already been saved */ + call0 _xt_context_save - /* On the ESP32, this level is used for the INT_WDT handler. If that triggers, the program is stuck with interrupts - off and the CPU should panic. */ - rsr a0, EXCSAVE_4 - wsr a0, EXCSAVE_1 /* panic handler reads this register */ - /* Set EXCCAUSE to reflect cause of the wdt int trigger */ - movi a0,PANIC_RSN_INTWDT_CPU0 - wsr a0,EXCCAUSE + /* Save vaddr into exception frame */ + rsr a0, EXCVADDR + s32i a0, sp, XT_STK_EXCVADDR + + /* Figure out reason, save into EXCCAUSE reg */ + + rsr a0, INTERRUPT + extui a0, a0, ETS_CACHEERR_INUM, 1 /* get cacheerr int bit */ + beqz a0, 1f + /* Kill this interrupt; we cannot reset it. */ + rsr a0, INTENABLE + movi a4, ~(1<>(shift))&(mask)) //}} +#endif /* !__ASSEMBLER__ */ + //Periheral Clock {{ #define APB_CLK_FREQ_ROM ( 26*1000000 ) #define CPU_CLK_FREQ_ROM APB_CLK_FREQ_ROM @@ -297,7 +300,7 @@ * 22 3 extern edge FRC1 timer * 23 3 extern level * 24 4 extern level TG1_WDT - * 25 4 extern level Reserved Reserved + * 25 4 extern level CACHEERR * 26 5 extern level Reserved Reserved * 27 3 extern level Reserved Reserved * 28 4 extern edge @@ -314,6 +317,7 @@ #define ETS_TG0_T1_INUM 10 /**< use edge interrupt*/ #define ETS_FRC1_INUM 22 #define ETS_T1_WDT_INUM 24 +#define ETS_CACHEERR_INUM 25 //CPU0 Interrupt number used in ROM, should be cancelled in SDK #define ETS_SLC_INUM 1