diff --git a/components/esp32/cache_err_int.c b/components/esp32/cache_err_int.c new file mode 100644 index 0000000000..ece6c2d604 --- /dev/null +++ b/components/esp32/cache_err_int.c @@ -0,0 +1,99 @@ +// Copyright 2015-2017 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + The cache has an interrupt that can be raised as soon as an access to a cached + region (flash, psram) is done without the cache being enabled. We use that here + to panic the CPU, which from a debugging perspective is better than grabbing bad + data from the bus. +*/ + +#include +#include +#include +#include +#include "freertos/FreeRTOS.h" +#include "esp_err.h" +#include "esp_intr.h" +#include "esp_attr.h" +#include "soc/dport_reg.h" +#include "sdkconfig.h" + +void esp_cache_err_int_init() +{ + uint32_t core_id = xPortGetCoreID(); + ESP_INTR_DISABLE(ETS_CACHEERR_INUM); + + // We do not register a handler for the interrupt because it is interrupt + // level 4 which is not serviceable from C. Instead, xtensa_vectors.S has + // a call to the panic handler for + // this interrupt. + intr_matrix_set(core_id, ETS_CACHE_IA_INTR_SOURCE, ETS_CACHEERR_INUM); + + // Enable invalid cache access interrupt when the cache is disabled. + // When the interrupt happens, we can not determine the CPU where the + // invalid cache access has occurred. We enable the interrupt to catch + // invalid access on both CPUs, but the interrupt is connected to the + // CPU which happens to call this function. + // For this reason, panic handler backtrace will not be correct if the + // interrupt is connected to PRO CPU and invalid access happens on the APP + // CPU. + + if (core_id == PRO_CPU_NUM) { + SET_PERI_REG_MASK(DPORT_CACHE_IA_INT_EN_REG, + DPORT_CACHE_IA_INT_PRO_OPPOSITE | + DPORT_CACHE_IA_INT_PRO_DRAM1 | + DPORT_CACHE_IA_INT_PRO_DROM0 | + DPORT_CACHE_IA_INT_PRO_IROM0 | + DPORT_CACHE_IA_INT_PRO_IRAM0 | + DPORT_CACHE_IA_INT_PRO_IRAM1); + } else { + SET_PERI_REG_MASK(DPORT_CACHE_IA_INT_EN_REG, + DPORT_CACHE_IA_INT_APP_OPPOSITE | + DPORT_CACHE_IA_INT_APP_DRAM1 | + DPORT_CACHE_IA_INT_APP_DROM0 | + DPORT_CACHE_IA_INT_APP_IROM0 | + DPORT_CACHE_IA_INT_APP_IRAM0 | + DPORT_CACHE_IA_INT_APP_IRAM1); + } + ESP_INTR_ENABLE(ETS_CACHEERR_INUM); +} + +int IRAM_ATTR esp_cache_err_get_cpuid() +{ + const uint32_t pro_mask = + DPORT_PRO_CPU_DISABLED_CACHE_IA_DRAM1 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_DROM0 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_IROM0 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_IRAM0 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_IRAM1 | + DPORT_APP_CPU_DISABLED_CACHE_IA_OPPOSITE; + + if (GET_PERI_REG_MASK(DPORT_PRO_DCACHE_DBUG3_REG, pro_mask)) { + return PRO_CPU_NUM; + } + + const uint32_t app_mask = + DPORT_APP_CPU_DISABLED_CACHE_IA_DRAM1 | + DPORT_APP_CPU_DISABLED_CACHE_IA_DROM0 | + DPORT_APP_CPU_DISABLED_CACHE_IA_IROM0 | + DPORT_APP_CPU_DISABLED_CACHE_IA_IRAM0 | + DPORT_APP_CPU_DISABLED_CACHE_IA_IRAM1 | + DPORT_PRO_CPU_DISABLED_CACHE_IA_OPPOSITE; + + if (GET_PERI_REG_MASK(DPORT_APP_DCACHE_DBUG3_REG, app_mask)) { + return APP_CPU_NUM; + } + return -1; +} diff --git a/components/esp32/cpu_start.c b/components/esp32/cpu_start.c index 61fd7f8b44..c3ef142c6b 100644 --- a/components/esp32/cpu_start.c +++ b/components/esp32/cpu_start.c @@ -55,6 +55,7 @@ #include "esp_int_wdt.h" #include "esp_task_wdt.h" #include "esp_phy_init.h" +#include "esp_cache_err_int.h" #include "esp_coexist.h" #include "esp_panic.h" #include "esp_core_dump.h" @@ -228,6 +229,7 @@ void start_cpu0_default(void) #if CONFIG_TASK_WDT esp_task_wdt_init(); #endif + esp_cache_err_int_init(); esp_crosscore_int_init(); esp_ipc_init(); spi_flash_init(); @@ -257,6 +259,7 @@ void start_cpu1_default(void) } //Take care putting stuff here: if asked, FreeRTOS will happily tell you the scheduler //has started, but it isn't active *on this CPU* yet. + esp_cache_err_int_init(); esp_crosscore_int_init(); ESP_EARLY_LOGI(TAG, "Starting scheduler on APP CPU."); diff --git a/components/esp32/include/esp_cache_err_int.h b/components/esp32/include/esp_cache_err_int.h new file mode 100644 index 0000000000..bcbd63e799 --- /dev/null +++ b/components/esp32/include/esp_cache_err_int.h @@ -0,0 +1,33 @@ +// Copyright 2015-2017 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +/** + * @brief initialize cache invalid access interrupt + * + * This function enables cache invalid access interrupt source and connects it + * to interrupt input number ETS_CACHEERR_INUM (see soc/soc.h). It is called + * from the startup code. + */ +void esp_cache_err_int_init(); + + +/** + * @brief get the CPU which caused cache invalid access interrupt + * @return + * - PRO_CPU_NUM, if PRO_CPU has caused cache IA interrupt + * - APP_CPU_NUM, if APP_CPU has caused cache IA interrupt + * - (-1) otherwise + */ +int esp_cache_err_get_cpuid(); diff --git a/components/esp32/include/esp_panic.h b/components/esp32/include/esp_panic.h index c9ee14099d..c4cc385720 100644 --- a/components/esp32/include/esp_panic.h +++ b/components/esp32/include/esp_panic.h @@ -13,7 +13,8 @@ extern "C" #define PANIC_RSN_COPROCEXCEPTION 4 #define PANIC_RSN_INTWDT_CPU0 5 #define PANIC_RSN_INTWDT_CPU1 6 -#define PANIC_RSN_MAX 6 +#define PANIC_RSN_CACHEERR 7 +#define PANIC_RSN_MAX 7 #ifndef __ASSEMBLER__ diff --git a/components/esp32/panic.c b/components/esp32/panic.c index 753851c11c..08c13f7658 100644 --- a/components/esp32/panic.c +++ b/components/esp32/panic.c @@ -16,6 +16,7 @@ #include #include "rom/rtc.h" +#include "rom/uart.h" #include "freertos/FreeRTOS.h" #include "freertos/task.h" @@ -28,6 +29,7 @@ #include "soc/timer_group_struct.h" #include "soc/timer_group_reg.h" #include "soc/cpu.h" +#include "soc/rtc.h" #include "esp_gdbstub.h" #include "esp_panic.h" @@ -35,6 +37,7 @@ #include "esp_err.h" #include "esp_core_dump.h" #include "esp_spi_flash.h" +#include "esp_cache_err_int.h" /* Panic handlers; these get called when an unhandled exception occurs or the assembly-level @@ -50,8 +53,8 @@ //printf may be broken, so we fix our own printing fns... static void panicPutChar(char c) { - while (((READ_PERI_REG(UART_STATUS_REG(0)) >> UART_TXFIFO_CNT_S)&UART_TXFIFO_CNT) >= 126) ; - WRITE_PERI_REG(UART_FIFO_REG(0), c); + while (((READ_PERI_REG(UART_STATUS_REG(CONFIG_CONSOLE_UART_NUM)) >> UART_TXFIFO_CNT_S)&UART_TXFIFO_CNT) >= 126) ; + WRITE_PERI_REG(UART_FIFO_REG(CONFIG_CONSOLE_UART_NUM), c); } static void panicPutStr(const char *c) @@ -120,7 +123,7 @@ static __attribute__((noreturn)) inline void invoke_abort() void abort() { #if !CONFIG_ESP32_PANIC_SILENT_REBOOT - ets_printf("abort() was called at PC 0x%08x\n", (intptr_t)__builtin_return_address(0) - 3); + ets_printf("abort() was called at PC 0x%08x on core %d\n", (intptr_t)__builtin_return_address(0) - 3, xPortGetCoreID()); #endif invoke_abort(); } @@ -149,9 +152,22 @@ static void haltOtherCore() esp_cpu_stall( xPortGetCoreID() == 0 ? 1 : 0 ); } + +static void setFirstBreakpoint(uint32_t pc) +{ + asm( + "wsr.ibreaka0 %0\n" \ + "rsr.ibreakenable a3\n" \ + "movi a4,1\n" \ + "or a4, a4, a3\n" \ + "wsr.ibreakenable a4\n" \ + ::"r"(pc):"a3", "a4"); +} + + void panicHandler(XtExcFrame *frame) { - int *regs = (int *)frame; + int core_id = xPortGetCoreID(); //Please keep in sync with PANIC_RSN_* defines const char *reasons[] = { "Unknown reason", @@ -161,20 +177,26 @@ void panicHandler(XtExcFrame *frame) "Coprocessor exception", "Interrupt wdt timeout on CPU0", "Interrupt wdt timeout on CPU1", + "Cache disabled but cached memory region accessed", }; const char *reason = reasons[0]; //The panic reason is stored in the EXCCAUSE register. - if (regs[20] <= PANIC_RSN_MAX) { - reason = reasons[regs[20]]; + if (frame->exccause <= PANIC_RSN_MAX) { + reason = reasons[frame->exccause]; + } + if (frame->exccause == PANIC_RSN_CACHEERR && esp_cache_err_get_cpuid() != core_id) { + // Cache error interrupt will be handled by the panic handler + // on the other CPU. + return; } haltOtherCore(); panicPutStr("Guru Meditation Error: Core "); - panicPutDec(xPortGetCoreID()); + panicPutDec(core_id); panicPutStr(" panic'ed ("); if (!abort_called) { panicPutStr(reason); panicPutStr(")\r\n"); - if (regs[20]==PANIC_RSN_DEBUGEXCEPTION) { + if (frame->exccause == PANIC_RSN_DEBUGEXCEPTION) { int debugRsn; asm("rsr.debugcause %0":"=r"(debugRsn)); panicPutStr("Debug exception reason: "); @@ -204,32 +226,19 @@ void panicHandler(XtExcFrame *frame) } if (esp_cpu_in_ocd_debug_mode()) { - asm("break.n 1"); + setFirstBreakpoint(frame->pc); + return; } commonErrorHandler(frame); } -static void setFirstBreakpoint(uint32_t pc) -{ - asm( - "wsr.ibreaka0 %0\n" \ - "rsr.ibreakenable a3\n" \ - "movi a4,1\n" \ - "or a4, a4, a3\n" \ - "wsr.ibreakenable a4\n" \ - ::"r"(pc):"a3", "a4"); -} - void xt_unhandled_exception(XtExcFrame *frame) { - int *regs = (int *)frame; - int x; - haltOtherCore(); panicPutStr("Guru Meditation Error of type "); - x = regs[20]; - if (x < 40) { - panicPutStr(edesc[x]); + int exccause = frame->exccause; + if (exccause < 40) { + panicPutStr(edesc[exccause]); } else { panicPutStr("Unknown"); } @@ -237,11 +246,11 @@ void xt_unhandled_exception(XtExcFrame *frame) panicPutDec(xPortGetCoreID()); if (esp_cpu_in_ocd_debug_mode()) { panicPutStr(" at pc="); - panicPutHex(regs[1]); + panicPutHex(frame->pc); panicPutStr(". Setting bp and returning..\r\n"); //Stick a hardware breakpoint on the address the handler returns to. This way, the OCD debugger //will kick in exactly at the context the error happened. - setFirstBreakpoint(regs[1]); + setFirstBreakpoint(frame->pc); return; } panicPutStr(". Exception was unhandled.\r\n"); @@ -315,6 +324,22 @@ void esp_panic_wdt_stop() WRITE_PERI_REG(RTC_CNTL_WDTWPROTECT_REG, 0); } +static void esp_panic_dig_reset() __attribute__((noreturn)); + +static void esp_panic_dig_reset() +{ + // make sure all the panic handler output is sent from UART FIFO + uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM); + // switch to XTAL (otherwise we will keep running from the PLL) + rtc_clk_cpu_freq_set(RTC_CPU_FREQ_XTAL); + // reset the digital part + esp_cpu_unstall(PRO_CPU_NUM); + SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_SYS_RST); + while (true) { + ; + } +} + static inline bool stackPointerIsSane(uint32_t sp) { return !(sp < 0x3ffae010 || sp > 0x3ffffff0 || ((sp & 0xf) != 0)); @@ -416,7 +441,12 @@ static void commonErrorHandler(XtExcFrame *frame) esp_panic_wdt_stop(); #if CONFIG_ESP32_PANIC_PRINT_REBOOT || CONFIG_ESP32_PANIC_SILENT_REBOOT panicPutStr("Rebooting...\r\n"); - esp_restart_noos(); + if (frame->exccause != PANIC_RSN_CACHEERR) { + esp_restart_noos(); + } else { + // The only way to clear invalid cache access interrupt is to reset the digital part + esp_panic_dig_reset(); + } #else disableAllWdts(); panicPutStr("CPU halted.\r\n"); diff --git a/components/freertos/xtensa_vectors.S b/components/freertos/xtensa_vectors.S index 7baae07ce0..ffbdb51407 100644 --- a/components/freertos/xtensa_vectors.S +++ b/components/freertos/xtensa_vectors.S @@ -93,6 +93,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xtensa_rtos.h" #include "esp_panic.h" #include "sdkconfig.h" +#include "soc/soc.h" /* Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used. Please change this when the tcb structure is changed @@ -360,20 +361,17 @@ _xt_panic: /* _xt_context_save seems to save the current a0, but we need the interuptees a0. Fix this. */ rsr a0, EXCSAVE_1 /* save interruptee's a0 */ + s32i a0, sp, XT_STK_A0 - /* Set up PS for C, disable all interrupts, and clear EXCM. */ - movi a0, PS_INTLEVEL(7) | PS_UM | PS_WOE + /* Set up PS for C, disable all interrupts except NMI and debug, and clear EXCM. */ + movi a0, PS_INTLEVEL(5) | PS_UM | PS_WOE wsr a0, PS //Call panic handler mov a6,sp call4 panicHandler -1: j 1b /* loop infinitely */ - - retw - .align 4 //Call using call0. Prints the hex char in a2. Kills a3, a4, a5 @@ -1638,34 +1636,84 @@ _xt_highint4: 1: #endif - /* USER_EDIT: - ADD HIGH PRIORITY LEVEL 4 INTERRUPT HANDLER CODE HERE. - */ + /* On the ESP32, this level is used for panic events that are detected by hardware and should + also panic when interrupts are disabled. At the moment, these are the interrupt watchdog + as well as the cache invalid access interrupt. (24 and 25) */ + /* Allocate exception frame and save minimal context. */ + mov a0, sp + addi sp, sp, -XT_STK_FRMSZ + s32i a0, sp, XT_STK_A1 + #if XCHAL_HAVE_WINDOWED + s32e a0, sp, -12 /* for debug backtrace */ + #endif + rsr a0, PS /* save interruptee's PS */ + s32i a0, sp, XT_STK_PS + rsr a0, EPC_4 /* save interruptee's PC */ + s32i a0, sp, XT_STK_PC + #if XCHAL_HAVE_WINDOWED + s32e a0, sp, -16 /* for debug backtrace */ + #endif + s32i a12, sp, XT_STK_A12 /* _xt_context_save requires A12- */ + s32i a13, sp, XT_STK_A13 /* A13 to have already been saved */ + call0 _xt_context_save - /* On the ESP32, this level is used for the INT_WDT handler. If that triggers, the program is stuck with interrupts - off and the CPU should panic. */ - rsr a0, EXCSAVE_4 - wsr a0, EXCSAVE_1 /* panic handler reads this register */ - /* Set EXCCAUSE to reflect cause of the wdt int trigger */ - movi a0,PANIC_RSN_INTWDT_CPU0 - wsr a0,EXCCAUSE + /* Save vaddr into exception frame */ + rsr a0, EXCVADDR + s32i a0, sp, XT_STK_EXCVADDR + + /* Figure out reason, save into EXCCAUSE reg */ + + rsr a0, INTERRUPT + extui a0, a0, ETS_CACHEERR_INUM, 1 /* get cacheerr int bit */ + beqz a0, 1f + /* Kill this interrupt; we cannot reset it. */ + rsr a0, INTENABLE + movi a4, ~(1<>(shift))&(mask)) //}} +#endif /* !__ASSEMBLER__ */ + //Periheral Clock {{ #define APB_CLK_FREQ_ROM ( 26*1000000 ) #define CPU_CLK_FREQ_ROM APB_CLK_FREQ_ROM @@ -297,7 +300,7 @@ * 22 3 extern edge FRC1 timer * 23 3 extern level * 24 4 extern level TG1_WDT - * 25 4 extern level Reserved Reserved + * 25 4 extern level CACHEERR * 26 5 extern level Reserved Reserved * 27 3 extern level Reserved Reserved * 28 4 extern edge @@ -314,6 +317,7 @@ #define ETS_TG0_T1_INUM 10 /**< use edge interrupt*/ #define ETS_FRC1_INUM 22 #define ETS_T1_WDT_INUM 24 +#define ETS_CACHEERR_INUM 25 //CPU0 Interrupt number used in ROM, should be cancelled in SDK #define ETS_SLC_INUM 1 diff --git a/components/spi_flash/test/test_cache_disabled.c b/components/spi_flash/test/test_cache_disabled.c index 8caa7e83f9..ba4e8534e6 100644 --- a/components/spi_flash/test/test_cache_disabled.c +++ b/components/spi_flash/test/test_cache_disabled.c @@ -51,3 +51,30 @@ TEST_CASE("spi_flash_cache_enabled() works on both CPUs", "[spi_flash]") vQueueDelete(result_queue); } +static const uint32_t s_in_rodata[] = { 0x12345678, 0xfedcba98 }; + +static void IRAM_ATTR cache_access_test_func(void* arg) +{ + spi_flash_disable_interrupts_caches_and_other_cpu(); + volatile uint32_t* src = (volatile uint32_t*) s_in_rodata; + uint32_t v1 = src[0]; + uint32_t v2 = src[1]; + bool cache_enabled = spi_flash_cache_enabled(); + spi_flash_enable_interrupts_caches_and_other_cpu(); + printf("%d %x %x\n", cache_enabled, v1, v2); + vTaskDelete(NULL); +} + +// These tests works properly if they resets the chip with the +// "Cache disabled but cached memory region accessed" reason and the correct CPU is logged. +TEST_CASE("invalid access to cache raises panic (PRO CPU)", "[spi_flash][ignore]") +{ + xTaskCreatePinnedToCore(&cache_access_test_func, "ia", 2048, NULL, 5, NULL, 0); + vTaskDelay(1000/portTICK_PERIOD_MS); +} + +TEST_CASE("invalid access to cache raises panic (APP CPU)", "[spi_flash][ignore]") +{ + xTaskCreatePinnedToCore(&cache_access_test_func, "ia", 2048, NULL, 5, NULL, 1); + vTaskDelay(1000/portTICK_PERIOD_MS); +}