add detection of invalid cache access

- fix level 4 interrupt vectors to produce correct backtrace
- initialize invalid cache access interrupt on startup
- handle invalid cache access in panic handler
This commit is contained in:
Jeroen Domburg 2017-03-09 20:50:39 +08:00 committed by Ivan Grokhotkov
parent 3c6c1e36ec
commit 0b79d07d34
7 changed files with 262 additions and 40 deletions

View File

@ -0,0 +1,99 @@
// Copyright 2015-2017 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
The cache has an interrupt that can be raised as soon as an access to a cached
region (flash, psram) is done without the cache being enabled. We use that here
to panic the CPU, which from a debugging perspective is better than grabbing bad
data from the bus.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "freertos/FreeRTOS.h"
#include "esp_err.h"
#include "esp_intr.h"
#include "esp_attr.h"
#include "soc/dport_reg.h"
#include "sdkconfig.h"
void esp_cache_err_int_init()
{
uint32_t core_id = xPortGetCoreID();
ESP_INTR_DISABLE(ETS_CACHEERR_INUM);
// We do not register a handler for the interrupt because it is interrupt
// level 4 which is not serviceable from C. Instead, xtensa_vectors.S has
// a call to the panic handler for
// this interrupt.
intr_matrix_set(core_id, ETS_CACHE_IA_INTR_SOURCE, ETS_CACHEERR_INUM);
// Enable invalid cache access interrupt when the cache is disabled.
// When the interrupt happens, we can not determine the CPU where the
// invalid cache access has occurred. We enable the interrupt to catch
// invalid access on both CPUs, but the interrupt is connected to the
// CPU which happens to call this function.
// For this reason, panic handler backtrace will not be correct if the
// interrupt is connected to PRO CPU and invalid access happens on the APP
// CPU.
if (core_id == PRO_CPU_NUM) {
SET_PERI_REG_MASK(DPORT_CACHE_IA_INT_EN_REG,
DPORT_CACHE_IA_INT_PRO_OPPOSITE |
DPORT_CACHE_IA_INT_PRO_DRAM1 |
DPORT_CACHE_IA_INT_PRO_DROM0 |
DPORT_CACHE_IA_INT_PRO_IROM0 |
DPORT_CACHE_IA_INT_PRO_IRAM0 |
DPORT_CACHE_IA_INT_PRO_IRAM1);
} else {
SET_PERI_REG_MASK(DPORT_CACHE_IA_INT_EN_REG,
DPORT_CACHE_IA_INT_APP_OPPOSITE |
DPORT_CACHE_IA_INT_APP_DRAM1 |
DPORT_CACHE_IA_INT_APP_DROM0 |
DPORT_CACHE_IA_INT_APP_IROM0 |
DPORT_CACHE_IA_INT_APP_IRAM0 |
DPORT_CACHE_IA_INT_APP_IRAM1);
}
ESP_INTR_ENABLE(ETS_CACHEERR_INUM);
}
int IRAM_ATTR esp_cache_err_get_cpuid()
{
const uint32_t pro_mask =
DPORT_PRO_CPU_DISABLED_CACHE_IA_DRAM1 |
DPORT_PRO_CPU_DISABLED_CACHE_IA_DROM0 |
DPORT_PRO_CPU_DISABLED_CACHE_IA_IROM0 |
DPORT_PRO_CPU_DISABLED_CACHE_IA_IRAM0 |
DPORT_PRO_CPU_DISABLED_CACHE_IA_IRAM1 |
DPORT_APP_CPU_DISABLED_CACHE_IA_OPPOSITE;
if (GET_PERI_REG_MASK(DPORT_PRO_DCACHE_DBUG3_REG, pro_mask)) {
return PRO_CPU_NUM;
}
const uint32_t app_mask =
DPORT_APP_CPU_DISABLED_CACHE_IA_DRAM1 |
DPORT_APP_CPU_DISABLED_CACHE_IA_DROM0 |
DPORT_APP_CPU_DISABLED_CACHE_IA_IROM0 |
DPORT_APP_CPU_DISABLED_CACHE_IA_IRAM0 |
DPORT_APP_CPU_DISABLED_CACHE_IA_IRAM1 |
DPORT_PRO_CPU_DISABLED_CACHE_IA_OPPOSITE;
if (GET_PERI_REG_MASK(DPORT_APP_DCACHE_DBUG3_REG, app_mask)) {
return APP_CPU_NUM;
}
return -1;
}

View File

@ -55,6 +55,7 @@
#include "esp_int_wdt.h"
#include "esp_task_wdt.h"
#include "esp_phy_init.h"
#include "esp_cache_err_int.h"
#include "esp_coexist.h"
#include "esp_panic.h"
#include "esp_core_dump.h"
@ -228,6 +229,7 @@ void start_cpu0_default(void)
#if CONFIG_TASK_WDT
esp_task_wdt_init();
#endif
esp_cache_err_int_init();
esp_crosscore_int_init();
esp_ipc_init();
spi_flash_init();
@ -257,6 +259,7 @@ void start_cpu1_default(void)
}
//Take care putting stuff here: if asked, FreeRTOS will happily tell you the scheduler
//has started, but it isn't active *on this CPU* yet.
esp_cache_err_int_init();
esp_crosscore_int_init();
ESP_EARLY_LOGI(TAG, "Starting scheduler on APP CPU.");

View File

@ -0,0 +1,33 @@
// Copyright 2015-2017 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @brief initialize cache invalid access interrupt
*
* This function enables cache invalid access interrupt source and connects it
* to interrupt input number ETS_CACHEERR_INUM (see soc/soc.h). It is called
* from the startup code.
*/
void esp_cache_err_int_init();
/**
* @brief get the CPU which caused cache invalid access interrupt
* @return
* - PRO_CPU_NUM, if PRO_CPU has caused cache IA interrupt
* - APP_CPU_NUM, if APP_CPU has caused cache IA interrupt
* - (-1) otherwise
*/
int esp_cache_err_get_cpuid();

View File

@ -13,7 +13,8 @@ extern "C"
#define PANIC_RSN_COPROCEXCEPTION 4
#define PANIC_RSN_INTWDT_CPU0 5
#define PANIC_RSN_INTWDT_CPU1 6
#define PANIC_RSN_MAX 6
#define PANIC_RSN_CACHEERR 7
#define PANIC_RSN_MAX 7
#ifndef __ASSEMBLER__

View File

@ -16,6 +16,7 @@
#include <xtensa/config/core.h>
#include "rom/rtc.h"
#include "rom/uart.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
@ -28,6 +29,7 @@
#include "soc/timer_group_struct.h"
#include "soc/timer_group_reg.h"
#include "soc/cpu.h"
#include "soc/rtc.h"
#include "esp_gdbstub.h"
#include "esp_panic.h"
@ -35,6 +37,7 @@
#include "esp_err.h"
#include "esp_core_dump.h"
#include "esp_spi_flash.h"
#include "esp_cache_err_int.h"
/*
Panic handlers; these get called when an unhandled exception occurs or the assembly-level
@ -120,7 +123,7 @@ static __attribute__((noreturn)) inline void invoke_abort()
void abort()
{
#if !CONFIG_ESP32_PANIC_SILENT_REBOOT
ets_printf("abort() was called at PC 0x%08x\n", (intptr_t)__builtin_return_address(0) - 3);
ets_printf("abort() was called at PC 0x%08x on core %d\n", (intptr_t)__builtin_return_address(0) - 3, xPortGetCoreID());
#endif
invoke_abort();
}
@ -149,9 +152,23 @@ static void haltOtherCore()
esp_cpu_stall( xPortGetCoreID() == 0 ? 1 : 0 );
}
static void setFirstBreakpoint(uint32_t pc)
{
asm(
"wsr.ibreaka0 %0\n" \
"rsr.ibreakenable a3\n" \
"movi a4,1\n" \
"or a4, a4, a3\n" \
"wsr.ibreakenable a4\n" \
::"r"(pc):"a3", "a4");
}
void panicHandler(XtExcFrame *frame)
{
int *regs = (int *)frame;
int core_id = xPortGetCoreID();
//Please keep in sync with PANIC_RSN_* defines
const char *reasons[] = {
"Unknown reason",
@ -161,15 +178,21 @@ void panicHandler(XtExcFrame *frame)
"Coprocessor exception",
"Interrupt wdt timeout on CPU0",
"Interrupt wdt timeout on CPU1",
"Cache disabled but cached memory region accessed",
};
const char *reason = reasons[0];
//The panic reason is stored in the EXCCAUSE register.
if (regs[20] <= PANIC_RSN_MAX) {
reason = reasons[regs[20]];
}
if (frame->exccause == PANIC_RSN_CACHEERR && esp_cache_err_get_cpuid() != core_id) {
// Cache error interrupt will be handled by the panic handler
// on the other CPU.
return;
}
haltOtherCore();
panicPutStr("Guru Meditation Error: Core ");
panicPutDec(xPortGetCoreID());
panicPutDec(core_id);
panicPutStr(" panic'ed (");
if (!abort_called) {
panicPutStr(reason);
@ -204,22 +227,12 @@ void panicHandler(XtExcFrame *frame)
}
if (esp_cpu_in_ocd_debug_mode()) {
asm("break.n 1");
setFirstBreakpoint(regs[1]);
return;
}
commonErrorHandler(frame);
}
static void setFirstBreakpoint(uint32_t pc)
{
asm(
"wsr.ibreaka0 %0\n" \
"rsr.ibreakenable a3\n" \
"movi a4,1\n" \
"or a4, a4, a3\n" \
"wsr.ibreakenable a4\n" \
::"r"(pc):"a3", "a4");
}
void xt_unhandled_exception(XtExcFrame *frame)
{
int *regs = (int *)frame;
@ -315,6 +328,22 @@ void esp_panic_wdt_stop()
WRITE_PERI_REG(RTC_CNTL_WDTWPROTECT_REG, 0);
}
static void esp_panic_dig_reset() __attribute__((noreturn));
static void esp_panic_dig_reset()
{
// make sure all the panic handler output is sent from UART FIFO
uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM);
// switch to XTAL (otherwise we will keep running from the PLL)
rtc_clk_cpu_freq_set(RTC_CPU_FREQ_XTAL);
// reset the digital part
esp_cpu_unstall(PRO_CPU_NUM);
SET_PERI_REG_MASK(RTC_CNTL_OPTIONS0_REG, RTC_CNTL_SW_SYS_RST);
while (true) {
;
}
}
static inline bool stackPointerIsSane(uint32_t sp)
{
return !(sp < 0x3ffae010 || sp > 0x3ffffff0 || ((sp & 0xf) != 0));
@ -416,7 +445,12 @@ static void commonErrorHandler(XtExcFrame *frame)
esp_panic_wdt_stop();
#if CONFIG_ESP32_PANIC_PRINT_REBOOT || CONFIG_ESP32_PANIC_SILENT_REBOOT
panicPutStr("Rebooting...\r\n");
if (frame->exccause != PANIC_RSN_CACHEERR) {
esp_restart_noos();
} else {
// The only way to clear invalid cache access interrupt is to reset the digital part
esp_panic_dig_reset();
}
#else
disableAllWdts();
panicPutStr("CPU halted.\r\n");

View File

@ -93,6 +93,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "xtensa_rtos.h"
#include "esp_panic.h"
#include "sdkconfig.h"
#include "soc/soc.h"
/*
Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used.
Please change this when the tcb structure is changed
@ -360,20 +361,17 @@ _xt_panic:
/* _xt_context_save seems to save the current a0, but we need the interuptees a0. Fix this. */
rsr a0, EXCSAVE_1 /* save interruptee's a0 */
s32i a0, sp, XT_STK_A0
/* Set up PS for C, disable all interrupts, and clear EXCM. */
movi a0, PS_INTLEVEL(7) | PS_UM | PS_WOE
/* Set up PS for C, disable all interrupts except NMI and debug, and clear EXCM. */
movi a0, PS_INTLEVEL(5) | PS_UM | PS_WOE
wsr a0, PS
//Call panic handler
mov a6,sp
call4 panicHandler
1: j 1b /* loop infinitely */
retw
.align 4
//Call using call0. Prints the hex char in a2. Kills a3, a4, a5
@ -1638,34 +1636,84 @@ _xt_highint4:
1:
#endif
/* USER_EDIT:
ADD HIGH PRIORITY LEVEL 4 INTERRUPT HANDLER CODE HERE.
*/
/* On the ESP32, this level is used for panic events that are detected by hardware and should
also panic when interrupts are disabled. At the moment, these are the interrupt watchdog
as well as the cache invalid access interrupt. (24 and 25) */
/* Allocate exception frame and save minimal context. */
mov a0, sp
addi sp, sp, -XT_STK_FRMSZ
s32i a0, sp, XT_STK_A1
#if XCHAL_HAVE_WINDOWED
s32e a0, sp, -12 /* for debug backtrace */
#endif
rsr a0, PS /* save interruptee's PS */
s32i a0, sp, XT_STK_PS
rsr a0, EPC_4 /* save interruptee's PC */
s32i a0, sp, XT_STK_PC
#if XCHAL_HAVE_WINDOWED
s32e a0, sp, -16 /* for debug backtrace */
#endif
s32i a12, sp, XT_STK_A12 /* _xt_context_save requires A12- */
s32i a13, sp, XT_STK_A13 /* A13 to have already been saved */
call0 _xt_context_save
/* On the ESP32, this level is used for the INT_WDT handler. If that triggers, the program is stuck with interrupts
off and the CPU should panic. */
rsr a0, EXCSAVE_4
wsr a0, EXCSAVE_1 /* panic handler reads this register */
/* Set EXCCAUSE to reflect cause of the wdt int trigger */
movi a0,PANIC_RSN_INTWDT_CPU0
wsr a0,EXCCAUSE
/* Save vaddr into exception frame */
rsr a0, EXCVADDR
s32i a0, sp, XT_STK_EXCVADDR
/* Figure out reason, save into EXCCAUSE reg */
rsr a0, INTERRUPT
extui a0, a0, ETS_CACHEERR_INUM, 1 /* get cacheerr int bit */
beqz a0, 1f
/* Kill this interrupt; we cannot reset it. */
rsr a0, INTENABLE
movi a4, ~(1<<ETS_CACHEERR_INUM)
and a0, a4, a0
wsr a0, INTENABLE
movi a0, PANIC_RSN_CACHEERR
j 9f
1:
#if CONFIG_INT_WDT_CHECK_CPU1
/* Check if the cause is the app cpu failing to tick.*/
movi a0, int_wdt_app_cpu_ticked
l32i a0, a0, 0
bnez a0, 1f
bnez a0, 2f
/* It is. Modify cause. */
movi a0,PANIC_RSN_INTWDT_CPU1
wsr a0,EXCCAUSE
1:
j 9f
2:
#endif
call0 _xt_panic
/* Set EXCCAUSE to reflect cause of the wdt int trigger */
movi a0,PANIC_RSN_INTWDT_CPU0
9:
/* Found the reason, now save it. */
s32i a0, sp, XT_STK_EXCCAUSE
/* _xt_context_save seems to save the current a0, but we need the interuptees a0. Fix this. */
rsr a0, EXCSAVE_4 /* save interruptee's a0 */
s32i a0, sp, XT_STK_A0
/* Set up PS for C, disable all interrupts except NMI and debug, and clear EXCM. */
movi a0, PS_INTLEVEL(5) | PS_UM | PS_WOE
wsr a0, PS
//Call panic handler
mov a6,sp
call4 panicHandler
call0 _xt_context_restore
l32i a0, sp, XT_STK_PS /* retrieve interruptee's PS */
wsr a0, PS
l32i a0, sp, XT_STK_PC /* retrieve interruptee's PC */
wsr a0, EPC_1
l32i a0, sp, XT_STK_A0 /* retrieve interruptee's A0 */
l32i sp, sp, XT_STK_A1 /* remove exception frame */
rsync /* ensure PS and EPC written */
.align 4
.L_xt_highint4_exit:
rsr a0, EXCSAVE_4 /* restore a0 */
rfi 4

View File

@ -65,8 +65,9 @@
#define BIT(nr) (1UL << (nr))
#else
#define BIT(nr) (1 << (nr))
#endif //__ASSEMBLER__
#endif
#ifndef __ASSEMBLER__
//write value to register
#define REG_WRITE(_r, _v) (*(volatile uint32_t *)(_r)) = (_v)
@ -134,6 +135,8 @@
#define GET_PERI_REG_BITS2(reg, mask,shift) ((READ_PERI_REG(reg)>>(shift))&(mask))
//}}
#endif /* !__ASSEMBLER__ */
//Periheral Clock {{
#define APB_CLK_FREQ_ROM ( 26*1000000 )
#define CPU_CLK_FREQ_ROM APB_CLK_FREQ_ROM
@ -297,7 +300,7 @@
* 22 3 extern edge FRC1 timer
* 23 3 extern level
* 24 4 extern level TG1_WDT
* 25 4 extern level Reserved Reserved
* 25 4 extern level CACHEERR
* 26 5 extern level Reserved Reserved
* 27 3 extern level Reserved Reserved
* 28 4 extern edge
@ -314,6 +317,7 @@
#define ETS_TG0_T1_INUM 10 /**< use edge interrupt*/
#define ETS_FRC1_INUM 22
#define ETS_T1_WDT_INUM 24
#define ETS_CACHEERR_INUM 25
//CPU0 Interrupt number used in ROM, should be cancelled in SDK
#define ETS_SLC_INUM 1