Fix interrupt watchdog caused by livelock

This commit is contained in:
Li Shuai 2020-03-13 14:11:00 +08:00 committed by maojianxin
parent ca51072461
commit a80cf2dc69
6 changed files with 198 additions and 4 deletions

View File

@ -355,6 +355,11 @@ esp_err_t esp_efuse_update_secure_version(uint32_t secure_version);
*/
void esp_efuse_init(uint32_t offset, uint32_t size);
inline static bool soc_has_cache_lock_bug(void)
{
return (esp_efuse_get_chip_ver() == 3);
}
#ifdef __cplusplus
}
#endif

View File

@ -21,6 +21,7 @@ menu "ESP32-specific"
bool "Rev 2"
config ESP32_REV_MIN_3
bool "Rev 3"
select INT_WDT if !FREERTOS_UNICORE && SPIRAM_SUPPORT
endchoice
config ESP32_REV_MIN

View File

@ -388,6 +388,10 @@ void start_cpu0_default(void)
esp_int_wdt_init();
//Initialize the interrupt watch dog for CPU0.
esp_int_wdt_cpu_init();
#else
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
assert(!soc_has_cache_lock_bug() && "Minimum Supported ESP32 Revision requires Rev 3");
#endif
#endif
esp_cache_err_int_init();
esp_crosscore_int_init();

View File

@ -17,10 +17,12 @@
#include <xtensa/corebits.h>
#include <xtensa/config/system.h>
#include "freertos/xtensa_context.h"
#include "freertos/xtensa_rtos.h"
#include "esp_panic.h"
#include "sdkconfig.h"
#include "soc/soc.h"
#include "soc/dport_reg.h"
#include "soc/timer_group_reg.h"
/*
@ -39,6 +41,16 @@ Interrupt , a high-priority interrupt, is used for several things:
_l5_intr_stack:
.space L5_INTR_STACK_SIZE
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
.global _l4_intr_livelock_counter
.global _l4_intr_livelock_max
.align 16
_l4_intr_livelock_counter:
.word 0
_l4_intr_livelock_max:
.word 0
#endif
.section .iram1,"ax"
.global xt_highint5
.type xt_highint5,@function
@ -52,8 +64,28 @@ xt_highint5:
bnez a0, .handle_dport_access_int
#endif // CONFIG_FREERTOS_UNICORE
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
/* See if we're here for the tg1 watchdog interrupt */
rsr a0, INTERRUPT
extui a0, a0, ETS_T1_WDT_INUM, 1
beqz a0, 1f
getcoreid a0
bnez a0, 1f /* App cpu (Core 1) jump bypass */
/* Pro cpu (Core 0) can execute to here. */
wsr a5, depc /* use DEPC as temp storage */
movi a0, _l4_intr_livelock_counter
l32i a0, a0, 0
movi a5, _l4_intr_livelock_max
l32i a5, a5, 0
bltu a0, a5, .handle_livelock_int /* _l4_intr_livelock_counter < _l4_intr_livelock_max */
rsr a5, depc /* restore a5 */
#endif
/* Allocate exception frame and save minimal context. */
mov a0, sp
1: mov a0, sp
addi sp, sp, -XT_STK_FRMSZ
s32i a0, sp, XT_STK_A1
#if XCHAL_HAVE_WINDOWED
@ -129,6 +161,114 @@ xt_highint5:
rfi 5
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
.align 4
.handle_livelock_int:
/* Save A2, A3, A4 so we can use those registers */
movi a0, _l4_intr_stack
s32i a2, a0, L4_INTR_A2_OFFSET
s32i a3, a0, L4_INTR_A3_OFFSET
s32i a4, a0, L4_INTR_A4_OFFSET
rsil a0, CONFIG_ESP32_DPORT_DIS_INTERRUPT_LVL /* disable nested iterrupt */
movi a2, _l4_intr_livelock_counter /* _l4_intr_livelock_counter++ */
l32i a3, a2, 0
addi a3, a3, 1
s32i a3, a2, 0
/*
The delay time can be calculated by the following formula:
T = ceil(0.25 + max(t1, t2)) us
t1 = 80 / f1, t2 = (1 + 14/N) * 20 / f2
f1: PSRAM access frequency, unit: MHz.
f2: Flash access frequency, unit: MHz.
When flash is slow/fast read, N = 1.
When flash is DOUT/DIO read, N = 2.
When flash is QOUT/QIO read, N = 4.
*/
rsr.ccount a2
movi a4, g_ticks_per_us_pro
l32i a4, a4, 0
#if defined(CONFIG_FLASHMODE_QIO) || defined(CONFIG_FLASHMODE_QOUT)
# if defined(CONFIG_ESPTOOLPY_FLASHFREQ_80M) && defined(CONFIG_SPIRAM_SPEED_80M)
movi a3, 2
# elif defined(CONFIG_ESPTOOLPY_FLASHFREQ_80M) && defined(CONFIG_SPIRAM_SPEED_40M)
movi a3, 3
# elif defined(CONFIG_ESPTOOLPY_FLASHFREQ_40M) && defined(CONFIG_SPIRAM_SPEED_40M)
movi a3, 3
# elif defined(CONFIG_ESPTOOLPY_FLASHFREQ_26M) && defined(CONFIG_SPIRAM_SPEED_40M)
movi a3, 4
# else
movi a3, 5
# endif
#elif defined(CONFIG_FLASHMODE_DIO) || defined(CONFIG_FLASHMODE_DOUT)
# if defined(CONFIG_ESPTOOLPY_FLASHFREQ_80M) && defined(CONFIG_SPIRAM_SPEED_80M)
movi a3, 3
# elif defined(CONFIG_ESPTOOLPY_FLASHFREQ_80M) && defined(CONFIG_SPIRAM_SPEED_40M)
movi a3, 3
# elif defined(CONFIG_ESPTOOLPY_FLASHFREQ_40M) && defined(CONFIG_SPIRAM_SPEED_40M)
movi a3, 5
# elif defined(CONFIG_ESPTOOLPY_FLASHFREQ_26M) && defined(CONFIG_SPIRAM_SPEED_40M)
movi a3, 7
# else
movi a3, 9
# endif
#endif
mull a3, a3, a4
1: rsr.ccount a4 /* delay_us(N) */
sub a4, a4, a2
bltu a4, a3, 1b
/* Feed watchdog and clear tg1 1st stage timeout interrupt */
movi a2, TIMERG1
movi a3, TIMG_WDT_WKEY_VALUE
memw
s32i a3, a2, 100 /* disable tg1 write protect */
movi a3, 40
memw
s32i a3, a2, 80 /* set timeout before interrupt */
movi a3, 4000
memw
s32i a3, a2, 84 /* set timeout before system reset */
movi a3, 1
s32i a3, a2, 96 /* feed wdt */
memw
/*
The vector number of the interrupt watchdog is ETS_T1_WDT_INUM (24), which is a
Level-Triggered interrupt and needs to be cleared at the peripheral.
*/
l32i a4, a2, 164
movi a3, 4
or a3, a4, a3
memw
s32i a3, a2, 164 /* clear tg1 1st stage timeout interrupt */
movi a3, 0
s32i a3, a2, 100 /* enable tg1 write protect */
memw
wsr a0, PS /* restore iterrupt level */
/* Done. Restore registers and return. */
movi a0, _l4_intr_stack
l32i a2, a0, L4_INTR_A2_OFFSET
l32i a3, a0, L4_INTR_A3_OFFSET
l32i a4, a0, L4_INTR_A4_OFFSET
rsync /* ensure register restored */
rsr a5, depc
rsr a0, EXCSAVE_4 /* restore a0 */
rfi 4
#endif
#ifndef CONFIG_FREERTOS_UNICORE

View File

@ -31,6 +31,7 @@
#include "driver/timer.h"
#include "driver/periph_ctrl.h"
#include "esp_int_wdt.h"
#include "esp_efuse.h"
#if CONFIG_INT_WDT
@ -38,6 +39,16 @@
// #define WDT_INT_NUM 24
#define WDT_INT_NUM ETS_T1_WDT_INUM
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
/*
* This parameter is indicates the response time of tg1 watchdog to identify the
* live lock, Too large values may affect BT and Wifi modules.
*/
#define TG1_WDT_LIVELOCK_TIMEOUT_MS (20)
extern uint32_t _l4_intr_livelock_counter, _l4_intr_livelock_max;
#endif
//Take care: the tick hook can also be called before esp_int_wdt_init() is called.
#if CONFIG_INT_WDT_CHECK_CPU1
//Not static; the ISR assembly checks this.
@ -50,7 +61,12 @@ static void IRAM_ATTR tick_hook(void) {
//Only feed wdt if app cpu also ticked.
if (int_wdt_app_cpu_ticked) {
TIMERG1.wdt_wprotect=TIMG_WDT_WKEY_VALUE;
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
_l4_intr_livelock_counter = 0;
TIMERG1.wdt_config2=CONFIG_INT_WDT_TIMEOUT_MS*2/(_l4_intr_livelock_max+1); //Set timeout before interrupt
#else
TIMERG1.wdt_config2=CONFIG_INT_WDT_TIMEOUT_MS*2; //Set timeout before interrupt
#endif
TIMERG1.wdt_config3=CONFIG_INT_WDT_TIMEOUT_MS*4; //Set timeout before reset
TIMERG1.wdt_feed=1;
TIMERG1.wdt_wprotect=0;
@ -92,9 +108,28 @@ void esp_int_wdt_init() {
void esp_int_wdt_cpu_init()
{
assert(CONFIG_INT_WDT_TIMEOUT_MS >= ((1000/CONFIG_FREERTOS_HZ)<<1) && "Interrupt watchdog timeout needs to meet double SysTick period!");
esp_register_freertos_tick_hook_for_cpu(tick_hook, xPortGetCoreID());
ESP_INTR_DISABLE(WDT_INT_NUM);
intr_matrix_set(xPortGetCoreID(), ETS_TG1_WDT_LEVEL_INTR_SOURCE, WDT_INT_NUM);
/*
* We found a live lock issue on ESP32 ECO3, This problem will cause the cache busy and then
* the CPU to stop executing instructions. In order to solve this problem, we need to use
* tg1 1st stage timeout interrupt to interrupt the cache busy state of the live lock.
* Here we only bind this interrupt to the Pro cpu (Core 0), when the tg1 1st stage timeout
* interrupt caused by the live lock occurs, only the Pro cpu (Core 0) execution path switched
* to level 4 ISR to unlock the cache busy status and resume system.
*/
if (xPortGetCoreID() == PRO_CPU_NUM) {
intr_matrix_set(xPortGetCoreID(), ETS_TG1_WDT_LEVEL_INTR_SOURCE, WDT_INT_NUM);
#if !defined(CONFIG_FREERTOS_UNICORE) && defined(CONFIG_SPIRAM_SUPPORT)
_l4_intr_livelock_max = 0;
if (soc_has_cache_lock_bug()) {
assert(((1000/CONFIG_FREERTOS_HZ)<<1) <= TG1_WDT_LIVELOCK_TIMEOUT_MS);
assert(CONFIG_INT_WDT_TIMEOUT_MS >= (TG1_WDT_LIVELOCK_TIMEOUT_MS*3));
_l4_intr_livelock_max = CONFIG_INT_WDT_TIMEOUT_MS/TG1_WDT_LIVELOCK_TIMEOUT_MS - 1;
}
#endif
}
//We do not register a handler for the interrupt because it is interrupt level 4 which
//is not servicable from C. Instead, xtensa_vectors.S has a call to the panic handler for
//this interrupt.

View File

@ -238,15 +238,24 @@ void panicHandler(XtExcFrame *frame)
}
#if !CONFIG_FREERTOS_UNICORE
/*
* When the real Interrupt watchdog occurs (_l4_intr_livelock_counter >= _l4_intr_livelock_max),
* do not clear the wdt interrupt, help the App cpu (Core 1) map tg1 1st stage timeout
* interrupt, trigger the App cpu (Core 1) to respond to the wdt interrupt.
*/
if (core_id == PRO_CPU_NUM) {
intr_matrix_set(APP_CPU_NUM, ETS_TG1_WDT_LEVEL_INTR_SOURCE, ETS_T1_WDT_INUM);
}
//Save frame for other core.
if ((frame->exccause == PANIC_RSN_INTWDT_CPU0 && core_id == 1) || (frame->exccause == PANIC_RSN_INTWDT_CPU1 && core_id == 0)) {
other_core_frame = frame;
while (1);
}
//The core which triggers the interrupt watchdog will delay 1 us, so the other core can save its frame.
//The core which triggers the interrupt watchdog will delay 500 us, so the other core can save its frame.
if (frame->exccause == PANIC_RSN_INTWDT_CPU0 || frame->exccause == PANIC_RSN_INTWDT_CPU1) {
ets_delay_us(1);
ets_delay_us(500);
}
if (frame->exccause == PANIC_RSN_CACHEERR && esp_cache_err_get_cpuid() != core_id) {