esp32: Core dump sanity checks were added

- CRC for core dump flash partition config
 - Tasks with corrupted TCBs are skipped
 - Assertions to check that nothing is written beyond core dump flash partition
This commit is contained in:
Alexey Gerenkov 2017-04-20 19:38:38 +03:00
parent 90c8bd93e0
commit 98895af68b
3 changed files with 159 additions and 68 deletions

View File

@ -19,6 +19,7 @@
#include "soc/timer_group_struct.h"
#include "soc/timer_group_reg.h"
#include "driver/gpio.h"
#include "rom/crc.h"
#include "esp_panic.h"
#include "esp_partition.h"
@ -41,74 +42,128 @@ const static DRAM_ATTR char TAG[] = "esp_core_dump";
#define ESP_COREDUMP_LOG_PROCESS( format, ... ) do{/*(__VA_ARGS__);*/}while(0)
#endif
// TODO: allow user to set this in menuconfig or get tasks iteratively
#define COREDUMP_MAX_TASKS_NUM 32
#define COREDUMP_MAX_TASKS_NUM 32
#define COREDUMP_MAX_TASK_STACK_SIZE (64*1024)
typedef esp_err_t (*esp_core_dump_write_prepare_t)(void *priv, uint32_t *data_len);
typedef esp_err_t (*esp_core_dump_write_start_t)(void *priv);
typedef esp_err_t (*esp_core_dump_write_end_t)(void *priv);
typedef esp_err_t (*esp_core_dump_flash_write_data_t)(void *priv, void * data, uint32_t data_len);
/** core dump emitter control structure */
typedef struct _core_dump_write_config_t
{
// this function is called before core dump data writing
// used for sanity checks
esp_core_dump_write_prepare_t prepare;
// this function is called at the beginning of data writing
esp_core_dump_write_start_t start;
// this function is called when all dump data are written
esp_core_dump_write_end_t end;
// this function is called to write data chunk
esp_core_dump_flash_write_data_t write;
// number of tasks with corrupted TCBs
uint32_t bad_tasks_num;
// pointer to data which are specific for particular core dump emitter
void * priv;
} core_dump_write_config_t;
/** core dump data header */
typedef struct _core_dump_header_t
{
uint32_t data_len; // data length
uint32_t tasks_num; // number of tasks
uint32_t tcb_sz; // size of TCB
} core_dump_header_t;
/** core dump task data header */
typedef struct _core_dump_task_header_t
{
void * tcb_addr; // TCB address
uint32_t stack_start; // stack start address
uint32_t stack_end; // stack end address
} core_dump_task_header_t;
static inline bool esp_task_stack_start_is_sane(uint32_t sp)
{
return !(sp < 0x3ffae010UL || sp > 0x3fffffffUL);
}
static inline bool esp_tcb_addr_is_sane(uint32_t addr, uint32_t sz)
{
//TODO: currently core dump supports TCBs in DRAM only, external SRAM not supported yet
return !(addr < 0x3ffae000UL || (addr + sz) > 0x40000000UL);
}
static void esp_core_dump_write(XtExcFrame *frame, core_dump_write_config_t *write_cfg)
{
union
{
uint8_t data8[12];
uint32_t data32[3];
} rom_data;
int cur_task_bad = 0;
esp_err_t err;
TaskSnapshot_t tasks[COREDUMP_MAX_TASKS_NUM];
UBaseType_t tcb_sz, task_num;
UBaseType_t tcb_sz, tcb_sz_padded, task_num;
uint32_t data_len = 0, i, len;
union
{
core_dump_header_t hdr;
core_dump_task_header_t task_hdr;
} dump_data;
task_num = uxTaskGetSnapshotAll(tasks, COREDUMP_MAX_TASKS_NUM, &tcb_sz);
// take TCB padding into account, actual TCB size will be stored in header
if (tcb_sz % sizeof(uint32_t))
len = (tcb_sz / sizeof(uint32_t) + 1) * sizeof(uint32_t);
tcb_sz_padded = (tcb_sz / sizeof(uint32_t) + 1) * sizeof(uint32_t);
else
len = tcb_sz;
tcb_sz_padded = tcb_sz;
// header + tasknum*(tcb + stack start/end + tcb addr)
data_len = 3*sizeof(uint32_t) + task_num*(len + 2*sizeof(uint32_t) + sizeof(uint32_t *));
data_len = sizeof(core_dump_header_t) + task_num*(tcb_sz_padded + sizeof(core_dump_task_header_t));
for (i = 0; i < task_num; i++) {
if (!esp_tcb_addr_is_sane((uint32_t)tasks[i].pxTCB, tcb_sz)) {
ESP_COREDUMP_LOG_PROCESS("Bad TCB addr %x!", tasks[i].pxTCB);
write_cfg->bad_tasks_num++;
continue;
}
if (tasks[i].pxTCB == xTaskGetCurrentTaskHandleForCPU(xPortGetCoreID())) {
// set correct stack top for current task
tasks[i].pxTopOfStack = (StackType_t *)frame;
ESP_COREDUMP_LOG_PROCESS("Current task EXIT/PC/PS/A0/SP %x %x %x %x %x", frame->exit, frame->pc, frame->ps, frame->a0, frame->a1);
ESP_COREDUMP_LOG_PROCESS("Current task EXIT/PC/PS/A0/SP %x %x %x %x %x",
frame->exit, frame->pc, frame->ps, frame->a0, frame->a1);
}
else {
XtSolFrame *task_frame = (XtSolFrame *)tasks[i].pxTopOfStack;
if (task_frame->exit == 0) {
ESP_COREDUMP_LOG_PROCESS("Task EXIT/PC/PS/A0/SP %x %x %x %x %x", task_frame->exit, task_frame->pc, task_frame->ps, task_frame->a0, task_frame->a1);
ESP_COREDUMP_LOG_PROCESS("Task EXIT/PC/PS/A0/SP %x %x %x %x %x",
task_frame->exit, task_frame->pc, task_frame->ps, task_frame->a0, task_frame->a1);
}
else {
#if CONFIG_ESP32_ENABLE_COREDUMP_TO_FLASH
XtExcFrame *task_frame2 = (XtExcFrame *)tasks[i].pxTopOfStack;
#endif
ESP_COREDUMP_LOG_PROCESS("Task EXIT/PC/PS/A0/SP %x %x %x %x %x", task_frame2->exit, task_frame2->pc, task_frame2->ps, task_frame2->a0, task_frame2->a1);
ESP_COREDUMP_LOG_PROCESS("Task EXIT/PC/PS/A0/SP %x %x %x %x %x",
task_frame2->exit, task_frame2->pc, task_frame2->ps, task_frame2->a0, task_frame2->a1);
}
}
#if( portSTACK_GROWTH < 0 )
len = (uint32_t)tasks[i].pxEndOfStack - (uint32_t)tasks[i].pxTopOfStack;
#else
len = (uint32_t)tasks[i].pxTopOfStack - (uint32_t)tasks[i].pxEndOfStack;
#endif
ESP_COREDUMP_LOG_PROCESS("Stack len = %lu (%x %x)", len, tasks[i].pxTopOfStack, tasks[i].pxEndOfStack);
// take stack padding into account
if (len % sizeof(uint32_t))
len = (len / sizeof(uint32_t) + 1) * sizeof(uint32_t);
data_len += len;
// check task's stack
if (!esp_stack_ptr_is_sane((uint32_t)tasks[i].pxTopOfStack) || !esp_task_stack_start_is_sane((uint32_t)tasks[i].pxEndOfStack)
|| len > COREDUMP_MAX_TASK_STACK_SIZE) {
if (tasks[i].pxTCB == xTaskGetCurrentTaskHandleForCPU(xPortGetCoreID())) {
cur_task_bad = 1;
}
ESP_COREDUMP_LOG_PROCESS("Corrupted TCB %x: stack len %lu, top %x, end %x!",
tasks[i].pxTCB, len, tasks[i].pxTopOfStack, tasks[i].pxEndOfStack);
tasks[i].pxTCB = 0; // make TCB addr invalid to skip it in dump
write_cfg->bad_tasks_num++;
} else {
ESP_COREDUMP_LOG_PROCESS("Stack len = %lu (%x %x)", len, tasks[i].pxTopOfStack, tasks[i].pxEndOfStack);
// take stack padding into account
len = (len + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1);
data_len += len;
}
}
data_len -= write_cfg->bad_tasks_num*(tcb_sz_padded + sizeof(core_dump_task_header_t));
ESP_COREDUMP_LOG_PROCESS("Core dump len = %lu (%d %d)", data_len, task_num, write_cfg->bad_tasks_num);
// prepare write
if (write_cfg->prepare) {
@ -118,9 +173,6 @@ static void esp_core_dump_write(XtExcFrame *frame, core_dump_write_config_t *wri
return;
}
}
ESP_COREDUMP_LOG_PROCESS("Core dump len = %lu", data_len);
// write start
if (write_cfg->start) {
err = write_cfg->start(write_cfg->priv);
@ -129,25 +181,27 @@ static void esp_core_dump_write(XtExcFrame *frame, core_dump_write_config_t *wri
return;
}
}
// write header
rom_data.data32[0] = data_len;
rom_data.data32[1] = task_num;
rom_data.data32[2] = tcb_sz;
err = write_cfg->write(write_cfg->priv, &rom_data, 3*sizeof(uint32_t));
dump_data.hdr.data_len = data_len;
dump_data.hdr.tasks_num = task_num - write_cfg->bad_tasks_num;
dump_data.hdr.tcb_sz = tcb_sz;
err = write_cfg->write(write_cfg->priv, &dump_data, sizeof(core_dump_header_t));
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to write core dump header (%d)!", err);
return;
}
// write tasks
for (i = 0; i < task_num; i++) {
if (!esp_tcb_addr_is_sane((uint32_t)tasks[i].pxTCB, tcb_sz)) {
ESP_COREDUMP_LOG_PROCESS("Skip TCB with bad addr %x!", tasks[i].pxTCB);
continue;
}
ESP_COREDUMP_LOG_PROCESS("Dump task %x", tasks[i].pxTCB);
// save TCB address, stack base and stack top addr
rom_data.data32[0] = (uint32_t)tasks[i].pxTCB;
rom_data.data32[1] = (uint32_t)tasks[i].pxTopOfStack;
rom_data.data32[2] = (uint32_t)tasks[i].pxEndOfStack;
err = write_cfg->write(write_cfg->priv, &rom_data, 3*sizeof(uint32_t));
dump_data.task_hdr.tcb_addr = tasks[i].pxTCB;
dump_data.task_hdr.stack_start = (uint32_t)tasks[i].pxTopOfStack;
dump_data.task_hdr.stack_end = (uint32_t)tasks[i].pxEndOfStack;
err = write_cfg->write(write_cfg->priv, &dump_data, sizeof(core_dump_task_header_t));
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to write task header (%d)!", err);
return;
@ -159,18 +213,15 @@ static void esp_core_dump_write(XtExcFrame *frame, core_dump_write_config_t *wri
return;
}
// save task stack
err = write_cfg->write(write_cfg->priv,
#if( portSTACK_GROWTH < 0 )
tasks[i].pxTopOfStack,
(uint32_t)tasks[i].pxEndOfStack - (uint32_t)tasks[i].pxTopOfStack
#else
tasks[i].pxEndOfStack,
(uint32_t)tasks[i].pxTopOfStack - (uint32_t)tasks[i].pxEndOfStack
#endif
);
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to write task stack (%d)!", err);
return;
if (tasks[i].pxTopOfStack != 0 && tasks[i].pxEndOfStack != 0) {
err = write_cfg->write(write_cfg->priv, tasks[i].pxTopOfStack,
(uint32_t)tasks[i].pxEndOfStack - (uint32_t)tasks[i].pxTopOfStack);
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to write task stack (%d)!", err);
return;
}
} else {
ESP_COREDUMP_LOG_PROCESS("Skip corrupted task %x stack!", tasks[i].pxTCB);
}
}
@ -182,6 +233,12 @@ static void esp_core_dump_write(XtExcFrame *frame, core_dump_write_config_t *wri
return;
}
}
if (write_cfg->bad_tasks_num) {
ESP_COREDUMP_LOGE("Skipped %d tasks with bad TCB!", write_cfg->bad_tasks_num);
if (cur_task_bad) {
ESP_COREDUMP_LOGE("Crashed task has been skipped!");
}
}
}
#if CONFIG_ESP32_ENABLE_COREDUMP_TO_FLASH
@ -195,10 +252,24 @@ typedef struct _core_dump_write_flash_data_t
uint32_t off;
} core_dump_write_flash_data_t;
// core dump partition start
static uint32_t s_core_part_start;
// core dump partition size
static uint32_t s_core_part_size;
typedef struct _core_dump_partition_t
{
// core dump partition start
uint32_t start;
// core dump partition size
uint32_t size;
} core_dump_partition_t;
typedef struct _core_dump_flash_config_t
{
// core dump partition start
core_dump_partition_t partition;
// core dump partition size
uint32_t crc;
} core_dump_flash_config_t;
// core dump flash data
static core_dump_flash_config_t s_core_flash_config;
static uint32_t esp_core_dump_write_flash_padded(size_t off, uint8_t *data, uint32_t data_size)
{
@ -211,6 +282,11 @@ static uint32_t esp_core_dump_write_flash_padded(size_t off, uint8_t *data, uint
} rom_data;
data_len = (data_size / sizeof(uint32_t)) * sizeof(uint32_t);
assert(off >= s_core_flash_config.partition.start);
assert((off + data_len + (data_size % sizeof(uint32_t) ? sizeof(uint32_t) : 0)) <=
s_core_flash_config.partition.start + s_core_flash_config.partition.size);
err = spi_flash_write(off, data, data_len);
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to write data to flash (%d)!", err);
@ -240,8 +316,9 @@ static esp_err_t esp_core_dump_flash_write_prepare(void *priv, uint32_t *data_le
uint32_t sec_num;
core_dump_write_flash_data_t *wr_data = (core_dump_write_flash_data_t *)priv;
// check for available space in partition
// add space for 2 magics. TODO: change to CRC
if ((*data_len + 2*sizeof(uint32_t)) > s_core_part_size) {
if ((*data_len + 2*sizeof(uint32_t)) > s_core_flash_config.partition.size) {
ESP_COREDUMP_LOGE("Not enough space to save core dump!");
return ESP_ERR_NO_MEM;
}
@ -252,7 +329,8 @@ static esp_err_t esp_core_dump_flash_write_prepare(void *priv, uint32_t *data_le
sec_num = *data_len / SPI_FLASH_SEC_SIZE;
if (*data_len % SPI_FLASH_SEC_SIZE)
sec_num++;
err = spi_flash_erase_range(s_core_part_start + 0, sec_num * SPI_FLASH_SEC_SIZE);
assert(sec_num * SPI_FLASH_SEC_SIZE <= s_core_flash_config.partition.size);
err = spi_flash_erase_range(s_core_flash_config.partition.start + 0, sec_num * SPI_FLASH_SEC_SIZE);
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to erase flash (%d)!", err);
return err;
@ -266,7 +344,8 @@ static esp_err_t esp_core_dump_flash_write_word(core_dump_write_flash_data_t *wr
esp_err_t err = ESP_OK;
uint32_t data32 = word;
err = spi_flash_write(s_core_part_start + wr_data->off, &data32, sizeof(uint32_t));
assert(wr_data->off + sizeof(uint32_t) <= s_core_flash_config.partition.size);
err = spi_flash_write(s_core_flash_config.partition.start + wr_data->off, &data32, sizeof(uint32_t));
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to write to flash (%d)!", err);
return err;
@ -287,21 +366,20 @@ static esp_err_t esp_core_dump_flash_write_end(void *priv)
{
core_dump_write_flash_data_t *wr_data = (core_dump_write_flash_data_t *)priv;
#if LOG_LOCAL_LEVEL >= ESP_LOG_DEBUG
uint32_t i;
union
{
uint8_t data8[16];
uint32_t data32[4];
} rom_data;
esp_err_t err = spi_flash_read(s_core_part_start + 0, &rom_data, sizeof(rom_data));
esp_err_t err = spi_flash_read(s_core_flash_config.partition.start + 0, &rom_data, sizeof(rom_data));
if (err != ESP_OK) {
ESP_COREDUMP_LOGE("Failed to read flash (%d)!", err);
return err;
}
else {
ESP_COREDUMP_LOG_PROCESS("Data from flash:");
for (i = 0; i < sizeof(rom_data)/sizeof(rom_data.data32[0]); i++) {
for (uint32_t i = 0; i < sizeof(rom_data)/sizeof(rom_data.data32[0]); i++) {
ESP_COREDUMP_LOG_PROCESS("%x", rom_data.data32[i]);
}
}
@ -316,7 +394,7 @@ static esp_err_t esp_core_dump_flash_write_data(void *priv, void * data, uint32_
esp_err_t err = ESP_OK;
core_dump_write_flash_data_t *wr_data = (core_dump_write_flash_data_t *)priv;
uint32_t len = esp_core_dump_write_flash_padded(s_core_part_start + wr_data->off, data, data_len);
uint32_t len = esp_core_dump_write_flash_padded(s_core_flash_config.partition.start + wr_data->off, data, data_len);
if (len != data_len)
return ESP_FAIL;
@ -330,9 +408,17 @@ void esp_core_dump_to_flash(XtExcFrame *frame)
core_dump_write_config_t wr_cfg;
core_dump_write_flash_data_t wr_data;
uint32_t crc = crc32_le(UINT32_MAX, (uint8_t const *)&s_core_flash_config.partition,
sizeof(s_core_flash_config.partition));
if (s_core_flash_config.crc != crc) {
ESP_COREDUMP_LOGE("Core dump flash config is corrupted! CRC=0x%x instead of 0x%x", crc, s_core_flash_config.crc);
return;
}
/* init non-OS flash access critical section */
spi_flash_guard_set(&g_flash_guard_no_os_ops);
memset(&wr_cfg, 0, sizeof(wr_cfg));
wr_cfg.prepare = esp_core_dump_flash_write_prepare;
wr_cfg.start = esp_core_dump_flash_write_start;
wr_cfg.end = esp_core_dump_flash_write_end;
@ -422,6 +508,7 @@ void esp_core_dump_to_uart(XtExcFrame *frame)
uint32_t tm_end, tm_cur;
int ch;
memset(&wr_cfg, 0, sizeof(wr_cfg));
wr_cfg.prepare = NULL;
wr_cfg.start = esp_core_dump_uart_write_start;
wr_cfg.end = esp_core_dump_uart_write_end;
@ -461,8 +548,10 @@ void esp_core_dump_init()
return;
}
ESP_COREDUMP_LOGI("Found partition '%s' @ %x %d bytes", core_part->label, core_part->address, core_part->size);
s_core_part_start = core_part->address;
s_core_part_size = core_part->size;
s_core_flash_config.partition.start = core_part->address;
s_core_flash_config.partition.size = core_part->size;
s_core_flash_config.crc = crc32_le(UINT32_MAX, (uint8_t const *)&s_core_flash_config.partition,
sizeof(s_core_flash_config.partition));
#endif
#if CONFIG_ESP32_ENABLE_COREDUMP_TO_UART
ESP_COREDUMP_LOGI("Init core dump to UART");

View File

@ -67,6 +67,13 @@ void esp_clear_watchpoint(int no);
*/
void esp_panic_wdt_stop(void);
/**
* @brief Checks stack pointer
*/
static inline bool esp_stack_ptr_is_sane(uint32_t sp)
{
return !(sp < 0x3ffae010UL || sp > 0x3ffffff0UL || ((sp & 0xf) != 0));
}
#endif
#ifdef __cplusplus

View File

@ -347,11 +347,6 @@ static void esp_panic_dig_reset()
}
}
static inline bool stackPointerIsSane(uint32_t sp)
{
return !(sp < 0x3ffae010 || sp > 0x3ffffff0 || ((sp & 0xf) != 0));
}
static void putEntry(uint32_t pc, uint32_t sp)
{
if (pc & 0x80000000) {
@ -372,7 +367,7 @@ static void doBacktrace(XtExcFrame *frame)
pc = frame->a0;
while (i++ < 100) {
uint32_t psp = sp;
if (!stackPointerIsSane(sp) || i++ > 100) {
if (!esp_stack_ptr_is_sane(sp) || i++ > 100) {
break;
}
sp = *((uint32_t *) (sp - 0x10 + 4));