refactor(linux): Improved error reporting

* Without using GDB, segfaults and aborts are reported
  clearly now
* Without using GDB, segfaults produce a rudimentary
  backtrace now
This commit is contained in:
Jakob Hasse 2024-02-06 18:40:37 +08:00
parent 6a919dcc16
commit 4c5cded945
3 changed files with 135 additions and 4 deletions

View File

@ -2,7 +2,7 @@
#
# The FreeRTOS component mainly contains
# - Different FreeRTOS kernel implementations (namely IDF FreeRTOS and Amazon SMP FreeRTOS).
# - Different ports of each architecture for each kernel implementaiton.
# - Different ports of each architecture for each kernel implementation.
# - IDF additions to FreeRTOS (e.g., features and API) to augment FreeRTOS
#
# The FreeRTOS component organizes its files as follows
@ -12,7 +12,7 @@
# - `./esp_additions`
# - Additional features added by ESP-IDF to augment FreeRTOS, and not part of the original kernel
# - `./FreeRTOS-Kernel-...`
# - Different FreeRTOS kernel implementations. Each implementaiton is kept in its own directory.
# - Different FreeRTOS kernel implementations. Each implementation is kept in its own directory.
# - Ports for the implementation are kept in `FreeRTOS-Kernel-.../portable/xxx/`\
# - `./test_apps`
# - Contains all unit tests/test apps for the FreeRTOS component.

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2015-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -17,6 +17,8 @@
#include <assert.h>
#include <time.h>
#include <unistd.h>
#include <execinfo.h>
#include <signal.h>
/* Scheduler includes. */
#include "FreeRTOS.h"
@ -24,6 +26,14 @@
#include "utils/wait_for_event.h"
#include "esp_log.h"
#define BACKTRACE_PC_ARRAY_SIZE 20
#define ON_SEGFAULT_MESSAGE "ERROR: Segmentation Fault, here's your backtrace:\n"
#define ON_ABORT_MESSAGE "ERROR: Aborted\n"
#if (defined(__APPLE__) && defined(__MACH__))
typedef sig_t sighandler_t;
#endif
static const char *TAG = "port";
static volatile UBaseType_t uxInterruptNesting = 0;
@ -40,6 +50,52 @@ BaseType_t xPortCheckIfInISR(void)
return uxInterruptNesting;
}
#if CONFIG_COMPILER_OPTIMIZATION_DEBUG
#define BACKTRACE_PC_ARRAY_SIZE_DUMMY 1
/**
* This function calls backtrace once to ensure that libgcc is loaded already.
*/
static void load_libgcc(void)
{
void *array[BACKTRACE_PC_ARRAY_SIZE_DUMMY];
size_t size = backtrace(array, BACKTRACE_PC_ARRAY_SIZE_DUMMY);
assert(size == 1); // Since this function can be called, the first stack frame should be present
}
/*
* Print a rudimentary backtrace to help users a bit with segfaults.
*/
static void segfault_handler(int sig)
{
void *array[BACKTRACE_PC_ARRAY_SIZE];
size_t size;
// get void*'s for all entries on the stack
size = backtrace(array, BACKTRACE_PC_ARRAY_SIZE);
// we need a raw file write here because other functions are not async-signal-safe
int written = write(STDERR_FILENO, ON_SEGFAULT_MESSAGE, sizeof(ON_SEGFAULT_MESSAGE));
(void) written; // The return value is ignored for now, as we don't have a lot of options in case of failure
// and EINTR can't happen in a signal handler anyways
backtrace_symbols_fd(array, size, STDERR_FILENO);
_exit(1);
}
/*
* Print a message to signal abort, even in idf.py monitor.
*/
static void abort_handler(int sig)
{
// we need a raw file write here because other functions are not async-signal-safe
int written = write(STDERR_FILENO, ON_ABORT_MESSAGE, sizeof(ON_ABORT_MESSAGE));
(void) written; // The return value is ignored for now, as we don't have a lot of options in case of failure
// and EINTR can't happen in a signal handler anyways
_exit(1);
}
#endif // CONFIG_COMPILER_OPTIMIZATION_DEBUG
void app_main(void);
static void main_task(void* args)
@ -50,10 +106,31 @@ static void main_task(void* args)
int main(int argc, const char **argv)
{
// This makes sure that stdio is always syncronized so that idf.py monitor
// This makes sure that stdio is always synchronized so that idf.py monitor
// and other tools read text output on time.
setvbuf(stdout, NULL, _IONBF, 0);
#if CONFIG_COMPILER_OPTIMIZATION_DEBUG
// Ensures that libgcc is loaded to avoid problems when loading it later in
// the signal handler (see NOTES section in glibc backtrace man page)
load_libgcc();
sighandler_t sig_res;
// Enable backtraces
sig_res = signal(SIGSEGV, segfault_handler);
if (sig_res == SIG_ERR) {
perror("Failed setting the segfault handler");
abort();
}
// Enable error message on abort
sig_res = signal(SIGABRT, abort_handler);
if (sig_res == SIG_ERR) {
perror("Failed setting the abort handler");
abort();
}
#endif // CONFIG_COMPILER_OPTIMIZATION_DEBUG
usleep(1000);
BaseType_t res = xTaskCreatePinnedToCore(&main_task, "main",
ESP_TASK_MAIN_STACK, NULL,

View File

@ -68,6 +68,60 @@ To build the application on Linux, the target has to be set to ``linux`` and the
idf.py build
idf.py monitor
Troubleshooting
---------------
Since the applications are compiled for the host, they can be debugged with all the tools available on the host. E.g., this could be `GDB <https://man7.org/linux/man-pages/man1/gdb.1.html>`_ and `Valgrind <https://linux.die.net/man/1/valgrind>`_ on Linux. For cases where no debugger is attached, the segmentation fault and Abort signal handlers are customized to print additional information to the user and to increase compatibility with the ESP-IDF tools.
.. note::
The following features are by no means a replacement for running the application in a debugger. It is only meant to give some additional information, e.g., if a battery of tests runs on Linux in a CI/CD system where only the application logs are collected. To trace down the actual issue in most cases, you will need to reproduce it with a debugger attached. A debugger is much more convenient too, because, for example, you do not need to convert addresses to line numbers.
Segmentation Faults
^^^^^^^^^^^^^^^^^^^
On Linux, applications prints an error message and a rudimentary backtrace once it encounters a segmentation fault. This information can be used to find the line numbers in the source code where the issue occurred. The following is an example of a segmentation fault in the Hello-World application:
.. code-block::
...
Hello world!
ERROR: Segmentation Fault, here's your backtrace:
path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf(+0x2d1b)[0x55d3f636ad1b]
/lib/x86_64-linux-gnu/libc.so.6(+0x3c050)[0x7f49f0e00050]
path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf(+0x6198)[0x55d3f636e198]
path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf(+0x5909)[0x55d3f636d909]
path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf(+0x2c93)[0x55d3f636ac93]
path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf(+0x484e)[0x55d3f636c84e]
/lib/x86_64-linux-gnu/libc.so.6(+0x89134)[0x7f49f0e4d134]
/lib/x86_64-linux-gnu/libc.so.6(+0x1097dc)[0x7f49f0ecd7dc]
Note that the addresses (``+0x...``) are relative binary addresses, which still need to be converted to the source code line numbers (see below).
Note furthermore that the backtrace is created from the signal handler, which means that the two uppermost stack frames are not of interest. Instead, the third line is the uppermost stack frame where the issue occurred:
.. code-block::
path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf(+0x6198)[0x55d3f636e198]
To retrieve the actual line in the source code, we need to call the tool ``addr2line`` with the file name and the relative address (in this case ``+0x6198``):
.. code-block::
$ addr2line -e path/to/esp-idf/examples/get-started/hello_world/build/hello_world.elf +0x6198
path/to/esp-idf/components/esp_hw_support/port/linux/chip_info.c:13
From here on, you should use elaborate debugging tools available on the host to further trace the issue down.
For more information on ``addr2line`` and how to call it, see the `addr2line man page <https://linux.die.net/man/1/addr2line>`_.
Aborts
^^^^^^
Once ``abort()`` has been called, the following line is printed:
.. code-block::
ERROR: Aborted
.. _component-linux-mock-support:
Component Linux/Mock Support Overview