Merge branch 'change/improve_xip_docs' into 'master'

psram: improve xip docs

See merge request espressif/esp-idf!33482
This commit is contained in:
Armando (Dou Yiwen) 2024-09-14 15:59:03 +08:00
commit f4dd74ed96
13 changed files with 88 additions and 27 deletions

View File

@ -136,10 +136,18 @@ esp_err_t mmu_config_psram_rodata_segment(uint32_t start_page, uint32_t psram_si
/*----------------------------------------------------------------------------
Part 2 APIs (See @Backgrounds on top of this file)
-------------------------------------------------------------------------------*/
extern int _instruction_reserved_start;
extern int _instruction_reserved_end;
extern int _rodata_reserved_start;
extern int _rodata_reserved_end;
/**
* If using `int`, then for CLANG, with enabled optimization when inlined function is provided with the address of external symbol, the two least bits of the constant used inside that function get cleared.
* Optimizer assumes that address of external symbol should be aligned to 4-bytes and therefore aligns constant value used for bitwise AND operation with that address.
*
* This means `extern int _instruction_reserved_start;` can be unaligned to 4 bytes, whereas using `char` can solve this issue.
*
* As we only use these symbol address, we declare them as `char` here
*/
extern char _instruction_reserved_start;
extern char _instruction_reserved_end;
extern char _rodata_reserved_start;
extern char _rodata_reserved_end;
//------------------------------------Copy Flash .text to PSRAM-------------------------------------//
#if CONFIG_SPIRAM_FETCH_INSTRUCTIONS

View File

@ -28,10 +28,18 @@
#define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1)))
extern int _instruction_reserved_start;
extern int _instruction_reserved_end;
extern int _rodata_reserved_start;
extern int _rodata_reserved_end;
/**
* If using `int`, then for CLANG, with enabled optimization when inlined function is provided with the address of external symbol, the two least bits of the constant used inside that function get cleared.
* Optimizer assumes that address of external symbol should be aligned to 4-bytes and therefore aligns constant value used for bitwise AND operation with that address.
*
* This means `extern int _instruction_reserved_start;` can be unaligned to 4 bytes, whereas using `char` can solve this issue.
*
* As we only use these symbol address, we declare them as `char` here
*/
extern char _instruction_reserved_start;
extern char _instruction_reserved_end;
extern char _rodata_reserved_start;
extern char _rodata_reserved_end;
const static char *TAG = "mmu_psram";
static uint32_t s_irom_vaddr_start;

View File

@ -136,10 +136,19 @@ extern int _rtc_bss_end;
extern int _bss_bt_start;
extern int _bss_bt_end;
#endif // CONFIG_BT_LE_RELEASE_IRAM_SUPPORTED
extern int _instruction_reserved_start;
extern int _instruction_reserved_end;
extern int _rodata_reserved_start;
extern int _rodata_reserved_end;
/**
* If using `int`, then for CLANG, with enabled optimization when inlined function is provided with the address of external symbol, the two least bits of the constant used inside that function get cleared.
* Optimizer assumes that address of external symbol should be aligned to 4-bytes and therefore aligns constant value used for bitwise AND operation with that address.
*
* This means `extern int _instruction_reserved_start;` can be unaligned to 4 bytes, whereas using `char` can solve this issue.
*
* As we only use these symbol address, we declare them as `char` here
*/
extern char _instruction_reserved_start;
extern char _instruction_reserved_end;
extern char _rodata_reserved_start;
extern char _rodata_reserved_end;
extern int _vector_table;
#if SOC_INT_CLIC_SUPPORTED

View File

@ -1007,6 +1007,10 @@ config SOC_SPI_MAX_PRE_DIVIDER
int
default 16
config SOC_SPIRAM_XIP_SUPPORTED
bool
default y
config SOC_SPI_MEM_SUPPORT_AUTO_WAIT_IDLE
bool
default y

View File

@ -426,6 +426,9 @@
#define SOC_MEMSPI_IS_INDEPENDENT 1
#define SOC_SPI_MAX_PRE_DIVIDER 16
/*-------------------------- SPIRAM CAPS ----------------------------------------*/
#define SOC_SPIRAM_XIP_SUPPORTED 1
/*-------------------------- SPI MEM CAPS ---------------------------------------*/
#define SOC_SPI_MEM_SUPPORT_AUTO_WAIT_IDLE (1)
#define SOC_SPI_MEM_SUPPORT_AUTO_SUSPEND (1)

View File

@ -507,6 +507,10 @@ config SOC_MEMSPI_IS_INDEPENDENT
bool
default y
config SOC_SPIRAM_XIP_SUPPORTED
bool
default y
config SOC_SPI_MEM_SUPPORT_AUTO_WAIT_IDLE
bool
default y

View File

@ -309,6 +309,9 @@
#define SOC_SPI_PERIPH_SUPPORT_MULTILINE_MODE(host_id) ({(void)host_id; 1;})
#define SOC_MEMSPI_IS_INDEPENDENT 1
/*-------------------------- SPIRAM CAPS ----------------------------------------*/
#define SOC_SPIRAM_XIP_SUPPORTED 1
/*-------------------------- SPI MEM CAPS ---------------------------------------*/
#define SOC_SPI_MEM_SUPPORT_AUTO_WAIT_IDLE (1)
#define SOC_SPI_MEM_SUPPORT_AUTO_SUSPEND (1)

View File

@ -36,13 +36,13 @@
#include "spi_flash_mmap.h"
#if CONFIG_SPIRAM_FETCH_INSTRUCTIONS
extern int _instruction_reserved_start;
extern int _instruction_reserved_end;
extern char _instruction_reserved_start;
extern char _instruction_reserved_end;
#endif
#if CONFIG_SPIRAM_RODATA
extern int _rodata_reserved_start;
extern int _rodata_reserved_end;
extern char _rodata_reserved_start;
extern char _rodata_reserved_end;
#endif
#if !CONFIG_SPI_FLASH_ROM_IMPL

View File

@ -167,13 +167,13 @@ By applying the macro ``EXT_RAM_NOINIT_ATTR``, data could be moved from the inte
The benefits of XiP from PSRAM is:
- PSRAM access speed is faster than Flash access. So the performance is better.
- PSRAM access speed may be faster than Flash access, so the overall application performance may be better. For example, if the PSRAM is an Octal mode (8-line-PSRAM) and is configured to 80 MHz, then it is faster than a Quad flash (4-line-flash) which is configured to 80 MHz.
- The cache will not be disabled during an SPI1 flash operation, thus optimizing the code execution performance during SPI1 flash operations. For ISRs, ISR callbacks and data which might be accessed during this period, you do not need to place them in internal RAM, thus internal RAM usage can be optimized. This feature is useful for high throughput peripheral involved applications to improve the performance during SPI1 flash operations.
:example:`system/xip_from_psram` demonstrates the usage of XiP from PSRAM, optimizing internal RAM usage and avoiding cache disabling during flash operations from user call (e.g., flash erase/read/write operations).
.. only:: esp32p4
.. only:: not (esp32s2 or esp32s3)
.. _external_ram_config_xip:
@ -182,7 +182,15 @@ By applying the macro ``EXT_RAM_NOINIT_ATTR``, data could be moved from the inte
The :ref:`CONFIG_SPIRAM_XIP_FROM_PSRAM` option enables the executable in place (XiP) from PSRAM feature. With this option sections that are normally placed in flash, ``.text`` (for instructions) and ``.rodata`` (for read only data), will be loaded in PSRAM.
With this option enabled, the cache will not be disabled during an SPI1 flash operation, so code that requires executing during an SPI1 flash operation does not have to be placed in internal RAM. Because P4 flash and PSRAM are using two separate SPI buses, moving flash content to PSRAM will actually increase the load of the PSRAM MSPI bus, so the exact impact on performance will be dependent on your app usage of PSRAM. For example, as the PSRAM bus speed could be much faster than flash bus speed, if the instructions and data that are used to be in flash are not accessed very frequently, you might get better performance with this option enabled. We suggest doing performance profiling to determine if enabling this option.
With this option enabled, the cache will not be disabled during an SPI1 flash operation, so code that requires executing during an SPI1 flash operation does not have to be placed in internal RAM.
.. only:: SOC_MMU_PER_EXT_MEM_TARGET
Because {IDF_TARGET_NAME} flash and PSRAM are using two separate SPI buses, moving flash content to PSRAM will actually increase the load of the PSRAM MSPI bus, so the exact impact on performance will be dependent on your app usage of PSRAM.
For example, as the PSRAM bus speed could be faster than flash bus speed (e.g., if the PSRAM is a HEX (16-line-PSRAM on ESP32P4) and is configured to 200 Mhz, then it is much faster than a Quad flash (4-line-flash) which is configured to 80 MHz.).
If the instructions and data that are used to be in flash are not accessed very frequently, you should get better performance with this option enabled. We suggest doing performance profiling to determine how enabling this option will impact your system.
Restrictions
============

View File

@ -167,13 +167,13 @@ ESP-IDF 启动过程中,片外 RAM 被映射到数据虚拟地址空间,该
在 PSRAM 中直接执行代码的好处包括:
- PSRAM 访问速度快于 flash因此性能更好。
- PSRAM 访问速度可能快于 flash因此性能更好。例如,如果使用的 PSRAM 是八线的,且被配置为 80 MHz而 flash 是4线的且被配置为 80 Mhz那么 PSRAM 的访问速度是快于 flash 的。
- 在进行 SPI1 flash 操作期间cache 仍然保持启用状态,这样可以优化代码执行性能。由于无需把中断服务程序 (ISR)、ISR 回调和在此期间可能被访问的数据放置在片上 RAM 中,片上 RAM 可用于其他用途,从而提高了使用效率。这个特性适用于需要处理大量数据的高吞吐量外设应用,能显著提高 SPI1 flash 操作期间的性能。
:example:`system/xip_from_psram` 演示了如何从 PSRAM 直接执行代码,从而优化内部 RAM 的使用,并避免用户调用 flash 操作(例如闪存擦除/读取/写入操作)时关闭 cache。
.. only:: esp32p4
.. only:: not (esp32s2 or esp32s3)
.. _external_ram_config_xip:
@ -182,7 +182,13 @@ ESP-IDF 启动过程中,片外 RAM 被映射到数据虚拟地址空间,该
启用 :ref:`CONFIG_SPIRAM_XIP_FROM_PSRAM` 选项后能在 PSRAM 中直接执行代码。通常放置在 flash 中的段,如 ``.text`` 部分的数据(用于指令)和 ``.rodata`` 部分的数据(用于只读数据),将被加载到 PSRAM 中。
启用此选项后SPI1 flash 操作期间 cache 保持启用状态,因此需要执行的代码在此期间不必放置在内部 RAM 中。由于 ESP32-P4 flash 和 PSRAM 使用两个独立的 SPI 总线,将 flash 内容移动到 PSRAM 实际上增加了 PSRAM MSPI 总线的负载,因此访问速度相对较慢。应用程序在运行过程中对 PSRAM 的使用会直接影响整体性能。因此,建议先进行性能分析以确定启用此选项是否会显著影响应用程序性能。
启用此选项后SPI1 flash 操作期间 cache 保持启用状态,因此需要执行的代码在此期间不必放置在内部 RAM 中。
.. only:: SOC_MMU_PER_EXT_MEM_TARGET
由于 {IDF_TARGET_NAME} flash 和 PSRAM 使用两个独立的 SPI 总线,将 flash 内容移动到 PSRAM 实际上增加了 PSRAM MSPI 总线的负载,
例如PSRAM 的访问速度可能快于 flash (比如在 ESP32-P4 上,选择的 PSRAM 是十六线的并将其配置为 200 MHz 此时 PSRAM 的访问速度是远快于一颗被配置为 80 MHz 的四线 flash 芯片),如果这些之前在 flash 中被就地执行的指令和数据不是十分频繁地被访问,则使能这个选项会增加系统的性能。建议先进行性能分析以确定启用此选项是否会显著影响应用程序性能。
片外 RAM 使用限制
===================

View File

@ -1,5 +1,5 @@
| Supported Targets | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | -------- | -------- | -------- |
| Supported Targets | ESP32-C5 | ESP32-C61 | ESP32-P4 | ESP32-S2 | ESP32-S3 |
| ----------------- | -------- | --------- | -------- | -------- | -------- |
# XIP (Execute-In-Place) From PSRAM Example

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
@ -14,6 +14,12 @@
#include "esp_flash.h"
#include "esp_timer.h"
#if CONFIG_IDF_TARGET_ESP32C5 || CONFIG_IDF_TARGET_ESP32C61
#define EXAMPLE_TIMER_ALERT_TIME (1 * 3 * 1000)
#else
#define EXAMPLE_TIMER_ALERT_TIME (1 * 10 * 1000)
#endif
static void oneshot_timer_callback(void* arg);
static void cb_in_psram(void);
static void cb_in_iram(void);
@ -41,14 +47,14 @@ void app_main(void)
ESP_LOGI(TAG, "found partition '%s' at offset 0x%"PRIx32" with size 0x%"PRIx32, part->label, part->address, part->size);
ESP_ERROR_CHECK(esp_flash_erase_region(part->flash_chip, part->address, part->size));
ESP_ERROR_CHECK(esp_timer_start_once(oneshot_timer, 1 * 10 * 1000));
ESP_ERROR_CHECK(esp_timer_start_once(oneshot_timer, EXAMPLE_TIMER_ALERT_TIME));
ESP_ERROR_CHECK(esp_flash_erase_region(part->flash_chip, part->address, part->size));
ESP_LOGI(TAG, "callback(in PSRAM) response time: %d us", time_cb_end - time_cb_start);
instructions_in_psram = false;
ESP_ERROR_CHECK(esp_timer_start_once(oneshot_timer, 1 * 10 * 1000));
ESP_ERROR_CHECK(esp_timer_start_once(oneshot_timer, EXAMPLE_TIMER_ALERT_TIME));
ESP_ERROR_CHECK(esp_flash_erase_region(part->flash_chip, part->address, part->size));
ESP_LOGI(TAG, "callback(in IRAM) response time: %d us", time_cb_end - time_cb_start);

View File

@ -7,6 +7,8 @@ from pytest_embedded.dut import Dut
@pytest.mark.esp32s2
@pytest.mark.esp32s3
@pytest.mark.esp32p4
@pytest.mark.esp32c5
@pytest.mark.esp32c61
@pytest.mark.generic
# in order to build the default sdkconfig(the CI won't build the sdkconfig.defaults if there is a sdkconfig.ci.xx)
@pytest.mark.parametrize(