fix(rmt): DMA descriptor alignment respect the cache line size

This commit is contained in:
morris 2024-01-05 11:18:03 +08:00
parent 86707a3af6
commit d97a46160d
9 changed files with 61 additions and 53 deletions

View File

@ -9,6 +9,6 @@ endif()
idf_component_register(SRCS ${srcs}
INCLUDE_DIRS ${public_include}
PRIV_REQUIRES "esp_pm" "esp_driver_gpio"
PRIV_REQUIRES "esp_pm" "esp_driver_gpio" "esp_mm"
LDFRAGMENTS "linker.lf"
)

View File

@ -66,7 +66,7 @@ static size_t IRAM_ATTR rmt_encode_bytes(rmt_encoder_t *encoder, rmt_channel_han
// where to put the encoded symbols? DMA buffer or RMT HW memory
rmt_symbol_word_t *mem_to_nc = NULL;
if (channel->dma_chan) {
mem_to_nc = (rmt_symbol_word_t *)RMT_GET_NON_CACHE_ADDR(channel->dma_mem_base);
mem_to_nc = tx_chan->dma_mem_base_nc;
} else {
mem_to_nc = channel->hw_mem_base;
}
@ -176,7 +176,7 @@ static size_t IRAM_ATTR rmt_encode_copy(rmt_encoder_t *encoder, rmt_channel_hand
// where to put the encoded symbols? DMA buffer or RMT HW memory
rmt_symbol_word_t *mem_to_nc = NULL;
if (channel->dma_chan) {
mem_to_nc = (rmt_symbol_word_t *)RMT_GET_NON_CACHE_ADDR(channel->dma_mem_base);
mem_to_nc = tx_chan->dma_mem_base_nc;
} else {
mem_to_nc = channel->hw_mem_base;
}

View File

@ -127,7 +127,6 @@ struct rmt_channel_t {
_Atomic rmt_fsm_t fsm; // channel life cycle specific FSM
rmt_channel_direction_t direction; // channel direction
rmt_symbol_word_t *hw_mem_base; // base address of RMT channel hardware memory
rmt_symbol_word_t *dma_mem_base; // base address of RMT channel DMA buffer
gdma_channel_handle_t dma_chan; // DMA channel
esp_pm_lock_handle_t pm_lock; // power management lock
#if CONFIG_PM_ENABLE
@ -157,6 +156,8 @@ typedef struct {
struct rmt_tx_channel_t {
rmt_channel_t base; // channel base class
rmt_symbol_word_t *dma_mem_base; // base address of RMT channel DMA buffer
rmt_symbol_word_t *dma_mem_base_nc; // base address of RMT channel DMA buffer, accessed in non-cached way
size_t mem_off; // runtime argument, indicating the next writing position in the RMT hardware memory
size_t mem_end; // runtime argument, indicating the end of current writing region
size_t ping_pong_symbols; // ping-pong size (half of the RMT channel memory)

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -17,6 +17,7 @@
#include "esp_log.h"
#include "esp_check.h"
#include "esp_memory_utils.h"
#include "esp_cache.h"
#include "esp_rom_gpio.h"
#include "soc/rmt_periph.h"
#include "soc/rtc.h"
@ -26,7 +27,6 @@
#include "driver/gpio.h"
#include "driver/rmt_rx.h"
#include "rmt_private.h"
#include "rom/cache.h"
#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1))
@ -199,7 +199,10 @@ esp_err_t rmt_new_rx_channel(const rmt_rx_channel_config_t *config, rmt_channel_
num_dma_nodes = config->mem_block_symbols * sizeof(rmt_symbol_word_t) / RMT_DMA_DESC_BUF_MAX_SIZE + 1;
num_dma_nodes = MAX(2, num_dma_nodes); // at least 2 DMA nodes for ping-pong
// DMA descriptors must be placed in internal SRAM
rx_channel->dma_nodes = heap_caps_aligned_calloc(RMT_DMA_DESC_ALIGN, num_dma_nodes, sizeof(rmt_dma_descriptor_t), mem_caps);
uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
// the alignment should meet both the DMA and cache requirement
size_t alignment = MAX(data_cache_line_size, RMT_DMA_DESC_ALIGN);
rx_channel->dma_nodes = heap_caps_aligned_calloc(alignment, num_dma_nodes, sizeof(rmt_dma_descriptor_t), mem_caps);
ESP_GOTO_ON_FALSE(rx_channel->dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for rx channel DMA nodes");
// we will use the non-cached address to manipulate the DMA descriptor, for simplicity
rx_channel->dma_nodes_nc = (rmt_dma_descriptor_t *)RMT_GET_NON_CACHE_ADDR(rx_channel->dma_nodes);
@ -345,13 +348,14 @@ esp_err_t rmt_receive(rmt_channel_handle_t channel, void *buffer, size_t buffer_
size_t last_dma_block_size = 0;
if (channel->dma_chan) {
ESP_RETURN_ON_FALSE_ISR(esp_ptr_internal(buffer), ESP_ERR_INVALID_ARG, TAG, "buffer must locate in internal RAM for DMA use");
#if CONFIG_IDF_TARGET_ESP32P4
uint32_t data_cache_line_mask = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA) - 1;
ESP_RETURN_ON_FALSE_ISR(((uintptr_t)buffer & data_cache_line_mask) == 0, ESP_ERR_INVALID_ARG, TAG, "buffer must be aligned to cache line size");
ESP_RETURN_ON_FALSE_ISR((buffer_size & data_cache_line_mask) == 0, ESP_ERR_INVALID_ARG, TAG, "buffer size must be aligned to cache line size");
#endif
// Currently we assume the user buffer is allocated from internal RAM, PSRAM is not supported yet.
ESP_RETURN_ON_FALSE_ISR(esp_ptr_internal(buffer), ESP_ERR_INVALID_ARG, TAG, "user buffer not allocated from internal RAM");
uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
// DMA doesn't have alignment requirement for SRAM buffer if the burst mode is not enabled,
// but we need to make sure the buffer is aligned to cache line size
uint32_t align_mask = data_cache_line_size ? (data_cache_line_size - 1) : 0;
ESP_RETURN_ON_FALSE_ISR(((uintptr_t)buffer & align_mask) == 0, ESP_ERR_INVALID_ARG, TAG, "buffer address not aligned");
ESP_RETURN_ON_FALSE_ISR((buffer_size & align_mask) == 0, ESP_ERR_INVALID_ARG, TAG, "buffer size not aligned");
ESP_RETURN_ON_FALSE_ISR(buffer_size <= rx_chan->num_dma_nodes * RMT_DMA_DESC_BUF_MAX_SIZE,
ESP_ERR_INVALID_ARG, TAG, "buffer size exceeds DMA capacity");
per_dma_block_size = buffer_size / rx_chan->num_dma_nodes;
@ -752,13 +756,11 @@ static bool IRAM_ATTR rmt_dma_rx_one_block_cb(gdma_channel_handle_t dma_chan, gd
rmt_rx_trans_desc_t *trans_desc = &rx_chan->trans_desc;
uint32_t channel_id = channel->channel_id;
#if CONFIG_IDF_TARGET_ESP32P4
int invalidate_map = CACHE_MAP_L1_DCACHE;
if (esp_ptr_external_ram((const void *)trans_desc->buffer)) {
invalidate_map |= CACHE_MAP_L2_CACHE;
uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
if (data_cache_line_size) {
// invalidate the user buffer, so that the DMA modified data can be seen by CPU
esp_cache_msync(trans_desc->buffer, trans_desc->buffer_size, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
}
Cache_Invalidate_Addr(invalidate_map, (uint32_t)trans_desc->buffer, trans_desc->buffer_size);
#endif
if (event_data->flags.normal_eof) {
// if the DMA received an EOF, it means the RMT peripheral has received an "end marker"

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -21,6 +21,8 @@
#include "soc/rtc.h"
#include "hal/rmt_ll.h"
#include "hal/gpio_hal.h"
#include "hal/cache_hal.h"
#include "hal/cache_ll.h"
#include "driver/gpio.h"
#include "driver/rmt_tx.h"
#include "rmt_private.h"
@ -47,10 +49,16 @@ static bool rmt_dma_tx_eof_cb(gdma_channel_handle_t dma_chan, gdma_event_data_t
static esp_err_t rmt_tx_init_dma_link(rmt_tx_channel_t *tx_channel, const rmt_tx_channel_config_t *config)
{
rmt_symbol_word_t *dma_mem_base = heap_caps_calloc(1, sizeof(rmt_symbol_word_t) * config->mem_block_symbols,
RMT_MEM_ALLOC_CAPS | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
// For simplicity, the encoder will access the dma_mem_base in a non-cached way
// and we allocate the dma_mem_base from the internal SRAM for performance
uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
// the alignment should meet both the DMA and cache requirement
size_t alignment = MAX(data_cache_line_size, sizeof(rmt_symbol_word_t));
rmt_symbol_word_t *dma_mem_base = heap_caps_aligned_calloc(alignment, config->mem_block_symbols, sizeof(rmt_symbol_word_t),
RMT_MEM_ALLOC_CAPS | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
ESP_RETURN_ON_FALSE(dma_mem_base, ESP_ERR_NO_MEM, TAG, "no mem for tx DMA buffer");
tx_channel->base.dma_mem_base = dma_mem_base;
tx_channel->dma_mem_base = dma_mem_base;
tx_channel->dma_mem_base_nc = (rmt_symbol_word_t *)RMT_GET_NON_CACHE_ADDR(dma_mem_base);
for (int i = 0; i < RMT_DMA_NODES_PING_PONG; i++) {
// each descriptor shares half of the DMA buffer
tx_channel->dma_nodes_nc[i].buffer = dma_mem_base + tx_channel->ping_pong_symbols * i;
@ -193,8 +201,8 @@ static esp_err_t rmt_tx_destroy(rmt_tx_channel_t *tx_channel)
vQueueDeleteWithCaps(tx_channel->trans_queues[i]);
}
}
if (tx_channel->base.dma_mem_base) {
free(tx_channel->base.dma_mem_base);
if (tx_channel->dma_mem_base) {
free(tx_channel->dma_mem_base);
}
if (tx_channel->base.group) {
// de-register channel from RMT group
@ -239,9 +247,12 @@ esp_err_t rmt_new_tx_channel(const rmt_tx_channel_config_t *config, rmt_channel_
ESP_GOTO_ON_FALSE(tx_channel, ESP_ERR_NO_MEM, err, TAG, "no mem for tx channel");
// create DMA descriptors
if (config->flags.with_dma) {
// DMA descriptors must be placed in internal SRAM
mem_caps |= MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA;
tx_channel->dma_nodes = heap_caps_aligned_calloc(RMT_DMA_DESC_ALIGN, RMT_DMA_NODES_PING_PONG, sizeof(rmt_dma_descriptor_t), mem_caps);
// DMA descriptors must be placed in internal SRAM
uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
// the alignment should meet both the DMA and cache requirement
size_t alignment = MAX(data_cache_line_size, RMT_DMA_DESC_ALIGN);
tx_channel->dma_nodes = heap_caps_aligned_calloc(alignment, RMT_DMA_NODES_PING_PONG, sizeof(rmt_dma_descriptor_t), mem_caps);
ESP_GOTO_ON_FALSE(tx_channel->dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for tx DMA nodes");
// we will use the non-cached address to manipulate the DMA descriptor, for simplicity
tx_channel->dma_nodes_nc = (rmt_dma_descriptor_t *)RMT_GET_NON_CACHE_ADDR(tx_channel->dma_nodes);
@ -327,9 +338,9 @@ esp_err_t rmt_new_tx_channel(const rmt_tx_channel_config_t *config, rmt_channel_
tx_channel->base.disable = rmt_tx_disable;
// return general channel handle
*ret_chan = &tx_channel->base;
ESP_LOGD(TAG, "new tx channel(%d,%d) at %p, gpio=%d, res=%"PRIu32"Hz, hw_mem_base=%p, dma_mem_base=%p, dma_nodes_nc=%p,ping_pong_size=%zu, queue_depth=%zu",
ESP_LOGD(TAG, "new tx channel(%d,%d) at %p, gpio=%d, res=%"PRIu32"Hz, hw_mem_base=%p, dma_mem_base=%p, dma_nodes=%p, ping_pong_size=%zu, queue_depth=%zu",
group_id, channel_id, tx_channel, config->gpio_num, tx_channel->base.resolution_hz,
tx_channel->base.hw_mem_base, tx_channel->base.dma_mem_base, tx_channel->dma_nodes_nc, tx_channel->ping_pong_symbols, tx_channel->queue_size);
tx_channel->base.hw_mem_base, tx_channel->dma_mem_base, tx_channel->dma_nodes, tx_channel->ping_pong_symbols, tx_channel->queue_size);
return ESP_OK;
err:
@ -569,7 +580,7 @@ static void IRAM_ATTR rmt_tx_mark_eof(rmt_tx_channel_t *tx_chan)
rmt_tx_trans_desc_t *cur_trans = tx_chan->cur_trans;
rmt_dma_descriptor_t *desc_nc = NULL;
if (channel->dma_chan) {
mem_to_nc = (rmt_symbol_word_t *)RMT_GET_NON_CACHE_ADDR(channel->dma_mem_base);
mem_to_nc = tx_chan->dma_mem_base_nc;
} else {
mem_to_nc = channel->hw_mem_base;
}
@ -646,7 +657,7 @@ static void IRAM_ATTR rmt_tx_do_transaction(rmt_tx_channel_t *tx_chan, rmt_tx_tr
tx_chan->dma_nodes_nc[i].next = &tx_chan->dma_nodes[i + 1]; // note, we must use the cache address for the next pointer
tx_chan->dma_nodes_nc[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_CPU;
}
tx_chan->dma_nodes_nc[1].next = &tx_chan->dma_nodes[0];
tx_chan->dma_nodes_nc[RMT_DMA_NODES_PING_PONG - 1].next = &tx_chan->dma_nodes[0];
}
#endif // SOC_RMT_SUPPORT_DMA

View File

@ -3,9 +3,5 @@
components/esp_driver_rmt/test_apps/rmt:
disable:
- if: SOC_RMT_SUPPORTED != 1
disable_test:
- if: IDF_TARGET == "esp32p4"
temporary: true
reason: test not passing, should be re-enabled # TODO: IDF-8961
depends_components:
- esp_driver_rmt

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -107,7 +107,8 @@ typedef struct {
TaskHandle_t task_to_notify;
size_t received_symbol_num;
rmt_receive_config_t rx_config;
rmt_symbol_word_t remote_codes[128];
rmt_symbol_word_t* remote_codes;
size_t remote_codes_mem_size;
} test_rx_user_data_t;
IRAM_ATTR
@ -121,7 +122,7 @@ static bool test_rmt_rx_done_callback(rmt_channel_handle_t channel, const rmt_rx
if (test_user_data->received_symbol_num == TEST_RMT_SYMBOLS) {
vTaskNotifyGiveFromISR(test_user_data->task_to_notify, &high_task_wakeup);
} else {
rmt_receive(channel, test_user_data->remote_codes, sizeof(test_user_data->remote_codes), &test_user_data->rx_config);
rmt_receive(channel, test_user_data->remote_codes, test_user_data->remote_codes_mem_size, &test_user_data->rx_config);
}
}
return high_task_wakeup == pdTRUE;
@ -129,6 +130,11 @@ static bool test_rmt_rx_done_callback(rmt_channel_handle_t channel, const rmt_rx
static void test_rmt_rx_iram_safe(size_t mem_block_symbols, bool with_dma, rmt_clock_source_t clk_src)
{
uint32_t const test_rx_buffer_symbols = 128;
rmt_symbol_word_t *remote_codes = heap_caps_aligned_calloc(64, test_rx_buffer_symbols, sizeof(rmt_symbol_word_t),
MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA);
TEST_ASSERT_NOT_NULL(remote_codes);
rmt_rx_channel_config_t rx_channel_cfg = {
.clk_src = clk_src,
.resolution_hz = 1000000, // 1MHz, 1 tick = 1us
@ -155,6 +161,8 @@ static void test_rmt_rx_iram_safe(size_t mem_block_symbols, bool with_dma, rmt_c
.signal_range_min_ns = 1250,
.signal_range_max_ns = 12000000,
},
.remote_codes = remote_codes,
.remote_codes_mem_size = test_rx_buffer_symbols * sizeof(rmt_symbol_word_t),
};
TEST_ESP_OK(rmt_rx_register_event_callbacks(rx_channel, &cbs, &test_user_data));
@ -162,7 +170,7 @@ static void test_rmt_rx_iram_safe(size_t mem_block_symbols, bool with_dma, rmt_c
TEST_ESP_OK(rmt_enable(rx_channel));
// ready to receive
TEST_ESP_OK(rmt_receive(rx_channel, test_user_data.remote_codes, sizeof(test_user_data.remote_codes), &test_user_data.rx_config));
TEST_ESP_OK(rmt_receive(rx_channel, remote_codes, test_user_data.remote_codes_mem_size, &test_user_data.rx_config));
// disable the flash cache, and simulate input signal by GPIO
unity_utils_run_cache_disable_stub(test_simulate_input_post_cache_disable, TEST_RMT_GPIO_NUM_A);
@ -174,6 +182,7 @@ static void test_rmt_rx_iram_safe(size_t mem_block_symbols, bool with_dma, rmt_c
TEST_ESP_OK(rmt_disable(rx_channel));
printf("delete channels and encoder\r\n");
TEST_ESP_OK(rmt_del_channel(rx_channel));
free(remote_codes);
}
TEST_CASE("rmt rx iram safe", "[rmt]")

View File

@ -10,6 +10,7 @@ from pytest_embedded import Dut
@pytest.mark.esp32c3
@pytest.mark.esp32c6
@pytest.mark.esp32h2
@pytest.mark.esp32p4
@pytest.mark.generic
@pytest.mark.parametrize(
'config',

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -214,11 +214,7 @@ typedef enum {
/**
* @brief Array initializer for all supported clock sources of RMT
*/
#if SOC_CLK_TREE_SUPPORTED
#define SOC_RMT_CLKS {SOC_MOD_CLK_PLL_F80M, SOC_MOD_CLK_RC_FAST, SOC_MOD_CLK_XTAL}
#else
#define SOC_RMT_CLKS {SOC_MOD_CLK_XTAL}
#endif
/**
* @brief Type of RMT clock source
@ -227,11 +223,7 @@ typedef enum {
RMT_CLK_SRC_PLL_F80M = SOC_MOD_CLK_PLL_F80M, /*!< Select PLL_F80M as the source clock */
RMT_CLK_SRC_RC_FAST = SOC_MOD_CLK_RC_FAST, /*!< Select RC_FAST as the source clock */
RMT_CLK_SRC_XTAL = SOC_MOD_CLK_XTAL, /*!< Select XTAL as the source clock */
#if SOC_CLK_TREE_SUPPORTED
RMT_CLK_SRC_DEFAULT = SOC_MOD_CLK_PLL_F80M, /*!< Select PLL_F80M as the default choice */
#else
RMT_CLK_SRC_DEFAULT = SOC_MOD_CLK_XTAL, /*!< Select XTAL as the default choice */
#endif
} soc_periph_rmt_clk_src_t;
/**
@ -240,11 +232,7 @@ typedef enum {
typedef enum {
RMT_BASECLK_PLL_F80M = SOC_MOD_CLK_PLL_F80M, /*!< RMT source clock is PLL_F80M */
RMT_BASECLK_XTAL = SOC_MOD_CLK_XTAL, /*!< RMT source clock is XTAL */
#if SOC_CLK_TREE_SUPPORTED
RMT_BASECLK_DEFAULT = SOC_MOD_CLK_PLL_F80M, /*!< RMT source clock default choice is PLL_F80M */
#else
RMT_BASECLK_DEFAULT = SOC_MOD_CLK_XTAL, /*!< RMT source clock default choice is XTAL */
#endif
} soc_periph_rmt_clk_src_legacy_t;
//////////////////////////////////////////////////Temp Sensor///////////////////////////////////////////////////////////