From f91933644845589142618e37d3f095e885d132de Mon Sep 17 00:00:00 2001 From: Chen Jichang Date: Wed, 31 Jul 2024 13:37:58 +0800 Subject: [PATCH] feat(dma): Add helper functions to split aligned buffer In some cases we will need the dma's buffer to be aligned with specific requirements. This MR add two helper function to split the unaligned buffer and merge to the origin buffer. The cost is that each unaligned buffer requires two stash buffers of spilt alignment size. And this memory should be managed by callers. --- components/esp_hw_support/dma/esp_dma_utils.c | 66 ++++++ .../dma/include/esp_private/esp_dma_utils.h | 85 ++++++++ .../test_apps/dma/main/test_gdma.c | 204 +++++++++++++++--- 3 files changed, 331 insertions(+), 24 deletions(-) create mode 100644 components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h diff --git a/components/esp_hw_support/dma/esp_dma_utils.c b/components/esp_hw_support/dma/esp_dma_utils.c index 0c1f0df1af..518cb0c5f9 100644 --- a/components/esp_hw_support/dma/esp_dma_utils.c +++ b/components/esp_hw_support/dma/esp_dma_utils.c @@ -13,15 +13,81 @@ #include "esp_heap_caps.h" #include "esp_memory_utils.h" #include "esp_dma_utils.h" +#include "esp_private/esp_dma_utils.h" #include "esp_private/esp_cache_private.h" #include "soc/soc_caps.h" #include "hal/hal_utils.h" +#include "hal/cache_hal.h" +#include "hal/cache_ll.h" +#include "esp_cache.h" static const char *TAG = "dma_utils"; #define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) #define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1))) +esp_err_t esp_dma_split_buffer_to_aligned(void *input_buffer, size_t input_buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array) +{ + esp_err_t ret = ESP_OK; + ESP_RETURN_ON_FALSE(align_array && input_buffer && input_buffer_len && stash_buffer && split_alignment && !(split_alignment & (split_alignment - 1) + && (stash_buffer_len >= 2 * split_alignment)), ESP_ERR_INVALID_ARG, TAG, "invalid argument"); + ESP_RETURN_ON_FALSE(!((uintptr_t)stash_buffer % split_alignment), ESP_ERR_INVALID_ARG, TAG, "extra buffer is not aligned"); + + // calculate head_overflow_len + size_t head_overflow_len = (uintptr_t)input_buffer % split_alignment; + head_overflow_len = head_overflow_len ? split_alignment - head_overflow_len : 0; + ESP_LOGD(TAG, "head_addr:%p split_alignment:%zu head_overflow_len:%zu", input_buffer, split_alignment, head_overflow_len); + // calculate tail_overflow_len + size_t tail_overflow_len = ((uintptr_t)input_buffer + input_buffer_len) % split_alignment; + ESP_LOGD(TAG, "tail_addr:%p split_alignment:%zu tail_overflow_len:%zu", input_buffer + input_buffer_len - tail_overflow_len, split_alignment, tail_overflow_len); + + uint32_t extra_buf_count = 0; + input_buffer = (uint8_t*)input_buffer; + stash_buffer = (uint8_t*)stash_buffer; + align_array->buf.head.recovery_address = input_buffer; + align_array->buf.head.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++; + align_array->buf.head.length = head_overflow_len; + align_array->buf.body.recovery_address = input_buffer + head_overflow_len; + align_array->buf.body.aligned_buffer = input_buffer + head_overflow_len; + align_array->buf.body.length = input_buffer_len - head_overflow_len - tail_overflow_len; + align_array->buf.tail.recovery_address = input_buffer + input_buffer_len - tail_overflow_len; + align_array->buf.tail.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++; + align_array->buf.tail.length = tail_overflow_len; + + // special handling when input_buffer length is no more than buffer alignment + if(head_overflow_len >= input_buffer_len || tail_overflow_len >= input_buffer_len) + { + align_array->buf.head.length = input_buffer_len ; + align_array->buf.body.length = 0 ; + align_array->buf.tail.length = 0 ; + } + + for(int i = 0; i < 3; i++) { + if(!align_array->aligned_buffer[i].length) { + align_array->aligned_buffer[i].aligned_buffer = NULL; + align_array->aligned_buffer[i].recovery_address = NULL; + } + } + + return ret; +} + +esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array) +{ + esp_err_t ret = ESP_OK; + ESP_RETURN_ON_FALSE(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument"); + + // only need to copy the head and tail buffer + if(align_array->buf.head.length) { + memcpy(align_array->buf.head.recovery_address, align_array->buf.head.aligned_buffer, align_array->buf.head.length); + } + if(align_array->buf.tail.length) { + memcpy(align_array->buf.tail.recovery_address, align_array->buf.tail.aligned_buffer, align_array->buf.tail.length); + } + + return ret; +} + esp_err_t esp_dma_capable_malloc(size_t size, const esp_dma_mem_info_t *dma_mem_info, void **out_ptr, size_t *actual_size) { ESP_RETURN_ON_FALSE_ISR(dma_mem_info && out_ptr, ESP_ERR_INVALID_ARG, TAG, "null pointer"); diff --git a/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h b/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h new file mode 100644 index 0000000000..b9ed67e93e --- /dev/null +++ b/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h @@ -0,0 +1,85 @@ +/* + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include "esp_err.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief DMA buffer information + */ +typedef struct { + void *aligned_buffer; //!< Buffer address + void *recovery_address; //!< Origin buffer address that aligned buffer should be recovered + size_t length; //!< Buffer length +} dma_buffer_split_info_t; + +/** + * @brief DMA buffer aligned array + */ +typedef struct { + union { + struct { + dma_buffer_split_info_t head; //!< Aligned head part. Corresponds to the part of the original buffer where the head is not aligned + dma_buffer_split_info_t body; //!< Aligned body part. Corresponds to the part of the original aligned buffer + dma_buffer_split_info_t tail; //!< Aligned tail part. Corresponds to the part of the original buffer where the tail is not aligned + } buf; + dma_buffer_split_info_t aligned_buffer[3]; //!< DMA aligned buffer array, consist of `head`, `body` and `tail` + }; +} dma_buffer_split_array_t; + +/** + * @brief Split unaligned DMA buffer to aligned DMA buffer or aligned DMA buffer array + * + * @note Returned align array contains three parts: head, body and tail. Length of each buffer will be >=0, length 0 means that there is no such part + * + * @param[in] buffer Origin DMA buffer address + * @param[in] buffer_len Origin DMA buffer length + * @param[in] stash_buffer Needed extra buffer to stash aligned buffer, should be allocated with DMA capable memory and aligned to split_alignment + * @param[in] stash_buffer_len stash_buffer length + * @param[in] split_alignment Alignment of each buffer required by the DMA + * @param[out] align_array Aligned DMA buffer array + * @return + * - ESP_OK: Split to aligned buffer successfully + * - ESP_ERR_INVALID_ARG: Split to aligned buffer failed because of invalid argument + * + * brief sketch: + * buffer alignment delimiter buffer alignment delimiter + * │ │ + * Origin Buffer │ Origin Buffer │ + * │ │ │ │ + * │ ▼ ▼ ▼ + * │ ...---xxxxx|xxxxxxxxxxxxxxxxxxxxxxxxxxxxx|xxxxx----... + * │ │ │ │ + * │ │ ▼ │ + * │ │ |xxxxxxxxxxxxxxxxxxxxxxxxxxxxx| │ + * │ │ ▲ │ + * ▼ │ │ │ + * Aligned buffers └──► Head Body Tail ◄──────┘ + * │ │ + * ▼ ▼ + * |xxxxx......| |xxxxx......| + */ +esp_err_t esp_dma_split_buffer_to_aligned(void *buffer, size_t buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array); + +/** + * @brief Merge aligned buffer array to origin buffer + * + * @param[in] align_array Aligned DMA buffer array + * @return + * - ESP_OK: Merge aligned buffer to origin buffer successfully + * - ESP_ERR_INVALID_ARG: Merge aligned buffer to origin buffer failed because of invalid argument + */ +esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array); + +#ifdef __cplusplus +} +#endif diff --git a/components/esp_hw_support/test_apps/dma/main/test_gdma.c b/components/esp_hw_support/test_apps/dma/main/test_gdma.c index b5638f6f5e..abaf59a7ed 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_gdma.c +++ b/components/esp_hw_support/test_apps/dma/main/test_gdma.c @@ -14,6 +14,7 @@ #include "esp_heap_caps.h" #include "esp_private/gdma.h" #include "esp_private/gdma_link.h" +#include "esp_private/esp_dma_utils.h" #include "hal/dma_types.h" #include "soc/soc_caps.h" #include "hal/gdma_ll.h" @@ -22,6 +23,9 @@ #include "esp_cache.h" #include "esp_memory_utils.h" +#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) +#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1)) + TEST_CASE("GDMA channel allocation", "[GDMA]") { gdma_channel_alloc_config_t channel_config = {}; @@ -147,22 +151,9 @@ TEST_CASE("GDMA channel allocation", "[GDMA]") #endif // GDMA_LL_AXI_PAIRS_PER_GROUP >= 2 } -static bool test_gdma_m2m_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +static void test_gdma_config_link_list(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, + gdma_link_list_handle_t *tx_link_list, gdma_link_list_handle_t *rx_link_list, size_t sram_alignment, bool dma_link_in_ext_mem) { - BaseType_t task_woken = pdFALSE; - SemaphoreHandle_t done_sem = (SemaphoreHandle_t)user_data; - xSemaphoreGiveFromISR(done_sem, &task_woken); - return task_woken == pdTRUE; -} - -static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, bool dma_link_in_ext_mem) -{ - size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); - gdma_rx_event_callbacks_t rx_cbs = { - .on_recv_eof = test_gdma_m2m_rx_eof_callback, - }; - SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); - TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, done_sem)); gdma_strategy_config_t strategy = { .auto_update_desc = true, @@ -189,24 +180,46 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl .check_owner = true, } }; - gdma_link_list_handle_t tx_link_list = NULL; - TEST_ESP_OK(gdma_new_link_list(&tx_link_list_config, &tx_link_list)); - // allocate the source buffer from SRAM - uint8_t *src_data = heap_caps_calloc(1, 128, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); - TEST_ASSERT_NOT_NULL(src_data); - + TEST_ESP_OK(gdma_new_link_list(&tx_link_list_config, tx_link_list)); // create DMA link list for RX channel gdma_link_list_config_t rx_link_list_config = { .buffer_alignment = sram_alignment, // RX buffer should be aligned to the cache line size, because we will do cache invalidate later .item_alignment = 8, // 8-byte alignment required by the AXI-GDMA - .num_items = 1, + .num_items = 5, .flags = { .items_in_ext_mem = dma_link_in_ext_mem, .check_owner = true, }, }; + TEST_ESP_OK(gdma_new_link_list(&rx_link_list_config, rx_link_list)); +} + +static bool test_gdma_m2m_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +{ + BaseType_t task_woken = pdFALSE; + SemaphoreHandle_t done_sem = (SemaphoreHandle_t)user_data; + xSemaphoreGiveFromISR(done_sem, &task_woken); + return task_woken == pdTRUE; +} + +static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, bool dma_link_in_ext_mem) +{ + size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); + gdma_rx_event_callbacks_t rx_cbs = { + .on_recv_eof = test_gdma_m2m_rx_eof_callback, + }; + SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); + TEST_ASSERT_NOT_NULL(done_sem); + TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, done_sem)); + + gdma_link_list_handle_t tx_link_list = NULL; gdma_link_list_handle_t rx_link_list = NULL; - TEST_ESP_OK(gdma_new_link_list(&rx_link_list_config, &rx_link_list)); + test_gdma_config_link_list(tx_chan, rx_chan, &tx_link_list, &rx_link_list, sram_alignment, dma_link_in_ext_mem); + + // allocate the source buffer from SRAM + uint8_t *src_data = heap_caps_calloc(1, 128, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + TEST_ASSERT_NOT_NULL(src_data); + // allocate the destination buffer from SRAM uint8_t *dst_data = heap_caps_calloc(1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); TEST_ASSERT_NOT_NULL(dst_data); @@ -270,7 +283,7 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl TEST_ESP_OK(gdma_start(rx_chan, gdma_link_get_head_addr(rx_link_list))); TEST_ESP_OK(gdma_start(tx_chan, gdma_link_get_head_addr(tx_link_list))); - xSemaphoreTake(done_sem, portMAX_DELAY); + xSemaphoreTake(done_sem, 1000 / portTICK_PERIOD_MS); if (sram_alignment) { // the destination data are not reflected to the cache, so do an invalidate to ask the cache load new data @@ -344,3 +357,146 @@ TEST_CASE("GDMA M2M Mode", "[GDMA][M2M]") TEST_ESP_OK(gdma_del_channel(rx_chan)); #endif // SOC_AXI_GDMA_SUPPORTED } + +typedef struct { + SemaphoreHandle_t done_sem; + dma_buffer_split_array_t *align_array; + size_t split_alignment; + bool need_invalidate; +} test_gdma_context_t; + +static bool test_gdma_m2m_unalgined_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +{ + BaseType_t task_woken = pdFALSE; + test_gdma_context_t *user_ctx = (test_gdma_context_t*)user_data; + for (int i = 0; i < 3; i++) { + if(user_ctx->align_array->aligned_buffer[i].aligned_buffer && user_ctx->need_invalidate) { + TEST_ESP_OK(esp_cache_msync(user_ctx->align_array->aligned_buffer[i].aligned_buffer, ALIGN_UP(user_ctx->align_array->aligned_buffer[i].length, user_ctx->split_alignment), ESP_CACHE_MSYNC_FLAG_DIR_M2C)); + } + } + TEST_ESP_OK(esp_dma_merge_aligned_buffers(user_ctx->align_array)); + xSemaphoreGiveFromISR(user_ctx->done_sem, &task_woken); + return task_woken == pdTRUE; +} + +static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len, size_t split_alignment) +{ + TEST_ASSERT_NOT_NULL(src_data); + TEST_ASSERT_NOT_NULL(dst_data); + gdma_channel_handle_t tx_chan = NULL; + gdma_channel_handle_t rx_chan = NULL; + gdma_channel_alloc_config_t tx_chan_alloc_config = {}; + gdma_channel_alloc_config_t rx_chan_alloc_config = {}; + tx_chan_alloc_config = (gdma_channel_alloc_config_t) { + .direction = GDMA_CHANNEL_DIRECTION_TX, + .flags.reserve_sibling = true, + }; + TEST_ESP_OK(gdma_new_ahb_channel(&tx_chan_alloc_config, &tx_chan)); + rx_chan_alloc_config = (gdma_channel_alloc_config_t) { + .direction = GDMA_CHANNEL_DIRECTION_RX, + .sibling_chan = tx_chan, + }; + TEST_ESP_OK(gdma_new_ahb_channel(&rx_chan_alloc_config, &rx_chan)); + size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); + + gdma_link_list_handle_t tx_link_list = NULL; + gdma_link_list_handle_t rx_link_list = NULL; + test_gdma_config_link_list(tx_chan, rx_chan, &tx_link_list, &rx_link_list, sram_alignment, false); + + // prepare the source data + for (int i = 0; i < data_length; i++) { + src_data[i] = i; + } + if (sram_alignment) { + // do write-back for the source data because it's in the cache + TEST_ESP_OK(esp_cache_msync(src_data, ALIGN_UP(data_length, sram_alignment), ESP_CACHE_MSYNC_FLAG_DIR_C2M)); + } + + gdma_buffer_mount_config_t tx_buf_mount_config[] = { + [0] = { + .buffer = src_data, + .length = data_length, + .flags = { + .mark_eof = true, + .mark_final = true, // using singly list, so terminate the link here + } + } + }; + TEST_ESP_OK(gdma_link_mount_buffers(tx_link_list, 0, tx_buf_mount_config, sizeof(tx_buf_mount_config) / sizeof(gdma_buffer_mount_config_t), NULL)); + + // allocate stash_buffer, should be freed by the user + void *stash_buffer = heap_caps_aligned_calloc(split_alignment, 2, split_alignment, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + size_t stash_buffer_len = 2 * split_alignment; + dma_buffer_split_array_t align_array = {0}; + gdma_buffer_mount_config_t rx_aligned_buf_mount_config[3] = {0}; + + TEST_ESP_OK(esp_dma_split_buffer_to_aligned(dst_data + offset_len, data_length, stash_buffer, stash_buffer_len, split_alignment, &align_array)); + for (int i = 0; i < 3; i++) { + rx_aligned_buf_mount_config[i].buffer = align_array.aligned_buffer[i].aligned_buffer; + rx_aligned_buf_mount_config[i].length = align_array.aligned_buffer[i].length; + } + TEST_ESP_OK(gdma_link_mount_buffers(rx_link_list, 0, rx_aligned_buf_mount_config, 3, NULL)); + + gdma_rx_event_callbacks_t rx_cbs = { + .on_recv_eof = test_gdma_m2m_unalgined_rx_eof_callback, + }; + SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); + TEST_ASSERT_NOT_NULL(done_sem); + test_gdma_context_t user_ctx = { + .done_sem = done_sem, + .align_array = &align_array, + .split_alignment = split_alignment, + .need_invalidate = sram_alignment ? true : false, + }; + TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, &user_ctx)); + + TEST_ESP_OK(gdma_start(rx_chan, gdma_link_get_head_addr(rx_link_list))); + TEST_ESP_OK(gdma_start(tx_chan, gdma_link_get_head_addr(tx_link_list))); + + xSemaphoreTake(done_sem, 1000 / portTICK_PERIOD_MS); + + // validate the destination data + for (int i = 0; i < data_length; i++) { + TEST_ASSERT_EQUAL(i % 256 , dst_data[i + offset_len]); + } + + free(stash_buffer); + TEST_ESP_OK(gdma_del_link_list(tx_link_list)); + TEST_ESP_OK(gdma_del_link_list(rx_link_list)); + TEST_ESP_OK(gdma_del_channel(tx_chan)); + TEST_ESP_OK(gdma_del_channel(rx_chan)); + vSemaphoreDelete(done_sem); +} + +TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]") +{ + uint8_t *sbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + uint8_t *dbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + + size_t split_alignment = 64; + // case buffer len less than buffer alignment + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 0, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 4, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 2, split_alignment); + + // case buffer head aligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 0, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 0, split_alignment); + + // case buffer tail aligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 10, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 10, split_alignment); + + // case buffer unaligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 100, 10, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 10, 60, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 10, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 10, split_alignment); + + // case buffer full aligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 0, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 0, split_alignment); + + free(sbuf); + free(dbuf); +}