mirror of
https://github.com/espressif/esp-idf.git
synced 2024-10-05 20:47:46 -04:00
change(async_memcpy): set DMA transfer burst size
This commit is contained in:
parent
2f0c9b3584
commit
e8852d5c38
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
@ -48,7 +48,6 @@ typedef struct async_memcpy_transaction_t {
|
||||
/// @note - Number of transaction objects are determined by the backlog parameter
|
||||
typedef struct {
|
||||
async_memcpy_context_t parent; // Parent IO interface
|
||||
size_t sram_trans_align; // DMA transfer alignment (both in size and address) for SRAM memory
|
||||
size_t max_single_dma_buffer; // max DMA buffer size by a single descriptor
|
||||
cp_dma_hal_context_t hal; // CPDMA hal
|
||||
intr_handle_t intr; // CPDMA interrupt handle
|
||||
@ -90,7 +89,7 @@ esp_err_t esp_async_memcpy_install_cpdma(const async_memcpy_config_t *config, as
|
||||
uint32_t trans_queue_len = config->backlog ? config->backlog : DEFAULT_TRANSACTION_QUEUE_LENGTH;
|
||||
// allocate memory for transaction pool, aligned to 4 because the trans->eof_node requires that alignment
|
||||
mcp_dma->transaction_pool = heap_caps_aligned_calloc(4, trans_queue_len, sizeof(async_memcpy_transaction_t),
|
||||
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
|
||||
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
|
||||
ESP_GOTO_ON_FALSE(mcp_dma->transaction_pool, ESP_ERR_NO_MEM, err, TAG, "no mem for transaction pool");
|
||||
|
||||
// Init hal context
|
||||
@ -111,8 +110,7 @@ esp_err_t esp_async_memcpy_install_cpdma(const async_memcpy_config_t *config, as
|
||||
// initialize other members
|
||||
portMUX_INITIALIZE(&mcp_dma->spin_lock);
|
||||
atomic_init(&mcp_dma->fsm, MCP_FSM_IDLE);
|
||||
mcp_dma->sram_trans_align = config->sram_trans_align;
|
||||
size_t trans_align = config->sram_trans_align;
|
||||
size_t trans_align = config->dma_burst_size;
|
||||
mcp_dma->max_single_dma_buffer = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
mcp_dma->parent.del = mcp_cpdma_del;
|
||||
mcp_dma->parent.memcpy = mcp_cpdma_memcpy;
|
||||
@ -240,12 +238,6 @@ static esp_err_t mcp_cpdma_memcpy(async_memcpy_context_t *ctx, void *dst, void *
|
||||
esp_err_t ret = ESP_OK;
|
||||
async_memcpy_cpdma_context_t *mcp_dma = __containerof(ctx, async_memcpy_cpdma_context_t, parent);
|
||||
ESP_RETURN_ON_FALSE(esp_ptr_internal(src) && esp_ptr_internal(dst), ESP_ERR_INVALID_ARG, TAG, "CP_DMA can only access SRAM");
|
||||
// alignment check
|
||||
if (mcp_dma->sram_trans_align) {
|
||||
ESP_RETURN_ON_FALSE((((intptr_t)dst & (mcp_dma->sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, TAG, "buffer address not aligned: %p -> %p", src, dst);
|
||||
ESP_RETURN_ON_FALSE(((n & (mcp_dma->sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, TAG,
|
||||
"copy size should align to %d bytes", mcp_dma->sram_trans_align);
|
||||
}
|
||||
async_memcpy_transaction_t *trans = NULL;
|
||||
// pick one transaction node from idle queue
|
||||
trans = try_pop_trans_from_idle_queue(mcp_dma);
|
||||
@ -257,12 +249,12 @@ static esp_err_t mcp_cpdma_memcpy(async_memcpy_context_t *ctx, void *dst, void *
|
||||
uint32_t num_desc_per_path = (n + max_single_dma_buffer - 1) / max_single_dma_buffer;
|
||||
// allocate DMA descriptors, descriptors need a strict alignment
|
||||
trans->tx_desc_link = heap_caps_aligned_calloc(4, num_desc_per_path, sizeof(dma_descriptor_align4_t),
|
||||
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
|
||||
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
|
||||
ESP_GOTO_ON_FALSE(trans->tx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors");
|
||||
// don't have to allocate the EOF descriptor, we will use trans->eof_node as the RX EOF descriptor
|
||||
if (num_desc_per_path > 1) {
|
||||
trans->rx_desc_link = heap_caps_aligned_calloc(4, num_desc_per_path - 1, sizeof(dma_descriptor_align4_t),
|
||||
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
|
||||
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
|
||||
ESP_GOTO_ON_FALSE(trans->rx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors");
|
||||
} else {
|
||||
// small copy buffer, use the trans->eof_node is sufficient
|
||||
|
@ -69,8 +69,10 @@ typedef struct async_memcpy_transaction_t {
|
||||
typedef struct {
|
||||
async_memcpy_context_t parent; // Parent IO interface
|
||||
size_t descriptor_align; // DMA descriptor alignment
|
||||
size_t sram_trans_align; // DMA buffer alignment (both in size and address) for SRAM memory
|
||||
size_t psram_trans_align; // DMA buffer alignment (both in size and address) for PSRAM memory
|
||||
size_t rx_int_mem_alignment; // DMA buffer alignment (both in size and address) for internal RX memory
|
||||
size_t rx_ext_mem_alignment; // DMA buffer alignment (both in size and address) for external RX memory
|
||||
size_t tx_int_mem_alignment; // DMA buffer alignment (both in size and address) for internal TX memory
|
||||
size_t tx_ext_mem_alignment; // DMA buffer alignment (both in size and address) for external TX memory
|
||||
size_t max_single_dma_buffer; // max DMA buffer size by a single descriptor
|
||||
int gdma_bus_id; // GDMA bus id (AHB, AXI, etc.)
|
||||
gdma_channel_handle_t tx_channel; // GDMA TX channel handle
|
||||
@ -146,12 +148,12 @@ static esp_err_t esp_async_memcpy_install_gdma_template(const async_memcpy_confi
|
||||
ESP_GOTO_ON_ERROR(gdma_connect(mcp_gdma->rx_channel, m2m_trigger), err, TAG, "GDMA rx connect failed");
|
||||
ESP_GOTO_ON_ERROR(gdma_connect(mcp_gdma->tx_channel, m2m_trigger), err, TAG, "GDMA tx connect failed");
|
||||
|
||||
gdma_transfer_ability_t transfer_ability = {
|
||||
.sram_trans_align = config->sram_trans_align,
|
||||
.psram_trans_align = config->psram_trans_align,
|
||||
gdma_transfer_config_t transfer_cfg = {
|
||||
.max_data_burst_size = config->dma_burst_size ? config->dma_burst_size : 16,
|
||||
.access_ext_mem = true, // allow to do memory copy from/to external memory
|
||||
};
|
||||
ESP_GOTO_ON_ERROR(gdma_set_transfer_ability(mcp_gdma->tx_channel, &transfer_ability), err, TAG, "set tx trans ability failed");
|
||||
ESP_GOTO_ON_ERROR(gdma_set_transfer_ability(mcp_gdma->rx_channel, &transfer_ability), err, TAG, "set rx trans ability failed");
|
||||
ESP_GOTO_ON_ERROR(gdma_config_transfer(mcp_gdma->tx_channel, &transfer_cfg), err, TAG, "config transfer for tx channel failed");
|
||||
ESP_GOTO_ON_ERROR(gdma_config_transfer(mcp_gdma->rx_channel, &transfer_cfg), err, TAG, "config transfer for rx channel failed");
|
||||
|
||||
// register rx eof callback
|
||||
gdma_rx_event_callbacks_t cbs = {
|
||||
@ -172,15 +174,13 @@ static esp_err_t esp_async_memcpy_install_gdma_template(const async_memcpy_confi
|
||||
atomic_init(&mcp_gdma->fsm, MCP_FSM_IDLE);
|
||||
mcp_gdma->gdma_bus_id = gdma_bus_id;
|
||||
|
||||
uint32_t psram_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA);
|
||||
uint32_t sram_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
|
||||
// if the psram_trans_align is configured to zero, we should fall back to use the data cache line size
|
||||
size_t psram_trans_align = MAX(psram_cache_line_size, config->psram_trans_align);
|
||||
size_t sram_trans_align = MAX(sram_cache_line_size, config->sram_trans_align);
|
||||
size_t trans_align = MAX(sram_trans_align, psram_trans_align);
|
||||
mcp_gdma->max_single_dma_buffer = ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align);
|
||||
mcp_gdma->psram_trans_align = psram_trans_align;
|
||||
mcp_gdma->sram_trans_align = sram_trans_align;
|
||||
// get the buffer alignment required by the GDMA channel
|
||||
gdma_get_alignment_constraints(mcp_gdma->rx_channel, &mcp_gdma->rx_int_mem_alignment, &mcp_gdma->rx_ext_mem_alignment);
|
||||
gdma_get_alignment_constraints(mcp_gdma->tx_channel, &mcp_gdma->tx_int_mem_alignment, &mcp_gdma->tx_ext_mem_alignment);
|
||||
|
||||
size_t buf_align = MAX(MAX(mcp_gdma->rx_int_mem_alignment, mcp_gdma->rx_ext_mem_alignment),
|
||||
MAX(mcp_gdma->tx_int_mem_alignment, mcp_gdma->tx_ext_mem_alignment));
|
||||
mcp_gdma->max_single_dma_buffer = ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, buf_align);
|
||||
mcp_gdma->parent.del = mcp_gdma_del;
|
||||
mcp_gdma->parent.memcpy = mcp_gdma_memcpy;
|
||||
#if SOC_GDMA_SUPPORT_ETM
|
||||
@ -335,29 +335,21 @@ static async_memcpy_transaction_t *try_pop_trans_from_idle_queue(async_memcpy_gd
|
||||
static bool check_buffer_alignment(async_memcpy_gdma_context_t *mcp_gdma, void *src, void *dst, size_t n)
|
||||
{
|
||||
bool valid = true;
|
||||
uint32_t psram_align_mask = 0;
|
||||
uint32_t sram_align_mask = 0;
|
||||
if (mcp_gdma->psram_trans_align) {
|
||||
psram_align_mask = mcp_gdma->psram_trans_align - 1;
|
||||
}
|
||||
if (mcp_gdma->sram_trans_align) {
|
||||
sram_align_mask = mcp_gdma->sram_trans_align - 1;
|
||||
}
|
||||
|
||||
if (esp_ptr_external_ram(dst)) {
|
||||
valid = valid && (((uint32_t)dst & psram_align_mask) == 0);
|
||||
valid = valid && ((n & psram_align_mask) == 0);
|
||||
valid = valid && (((uint32_t)dst & (mcp_gdma->rx_ext_mem_alignment - 1)) == 0);
|
||||
valid = valid && ((n & (mcp_gdma->rx_ext_mem_alignment - 1)) == 0);
|
||||
} else {
|
||||
valid = valid && (((uint32_t)dst & sram_align_mask) == 0);
|
||||
valid = valid && ((n & sram_align_mask) == 0);
|
||||
valid = valid && (((uint32_t)dst & (mcp_gdma->rx_int_mem_alignment - 1)) == 0);
|
||||
valid = valid && ((n & (mcp_gdma->rx_int_mem_alignment - 1)) == 0);
|
||||
}
|
||||
|
||||
if (esp_ptr_external_ram(src)) {
|
||||
valid = valid && (((uint32_t)src & psram_align_mask) == 0);
|
||||
valid = valid && ((n & psram_align_mask) == 0);
|
||||
valid = valid && (((uint32_t)src & (mcp_gdma->tx_ext_mem_alignment - 1)) == 0);
|
||||
valid = valid && ((n & (mcp_gdma->tx_ext_mem_alignment - 1)) == 0);
|
||||
} else {
|
||||
valid = valid && (((uint32_t)src & sram_align_mask) == 0);
|
||||
valid = valid && ((n & sram_align_mask) == 0);
|
||||
valid = valid && (((uint32_t)src & (mcp_gdma->tx_int_mem_alignment - 1)) == 0);
|
||||
valid = valid && ((n & (mcp_gdma->tx_int_mem_alignment - 1)) == 0);
|
||||
}
|
||||
|
||||
return valid;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
@ -51,8 +51,11 @@ typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_handle_t mcp_hdl, async_memcp
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t backlog; /*!< Maximum number of transactions that can be prepared in the background */
|
||||
size_t sram_trans_align; /*!< DMA transfer alignment (both in size and address) for SRAM memory */
|
||||
size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */
|
||||
size_t sram_trans_align __attribute__((deprecated)); /*!< DMA transfer alignment (both in size and address) for SRAM memory */
|
||||
union {
|
||||
size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */
|
||||
size_t dma_burst_size; /*!< DMA transfer burst size, in bytes */
|
||||
};
|
||||
uint32_t flags; /*!< Extra flags to control async memcpy feature */
|
||||
} async_memcpy_config_t;
|
||||
|
||||
@ -62,8 +65,7 @@ typedef struct {
|
||||
#define ASYNC_MEMCPY_DEFAULT_CONFIG() \
|
||||
{ \
|
||||
.backlog = 8, \
|
||||
.sram_trans_align = 0, \
|
||||
.psram_trans_align = 0, \
|
||||
.dma_burst_size = 16, \
|
||||
.flags = 0, \
|
||||
}
|
||||
|
||||
|
@ -302,8 +302,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
|
||||
|
||||
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
|
||||
config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS;
|
||||
config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer
|
||||
config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer
|
||||
config.dma_burst_size = 64; // set a big burst size for performance
|
||||
async_memcpy_handle_t driver = NULL;
|
||||
int64_t elapse_us = 0;
|
||||
float throughput = 0.0;
|
||||
@ -311,7 +310,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
|
||||
|
||||
// 1. SRAM->SRAM
|
||||
memcpy_testbench_context_t test_context = {
|
||||
.align = config.psram_trans_align,
|
||||
.align = config.dma_burst_size,
|
||||
.buffer_size = buffer_size,
|
||||
.src_in_psram = false,
|
||||
.dst_in_psram = false,
|
||||
|
@ -36,8 +36,7 @@ There are several ways to install the async memcpy driver, depending on the unde
|
||||
Driver configuration is described in :cpp:type:`async_memcpy_config_t`:
|
||||
|
||||
* :cpp:member:`backlog`: This is used to configure the maximum number of memory copy transactions that can be queued up before the first one is completed. If this field is set to zero, then the default value 4 will be applied.
|
||||
* :cpp:member:`sram_trans_align`: Declare SRAM alignment for both data address and copy size, set to zero if the data has no restriction in alignment. If set to a quadruple value (i.e., 4X), the driver will enable the burst mode internally, which is helpful for some performance related application.
|
||||
* :cpp:member:`psram_trans_align`: Declare PSRAM alignment for both data address and copy size. User has to give it a valid value (only 16, 32, 64 are supported) if the destination of memcpy is located in PSRAM. The default alignment (i.e., 16) will be applied if it is set to zero. Internally, the driver configures the size of block used by DMA to access PSRAM, according to the alignment.
|
||||
* :cpp:member:`dma_burst_size`: Set the burst size in a DMA burst transfer.
|
||||
* :cpp:member:`flags`: This is used to enable some special driver features.
|
||||
|
||||
.. code-block:: c
|
||||
|
@ -36,8 +36,7 @@ DMA 允许多个内存复制请求在首个请求完成之前排队,即允许
|
||||
在 :cpp:type:`async_memcpy_config_t` 中设置驱动配置:
|
||||
|
||||
* :cpp:member:`backlog`:此项用于配置首个请求完成前可以排队的最大内存复制事务数量。如果将此字段设置为零,会应用默认值 4。
|
||||
* :cpp:member:`sram_trans_align`:声明 SRAM 中数据地址和复制大小的对齐方式,如果数据没有对齐限制,则设置为零。如果设置为四的倍数值(即 4X),驱动程序将内部启用突发模式,这有利于某些和性能相关的应用程序。
|
||||
* :cpp:member:`psram_trans_align`:声明 PSRAM 中数据地址和复制大小的对齐方式。如果 memcpy 的目标地址位于 PSRAM 中,用户必须给出一个有效值(只支持 16、32、64)。如果设置为零,会默认采用 16 位对齐。在内部,驱动程序会根据对齐方式来配置 DMA 访问 PSRAM 时所用的块大小。
|
||||
* :cpp:member:`dma_burst_size`:设置单次 DMA 传输中突发数据量的大小。
|
||||
* :cpp:member:`flags`:此项可以启用一些特殊的驱动功能。
|
||||
|
||||
.. code-block:: c
|
||||
|
Loading…
Reference in New Issue
Block a user