feat(async_memcpy): refactor driver code to support different DMA backen

To support AHB and AXI DMA memory copy for the same target (esp32p4).
This commit is contained in:
morris 2023-07-12 15:21:40 +08:00
parent 0de9a18d4d
commit fd3d1aa101
17 changed files with 1163 additions and 769 deletions

View File

@ -60,16 +60,18 @@ if(NOT BOOTLOADER_BUILD)
endif()
if(CONFIG_SOC_GDMA_SUPPORTED)
list(APPEND srcs "dma/gdma.c" "dma/async_memcpy_impl_gdma.c")
endif()
if(CONFIG_SOC_CP_DMA_SUPPORTED)
list(APPEND srcs "dma/async_memcpy_impl_cp_dma.c")
list(APPEND srcs "dma/gdma.c")
endif()
if(CONFIG_SOC_ASYNC_MEMCPY_SUPPORTED)
list(APPEND srcs "dma/esp_async_memcpy.c")
endif()
if(CONFIG_SOC_GDMA_SUPPORTED)
list(APPEND srcs "dma/async_memcpy_gdma.c")
endif() # CONFIG_SOC_GDMA_SUPPORTED
if(CONFIG_SOC_CP_DMA_SUPPORTED)
list(APPEND srcs "dma/async_memcpy_cp_dma.c")
endif() # CONFIG_SOC_CP_DMA_SUPPORTED
endif() # CONFIG_SOC_ASYNC_MEMCPY_SUPPORTED
if(CONFIG_SOC_GDMA_SUPPORT_ETM)
list(APPEND srcs "dma/gdma_etm.c")

View File

@ -0,0 +1,358 @@
/*
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include <stdatomic.h>
#include <sys/queue.h>
#include <sys/param.h>
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "soc/soc_caps.h"
#include "soc/interrupts.h"
#include "esp_log.h"
#include "esp_check.h"
#include "esp_attr.h"
#include "esp_err.h"
#include "esp_intr_alloc.h"
#include "esp_memory_utils.h"
#include "esp_async_memcpy.h"
#include "esp_async_memcpy_priv.h"
#include "hal/cp_dma_hal.h"
#include "hal/cp_dma_ll.h"
#include "hal/dma_types.h"
static const char *TAG = "async_mcp.cpdma";
/// @brief Transaction object for async memcpy
/// @note - the DMA descriptors to be 4-byte aligned
/// @note - The DMA descriptor link list is allocated dynamically from DMA-able memory
/// @note - Because of the eof_node, the transaction object should also be allocated from DMA-able memory
typedef struct async_memcpy_transaction_t {
dma_descriptor_align4_t eof_node; // this is the DMA node which act as the EOF descriptor (RX path only)
dma_descriptor_align4_t *tx_desc_link; // descriptor link list, the length of the link is determined by the copy buffer size
dma_descriptor_align4_t *rx_desc_link; // descriptor link list, the length of the link is determined by the copy buffer size
intptr_t tx_start_desc_addr; // TX start descriptor address
intptr_t rx_start_desc_addr; // RX start descriptor address
async_memcpy_isr_cb_t cb; // user callback
void *cb_args; // user callback args
STAILQ_ENTRY(async_memcpy_transaction_t) idle_queue_entry; // Entry for the idle queue
STAILQ_ENTRY(async_memcpy_transaction_t) ready_queue_entry; // Entry for the ready queue
} async_memcpy_transaction_t;
/// @brief Context of async memcpy driver
/// @note - It saves two queues, one for idle transaction objects, one for ready transaction objects
/// @note - Transaction objects are allocated from DMA-able memory
/// @note - Number of transaction objects are determined by the backlog parameter
typedef struct {
async_memcpy_context_t parent; // Parent IO interface
size_t sram_trans_align; // DMA transfer alignment (both in size and address) for SRAM memory
size_t max_single_dma_buffer; // max DMA buffer size by a single descriptor
cp_dma_hal_context_t hal; // CPDMA hal
intr_handle_t intr; // CPDMA interrupt handle
portMUX_TYPE spin_lock; // spin lock to avoid threads and isr from accessing the same resource simultaneously
_Atomic async_memcpy_fsm_t fsm; // driver state machine, changing state should be atomic
async_memcpy_transaction_t *transaction_pool; // transaction object pool
STAILQ_HEAD(, async_memcpy_transaction_t) idle_queue_head; // Head of the idle queue
STAILQ_HEAD(, async_memcpy_transaction_t) ready_queue_head; // Head of the ready queue
} async_memcpy_cpdma_context_t;
static void mcp_default_isr_handler(void *args);
static esp_err_t mcp_cpdma_del(async_memcpy_context_t *ctx);
static esp_err_t mcp_cpdma_memcpy(async_memcpy_context_t *ctx, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args);
static esp_err_t mcp_cpdma_destroy(async_memcpy_cpdma_context_t *mcp_dma)
{
if (mcp_dma->transaction_pool) {
free(mcp_dma->transaction_pool);
}
if (mcp_dma->intr) {
esp_intr_free(mcp_dma->intr);
}
if (mcp_dma->hal.dev) { // this is for check if the hal is initialized
cp_dma_hal_stop(&mcp_dma->hal);
cp_dma_hal_deinit(&mcp_dma->hal);
}
free(mcp_dma);
return ESP_OK;
}
esp_err_t esp_async_memcpy_install_cpdma(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp)
{
esp_err_t ret = ESP_OK;
async_memcpy_cpdma_context_t *mcp_dma = NULL;
ESP_RETURN_ON_FALSE(config && mcp, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
// allocate memory of driver context from internal memory
mcp_dma = heap_caps_calloc(1, sizeof(async_memcpy_cpdma_context_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
ESP_GOTO_ON_FALSE(mcp_dma, ESP_ERR_NO_MEM, err, TAG, "no mem for driver context");
uint32_t trans_queue_len = config->backlog ? config->backlog : DEFAULT_TRANSACTION_QUEUE_LENGTH;
// allocate memory for transaction pool, aligned to 4 because the trans->eof_node requires that alignment
mcp_dma->transaction_pool = heap_caps_aligned_calloc(4, trans_queue_len, sizeof(async_memcpy_transaction_t),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
ESP_GOTO_ON_FALSE(mcp_dma->transaction_pool, ESP_ERR_NO_MEM, err, TAG, "no mem for transaction pool");
// Init hal context
cp_dma_hal_config_t hal_config = {};
cp_dma_hal_init(&mcp_dma->hal, &hal_config);
ESP_GOTO_ON_ERROR(esp_intr_alloc(ETS_DMA_COPY_INTR_SOURCE, 0, mcp_default_isr_handler, mcp_dma, &mcp_dma->intr),
err, TAG, "install isr failed");
// initialize transaction queue
STAILQ_INIT(&mcp_dma->idle_queue_head);
STAILQ_INIT(&mcp_dma->ready_queue_head);
// pick transactions from the pool and insert to the idle queue
for (int i = 0; i < trans_queue_len; i++) {
STAILQ_INSERT_TAIL(&mcp_dma->idle_queue_head, &mcp_dma->transaction_pool[i], idle_queue_entry);
}
// initialize other members
portMUX_INITIALIZE(&mcp_dma->spin_lock);
atomic_init(&mcp_dma->fsm, MCP_FSM_IDLE);
mcp_dma->sram_trans_align = config->sram_trans_align;
size_t trans_align = config->sram_trans_align;
mcp_dma->max_single_dma_buffer = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
mcp_dma->parent.del = mcp_cpdma_del;
mcp_dma->parent.memcpy = mcp_cpdma_memcpy;
// return driver object
*mcp = &mcp_dma->parent;
return ESP_OK;
err:
if (mcp_dma) {
mcp_cpdma_destroy(mcp_dma);
}
return ret;
}
esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_memcpy_handle_t *asmcp)
__attribute__((alias("esp_async_memcpy_install_cpdma")));
static esp_err_t mcp_cpdma_del(async_memcpy_context_t *ctx)
{
async_memcpy_cpdma_context_t *mcp_dma = __containerof(ctx, async_memcpy_cpdma_context_t, parent);
// check if there are pending transactions
ESP_RETURN_ON_FALSE(STAILQ_EMPTY(&mcp_dma->ready_queue_head), ESP_ERR_INVALID_STATE, TAG, "there are pending transactions");
// check if the driver is in IDLE state
ESP_RETURN_ON_FALSE(atomic_load(&mcp_dma->fsm) == MCP_FSM_IDLE, ESP_ERR_INVALID_STATE, TAG, "driver is not in IDLE state");
return mcp_cpdma_destroy(mcp_dma);
}
static void mount_tx_buffer_to_dma(dma_descriptor_align4_t *desc_array, int num_desc,
uint8_t *buf, size_t buf_sz, size_t max_single_dma_buffer)
{
uint32_t prepared_length = 0;
size_t len = buf_sz;
for (int i = 0; i < num_desc - 1; i++) {
desc_array[i].buffer = &buf[prepared_length];
desc_array[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc_array[i].dw0.suc_eof = 0;
desc_array[i].dw0.size = max_single_dma_buffer;
desc_array[i].dw0.length = max_single_dma_buffer;
desc_array[i].next = &desc_array[i + 1];
prepared_length += max_single_dma_buffer;
len -= max_single_dma_buffer;
}
// take special care to the EOF descriptor
desc_array[num_desc - 1].buffer = &buf[prepared_length];
desc_array[num_desc - 1].next = NULL;
desc_array[num_desc - 1].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc_array[num_desc - 1].dw0.suc_eof = 1;
desc_array[num_desc - 1].dw0.size = len;
desc_array[num_desc - 1].dw0.length = len;
}
static void mount_rx_buffer_to_dma(dma_descriptor_align4_t *desc_array, int num_desc, dma_descriptor_align4_t *eof_desc,
uint8_t *buf, size_t buf_sz, size_t max_single_dma_buffer)
{
uint32_t prepared_length = 0;
size_t len = buf_sz;
if (desc_array) {
assert(num_desc > 0);
for (int i = 0; i < num_desc; i++) {
desc_array[i].buffer = &buf[prepared_length];
desc_array[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc_array[i].dw0.size = max_single_dma_buffer;
desc_array[i].dw0.length = max_single_dma_buffer;
desc_array[i].next = &desc_array[i + 1];
prepared_length += max_single_dma_buffer;
len -= max_single_dma_buffer;
}
desc_array[num_desc - 1].next = eof_desc;
}
eof_desc->buffer = &buf[prepared_length];
eof_desc->next = NULL;
eof_desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
eof_desc->dw0.size = len;
eof_desc->dw0.length = len;
}
/// @brief help function to get one transaction from the ready queue
/// @note this function is allowed to be called in ISR
static async_memcpy_transaction_t *try_pop_trans_from_ready_queue(async_memcpy_cpdma_context_t *mcp_dma)
{
async_memcpy_transaction_t *trans = NULL;
portENTER_CRITICAL_SAFE(&mcp_dma->spin_lock);
trans = STAILQ_FIRST(&mcp_dma->ready_queue_head);
if (trans) {
STAILQ_REMOVE_HEAD(&mcp_dma->ready_queue_head, ready_queue_entry);
}
portEXIT_CRITICAL_SAFE(&mcp_dma->spin_lock);
return trans;
}
/// @brief help function to start a pending transaction
/// @note this function is allowed to be called in ISR
static void try_start_pending_transaction(async_memcpy_cpdma_context_t *mcp_dma)
{
async_memcpy_fsm_t expected_fsm = MCP_FSM_IDLE;
async_memcpy_transaction_t *trans = NULL;
if (atomic_compare_exchange_strong(&mcp_dma->fsm, &expected_fsm, MCP_FSM_RUN_WAIT)) {
trans = try_pop_trans_from_ready_queue(mcp_dma);
if (trans) {
atomic_store(&mcp_dma->fsm, MCP_FSM_RUN);
cp_dma_hal_set_desc_base_addr(&mcp_dma->hal, trans->tx_start_desc_addr, trans->rx_start_desc_addr);
cp_dma_hal_start(&mcp_dma->hal); // enable DMA and interrupt
} else {
atomic_store(&mcp_dma->fsm, MCP_FSM_IDLE);
}
}
}
/// @brief help function to get one transaction from the idle queue
/// @note this function is allowed to be called in ISR
static async_memcpy_transaction_t *try_pop_trans_from_idle_queue(async_memcpy_cpdma_context_t *mcp_dma)
{
async_memcpy_transaction_t *trans = NULL;
portENTER_CRITICAL_SAFE(&mcp_dma->spin_lock);
trans = STAILQ_FIRST(&mcp_dma->idle_queue_head);
if (trans) {
STAILQ_REMOVE_HEAD(&mcp_dma->idle_queue_head, idle_queue_entry);
}
portEXIT_CRITICAL_SAFE(&mcp_dma->spin_lock);
return trans;
}
static esp_err_t mcp_cpdma_memcpy(async_memcpy_context_t *ctx, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args)
{
esp_err_t ret = ESP_OK;
async_memcpy_cpdma_context_t *mcp_dma = __containerof(ctx, async_memcpy_cpdma_context_t, parent);
ESP_RETURN_ON_FALSE(esp_ptr_internal(src) && esp_ptr_internal(dst), ESP_ERR_INVALID_ARG, TAG, "CP_DMA can only access SRAM");
// alignment check
if (mcp_dma->sram_trans_align) {
ESP_RETURN_ON_FALSE((((intptr_t)dst & (mcp_dma->sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, TAG, "buffer address not aligned: %p -> %p", src, dst);
ESP_RETURN_ON_FALSE(((n & (mcp_dma->sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, TAG,
"copy size should align to %d bytes", mcp_dma->sram_trans_align);
}
async_memcpy_transaction_t *trans = NULL;
// pick one transaction node from idle queue
trans = try_pop_trans_from_idle_queue(mcp_dma);
// check if we get the transaction object successfully
ESP_RETURN_ON_FALSE(trans, ESP_ERR_INVALID_STATE, TAG, "no free node in the idle queue");
// calculate how many descriptors we want
size_t max_single_dma_buffer = mcp_dma->max_single_dma_buffer;
uint32_t num_desc_per_path = (n + max_single_dma_buffer - 1) / max_single_dma_buffer;
// allocate DMA descriptors, descriptors need a strict alignment
trans->tx_desc_link = heap_caps_aligned_calloc(4, num_desc_per_path, sizeof(dma_descriptor_align4_t),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
ESP_GOTO_ON_FALSE(trans->tx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors");
// don't have to allocate the EOF descriptor, we will use trans->eof_node as the RX EOF descriptor
if (num_desc_per_path > 1) {
trans->rx_desc_link = heap_caps_aligned_calloc(4, num_desc_per_path - 1, sizeof(dma_descriptor_align4_t),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
ESP_GOTO_ON_FALSE(trans->rx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors");
} else {
// small copy buffer, use the trans->eof_node is sufficient
trans->rx_desc_link = NULL;
}
// (preload) mount src data to the TX descriptor
mount_tx_buffer_to_dma(trans->tx_desc_link, num_desc_per_path, src, n, max_single_dma_buffer);
// (preload) mount dst data to the RX descriptor
mount_rx_buffer_to_dma(trans->rx_desc_link, num_desc_per_path - 1, &trans->eof_node, dst, n, max_single_dma_buffer);
// save other transaction context
trans->cb = cb_isr;
trans->cb_args = cb_args;
trans->tx_start_desc_addr = (intptr_t)trans->tx_desc_link;
trans->rx_start_desc_addr = trans->rx_desc_link ? (intptr_t)trans->rx_desc_link : (intptr_t)&trans->eof_node;
portENTER_CRITICAL(&mcp_dma->spin_lock);
// insert the trans to ready queue
STAILQ_INSERT_TAIL(&mcp_dma->ready_queue_head, trans, ready_queue_entry);
portEXIT_CRITICAL(&mcp_dma->spin_lock);
// check driver state, if there's no running transaction, start a new one
try_start_pending_transaction(mcp_dma);
return ESP_OK;
err:
if (trans) {
if (trans->tx_desc_link) {
free(trans->tx_desc_link);
trans->tx_desc_link = NULL;
}
if (trans->rx_desc_link) {
free(trans->rx_desc_link);
trans->rx_desc_link = NULL;
}
// return back the trans to idle queue
portENTER_CRITICAL(&mcp_dma->spin_lock);
STAILQ_INSERT_TAIL(&mcp_dma->idle_queue_head, trans, idle_queue_entry);
portEXIT_CRITICAL(&mcp_dma->spin_lock);
}
return ret;
}
static void mcp_default_isr_handler(void *args)
{
bool need_yield = false;
async_memcpy_cpdma_context_t *mcp_dma = (async_memcpy_cpdma_context_t *)args;
// get the interrupt status and clear it
uint32_t status = cp_dma_hal_get_intr_status(&mcp_dma->hal);
cp_dma_hal_clear_intr_status(&mcp_dma->hal, status);
// End-Of-Frame on RX side
if (status & CP_DMA_LL_EVENT_RX_EOF) {
dma_descriptor_align4_t *eof_desc = (dma_descriptor_align4_t *)cp_dma_ll_get_rx_eof_descriptor_address(mcp_dma->hal.dev);
// get the transaction object address by the EOF descriptor address
async_memcpy_transaction_t *trans = __containerof(eof_desc, async_memcpy_transaction_t, eof_node);
// switch driver state from RUN to IDLE
async_memcpy_fsm_t expected_fsm = MCP_FSM_RUN;
if (atomic_compare_exchange_strong(&mcp_dma->fsm, &expected_fsm, MCP_FSM_IDLE_WAIT)) {
// invoked callback registered by user
async_memcpy_isr_cb_t cb = trans->cb;
if (cb) {
async_memcpy_event_t e = {
// No event data for now
};
need_yield = cb(&mcp_dma->parent, &e, trans->cb_args);
}
// recycle descriptor memory
free(trans->tx_desc_link);
free(trans->rx_desc_link);
trans->tx_desc_link = NULL;
trans->rx_desc_link = NULL;
trans->cb = NULL;
portENTER_CRITICAL_ISR(&mcp_dma->spin_lock);
// insert the trans object to the idle queue
STAILQ_INSERT_TAIL(&mcp_dma->idle_queue_head, trans, idle_queue_entry);
portEXIT_CRITICAL_ISR(&mcp_dma->spin_lock);
atomic_store(&mcp_dma->fsm, MCP_FSM_IDLE);
}
// try start the next pending transaction
try_start_pending_transaction(mcp_dma);
}
if (need_yield) {
portYIELD_FROM_ISR();
}
}

View File

@ -0,0 +1,496 @@
/*
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include <stdatomic.h>
#include <sys/queue.h>
#include <sys/param.h>
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "soc/soc_caps.h"
#include "esp_log.h"
#include "esp_check.h"
#include "esp_attr.h"
#include "esp_err.h"
#include "esp_private/gdma.h"
#include "esp_memory_utils.h"
#include "esp_async_memcpy.h"
#include "esp_async_memcpy_priv.h"
#include "hal/dma_types.h"
#include "hal/cache_hal.h"
#include "rom/cache.h"
static const char *TAG = "async_mcp.gdma";
#define MCP_NEEDS_INVALIDATE_DST_CACHE CONFIG_IDF_TARGET_ESP32P4
#define MCP_NEEDS_WRITE_BACK_SRC_CACHE CONFIG_IDF_TARGET_ESP32P4
#define MCP_NEEDS_WRITE_BACK_DESC_CACHE CONFIG_IDF_TARGET_ESP32P4
#if SOC_AXI_GDMA_SUPPORTED
#define MCP_DMA_DESC_ALIGN 64
typedef dma_descriptor_align8_t mcp_dma_descriptor_t;
#elif SOC_AHB_GDMA_SUPPORTED
#define MCP_DMA_DESC_ALIGN 32
typedef dma_descriptor_align4_t mcp_dma_descriptor_t;
#else
#error "Unsupported GDMA type"
#endif
/// @brief Transaction object for async memcpy
/// @note - GDMA requires the DMA descriptors to be 4 or 8 bytes aligned
/// @note - The DMA descriptor link list is allocated dynamically from DMA-able memory
/// @note - Because of the eof_node, the transaction object should also be allocated from DMA-able memory
typedef struct async_memcpy_transaction_t {
mcp_dma_descriptor_t eof_node; // this is the DMA node which act as the EOF descriptor (RX path only)
mcp_dma_descriptor_t *tx_desc_link; // descriptor link list, the length of the link is determined by the copy buffer size
mcp_dma_descriptor_t *rx_desc_link; // descriptor link list, the length of the link is determined by the copy buffer size
intptr_t tx_start_desc_addr; // TX start descriptor address
intptr_t rx_start_desc_addr; // RX start descriptor address
intptr_t memcpy_dst_addr; // memcpy destination address
size_t memcpy_size; // memcpy size
async_memcpy_isr_cb_t cb; // user callback
void *cb_args; // user callback args
STAILQ_ENTRY(async_memcpy_transaction_t) idle_queue_entry; // Entry for the idle queue
STAILQ_ENTRY(async_memcpy_transaction_t) ready_queue_entry; // Entry for the ready queue
} async_memcpy_transaction_t;
/// @brief Context of async memcpy driver
/// @note - It saves two queues, one for idle transaction objects, one for ready transaction objects
/// @note - Transaction objects are allocated from DMA-able memory
/// @note - Number of transaction objects are determined by the backlog parameter
typedef struct {
async_memcpy_context_t parent; // Parent IO interface
size_t sram_trans_align; // DMA transfer alignment (both in size and address) for SRAM memory
size_t psram_trans_align; // DMA transfer alignment (both in size and address) for PSRAM memory
size_t max_single_dma_buffer; // max DMA buffer size by a single descriptor
int gdma_bus_id; // GDMA bus id (AHB, AXI, etc.)
gdma_channel_handle_t tx_channel; // GDMA TX channel handle
gdma_channel_handle_t rx_channel; // GDMA RX channel handle
portMUX_TYPE spin_lock; // spin lock to avoid threads and isr from accessing the same resource simultaneously
_Atomic async_memcpy_fsm_t fsm; // driver state machine, changing state should be atomic
async_memcpy_transaction_t *transaction_pool; // transaction object pool
STAILQ_HEAD(, async_memcpy_transaction_t) idle_queue_head; // Head of the idle queue
STAILQ_HEAD(, async_memcpy_transaction_t) ready_queue_head; // Head of the ready queue
} async_memcpy_gdma_context_t;
static bool mcp_gdma_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data);
static esp_err_t mcp_gdma_del(async_memcpy_context_t *ctx);
static esp_err_t mcp_gdma_memcpy(async_memcpy_context_t *ctx, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args);
#if SOC_GDMA_SUPPORT_ETM
static esp_err_t mcp_new_etm_event(async_memcpy_context_t *ctx, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event);
#endif // SOC_GDMA_SUPPORT_ETM
static esp_err_t mcp_gdma_destroy(async_memcpy_gdma_context_t *mcp_gdma)
{
if (mcp_gdma->transaction_pool) {
free(mcp_gdma->transaction_pool);
}
if (mcp_gdma->tx_channel) {
gdma_disconnect(mcp_gdma->tx_channel);
gdma_del_channel(mcp_gdma->tx_channel);
}
if (mcp_gdma->rx_channel) {
gdma_disconnect(mcp_gdma->rx_channel);
gdma_del_channel(mcp_gdma->rx_channel);
}
free(mcp_gdma);
return ESP_OK;
}
static esp_err_t esp_async_memcpy_install_gdma_template(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp,
esp_err_t (*new_channel)(const gdma_channel_alloc_config_t *, gdma_channel_handle_t *), int gdma_bus_id)
{
esp_err_t ret = ESP_OK;
async_memcpy_gdma_context_t *mcp_gdma = NULL;
ESP_RETURN_ON_FALSE(config && mcp, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
// allocate memory of driver context from internal memory
mcp_gdma = heap_caps_calloc(1, sizeof(async_memcpy_gdma_context_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
ESP_GOTO_ON_FALSE(mcp_gdma, ESP_ERR_NO_MEM, err, TAG, "no mem for driver context");
uint32_t trans_queue_len = config->backlog ? config->backlog : DEFAULT_TRANSACTION_QUEUE_LENGTH;
// allocate memory for transaction pool
mcp_gdma->transaction_pool = heap_caps_aligned_calloc(MCP_DMA_DESC_ALIGN, trans_queue_len, sizeof(async_memcpy_transaction_t),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
ESP_GOTO_ON_FALSE(mcp_gdma->transaction_pool, ESP_ERR_NO_MEM, err, TAG, "no mem for transaction pool");
// create TX channel and RX channel, they should reside in the same DMA pair
gdma_channel_alloc_config_t tx_alloc_config = {
.flags.reserve_sibling = 1,
.direction = GDMA_CHANNEL_DIRECTION_TX,
};
ESP_GOTO_ON_ERROR(new_channel(&tx_alloc_config, &mcp_gdma->tx_channel), err, TAG, "failed to create GDMA TX channel");
gdma_channel_alloc_config_t rx_alloc_config = {
.direction = GDMA_CHANNEL_DIRECTION_RX,
.sibling_chan = mcp_gdma->tx_channel,
};
ESP_GOTO_ON_ERROR(new_channel(&rx_alloc_config, &mcp_gdma->rx_channel), err, TAG, "failed to create GDMA RX channel");
// initialize GDMA channels
gdma_trigger_t m2m_trigger = GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_M2M, 0);
// get a free DMA trigger ID for memory copy
uint32_t free_m2m_id_mask = 0;
gdma_get_free_m2m_trig_id_mask(mcp_gdma->tx_channel, &free_m2m_id_mask);
m2m_trigger.instance_id = __builtin_ctz(free_m2m_id_mask);
gdma_connect(mcp_gdma->rx_channel, m2m_trigger);
gdma_connect(mcp_gdma->tx_channel, m2m_trigger);
gdma_transfer_ability_t transfer_ability = {
.sram_trans_align = config->sram_trans_align,
.psram_trans_align = config->psram_trans_align,
};
ESP_GOTO_ON_ERROR(gdma_set_transfer_ability(mcp_gdma->tx_channel, &transfer_ability), err, TAG, "set tx trans ability failed");
ESP_GOTO_ON_ERROR(gdma_set_transfer_ability(mcp_gdma->rx_channel, &transfer_ability), err, TAG, "set rx trans ability failed");
// register rx eof callback
gdma_rx_event_callbacks_t cbs = {
.on_recv_eof = mcp_gdma_rx_eof_callback,
};
ESP_GOTO_ON_ERROR(gdma_register_rx_event_callbacks(mcp_gdma->rx_channel, &cbs, mcp_gdma), err, TAG, "failed to register RX EOF callback");
// initialize transaction queue
STAILQ_INIT(&mcp_gdma->idle_queue_head);
STAILQ_INIT(&mcp_gdma->ready_queue_head);
// pick transactions from the pool and insert to the idle queue
for (int i = 0; i < trans_queue_len; i++) {
STAILQ_INSERT_TAIL(&mcp_gdma->idle_queue_head, &mcp_gdma->transaction_pool[i], idle_queue_entry);
}
// initialize other members
portMUX_INITIALIZE(&mcp_gdma->spin_lock);
atomic_init(&mcp_gdma->fsm, MCP_FSM_IDLE);
mcp_gdma->gdma_bus_id = gdma_bus_id;
// if the psram_trans_align is configured to zero, we should fall back to use the data cache line size
uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_TYPE_DATA);
size_t psram_trans_align = MAX(data_cache_line_size, config->psram_trans_align);
size_t trans_align = MAX(config->sram_trans_align, psram_trans_align);
mcp_gdma->max_single_dma_buffer = ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align);
mcp_gdma->psram_trans_align = psram_trans_align;
mcp_gdma->sram_trans_align = config->sram_trans_align;
mcp_gdma->parent.del = mcp_gdma_del;
mcp_gdma->parent.memcpy = mcp_gdma_memcpy;
#if SOC_GDMA_SUPPORT_ETM
mcp_gdma->parent.new_etm_event = mcp_new_etm_event;
#endif
// return driver object
*mcp = &mcp_gdma->parent;
return ESP_OK;
err:
if (mcp_gdma) {
mcp_gdma_destroy(mcp_gdma);
}
return ret;
}
#if SOC_AHB_GDMA_SUPPORTED
esp_err_t esp_async_memcpy_install_gdma_ahb(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp)
{
return esp_async_memcpy_install_gdma_template(config, mcp, gdma_new_ahb_channel, SOC_GDMA_BUS_AHB);
}
/// default installation falls back to use the AHB GDMA
esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_memcpy_handle_t *asmcp)
__attribute__((alias("esp_async_memcpy_install_gdma_ahb")));
#endif // SOC_AHB_GDMA_SUPPORTED
#if SOC_AXI_GDMA_SUPPORTED
esp_err_t esp_async_memcpy_install_gdma_axi(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp)
{
return esp_async_memcpy_install_gdma_template(config, mcp, gdma_new_axi_channel, SOC_GDMA_BUS_AXI);
}
#endif // SOC_AXI_GDMA_SUPPORTED
static esp_err_t mcp_gdma_del(async_memcpy_context_t *ctx)
{
async_memcpy_gdma_context_t *mcp_gdma = __containerof(ctx, async_memcpy_gdma_context_t, parent);
// check if there are pending transactions
ESP_RETURN_ON_FALSE(STAILQ_EMPTY(&mcp_gdma->ready_queue_head), ESP_ERR_INVALID_STATE, TAG, "there are pending transactions");
// check if the driver is in IDLE state
ESP_RETURN_ON_FALSE(atomic_load(&mcp_gdma->fsm) == MCP_FSM_IDLE, ESP_ERR_INVALID_STATE, TAG, "driver is not in IDLE state");
return mcp_gdma_destroy(mcp_gdma);
}
static void mount_tx_buffer_to_dma(mcp_dma_descriptor_t *desc_array, int num_desc,
uint8_t *buf, size_t buf_sz, size_t max_single_dma_buffer)
{
uint32_t prepared_length = 0;
size_t len = buf_sz;
for (int i = 0; i < num_desc - 1; i++) {
desc_array[i].buffer = &buf[prepared_length];
desc_array[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc_array[i].dw0.suc_eof = 0;
desc_array[i].dw0.size = max_single_dma_buffer;
desc_array[i].dw0.length = max_single_dma_buffer;
desc_array[i].next = &desc_array[i + 1];
prepared_length += max_single_dma_buffer;
len -= max_single_dma_buffer;
}
// take special care to the EOF descriptor
desc_array[num_desc - 1].buffer = &buf[prepared_length];
desc_array[num_desc - 1].next = NULL;
desc_array[num_desc - 1].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc_array[num_desc - 1].dw0.suc_eof = 1;
desc_array[num_desc - 1].dw0.size = len;
desc_array[num_desc - 1].dw0.length = len;
#if MCP_NEEDS_WRITE_BACK_DESC_CACHE
Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)desc_array, sizeof(mcp_dma_descriptor_t) * num_desc);
#endif
}
static void mount_rx_buffer_to_dma(mcp_dma_descriptor_t *desc_array, int num_desc, mcp_dma_descriptor_t *eof_desc,
uint8_t *buf, size_t buf_sz, size_t max_single_dma_buffer)
{
uint32_t prepared_length = 0;
size_t len = buf_sz;
if (desc_array) {
assert(num_desc > 0);
for (int i = 0; i < num_desc; i++) {
desc_array[i].buffer = &buf[prepared_length];
desc_array[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc_array[i].dw0.size = max_single_dma_buffer;
desc_array[i].dw0.length = max_single_dma_buffer;
desc_array[i].next = &desc_array[i + 1];
prepared_length += max_single_dma_buffer;
len -= max_single_dma_buffer;
}
desc_array[num_desc - 1].next = eof_desc;
}
eof_desc->buffer = &buf[prepared_length];
eof_desc->next = NULL;
eof_desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
eof_desc->dw0.size = len;
eof_desc->dw0.length = len;
#if MCP_NEEDS_WRITE_BACK_DESC_CACHE
if (desc_array) {
Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)desc_array, sizeof(mcp_dma_descriptor_t) * num_desc);
}
Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)eof_desc, sizeof(mcp_dma_descriptor_t));
#endif
}
/// @brief help function to get one transaction from the ready queue
/// @note this function is allowed to be called in ISR
static async_memcpy_transaction_t *try_pop_trans_from_ready_queue(async_memcpy_gdma_context_t *mcp_gdma)
{
async_memcpy_transaction_t *trans = NULL;
portENTER_CRITICAL_SAFE(&mcp_gdma->spin_lock);
trans = STAILQ_FIRST(&mcp_gdma->ready_queue_head);
if (trans) {
STAILQ_REMOVE_HEAD(&mcp_gdma->ready_queue_head, ready_queue_entry);
}
portEXIT_CRITICAL_SAFE(&mcp_gdma->spin_lock);
return trans;
}
/// @brief help function to start a pending transaction
/// @note this function is allowed to be called in ISR
static void try_start_pending_transaction(async_memcpy_gdma_context_t *mcp_gdma)
{
async_memcpy_fsm_t expected_fsm = MCP_FSM_IDLE;
async_memcpy_transaction_t *trans = NULL;
if (atomic_compare_exchange_strong(&mcp_gdma->fsm, &expected_fsm, MCP_FSM_RUN_WAIT)) {
trans = try_pop_trans_from_ready_queue(mcp_gdma);
if (trans) {
atomic_store(&mcp_gdma->fsm, MCP_FSM_RUN);
gdma_start(mcp_gdma->rx_channel, trans->rx_start_desc_addr);
gdma_start(mcp_gdma->tx_channel, trans->tx_start_desc_addr);
} else {
atomic_store(&mcp_gdma->fsm, MCP_FSM_IDLE);
}
}
}
/// @brief help function to get one transaction from the idle queue
/// @note this function is allowed to be called in ISR
static async_memcpy_transaction_t *try_pop_trans_from_idle_queue(async_memcpy_gdma_context_t *mcp_gdma)
{
async_memcpy_transaction_t *trans = NULL;
portENTER_CRITICAL_SAFE(&mcp_gdma->spin_lock);
trans = STAILQ_FIRST(&mcp_gdma->idle_queue_head);
if (trans) {
STAILQ_REMOVE_HEAD(&mcp_gdma->idle_queue_head, idle_queue_entry);
}
portEXIT_CRITICAL_SAFE(&mcp_gdma->spin_lock);
return trans;
}
static bool check_buffer_aligned(async_memcpy_gdma_context_t *mcp_gdma, void *src, void *dst, size_t n)
{
bool valid = true;
if (esp_ptr_external_ram(dst)) {
if (mcp_gdma->psram_trans_align) {
valid = valid && (((intptr_t)dst & (mcp_gdma->psram_trans_align - 1)) == 0);
valid = valid && ((n & (mcp_gdma->psram_trans_align - 1)) == 0);
}
} else {
if (mcp_gdma->sram_trans_align) {
valid = valid && (((intptr_t)dst & (mcp_gdma->sram_trans_align - 1)) == 0);
valid = valid && ((n & (mcp_gdma->sram_trans_align - 1)) == 0);
}
}
return valid;
}
static esp_err_t mcp_gdma_memcpy(async_memcpy_context_t *ctx, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args)
{
esp_err_t ret = ESP_OK;
async_memcpy_gdma_context_t *mcp_gdma = __containerof(ctx, async_memcpy_gdma_context_t, parent);
// buffer location check
#if SOC_AHB_GDMA_SUPPORTED && !SOC_AHB_GDMA_SUPPORT_PSRAM
if (mcp_gdma->gdma_bus_id == SOC_GDMA_BUS_AHB) {
ESP_RETURN_ON_FALSE(esp_ptr_internal(src) && esp_ptr_internal(dst), ESP_ERR_INVALID_ARG, TAG, "AHB GDMA can only access SRAM");
}
#endif // SOC_AHB_GDMA_SUPPORTED && !SOC_AHB_GDMA_SUPPORT_PSRAM
#if SOC_AXI_GDMA_SUPPORTED && !SOC_AXI_GDMA_SUPPORT_PSRAM
if (mcp_gdma->gdma_bus_id == SOC_GDMA_BUS_AXI) {
ESP_RETURN_ON_FALSE(esp_ptr_internal(src) && esp_ptr_internal(dst), ESP_ERR_INVALID_ARG, TAG, "AXI_DMA can only access SRAM");
}
#endif // SOC_AXI_GDMA_SUPPORTED && !SOC_AXI_GDMA_SUPPORT_PSRAM
// alignment check
ESP_RETURN_ON_FALSE(check_buffer_aligned(mcp_gdma, src, dst, n), ESP_ERR_INVALID_ARG, TAG, "buffer not aligned: %p -> %p, sz=%zu", src, dst, n);
async_memcpy_transaction_t *trans = NULL;
// pick one transaction node from idle queue
trans = try_pop_trans_from_idle_queue(mcp_gdma);
// check if we get the transaction object successfully
ESP_RETURN_ON_FALSE(trans, ESP_ERR_INVALID_STATE, TAG, "no free node in the idle queue");
// calculate how many descriptors we want
size_t max_single_dma_buffer = mcp_gdma->max_single_dma_buffer;
uint32_t num_desc_per_path = (n + max_single_dma_buffer - 1) / max_single_dma_buffer;
// allocate DMA descriptors, descriptors need a strict alignment
trans->tx_desc_link = heap_caps_aligned_calloc(MCP_DMA_DESC_ALIGN, num_desc_per_path, sizeof(mcp_dma_descriptor_t),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
ESP_GOTO_ON_FALSE(trans->tx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors");
// don't have to allocate the EOF descriptor, we will use trans->eof_node as the RX EOF descriptor
if (num_desc_per_path > 1) {
trans->rx_desc_link = heap_caps_aligned_calloc(MCP_DMA_DESC_ALIGN, num_desc_per_path - 1, sizeof(mcp_dma_descriptor_t),
MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA);
ESP_GOTO_ON_FALSE(trans->rx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors");
} else {
// small copy buffer, use the trans->eof_node is sufficient
trans->rx_desc_link = NULL;
}
// (preload) mount src data to the TX descriptor
mount_tx_buffer_to_dma(trans->tx_desc_link, num_desc_per_path, src, n, max_single_dma_buffer);
// (preload) mount dst data to the RX descriptor
mount_rx_buffer_to_dma(trans->rx_desc_link, num_desc_per_path - 1, &trans->eof_node, dst, n, max_single_dma_buffer);
// if the data is in the cache, write back, then DMA can see the latest data
#if MCP_NEEDS_WRITE_BACK_SRC_CACHE
int write_back_map = CACHE_MAP_L1_DCACHE;
if (esp_ptr_external_ram(src)) {
write_back_map |= CACHE_MAP_L2_CACHE;
}
Cache_WriteBack_Addr(write_back_map, (uint32_t)src, n);
#endif
// save other transaction context
trans->cb = cb_isr;
trans->cb_args = cb_args;
trans->memcpy_size = n;
trans->memcpy_dst_addr = (intptr_t)dst;
trans->tx_start_desc_addr = (intptr_t)trans->tx_desc_link;
trans->rx_start_desc_addr = trans->rx_desc_link ? (intptr_t)trans->rx_desc_link : (intptr_t)&trans->eof_node;
portENTER_CRITICAL(&mcp_gdma->spin_lock);
// insert the trans to ready queue
STAILQ_INSERT_TAIL(&mcp_gdma->ready_queue_head, trans, ready_queue_entry);
portEXIT_CRITICAL(&mcp_gdma->spin_lock);
// check driver state, if there's no running transaction, start a new one
try_start_pending_transaction(mcp_gdma);
return ESP_OK;
err:
if (trans) {
if (trans->tx_desc_link) {
free(trans->tx_desc_link);
trans->tx_desc_link = NULL;
}
if (trans->rx_desc_link) {
free(trans->rx_desc_link);
trans->rx_desc_link = NULL;
}
// return back the trans to idle queue
portENTER_CRITICAL(&mcp_gdma->spin_lock);
STAILQ_INSERT_TAIL(&mcp_gdma->idle_queue_head, trans, idle_queue_entry);
portEXIT_CRITICAL(&mcp_gdma->spin_lock);
}
return ret;
}
static bool mcp_gdma_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
{
bool need_yield = false;
async_memcpy_gdma_context_t *mcp_gdma = (async_memcpy_gdma_context_t *)user_data;
mcp_dma_descriptor_t *eof_desc = (mcp_dma_descriptor_t *)event_data->rx_eof_desc_addr;
// get the transaction object address by the EOF descriptor address
async_memcpy_transaction_t *trans = __containerof(eof_desc, async_memcpy_transaction_t, eof_node);
// switch driver state from RUN to IDLE
async_memcpy_fsm_t expected_fsm = MCP_FSM_RUN;
if (atomic_compare_exchange_strong(&mcp_gdma->fsm, &expected_fsm, MCP_FSM_IDLE_WAIT)) {
// if the data is in the cache, invalidate, then CPU can see the latest data
#if MCP_NEEDS_INVALIDATE_DST_CACHE
int write_back_map = CACHE_MAP_L1_DCACHE;
if (esp_ptr_external_ram((const void *)trans->memcpy_dst_addr)) {
write_back_map |= CACHE_MAP_L2_CACHE;
}
Cache_Invalidate_Addr(write_back_map, (uint32_t)trans->memcpy_dst_addr, trans->memcpy_size);
#endif
// invoked callback registered by user
async_memcpy_isr_cb_t cb = trans->cb;
if (cb) {
async_memcpy_event_t e = {
// No event data for now
};
need_yield = cb(&mcp_gdma->parent, &e, trans->cb_args);
}
// recycle descriptor memory
if (trans->tx_desc_link) {
free(trans->tx_desc_link);
trans->tx_desc_link = NULL;
}
if (trans->rx_desc_link) {
free(trans->rx_desc_link);
trans->rx_desc_link = NULL;
}
trans->cb = NULL;
portENTER_CRITICAL_ISR(&mcp_gdma->spin_lock);
// insert the trans object to the idle queue
STAILQ_INSERT_TAIL(&mcp_gdma->idle_queue_head, trans, idle_queue_entry);
portEXIT_CRITICAL_ISR(&mcp_gdma->spin_lock);
atomic_store(&mcp_gdma->fsm, MCP_FSM_IDLE);
}
// try start the next pending transaction
try_start_pending_transaction(mcp_gdma);
return need_yield;
}
#if SOC_GDMA_SUPPORT_ETM
static esp_err_t mcp_new_etm_event(async_memcpy_context_t *ctx, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event)
{
async_memcpy_gdma_context_t *mcp_gdma = __containerof(ctx, async_memcpy_gdma_context_t, parent);
if (event_type == ASYNC_MEMCPY_ETM_EVENT_COPY_DONE) {
// use the RX EOF to indicate the async memcpy done event
gdma_etm_event_config_t etm_event_conf = {
.event_type = GDMA_ETM_EVENT_EOF,
};
return gdma_new_etm_event(mcp_gdma->rx_channel, &etm_event_conf, out_event);
} else {
return ESP_ERR_NOT_SUPPORTED;
}
}
#endif // SOC_GDMA_SUPPORT_ETM

View File

@ -1,92 +0,0 @@
/*
* SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "freertos/FreeRTOS.h"
#include "soc/periph_defs.h"
#include "soc/soc_memory_layout.h"
#include "hal/cp_dma_hal.h"
#include "hal/cp_dma_ll.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "esp_err.h"
#include "esp_etm.h"
#include "esp_async_memcpy_impl.h"
IRAM_ATTR static void async_memcpy_impl_default_isr_handler(void *args)
{
async_memcpy_impl_t *mcp_impl = (async_memcpy_impl_t *)args;
portENTER_CRITICAL_ISR(&mcp_impl->hal_lock);
uint32_t status = cp_dma_hal_get_intr_status(&mcp_impl->hal);
cp_dma_hal_clear_intr_status(&mcp_impl->hal, status);
portEXIT_CRITICAL_ISR(&mcp_impl->hal_lock);
// End-Of-Frame on RX side
if (status & CP_DMA_LL_EVENT_RX_EOF) {
mcp_impl->rx_eof_addr = cp_dma_ll_get_rx_eof_descriptor_address(mcp_impl->hal.dev);
async_memcpy_isr_on_rx_done_event(mcp_impl);
}
if (mcp_impl->isr_need_yield) {
mcp_impl->isr_need_yield = false;
portYIELD_FROM_ISR();
}
}
esp_err_t async_memcpy_impl_init(async_memcpy_impl_t *impl)
{
esp_err_t ret = ESP_OK;
impl->hal_lock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED;
cp_dma_hal_config_t config = {};
cp_dma_hal_init(&impl->hal, &config);
ret = esp_intr_alloc(ETS_DMA_COPY_INTR_SOURCE, ESP_INTR_FLAG_IRAM, async_memcpy_impl_default_isr_handler, impl, &impl->intr);
return ret;
}
esp_err_t async_memcpy_impl_deinit(async_memcpy_impl_t *impl)
{
esp_err_t ret = ESP_OK;
cp_dma_hal_deinit(&impl->hal);
ret = esp_intr_free(impl->intr);
return ret;
}
esp_err_t async_memcpy_impl_start(async_memcpy_impl_t *impl, intptr_t outlink_base, intptr_t inlink_base)
{
cp_dma_hal_set_desc_base_addr(&impl->hal, outlink_base, inlink_base);
cp_dma_hal_start(&impl->hal); // enable DMA and interrupt
return ESP_OK;
}
esp_err_t async_memcpy_impl_stop(async_memcpy_impl_t *impl)
{
cp_dma_hal_stop(&impl->hal); // disable DMA and interrupt
return ESP_OK;
}
esp_err_t async_memcpy_impl_restart(async_memcpy_impl_t *impl)
{
cp_dma_hal_restart_rx(&impl->hal);
cp_dma_hal_restart_tx(&impl->hal);
return ESP_OK;
}
esp_err_t async_memcpy_impl_new_etm_event(async_memcpy_impl_t *impl, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event)
{
(void)impl;
(void)event_type;
(void)out_event;
return ESP_ERR_NOT_SUPPORTED;
}
bool async_memcpy_impl_is_buffer_address_valid(async_memcpy_impl_t *impl, void *src, void *dst)
{
// CP_DMA can only access SRAM
return esp_ptr_internal(src) && esp_ptr_internal(dst);
}

View File

@ -1,151 +0,0 @@
/*
* SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "freertos/FreeRTOS.h"
#include "soc/periph_defs.h"
#include "soc/soc_memory_layout.h"
#include "soc/soc_caps.h"
#include "esp_private/periph_ctrl.h"
#include "esp_log.h"
#include "esp_attr.h"
#include "esp_err.h"
#include "esp_async_memcpy_impl.h"
#if SOC_APM_SUPPORTED
#include "hal/apm_ll.h"
#endif
IRAM_ATTR static bool async_memcpy_impl_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
{
async_memcpy_impl_t *mcp_impl = (async_memcpy_impl_t *)user_data;
mcp_impl->rx_eof_addr = event_data->rx_eof_desc_addr;
async_memcpy_isr_on_rx_done_event(mcp_impl);
return mcp_impl->isr_need_yield;
}
esp_err_t async_memcpy_impl_init(async_memcpy_impl_t *impl)
{
esp_err_t ret = ESP_OK;
// create TX channel and reserve sibling channel for future use
gdma_channel_alloc_config_t tx_alloc_config = {
.flags.reserve_sibling = 1,
.direction = GDMA_CHANNEL_DIRECTION_TX,
};
ret = gdma_new_channel(&tx_alloc_config, &impl->tx_channel);
if (ret != ESP_OK) {
goto err;
}
// create RX channel and specify it should be reside in the same pair as TX
gdma_channel_alloc_config_t rx_alloc_config = {
.direction = GDMA_CHANNEL_DIRECTION_RX,
.sibling_chan = impl->tx_channel,
};
ret = gdma_new_channel(&rx_alloc_config, &impl->rx_channel);
if (ret != ESP_OK) {
goto err;
}
gdma_trigger_t m2m_trigger = GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_M2M, 0);
// get a free DMA trigger ID for memory copy
uint32_t free_m2m_id_mask = 0;
gdma_get_free_m2m_trig_id_mask(impl->tx_channel, &free_m2m_id_mask);
m2m_trigger.instance_id = __builtin_ctz(free_m2m_id_mask);
gdma_connect(impl->rx_channel, m2m_trigger);
gdma_connect(impl->tx_channel, m2m_trigger);
gdma_strategy_config_t strategy_config = {
.auto_update_desc = true,
.owner_check = true,
};
gdma_transfer_ability_t transfer_ability = {
.sram_trans_align = impl->sram_trans_align,
.psram_trans_align = impl->psram_trans_align,
};
ret = gdma_set_transfer_ability(impl->tx_channel, &transfer_ability);
if (ret != ESP_OK) {
goto err;
}
ret = gdma_set_transfer_ability(impl->rx_channel, &transfer_ability);
if (ret != ESP_OK) {
goto err;
}
gdma_apply_strategy(impl->tx_channel, &strategy_config);
gdma_apply_strategy(impl->rx_channel, &strategy_config);
#if SOC_APM_SUPPORTED
// APM strategy: trusted mode
// TODO: IDF-5354 GDMA for M2M usage only need read and write permissions, we should disable the execute permission by the APM controller
apm_tee_ll_set_master_secure_mode(APM_LL_MASTER_GDMA + m2m_trigger.instance_id, APM_LL_SECURE_MODE_TEE);
#endif // SOC_APM_SUPPORTED
gdma_rx_event_callbacks_t cbs = {
.on_recv_eof = async_memcpy_impl_rx_eof_callback
};
ret = gdma_register_rx_event_callbacks(impl->rx_channel, &cbs, impl);
err:
return ret;
}
esp_err_t async_memcpy_impl_deinit(async_memcpy_impl_t *impl)
{
gdma_disconnect(impl->rx_channel);
gdma_disconnect(impl->tx_channel);
gdma_del_channel(impl->rx_channel);
gdma_del_channel(impl->tx_channel);
return ESP_OK;
}
esp_err_t async_memcpy_impl_start(async_memcpy_impl_t *impl, intptr_t outlink_base, intptr_t inlink_base)
{
gdma_start(impl->rx_channel, inlink_base);
gdma_start(impl->tx_channel, outlink_base);
return ESP_OK;
}
esp_err_t async_memcpy_impl_stop(async_memcpy_impl_t *impl)
{
gdma_stop(impl->rx_channel);
gdma_stop(impl->tx_channel);
return ESP_OK;
}
esp_err_t async_memcpy_impl_restart(async_memcpy_impl_t *impl)
{
gdma_append(impl->rx_channel);
gdma_append(impl->tx_channel);
return ESP_OK;
}
esp_err_t async_memcpy_impl_new_etm_event(async_memcpy_impl_t *impl, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event)
{
if (event_type == ASYNC_MEMCPY_ETM_EVENT_COPY_DONE) {
// use the RX EOF to indicate the async memcpy done event
gdma_etm_event_config_t etm_event_conf = {
.event_type = GDMA_ETM_EVENT_EOF,
};
return gdma_new_etm_event(impl->rx_channel, &etm_event_conf, out_event);
} else {
return ESP_ERR_NOT_SUPPORTED;
}
}
bool async_memcpy_impl_is_buffer_address_valid(async_memcpy_impl_t *impl, void *src, void *dst)
{
bool valid = true;
if (esp_ptr_external_ram(dst)) {
if (impl->psram_trans_align) {
valid = valid && (((intptr_t)dst & (impl->psram_trans_align - 1)) == 0);
}
} else {
if (impl->sram_trans_align) {
valid = valid && (((intptr_t)dst & (impl->sram_trans_align - 1)) == 0);
}
}
return valid;
}

View File

@ -1,311 +1,31 @@
/*
* SPDX-FileCopyrightText: 2020-2022 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <sys/param.h>
#include "freertos/FreeRTOS.h"
#include "freertos/semphr.h"
#include "hal/dma_types.h"
#include "esp_check.h"
#include "esp_heap_caps.h"
#include "esp_log.h"
#include "esp_async_memcpy.h"
#include "esp_async_memcpy_impl.h"
#include "esp_async_memcpy_priv.h"
static const char *TAG = "async_memcpy";
static const char *TAG = "async_mcp";
#define ALIGN_DOWN(val, align) ((val) & ~((align) - 1))
/**
* @brief Type of async mcp stream
* mcp stream inherits DMA descriptor, besides that, it has a callback function member
*/
typedef struct {
dma_descriptor_t desc;
async_memcpy_isr_cb_t cb;
void *cb_args;
} async_memcpy_stream_t;
/**
* @brief Type of async mcp driver context
*/
typedef struct async_memcpy_context_t {
async_memcpy_impl_t mcp_impl; // implementation layer
portMUX_TYPE spinlock; // spinlock, prevent operating descriptors concurrently
intr_handle_t intr_hdl; // interrupt handle
uint32_t flags; // extra driver flags
dma_descriptor_t *tx_desc; // pointer to the next free TX descriptor
dma_descriptor_t *rx_desc; // pointer to the next free RX descriptor
dma_descriptor_t *next_rx_desc_to_check; // pointer to the next RX descriptor to recycle
uint32_t max_stream_num; // maximum number of streams
size_t max_dma_buffer_size; // maximum DMA buffer size
async_memcpy_stream_t *out_streams; // pointer to the first TX stream
async_memcpy_stream_t *in_streams; // pointer to the first RX stream
async_memcpy_stream_t streams_pool[0]; // stream pool (TX + RX), the size is configured during driver installation
} async_memcpy_context_t;
esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_memcpy_t *asmcp)
esp_err_t esp_async_memcpy_uninstall(async_memcpy_handle_t asmcp)
{
esp_err_t ret = ESP_OK;
async_memcpy_context_t *mcp_hdl = NULL;
ESP_GOTO_ON_FALSE(config, ESP_ERR_INVALID_ARG, err, TAG, "configuration can't be null");
ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "can't assign mcp handle to null");
// context memory size + stream pool size
size_t total_malloc_size = sizeof(async_memcpy_context_t) + sizeof(async_memcpy_stream_t) * config->backlog * 2;
// to work when cache is disabled, the driver handle should located in SRAM
mcp_hdl = heap_caps_calloc(1, total_malloc_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
ESP_GOTO_ON_FALSE(mcp_hdl, ESP_ERR_NO_MEM, err, TAG, "allocate context memory failed");
mcp_hdl->flags = config->flags;
mcp_hdl->out_streams = mcp_hdl->streams_pool;
mcp_hdl->in_streams = mcp_hdl->streams_pool + config->backlog;
mcp_hdl->max_stream_num = config->backlog;
// circle TX/RX descriptors
for (size_t i = 0; i < mcp_hdl->max_stream_num; i++) {
mcp_hdl->out_streams[i].desc.dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_CPU;
mcp_hdl->out_streams[i].desc.next = &mcp_hdl->out_streams[i + 1].desc;
mcp_hdl->in_streams[i].desc.dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_CPU;
mcp_hdl->in_streams[i].desc.next = &mcp_hdl->in_streams[i + 1].desc;
}
mcp_hdl->out_streams[mcp_hdl->max_stream_num - 1].desc.next = &mcp_hdl->out_streams[0].desc;
mcp_hdl->in_streams[mcp_hdl->max_stream_num - 1].desc.next = &mcp_hdl->in_streams[0].desc;
mcp_hdl->tx_desc = &mcp_hdl->out_streams[0].desc;
mcp_hdl->rx_desc = &mcp_hdl->in_streams[0].desc;
mcp_hdl->next_rx_desc_to_check = &mcp_hdl->in_streams[0].desc;
mcp_hdl->spinlock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED;
mcp_hdl->mcp_impl.sram_trans_align = config->sram_trans_align;
mcp_hdl->mcp_impl.psram_trans_align = config->psram_trans_align;
size_t trans_align = MAX(config->sram_trans_align, config->psram_trans_align);
mcp_hdl->max_dma_buffer_size = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
// initialize implementation layer
ret = async_memcpy_impl_init(&mcp_hdl->mcp_impl);
ESP_GOTO_ON_ERROR(ret, err, TAG, "DMA M2M init failed");
ESP_LOGD(TAG, "installed memory to memory copy channel at %p", mcp_hdl);
*asmcp = mcp_hdl;
async_memcpy_impl_start(&mcp_hdl->mcp_impl, (intptr_t)&mcp_hdl->out_streams[0].desc, (intptr_t)&mcp_hdl->in_streams[0].desc);
return ESP_OK;
err:
if (mcp_hdl) {
free(mcp_hdl);
}
if (asmcp) {
*asmcp = NULL;
}
return ret;
ESP_RETURN_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
return asmcp->del(asmcp);
}
esp_err_t esp_async_memcpy_uninstall(async_memcpy_t asmcp)
esp_err_t esp_async_memcpy(async_memcpy_handle_t asmcp, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args)
{
esp_err_t ret = ESP_OK;
ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null");
async_memcpy_impl_stop(&asmcp->mcp_impl);
async_memcpy_impl_deinit(&asmcp->mcp_impl);
free(asmcp);
err:
return ret;
ESP_RETURN_ON_FALSE(asmcp && dst && src && n, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
return asmcp->memcpy(asmcp, dst, src, n, cb_isr, cb_args);
}
esp_err_t esp_async_memcpy_new_etm_event(async_memcpy_t asmcp, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event)
#if SOC_GDMA_SUPPORT_ETM
esp_err_t esp_async_memcpy_new_etm_event(async_memcpy_handle_t asmcp, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event)
{
ESP_RETURN_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, TAG, "mcp handle can't be null");
return async_memcpy_impl_new_etm_event(&asmcp->mcp_impl, event_type, out_event);
}
static int async_memcpy_prepare_receive(async_memcpy_t asmcp, void *buffer, size_t size, dma_descriptor_t **start_desc, dma_descriptor_t **end_desc)
{
uint32_t prepared_length = 0;
uint8_t *buf = (uint8_t *)buffer;
dma_descriptor_t *desc = asmcp->rx_desc; // descriptor iterator
dma_descriptor_t *start = desc;
dma_descriptor_t *end = desc;
while (size > asmcp->max_dma_buffer_size) {
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
desc->dw0.suc_eof = 0;
desc->dw0.size = asmcp->max_dma_buffer_size;
desc->buffer = &buf[prepared_length];
desc = desc->next; // move to next descriptor
prepared_length += asmcp->max_dma_buffer_size;
size -= asmcp->max_dma_buffer_size;
} else {
// out of RX descriptors
goto _exit;
}
}
if (size) {
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
end = desc; // the last descriptor used
desc->dw0.suc_eof = 0;
desc->dw0.size = size;
desc->buffer = &buf[prepared_length];
desc = desc->next; // move to next descriptor
prepared_length += size;
} else {
// out of RX descriptors
goto _exit;
}
}
_exit:
*start_desc = start;
*end_desc = end;
return prepared_length;
}
static int async_memcpy_prepare_transmit(async_memcpy_t asmcp, void *buffer, size_t len, dma_descriptor_t **start_desc, dma_descriptor_t **end_desc)
{
uint32_t prepared_length = 0;
uint8_t *buf = (uint8_t *)buffer;
dma_descriptor_t *desc = asmcp->tx_desc; // descriptor iterator
dma_descriptor_t *start = desc;
dma_descriptor_t *end = desc;
while (len > asmcp->max_dma_buffer_size) {
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
desc->dw0.suc_eof = 0; // not the end of the transaction
desc->dw0.size = asmcp->max_dma_buffer_size;
desc->dw0.length = asmcp->max_dma_buffer_size;
desc->buffer = &buf[prepared_length];
desc = desc->next; // move to next descriptor
prepared_length += asmcp->max_dma_buffer_size;
len -= asmcp->max_dma_buffer_size;
} else {
// out of TX descriptors
goto _exit;
}
}
if (len) {
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
end = desc; // the last descriptor used
desc->dw0.suc_eof = 1; // end of the transaction
desc->dw0.size = len;
desc->dw0.length = len;
desc->buffer = &buf[prepared_length];
desc = desc->next; // move to next descriptor
prepared_length += len;
} else {
// out of TX descriptors
goto _exit;
}
}
*start_desc = start;
*end_desc = end;
_exit:
return prepared_length;
}
static bool async_memcpy_get_next_rx_descriptor(async_memcpy_t asmcp, dma_descriptor_t *eof_desc, dma_descriptor_t **next_desc)
{
dma_descriptor_t *next = asmcp->next_rx_desc_to_check;
// additional check, to avoid potential interrupt got triggered by mistake
if (next->dw0.owner == DMA_DESCRIPTOR_BUFFER_OWNER_CPU) {
asmcp->next_rx_desc_to_check = asmcp->next_rx_desc_to_check->next;
*next_desc = next;
// return if we need to continue
return eof_desc == next ? false : true;
}
*next_desc = NULL;
return false;
}
esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args)
{
esp_err_t ret = ESP_OK;
dma_descriptor_t *rx_start_desc = NULL;
dma_descriptor_t *rx_end_desc = NULL;
dma_descriptor_t *tx_start_desc = NULL;
dma_descriptor_t *tx_end_desc = NULL;
size_t rx_prepared_size = 0;
size_t tx_prepared_size = 0;
ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null");
ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid: %p -> %p", src, dst);
ESP_GOTO_ON_FALSE(n <= asmcp->max_dma_buffer_size * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large");
if (asmcp->mcp_impl.sram_trans_align) {
ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.sram_trans_align);
}
if (asmcp->mcp_impl.psram_trans_align) {
ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.psram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.psram_trans_align);
}
// Prepare TX and RX descriptor
portENTER_CRITICAL_SAFE(&asmcp->spinlock);
rx_prepared_size = async_memcpy_prepare_receive(asmcp, dst, n, &rx_start_desc, &rx_end_desc);
tx_prepared_size = async_memcpy_prepare_transmit(asmcp, src, n, &tx_start_desc, &tx_end_desc);
if (rx_start_desc && tx_start_desc && (rx_prepared_size == n) && (tx_prepared_size == n)) {
// register user callback to the last descriptor
async_memcpy_stream_t *mcp_stream = __containerof(rx_end_desc, async_memcpy_stream_t, desc);
mcp_stream->cb = cb_isr;
mcp_stream->cb_args = cb_args;
// restart RX firstly
dma_descriptor_t *desc = rx_start_desc;
while (desc != rx_end_desc) {
desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc = desc->next;
}
desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
asmcp->rx_desc = desc->next;
// restart TX secondly
desc = tx_start_desc;
while (desc != tx_end_desc) {
desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc = desc->next;
}
desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
asmcp->tx_desc = desc->next;
async_memcpy_impl_restart(&asmcp->mcp_impl);
}
portEXIT_CRITICAL_SAFE(&asmcp->spinlock);
// It's unlikely that we have space for rx descriptor but no space for tx descriptor
// Both tx and rx descriptor should move in the same pace
ESP_GOTO_ON_FALSE(rx_prepared_size == n, ESP_FAIL, err, TAG, "out of rx descriptor");
ESP_GOTO_ON_FALSE(tx_prepared_size == n, ESP_FAIL, err, TAG, "out of tx descriptor");
err:
return ret;
}
IRAM_ATTR void async_memcpy_isr_on_rx_done_event(async_memcpy_impl_t *impl)
{
bool to_continue = false;
async_memcpy_stream_t *in_stream = NULL;
dma_descriptor_t *next_desc = NULL;
async_memcpy_context_t *asmcp = __containerof(impl, async_memcpy_context_t, mcp_impl);
// get the RX eof descriptor address
dma_descriptor_t *eof = (dma_descriptor_t *)impl->rx_eof_addr;
// traversal all unchecked descriptors
do {
portENTER_CRITICAL_ISR(&asmcp->spinlock);
// There is an assumption that the usage of rx descriptors are in the same pace as tx descriptors (this is determined by M2M DMA working mechanism)
// And once the rx descriptor is recycled, the corresponding tx desc is guaranteed to be returned by DMA
to_continue = async_memcpy_get_next_rx_descriptor(asmcp, eof, &next_desc);
portEXIT_CRITICAL_ISR(&asmcp->spinlock);
if (next_desc) {
in_stream = __containerof(next_desc, async_memcpy_stream_t, desc);
// invoke user registered callback if available
if (in_stream->cb) {
async_memcpy_event_t e = {0};
if (in_stream->cb(asmcp, &e, in_stream->cb_args)) {
impl->isr_need_yield = true;
}
in_stream->cb = NULL;
in_stream->cb_args = NULL;
}
}
} while (to_continue);
ESP_RETURN_ON_FALSE(asmcp && out_event, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
return asmcp->new_etm_event(asmcp, event_type, out_event);
}
#endif

View File

@ -0,0 +1,46 @@
/*
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include "esp_err.h"
#include "esp_etm.h"
#include "esp_async_memcpy.h"
#include "soc/soc_caps.h"
#define ALIGN_DOWN(val, align) ((val) & ~((align) - 1))
#define DEFAULT_TRANSACTION_QUEUE_LENGTH 4
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
MCP_FSM_IDLE_WAIT, /// intermediate state, for state changes from others to IDLE
MCP_FSM_IDLE,
MCP_FSM_RUN_WAIT, /// intermediate state, for state changes from others to RUN
MCP_FSM_RUN,
} async_memcpy_fsm_t;
typedef struct async_memcpy_context_t async_memcpy_context_t;
struct async_memcpy_context_t {
/// @brief Start a new async memcpy transaction
esp_err_t (*memcpy)(async_memcpy_context_t *ctx, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args);
#if SOC_GDMA_SUPPORT_ETM
/// @brief Create ETM event handle of specific event type
esp_err_t (*new_etm_event)(async_memcpy_context_t *ctx, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event);
#endif // SOC_GDMA_SUPPORT_ETM
/// @brief Delete async memcpy driver context
esp_err_t (*del)(async_memcpy_context_t *ctx);
};
#ifdef __cplusplus
}
#endif

View File

@ -1,29 +1,33 @@
/*
* SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include "soc/soc_caps.h"
#include "esp_err.h"
#include "esp_etm.h"
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdbool.h>
#include "esp_err.h"
#include "esp_etm.h"
/**
* @brief Type of async memcpy handle
*
* @brief Async memory copy driver handle
*/
typedef struct async_memcpy_context_t *async_memcpy_t;
typedef struct async_memcpy_context_t *async_memcpy_handle_t;
/** @cond */
/// @brief legacy driver handle type
typedef async_memcpy_handle_t async_memcpy_t;
/** @endcond */
/**
* @brief Type of async memcpy event object
*
* @brief Async memory copy event data
*/
typedef struct {
void *data; /*!< Event data */
@ -40,14 +44,13 @@ typedef struct {
* @note User can call OS primitives (semaphore, mutex, etc) in the callback function.
* Keep in mind, if any OS primitive wakes high priority task up, the callback should return true.
*/
typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args);
typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_handle_t mcp_hdl, async_memcpy_event_t *event, void *cb_args);
/**
* @brief Type of async memcpy configuration
*
*/
typedef struct {
uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */
uint32_t backlog; /*!< Maximum number of transactions that can be prepared in the background */
size_t sram_trans_align; /*!< DMA transfer alignment (both in size and address) for SRAM memory */
size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */
uint32_t flags; /*!< Extra flags to control async memcpy feature */
@ -55,7 +58,6 @@ typedef struct {
/**
* @brief Default configuration for async memcpy
*
*/
#define ASYNC_MEMCPY_DEFAULT_CONFIG() \
{ \
@ -65,36 +67,86 @@ typedef struct {
.flags = 0, \
}
#if SOC_AHB_GDMA_SUPPORTED
/**
* @brief Install async memcpy driver
* @brief Install async memcpy driver, with AHB-GDMA as the backend
*
* @param[in] config Configuration of async memcpy
* @param[out] asmcp Handle of async memcpy that returned from this API. If driver installation is failed, asmcp would be assigned to NULL.
* @param[out] mcp Returned driver handle
* @return
* - ESP_OK: Install async memcpy driver successfully
* - ESP_ERR_INVALID_ARG: Install async memcpy driver failed because of invalid argument
* - ESP_ERR_NO_MEM: Install async memcpy driver failed because out of memory
* - ESP_FAIL: Install async memcpy driver failed because of other error
*/
esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_memcpy_t *asmcp);
esp_err_t esp_async_memcpy_install_gdma_ahb(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp);
#endif // SOC_AHB_GDMA_SUPPORTED
#if SOC_AXI_GDMA_SUPPORTED
/**
* @brief Install async memcpy driver, with AXI-GDMA as the backend
*
* @param[in] config Configuration of async memcpy
* @param[out] mcp Returned driver handle
* @return
* - ESP_OK: Install async memcpy driver successfully
* - ESP_ERR_INVALID_ARG: Install async memcpy driver failed because of invalid argument
* - ESP_ERR_NO_MEM: Install async memcpy driver failed because out of memory
* - ESP_FAIL: Install async memcpy driver failed because of other error
*/
esp_err_t esp_async_memcpy_install_gdma_axi(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp);
#endif // SOC_AXI_GDMA_SUPPORTED
#if SOC_CP_DMA_SUPPORTED
/**
* @brief Install async memcpy driver, with CPDMA as the backend
*
* @note CPDMA is a CPU peripheral, aiming for memory copy.
*
* @param[in] config Configuration of async memcpy
* @param[out] mcp Returned driver handle
* @return
* - ESP_OK: Install async memcpy driver successfully
* - ESP_ERR_INVALID_ARG: Install async memcpy driver failed because of invalid argument
* - ESP_ERR_NO_MEM: Install async memcpy driver failed because out of memory
* - ESP_FAIL: Install async memcpy driver failed because of other error
*/
esp_err_t esp_async_memcpy_install_cpdma(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp);
#endif // SOC_CP_DMA_SUPPORTED
/**
* @brief Install async memcpy driver with the default DMA backend
*
* @note On chip with CPDMA support, CPDMA is the default choice.
* On chip with AHB-GDMA support, AHB-GDMA is the default choice.
*
* @param[in] config Configuration of async memcpy
* @param[out] mcp Returned driver handle
* @return
* - ESP_OK: Install async memcpy driver successfully
* - ESP_ERR_INVALID_ARG: Install async memcpy driver failed because of invalid argument
* - ESP_ERR_NO_MEM: Install async memcpy driver failed because out of memory
* - ESP_FAIL: Install async memcpy driver failed because of other error
*/
esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_memcpy_handle_t *mcp);
/**
* @brief Uninstall async memcpy driver
*
* @param[in] asmcp Handle of async memcpy driver that returned from esp_async_memcpy_install
* @param[in] mcp Handle of async memcpy driver that returned from `esp_async_memcpy_install`
* @return
* - ESP_OK: Uninstall async memcpy driver successfully
* - ESP_ERR_INVALID_ARG: Uninstall async memcpy driver failed because of invalid argument
* - ESP_FAIL: Uninstall async memcpy driver failed because of other error
*/
esp_err_t esp_async_memcpy_uninstall(async_memcpy_t asmcp);
esp_err_t esp_async_memcpy_uninstall(async_memcpy_handle_t mcp);
/**
* @brief Send an asynchronous memory copy request
*
* @note The callback function is invoked in interrupt context, never do blocking jobs in the callback.
*
* @param[in] asmcp Handle of async memcpy driver that returned from esp_async_memcpy_install
* @param[in] mcp Handle of async memcpy driver that returned from `esp_async_memcpy_install`
* @param[in] dst Destination address (copy to)
* @param[in] src Source address (copy from)
* @param[in] n Number of bytes to copy
@ -105,8 +157,9 @@ esp_err_t esp_async_memcpy_uninstall(async_memcpy_t asmcp);
* - ESP_ERR_INVALID_ARG: Send memory copy request failed because of invalid argument
* - ESP_FAIL: Send memory copy request failed because of other error
*/
esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args);
esp_err_t esp_async_memcpy(async_memcpy_handle_t mcp, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args);
#if SOC_GDMA_SUPPORT_ETM
/**
* @brief Async memory copy specific events that supported by the ETM module
*/
@ -119,17 +172,17 @@ typedef enum {
*
* @note The created ETM event object can be deleted later by calling `esp_etm_del_event`
*
* @param[in] asmcp Handle of async memcpy driver that returned from `esp_async_memcpy_install`
* @param[in] mcp Handle of async memcpy driver that returned from `esp_async_memcpy_install`
* @param[in] event_type ETM event type
* @param[out] out_event Returned ETM event handle
* @return
* @return
* - ESP_OK: Get ETM event successfully
* - ESP_ERR_INVALID_ARG: Get ETM event failed because of invalid argument
* - ESP_ERR_NOT_SUPPORTED: Get ETM event failed because the DMA hardware doesn't support ETM submodule
* - ESP_FAIL: Get ETM event failed because of other error
*/
esp_err_t esp_async_memcpy_new_etm_event(async_memcpy_t asmcp, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event);
esp_err_t esp_async_memcpy_new_etm_event(async_memcpy_handle_t mcp, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event);
#endif // SOC_GDMA_SUPPORT_ETM
#ifdef __cplusplus
}

View File

@ -1,120 +0,0 @@
/*
* SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdbool.h>
#include "esp_err.h"
#include "esp_intr_alloc.h"
#include "esp_etm.h"
#include "soc/soc_caps.h"
#include "hal/dma_types.h"
#include "freertos/FreeRTOS.h"
#include "esp_async_memcpy.h"
#if SOC_CP_DMA_SUPPORTED
#include "hal/cp_dma_ll.h"
#include "hal/cp_dma_hal.h"
#elif SOC_GDMA_SUPPORTED
#include "esp_private/gdma.h"
#endif
/**
* @brief Type of async mcp implementation layer context
*
*/
typedef struct {
#if SOC_CP_DMA_SUPPORTED
cp_dma_hal_context_t hal; // CP DMA hal
intr_handle_t intr; // CP DMA interrupt handle
portMUX_TYPE hal_lock; // CP DMA HAL level spin lock
#elif SOC_GDMA_SUPPORTED
gdma_channel_handle_t tx_channel;
gdma_channel_handle_t rx_channel;
#endif
intptr_t rx_eof_addr;
size_t sram_trans_align;
size_t psram_trans_align;
bool isr_need_yield; // if current isr needs a yield for higher priority task
} async_memcpy_impl_t;
/**
* @brief ISR callback function, invoked when RX done event triggered
*
* @param impl async mcp implementation layer context pointer
*/
void async_memcpy_isr_on_rx_done_event(async_memcpy_impl_t *impl);
/**
* @brief Initialize async mcp implementation layer
*
* @param impl async mcp implementation layer context pointer
* @return Always return ESP_OK
*/
esp_err_t async_memcpy_impl_init(async_memcpy_impl_t *impl);
/**
* @brief Deinitialize async mcp implementation layer
*
* @param impl async mcp implementation layer context pointer
* @return Always return ESP_OK
*/
esp_err_t async_memcpy_impl_deinit(async_memcpy_impl_t *impl);
/**
* @brief Start async mcp (on implementation layer)
*
* @param impl async mcp implementation layer context pointer
* @param outlink_base base descriptor address for TX DMA channel
* @param inlink_base base descriptor address for RX DMA channel
* @return Always return ESP_OK
*/
esp_err_t async_memcpy_impl_start(async_memcpy_impl_t *impl, intptr_t outlink_base, intptr_t inlink_base);
/**
* @brief Stop async mcp (on implementation layer)
*
* @param impl async mcp implementation layer context pointer
* @return Always return ESP_OK
*/
esp_err_t async_memcpy_impl_stop(async_memcpy_impl_t *impl);
/**
* @brief Restart async mcp DMA engine
*
* @param impl async mcp implementation layer context pointer
* @return Always return ESP_OK
*/
esp_err_t async_memcpy_impl_restart(async_memcpy_impl_t *impl);
/**
* @brief Get ETM Event handle
*
* @param impl async mcp implementation layer context pointer
* @param event_type ETM event type
* @param out_event Returned ETM event handle
* @return ESP_OK on success, ESP_ERR_NOT_SUPPORTED if not supported in hardware, otherwise failed
*/
esp_err_t async_memcpy_impl_new_etm_event(async_memcpy_impl_t *impl, async_memcpy_etm_event_t event_type, esp_etm_event_handle_t *out_event);
/**
* @brief check if buffer address is valid
* @note This is related to underlying target (e.g. on esp32-s2, only buffer located in SRAM is supported)
*
* @param impl async mcp implementation layer context pointer
* @param src Source buffer address
* @param dst Destination buffer address
* @return True if both address are valid
*/
bool async_memcpy_impl_is_buffer_address_valid(async_memcpy_impl_t *impl, void *src, void *dst);
#ifdef __cplusplus
}
#endif

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2021-2022 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2021-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -28,6 +28,7 @@
typedef struct {
uint32_t seed;
size_t buffer_size;
size_t copy_size;
uint8_t *src_buf;
uint8_t *dst_buf;
uint8_t *from_addr;
@ -43,62 +44,56 @@ static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_contex
srand(test_context->seed);
printf("allocating memory buffer...\r\n");
size_t buffer_size = test_context->buffer_size;
size_t copy_size = buffer_size;
uint8_t *src_buf = NULL;
uint8_t *dst_buf = NULL;
uint8_t *from_addr = NULL;
uint8_t *to_addr = NULL;
#if CONFIG_SPIRAM && SOC_AHB_GDMA_SUPPORT_PSRAM
if (test_context->src_in_psram) {
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_SPIRAM);
src_buf = heap_caps_aligned_alloc(test_context->align, buffer_size, MALLOC_CAP_SPIRAM);
} else {
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
src_buf = heap_caps_aligned_alloc(test_context->align, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
}
if (test_context->dst_in_psram) {
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_SPIRAM);
dst_buf = heap_caps_aligned_alloc(test_context->align, buffer_size, MALLOC_CAP_SPIRAM);
} else {
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
dst_buf = heap_caps_aligned_alloc(test_context->align, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
}
#else
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
src_buf = heap_caps_aligned_alloc(test_context->align, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
dst_buf = heap_caps_aligned_alloc(test_context->align, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
#endif
TEST_ASSERT_NOT_NULL_MESSAGE(src_buf, "allocate source buffer failed");
TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed");
// address alignment
from_addr = (uint8_t *)ALIGN_UP((uint32_t)(src_buf), test_context->align);
to_addr = (uint8_t *)ALIGN_UP((uint32_t)(dst_buf), test_context->align);
uint8_t gap = MAX(from_addr - src_buf, to_addr - dst_buf);
buffer_size -= gap;
// size alignment
buffer_size = ALIGN_DOWN(buffer_size, test_context->align);
// adding extra offset
from_addr += test_context->offset;
to_addr += test_context->offset;
buffer_size -= test_context->offset;
from_addr = src_buf + test_context->offset;
to_addr = dst_buf + test_context->offset;
copy_size -= test_context->offset;
printf("...size %zu Bytes, src@%p, dst@%p\r\n", buffer_size, from_addr, to_addr);
printf("...to copy size %zu Bytes, from @%p, to @%p\r\n", copy_size, from_addr, to_addr);
printf("fill src buffer with random data\r\n");
for (int i = 0; i < buffer_size; i++) {
for (int i = 0; i < copy_size; i++) {
from_addr[i] = rand() % 256;
}
// return value
test_context->buffer_size = buffer_size;
// save context
test_context->copy_size = copy_size;
test_context->src_buf = src_buf;
test_context->dst_buf = dst_buf;
test_context->from_addr = from_addr;
test_context->to_addr = to_addr;
}
static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t buffer_size, uint8_t *src_buf, uint8_t *dst_buf, uint8_t *from_addr, uint8_t *to_addr)
static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t copy_size, uint8_t *src_buf, uint8_t *dst_buf, uint8_t *from_addr, uint8_t *to_addr)
{
srand(seed);
for (int i = 0; i < buffer_size; i++) {
// check if source date has been copied to destination and source data not broken
TEST_ASSERT_EQUAL_MESSAGE(rand() % 256, to_addr[i], "destination data doesn't match generator data");
// check if source date has been copied to destination and source data not broken
for (int i = 0; i < copy_size; i++) {
TEST_ASSERT_EQUAL_MESSAGE(rand() % 256, from_addr[i], "source data doesn't match generator data");
}
srand(seed);
for (int i = 0; i < buffer_size; i++) {
// check if source data has been copied to destination
for (int i = 0; i < copy_size; i++) {
TEST_ASSERT_EQUAL_MESSAGE(rand() % 256, to_addr[i], "destination data doesn't match source data");
}
free(src_buf);
@ -108,13 +103,13 @@ static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t buff
TEST_CASE("memory copy the same buffer with different content", "[async mcp]")
{
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
config.backlog = 1;
async_memcpy_t driver = NULL;
async_memcpy_handle_t driver = NULL;
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint8_t sbuf[256] = {0};
uint8_t dbuf[256] = {0};
uint8_t *sbuf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
uint8_t *dbuf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
for (int j = 0; j < 20; j++) {
TEST_ESP_OK(esp_async_memcpy(driver, dbuf, sbuf, 256, NULL, NULL));
vTaskDelay(pdMS_TO_TICKS(10));
for (int i = 0; i < 256; i++) {
if (sbuf[i] != dbuf[i]) {
printf("location[%d]:s=%d,d=%d\r\n", i, sbuf[i], dbuf[i]);
@ -125,15 +120,12 @@ TEST_CASE("memory copy the same buffer with different content", "[async mcp]")
}
}
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
free(sbuf);
free(dbuf);
}
TEST_CASE("memory copy by DMA one by one", "[async mcp]")
static void test_memory_copy_one_by_one(async_memcpy_handle_t driver)
{
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
config.backlog = 4;
async_memcpy_t driver = NULL;
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011};
memcpy_testbench_context_t test_context = {
.align = 4,
@ -144,20 +136,79 @@ TEST_CASE("memory copy by DMA one by one", "[async mcp]")
for (int off = 0; off < 4; off++) {
test_context.buffer_size = test_buffer_len[i];
test_context.seed = i;
test_context.offset = off;
async_memcpy_setup_testbench(&test_context);
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, NULL, NULL));
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
vTaskDelay(pdMS_TO_TICKS(100));
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.copy_size, NULL, NULL));
vTaskDelay(pdMS_TO_TICKS(10));
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.copy_size, test_context.src_buf,
test_context.dst_buf, test_context.from_addr, test_context.to_addr);
}
}
}
TEST_CASE("memory copy by DMA one by one", "[async mcp]")
{
async_memcpy_config_t config = {
.backlog = 4,
};
async_memcpy_handle_t driver = NULL;
#if SOC_AHB_GDMA_SUPPORTED
printf("Testing memory by AHB GDMA\r\n");
TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&config, &driver));
test_memory_copy_one_by_one(driver);
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
#endif // SOC_AHB_GDMA_SUPPORTED
#if SOC_AXI_GDMA_SUPPORTED
printf("Testing memory by AXI GDMA\r\n");
TEST_ESP_OK(esp_async_memcpy_install_gdma_axi(&config, &driver));
test_memory_copy_one_by_one(driver);
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
#endif // SOC_AXI_GDMA_SUPPORTED
#if SOC_CP_DMA_SUPPORTED
printf("Testing memory by CP DMA\r\n");
TEST_ESP_OK(esp_async_memcpy_install_cpdma(&config, &driver));
test_memory_copy_one_by_one(driver);
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
#endif // SOC_CP_DMA_SUPPORTED
}
static bool test_async_memcpy_cb_v1(async_memcpy_handle_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
{
SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args;
BaseType_t high_task_wakeup = pdFALSE;
xSemaphoreGiveFromISR(sem, &high_task_wakeup);
return high_task_wakeup == pdTRUE;
}
TEST_CASE("memory copy done callback", "[async mcp]")
{
async_memcpy_config_t config = {
// all default
};
async_memcpy_handle_t driver = NULL;
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint8_t *src_buf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
uint8_t *dst_buf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
SemaphoreHandle_t sem = xSemaphoreCreateBinary();
TEST_ESP_OK(esp_async_memcpy(driver, dst_buf, src_buf, 256, test_async_memcpy_cb_v1, sem));
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
free(src_buf);
free(dst_buf);
vSemaphoreDelete(sem);
}
TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
{
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
async_memcpy_t driver = NULL;
async_memcpy_handle_t driver = NULL;
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011};
@ -172,10 +223,10 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
async_memcpy_setup_testbench(&test_context[i]);
}
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].copy_size, NULL, NULL));
}
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
async_memcpy_verify_and_clear_testbench(i, test_context[i].copy_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
}
// Non-aligned case
@ -186,10 +237,10 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
async_memcpy_setup_testbench(&test_context[i]);
}
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].copy_size, NULL, NULL));
}
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
async_memcpy_verify_and_clear_testbench(i, test_context[i].copy_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
}
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
@ -198,7 +249,7 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16)
static int s_count = 0;
static IRAM_ATTR bool test_async_memcpy_isr_cb(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
static IRAM_ATTR bool test_async_memcpy_isr_cb(async_memcpy_handle_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
{
SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args;
BaseType_t high_task_wakeup = pdFALSE;
@ -217,7 +268,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS;
config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer
config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer
async_memcpy_t driver = NULL;
async_memcpy_handle_t driver = NULL;
int64_t elapse_us = 0;
float throughput = 0.0;
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
@ -233,7 +284,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.copy_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
@ -247,7 +298,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.copy_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
#if CONFIG_SPIRAM && SOC_AHB_GDMA_SUPPORT_PSRAM
// 2. PSRAM->PSRAM
@ -257,7 +308,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.copy_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
@ -271,7 +322,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.copy_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
// 3. PSRAM->SRAM
test_context.src_in_psram = true;
@ -280,7 +331,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.copy_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
@ -294,7 +345,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.copy_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
// 4. SRAM->PSRAM
test_context.src_in_psram = false;
@ -303,7 +354,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
s_count = 0;
ccomp_timer_start();
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.copy_size, test_async_memcpy_isr_cb, sem));
}
// wait for done semaphore
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
@ -317,7 +368,7 @@ static void memcpy_performance_test(uint32_t buffer_size)
elapse_us = ccomp_timer_stop();
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.copy_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
#endif
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));

View File

@ -6,7 +6,6 @@ from pytest_embedded import Dut
@pytest.mark.esp32s2
@pytest.mark.esp32s3
@pytest.mark.esp32c2
@pytest.mark.esp32c3
@pytest.mark.esp32c6
@ -21,3 +20,16 @@ from pytest_embedded import Dut
)
def test_dma(dut: Dut) -> None:
dut.run_all_single_board_cases()
@pytest.mark.esp32s3
@pytest.mark.octal_psram
@pytest.mark.parametrize(
'config',
[
'release',
],
indirect=True,
)
def test_dma_psram(dut: Dut) -> None:
dut.run_all_single_board_cases()

View File

@ -0,0 +1,3 @@
CONFIG_SPIRAM=y
CONFIG_SPIRAM_MODE_OCT=y
CONFIG_SPIRAM_SPEED_80M=y

View File

@ -45,7 +45,7 @@ TEST_CASE("async_memcpy_eof_event", "[etm]")
TEST_ESP_OK(gpio_set_level(output_gpio, 1));
printf("install async memcpy context\r\n");
async_memcpy_t mcp_ctx = NULL;
async_memcpy_handle_t mcp_ctx = NULL;
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
TEST_ESP_OK(esp_async_memcpy_install(&config, &mcp_ctx));

View File

@ -28,7 +28,7 @@ entries:
systimer_hal (noflash)
if TWAI_ISR_IN_IRAM = y:
twai_hal_iram (noflash)
if IDF_TARGET_ESP32 = n:
if SOC_GPSPI_SUPPORTED = y && IDF_TARGET_ESP32 = n:
spi_flash_hal_gpspi (noflash)
if SOC_PMU_SUPPORTED = y:
pmu_hal (noflash)

View File

@ -23,6 +23,10 @@ config SOC_GPTIMER_SUPPORTED
bool
default y
config SOC_ASYNC_MEMCPY_SUPPORTED
bool
default y
config SOC_SUPPORTS_SECURE_DL_MODE
bool
default y
@ -227,6 +231,10 @@ config SOC_GDMA_PAIRS_PER_GROUP_MAX
int
default 3
config SOC_AXI_GDMA_SUPPORT_PSRAM
bool
default y
config SOC_ETM_GROUPS
int
default 1

View File

@ -38,7 +38,7 @@
// #define SOC_TWAI_SUPPORTED 1 //TODO: IDF-7470
// #define SOC_ETM_SUPPORTED 1 //TODO: IDF-7478
// #define SOC_PARLIO_SUPPORTED 1 //TODO: IDF-7471, TODO: IDF-7472
// #define SOC_ASYNC_MEMCPY_SUPPORTED 1
#define SOC_ASYNC_MEMCPY_SUPPORTED 1
// disable usb serial jtag for esp32p4, current image does not support
// #define SOC_USB_SERIAL_JTAG_SUPPORTED 1 //TODO: IDF-7496
// #define SOC_TEMP_SENSOR_SUPPORTED 1 //TODO: IDF-7482
@ -161,6 +161,7 @@
#define SOC_AHB_GDMA_VERSION 2
#define SOC_GDMA_NUM_GROUPS_MAX 2
#define SOC_GDMA_PAIRS_PER_GROUP_MAX 3
#define SOC_AXI_GDMA_SUPPORT_PSRAM 1
// #define SOC_GDMA_SUPPORT_ETM 1 // Both AHB-DMA and AXI-DMA supports ETM //TODO: IDF-7478
/*-------------------------- ETM CAPS --------------------------------------*/

View File

@ -1,60 +1,67 @@
The Async Memcpy API
====================
Asynchronous Memory Copy
========================
Overview
--------
{IDF_TARGET_NAME} has a DMA engine which can help to offload internal memory copy operations from the CPU in a asynchronous way.
{IDF_TARGET_NAME} has a DMA engine which can help to offload internal memory copy operations from the CPU in an asynchronous way.
The async memcpy API wraps all DMA configurations and operations, the signature of :cpp:func:`esp_async_memcpy` is almost the same to the standard libc one.
The async memcpy API wraps all DMA configurations and operations, the signature of :cpp:func:`esp_async_memcpy` is almost the same to the standard libc ``memcpy`` function.
Thanks to the benefit of the DMA, we do not have to wait for each memory copy to be done before we issue another memcpy request. By the way, it is still possible to know when memcpy is finished by listening in the memcpy callback function.
The DMA allows multiple memory copy requests to be queued up before the first one is completed, which allows overlap of computation and memory copy. By the way, it is still possible to know the exact time when a memory copy request is completed by registering an event callback.
.. only:: esp32s2
.. only:: SOC_AHB_GDMA_SUPPORT_PSRAM
.. note::
Memory copy from/to external PSRAM is not supported on ESP32-S2, :cpp:func:`esp_async_memcpy` will abort returning an error if buffer address is not in SRAM.
If the async memcpy is constructed upon the AHB GDMA, it is also possible to copy data from/to PSRAM with a proper alignment.
.. only:: SOC_AXI_GDMA_SUPPORT_PSRAM
If the async memcpy is constructed upon the AXI GDMA, it is also possible to copy data from/to PSRAM with a proper alignment.
Configure and Install Driver
----------------------------
:cpp:func:`esp_async_memcpy_install` is used to install the driver with user's configuration. Please note that async memcpy has to be called with the handle returned from :cpp:func:`esp_async_memcpy_install`.
There are several ways to install the async memcpy driver, depending on the underlying DMA engine.
.. list::
:SOC_CP_DMA_SUPPORTED: - :cpp:func:`esp_async_memcpy_install_cpdma` is used to install the async memcpy driver based on the CP DMA engine.
:SOC_AHB_GDMA_SUPPORTED: - :cpp:func:`esp_async_memcpy_install_gdma_ahb` is used to install the async memcpy driver based on the AHB GDMA engine.
:SOC_AXI_GDMA_SUPPORTED: - :cpp:func:`esp_async_memcpy_install_gdma_axi` is used to install the async memcpy driver based on the AXI GDMA engine.
- :cpp:func:`esp_async_memcpy_install` is a generic API to install the async memcpy driver with a default DMA engine. If the SOC has the CP_DMA engine, the default DMA engine is CP_DMA. Otherwise, the default DMA engine is AHB_GDMA.
Driver configuration is described in :cpp:type:`async_memcpy_config_t`:
* :cpp:member:`backlog`: This is used to configure the maximum number of DMA operations being processed at the same time.
* :cpp:member:`backlog`: This is used to configure the maximum number of memory copy transactions that can be queued up before the first one is completed. If this field is set to zero, then the default value (i.e., 4) will be applied.
* :cpp:member:`sram_trans_align`: Declare SRAM alignment for both data address and copy size, set to zero if the data has no restriction in alignment. If set to a quadruple value (i.e., 4X), the driver will enable the burst mode internally, which is helpful for some performance related application.
* :cpp:member:`psram_trans_align`: Declare PSRAM alignment for both data address and copy size. User has to give it a valid value (only 16, 32, 64 are supported) if the destination of memcpy is located in PSRAM. The default alignment (i.e., 16) will be applied if it is set to zero. Internally, the driver configures the size of block used by DMA to access PSRAM, according to the alignment.
* :cpp:member:`flags`: This is used to enable some special driver features.
:c:macro:`ASYNC_MEMCPY_DEFAULT_CONFIG` provides a default configuration, which specifies the backlog to 8.
.. code-block:: c
::
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
// update the maximum data stream supported by underlying DMA engine
config.backlog = 16;
async_memcpy_t driver = NULL;
ESP_ERROR_CHECK(esp_async_memcpy_install(&config, &driver)); // install driver, return driver handle
config.backlog = 8;
async_memcpy_handle_t driver = NULL;
ESP_ERROR_CHECK(esp_async_memcpy_install(&config, &driver)); // install driver with default DMA engine
Send Memory Copy Request
------------------------
:cpp:func:`esp_async_memcpy` is the API to send memory copy request to DMA engine. It must be called after driver is installed successfully. This API is thread safe, so it can be called from different tasks.
Different from the libc version of ``memcpy``, user should also pass a callback to :cpp:func:`esp_async_memcpy`, if it is necessary to be notified when the memory copy is done. The callback is executed in the ISR context, make sure you does not violate the restriction applied to ISR handler.
Different from the libc version of ``memcpy``, you can optionally pass a callback to :cpp:func:`esp_async_memcpy`, so that you can be notified when the memory copy is finished. Note, the callback is executed in the ISR context, please make sure you will not call any blocking functions in the callback.
Besides that, the callback function should reside in IRAM space by applying ``IRAM_ATTR`` attribute. The prototype of the callback function is :cpp:type:`async_memcpy_isr_cb_t`, please note that, the callback function should return true if it wakes up a high priority task by some API like :cpp:func:`xSemaphoreGiveFromISR`.
The prototype of the callback function is :cpp:type:`async_memcpy_isr_cb_t`. The callback function should only return true if it wakes up a high priority task by RTOS APIs like :cpp:func:`xSemaphoreGiveFromISR`.
.. code-block:: c
::
// Callback implementation, running in ISR context
static IRAM_ATTR bool my_async_memcpy_cb(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
static bool my_async_memcpy_cb(async_memcpy_handle_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
{
SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args;
BaseType_t high_task_wakeup = pdFALSE;
@ -70,10 +77,10 @@ Besides that, the callback function should reside in IRAM space by applying ``IR
// Do something else here
xSemaphoreTake(my_semaphore, portMAX_DELAY); // Wait until the buffer copy is done
Uninstall Driver (Optional)
---------------------------
Uninstall Driver
----------------
:cpp:func:`esp_async_memcpy_uninstall` is used to uninstall asynchronous memcpy driver. It is not necessary to uninstall the driver after each memcpy operation. If you know your application will not use this driver anymore, then this API can recycle the memory for you.
:cpp:func:`esp_async_memcpy_uninstall` is used to uninstall asynchronous memcpy driver. It is not necessary to uninstall the driver after each memcpy operation. If you know your application will not use this driver anymore, then this API can recycle the memory and other hardware resources for you.
.. only:: SOC_ETM_SUPPORTED and SOC_GDMA_SUPPORT_ETM