mirror of
https://github.com/espressif/esp-idf.git
synced 2024-10-05 20:47:46 -04:00
Merge branch 'feature/interrupt_overhead_improvement' into 'master'
feature/interrupt overhead improvement Closes IDF-248 See merge request espressif/esp-idf!6328
This commit is contained in:
commit
a7aea56977
@ -5,7 +5,7 @@ if(IDF_TARGET STREQUAL "esp32")
|
||||
endif()
|
||||
|
||||
|
||||
idf_component_register(SRC_DIRS ${src_dirs}
|
||||
idf_component_register(SRC_DIRS ${srcdirs}
|
||||
INCLUDE_DIRS .
|
||||
REQUIRES unity test_utils
|
||||
)
|
73
components/freertos/test/test_isr_latency.c
Normal file
73
components/freertos/test/test_isr_latency.c
Normal file
@ -0,0 +1,73 @@
|
||||
#include <esp_types.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "freertos/semphr.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "freertos/xtensa_api.h"
|
||||
#include "esp_intr_alloc.h"
|
||||
#include "xtensa/hal.h"
|
||||
#include "unity.h"
|
||||
#include "soc/cpu.h"
|
||||
#include "test_utils.h"
|
||||
|
||||
#define SW_ISR_LEVEL_1 7
|
||||
|
||||
static SemaphoreHandle_t sync;
|
||||
static SemaphoreHandle_t end_sema;
|
||||
static uint32_t cycle_before_trigger;
|
||||
static uint32_t cycle_before_exit;
|
||||
static uint32_t delta_enter_cycles = 0;
|
||||
static uint32_t delta_exit_cycles = 0;
|
||||
|
||||
static void software_isr(void *arg) {
|
||||
(void)arg;
|
||||
BaseType_t yield;
|
||||
delta_enter_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_trigger;
|
||||
|
||||
xt_set_intclear(1 << SW_ISR_LEVEL_1);
|
||||
|
||||
xSemaphoreGiveFromISR(sync, &yield);
|
||||
if(yield) {
|
||||
portYIELD_FROM_ISR();
|
||||
}
|
||||
|
||||
cycle_before_exit = portGET_RUN_TIME_COUNTER_VALUE();
|
||||
}
|
||||
|
||||
static void test_task(void *arg) {
|
||||
(void)arg;
|
||||
|
||||
intr_handle_t handle;
|
||||
|
||||
esp_err_t err = esp_intr_alloc(ETS_INTERNAL_SW0_INTR_SOURCE, ESP_INTR_FLAG_LEVEL1, &software_isr, NULL, &handle);
|
||||
TEST_ASSERT_EQUAL_HEX32(ESP_OK, err);
|
||||
|
||||
for(int i = 0;i < 10000; i++) {
|
||||
cycle_before_trigger = portGET_RUN_TIME_COUNTER_VALUE();
|
||||
xt_set_intset(1 << SW_ISR_LEVEL_1);
|
||||
xSemaphoreTake(sync, portMAX_DELAY);
|
||||
delta_exit_cycles += portGET_RUN_TIME_COUNTER_VALUE() - cycle_before_exit;
|
||||
}
|
||||
|
||||
delta_enter_cycles /= 10000;
|
||||
delta_exit_cycles /= 10000;
|
||||
|
||||
esp_intr_free(handle);
|
||||
xSemaphoreGive(end_sema);
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
TEST_CASE("isr latency test", "[freertos] [ignore]")
|
||||
{
|
||||
sync = xSemaphoreCreateBinary();
|
||||
TEST_ASSERT(sync != NULL);
|
||||
end_sema = xSemaphoreCreateBinary();
|
||||
TEST_ASSERT(end_sema != NULL);
|
||||
xTaskCreatePinnedToCore(test_task, "tst" , 4096, NULL, configMAX_PRIORITIES - 1, NULL, 0);
|
||||
BaseType_t result = xSemaphoreTake(end_sema, portMAX_DELAY);
|
||||
TEST_ASSERT_EQUAL_HEX32(pdTRUE, result);
|
||||
TEST_PERFORMANCE_LESS_THAN(ISR_ENTER_CYCLES, "%d cycles" ,delta_enter_cycles);
|
||||
TEST_PERFORMANCE_LESS_THAN(ISR_EXIT_CYCLES, "%d cycles" ,delta_exit_cycles);
|
||||
}
|
88
components/freertos/xt_asm_utils.h
Normal file
88
components/freertos/xt_asm_utils.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/* Copyright 2015-2018 Espressif Systems (Shanghai) PTE LTD
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* File adapted to use on IDF FreeRTOS component, extracted
|
||||
* originally from zephyr RTOS code base:
|
||||
* https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
|
||||
*/
|
||||
|
||||
#ifndef __XT_ASM_UTILS_H
|
||||
#define __XT_ASM_UTILS_H
|
||||
|
||||
/*
|
||||
* SPILL_ALL_WINDOWS
|
||||
*
|
||||
* Spills all windowed registers (i.e. registers not visible as
|
||||
* A0-A15) to their ABI-defined spill regions on the stack.
|
||||
*
|
||||
* Unlike the Xtensa HAL implementation, this code requires that the
|
||||
* EXCM and WOE bit be enabled in PS, and relies on repeated hardware
|
||||
* exception handling to do the register spills. The trick is to do a
|
||||
* noop write to the high registers, which the hardware will trap
|
||||
* (into an overflow exception) in the case where those registers are
|
||||
* already used by an existing call frame. Then it rotates the window
|
||||
* and repeats until all but the A0-A3 registers of the original frame
|
||||
* are guaranteed to be spilled, eventually rotating back around into
|
||||
* the original frame. Advantages:
|
||||
*
|
||||
* - Vastly smaller code size
|
||||
*
|
||||
* - More easily maintained if changes are needed to window over/underflow
|
||||
* exception handling.
|
||||
*
|
||||
* - Requires no scratch registers to do its work, so can be used safely in any
|
||||
* context.
|
||||
*
|
||||
* - If the WOE bit is not enabled (for example, in code written for
|
||||
* the CALL0 ABI), this becomes a silent noop and operates compatbily.
|
||||
*
|
||||
* - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
|
||||
* just a little bit, it's MUCH faster. With a mostly full register
|
||||
* file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
|
||||
* registers with this vs. 279 (!) to do it with
|
||||
* xthal_spill_windows().
|
||||
*/
|
||||
|
||||
.macro SPILL_ALL_WINDOWS
|
||||
#if XCHAL_NUM_AREGS == 64
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 4
|
||||
#elif XCHAL_NUM_AREGS == 32
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a12, a12, a12
|
||||
rotw 3
|
||||
and a4, a4, a4
|
||||
rotw 2
|
||||
#else
|
||||
#error Unrecognized XCHAL_NUM_AREGS
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#endif
|
@ -51,6 +51,7 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\
|
||||
|
||||
#include "xtensa_rtos.h"
|
||||
#include "xtensa_context.h"
|
||||
#include "xt_asm_utils.h"
|
||||
|
||||
#ifdef XT_USE_OVLY
|
||||
#include <xtensa/overlay_os_asm.h>
|
||||
@ -58,8 +59,6 @@ NOERROR: .error "C preprocessor needed for this file: make sure its filename\
|
||||
|
||||
.text
|
||||
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
|
||||
_xt_context_save
|
||||
@ -97,8 +96,8 @@ Exit conditions:
|
||||
.align 4
|
||||
.literal_position
|
||||
.align 4
|
||||
_xt_context_save:
|
||||
|
||||
_xt_context_save:
|
||||
s32i a2, sp, XT_STK_A2
|
||||
s32i a3, sp, XT_STK_A3
|
||||
s32i a4, sp, XT_STK_A4
|
||||
@ -143,49 +142,15 @@ _xt_context_save:
|
||||
mov a9, a0 /* preserve ret addr */
|
||||
#endif
|
||||
|
||||
#ifndef __XTENSA_CALL0_ABI__
|
||||
/*
|
||||
To spill the reg windows, temp. need pre-interrupt stack ptr and a4-15.
|
||||
Need to save a9,12,13 temporarily (in frame temps) and recover originals.
|
||||
Interrupts need to be disabled below XCHAL_EXCM_LEVEL and window overflow
|
||||
and underflow exceptions disabled (assured by PS.EXCM == 1).
|
||||
*/
|
||||
s32i a12, sp, XT_STK_TMP0 /* temp. save stuff in stack frame */
|
||||
s32i a13, sp, XT_STK_TMP1
|
||||
s32i a9, sp, XT_STK_TMP2
|
||||
|
||||
/*
|
||||
Save the overlay state if we are supporting overlays. Since we just saved
|
||||
three registers, we can conveniently use them here. Note that as of now,
|
||||
overlays only work for windowed calling ABI.
|
||||
*/
|
||||
#ifdef XT_USE_OVLY
|
||||
l32i a9, sp, XT_STK_PC /* recover saved PC */
|
||||
_xt_overlay_get_state a9, a12, a13
|
||||
s32i a9, sp, XT_STK_OVLY /* save overlay state */
|
||||
#endif
|
||||
|
||||
l32i a12, sp, XT_STK_A12 /* recover original a9,12,13 */
|
||||
l32i a13, sp, XT_STK_A13
|
||||
l32i a9, sp, XT_STK_A9
|
||||
addi sp, sp, XT_STK_FRMSZ /* restore the interruptee's SP */
|
||||
call0 xthal_window_spill_nw /* preserves only a4,5,8,9,12,13 */
|
||||
addi sp, sp, -XT_STK_FRMSZ
|
||||
l32i a12, sp, XT_STK_TMP0 /* recover stuff from stack frame */
|
||||
l32i a13, sp, XT_STK_TMP1
|
||||
l32i a9, sp, XT_STK_TMP2
|
||||
#endif
|
||||
|
||||
#if XCHAL_EXTRA_SA_SIZE > 0
|
||||
/*
|
||||
NOTE: Normally the xthal_save_extra_nw macro only affects address
|
||||
registers a2-a5. It is theoretically possible for Xtensa processor
|
||||
designers to write TIE that causes more address registers to be
|
||||
affected, but it is generally unlikely. If that ever happens,
|
||||
more registers need to be saved/restored around this macro invocation.
|
||||
Here we assume a9,12,13 are preserved.
|
||||
Future Xtensa tools releases might limit the regs that can be affected.
|
||||
*/
|
||||
addi a2, sp, XT_STK_EXTRA /* where to save it */
|
||||
# if XCHAL_EXTRA_SA_ALIGN > 16
|
||||
movi a3, -XCHAL_EXTRA_SA_ALIGN
|
||||
@ -194,6 +159,38 @@ _xt_context_save:
|
||||
call0 xthal_save_extra_nw /* destroys a0,2,3,4,5 */
|
||||
#endif
|
||||
|
||||
#ifndef __XTENSA_CALL0_ABI__
|
||||
#ifdef XT_USE_OVLY
|
||||
l32i a9, sp, XT_STK_PC /* recover saved PC */
|
||||
_xt_overlay_get_state a9, a12, a13
|
||||
s32i a9, sp, XT_STK_OVLY /* save overlay state */
|
||||
#endif
|
||||
|
||||
rsr a2, PS /* We need to enable window exceptions to */
|
||||
movi a3, PS_INTLEVEL_MASK /* perform spill registers*/
|
||||
and a2, a2, a3
|
||||
bnez a2, _not_l1
|
||||
rsr a2, PS
|
||||
movi a3, PS_INTLEVEL(1) /* For some curious reason the level 1 interrupts */
|
||||
or a2, a2, a3 /* dont set the intlevel correctly on PS, we need to */
|
||||
wsr a2, PS /* do this manually */
|
||||
_not_l1:
|
||||
rsr a2, PS /* finally umask the window exceptions */
|
||||
movi a3, ~(PS_EXCM_MASK)
|
||||
and a2, a2, a3
|
||||
wsr a2, PS
|
||||
rsync
|
||||
|
||||
addi sp, sp, XT_STK_FRMSZ /* go back to spill register region */
|
||||
SPILL_ALL_WINDOWS /* place the live register windows there */
|
||||
addi sp, sp, -XT_STK_FRMSZ /* return the current stack pointer and proceed with context save*/
|
||||
|
||||
#endif
|
||||
|
||||
l32i a12, sp, XT_STK_TMP0 /* restore the temp saved registers */
|
||||
l32i a13, sp, XT_STK_TMP1 /* our return address is there */
|
||||
l32i a9, sp, XT_STK_TMP2
|
||||
|
||||
#if XCHAL_EXTRA_SA_SIZE > 0 || !defined(__XTENSA_CALL0_ABI__)
|
||||
mov a0, a9 /* retrieve ret addr */
|
||||
#endif
|
||||
|
@ -32,6 +32,10 @@
|
||||
#define IDF_PERFORMANCE_MAX_ESP32_CYCLES_PER_SQRT 140
|
||||
// SHA256 hardware throughput at 240MHz, threshold set lower than worst case
|
||||
#define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC 9.0
|
||||
#define IDF_PERFORMANCE_MAX_SPILL_REG_CYCLES 150
|
||||
#define IDF_PERFORMANCE_MAX_ISR_ENTER_CYCLES 290
|
||||
#define IDF_PERFORMANCE_MAX_ISR_EXIT_CYCLES 565
|
||||
|
||||
|
||||
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000
|
||||
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 180000
|
||||
|
@ -27,6 +27,14 @@
|
||||
|
||||
#define UNITY_EXCLUDE_TIME_H
|
||||
|
||||
/**
|
||||
* @note For some reason setjmp does not work with
|
||||
* unity, since it is only used on test entry and
|
||||
* exit it should not impact the rest of test
|
||||
* framework. So we disable it here.
|
||||
*/
|
||||
#define UNITY_EXCLUDE_SETJMP_H
|
||||
|
||||
void unity_flush(void);
|
||||
void unity_putc(int c);
|
||||
void unity_gets(char* dst, size_t len);
|
||||
|
@ -307,7 +307,7 @@ example_test_012:
|
||||
|
||||
UT_001:
|
||||
extends: .unit_test_template
|
||||
parallel: 28
|
||||
parallel: 30
|
||||
tags:
|
||||
- ESP32_IDF
|
||||
- UT_T1_1
|
||||
@ -316,7 +316,7 @@ UT_001:
|
||||
|
||||
UT_002:
|
||||
extends: .unit_test_template
|
||||
parallel: 9
|
||||
parallel: 11
|
||||
tags:
|
||||
- ESP32_IDF
|
||||
- UT_T1_1
|
||||
@ -466,7 +466,7 @@ UT_034:
|
||||
|
||||
UT_035:
|
||||
extends: .unit_test_template
|
||||
parallel: 17
|
||||
parallel: 20
|
||||
tags:
|
||||
- ESP32S2BETA_IDF
|
||||
- UT_T1_1
|
||||
|
Loading…
Reference in New Issue
Block a user