Merge branch 'feature/mpi_accel_s2' into 'master'

MPI/RSA accelerator bringup for S2 and bignum refactor

Closes IDF-803 and IDF-1174

See merge request espressif/esp-idf!7915
This commit is contained in:
Angus Gratton 2020-03-17 13:16:05 +08:00
commit 9c430a17aa
13 changed files with 828 additions and 1007 deletions

View File

@ -11,9 +11,9 @@
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 4500
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 180000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PUBLIC_OP 65000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PRIVATE_OP 850000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 190000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PUBLIC_OP 90000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PRIVATE_OP 870000
#define IDF_PERFORMANCE_MAX_SPI_PER_TRANS_NO_POLLING 30
#define IDF_PERFORMANCE_MAX_SPI_PER_TRANS_NO_POLLING_NO_DMA 27

View File

@ -9,10 +9,10 @@
#define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB 900
#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB 800
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 14000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 100000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PUBLIC_OP 60000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PRIVATE_OP 600000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP 19000
#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP 160000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PUBLIC_OP 90000
#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PRIVATE_OP 850000
#define IDF_PERFORMANCE_MAX_SPI_PER_TRANS_NO_POLLING 32
#define IDF_PERFORMANCE_MAX_SPI_PER_TRANS_NO_POLLING_NO_DMA 30

View File

@ -76,8 +76,9 @@ target_sources(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/esp_hardware.c"
"${COMPONENT_DIR}/port/esp_mem.c"
"${COMPONENT_DIR}/port/esp_timing.c"
"${COMPONENT_DIR}/port/esp_sha.c"
"${COMPONENT_DIR}/port/esp_bignum.c"
"${COMPONENT_DIR}/port/esp_aes_xts.c"
"${COMPONENT_DIR}/port/${idf_target}/esp_bignum.c"
"${COMPONENT_DIR}/port/${idf_target}/bignum.c"
"${COMPONENT_DIR}/port/${idf_target}/aes.c"
"${COMPONENT_DIR}/port/${idf_target}/sha.c"
"${COMPONENT_DIR}/port/${idf_target}/esp_sha1.c"

View File

@ -220,7 +220,6 @@ menu "mbedTLS"
config MBEDTLS_HARDWARE_MPI
bool "Enable hardware MPI (bignum) acceleration"
default y
depends on !IDF_TARGET_ESP32S2
help
Enable hardware accelerated multiple precision integer operations.

View File

@ -0,0 +1,270 @@
/**
* \brief Multi-precision integer library, ESP-IDF hardware accelerated parts
*
* based on mbedTLS implementation
*
* Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
* Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE Ltd
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "soc/hwcrypto_periph.h"
#include "driver/periph_ctrl.h"
#include <mbedtls/bignum.h>
#include "bignum_impl.h"
#include <sys/param.h>
/* Round up number of words to nearest
512 bit (16 word) block count.
*/
size_t esp_mpi_hardware_words(size_t words)
{
return (words + 0xF) & ~0xF;
}
void esp_mpi_enable_hardware_hw_op( void )
{
/* Enable RSA hardware */
periph_module_enable(PERIPH_RSA_MODULE);
DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
while (DPORT_REG_READ(RSA_CLEAN_REG) != 1)
{ }
// Note: from enabling RSA clock to here takes about 1.3us
}
void esp_mpi_disable_hardware_hw_op( void )
{
DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
/* Disable RSA hardware */
periph_module_disable(PERIPH_RSA_MODULE);
}
/* Copy mbedTLS MPI bignum 'mpi' to hardware memory block at 'mem_base'.
If hw_words is higher than the number of words in the bignum then
these additional words will be zeroed in the memory buffer.
*/
static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t hw_words)
{
uint32_t *pbase = (uint32_t *)mem_base;
uint32_t copy_words = MIN(hw_words, mpi->n);
/* Copy MPI data to memory block registers */
for (int i = 0; i < copy_words; i++) {
pbase[i] = mpi->p[i];
}
/* Zero any remaining memory block data */
for (int i = copy_words; i < hw_words; i++) {
pbase[i] = 0;
}
}
/* Read mbedTLS MPI bignum back from hardware memory block.
Reads num_words words from block.
Bignum 'x' should already be grown to at least num_words by caller (can be done while
calculation is in progress, to save some cycles)
*/
static inline void mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
{
assert(x->n >= num_words);
/* Copy data from memory block registers */
esp_dport_access_read_buffer(x->p, mem_base, num_words);
/* Zero any remaining limbs in the bignum, if the buffer is bigger
than num_words */
for (size_t i = num_words; i < x->n; i++) {
x->p[i] = 0;
}
}
/* Begin an RSA operation. op_reg specifies which 'START' register
to write to.
*/
static inline void start_op(uint32_t op_reg)
{
/* Clear interrupt status */
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
/* Note: above REG_WRITE includes a memw, so we know any writes
to the memory blocks are also complete. */
DPORT_REG_WRITE(op_reg, 1);
}
/* Wait for an RSA operation to complete.
*/
static inline void wait_op_complete(void)
{
while (DPORT_REG_READ(RSA_INTERRUPT_REG) != 1)
{ }
/* clear the interrupt */
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
}
/* Read result from last MPI operation */
void esp_mpi_read_result_hw_op(mbedtls_mpi *Z, size_t z_words)
{
wait_op_complete();
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
}
/* Z = (X * Y) mod M */
void esp_mpi_mul_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t hw_words)
{
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, hw_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, (uint32_t)Mprime);
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hw_words / 16) - 1);
/* Execute first stage montgomery multiplication */
start_op(RSA_MULT_START_REG);
wait_op_complete();
/* execute second stage */
/* Load Y to X input memory block, rerun */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG);
}
/* Z = X * Y */
void esp_mpi_mul_mpi_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t hw_words)
{
/* Copy X (right-extended) & Y (left-extended) to memory block */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_Z_BLOCK_BASE + hw_words * 4, Y, hw_words);
/* NB: as Y is left-extended, we don't zero the bottom words_mult words of Y block.
This is OK for now because zeroing is done by hardware when we do esp_mpi_acquire_hardware().
*/
DPORT_REG_WRITE(RSA_M_DASH_REG, 0);
/* "mode" register loaded with number of 512-bit blocks in result,
plus 7 (for range 9-12). (this is ((N~ / 32) - 1) + 8))
*/
DPORT_REG_WRITE(RSA_MULT_MODE_REG, ((hw_words * 2) / 16) + 7);
start_op(RSA_MULT_START_REG);
}
int esp_mont_hw_op(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M,
mbedtls_mpi_uint Mprime,
size_t hw_words,
bool again)
{
// Note Z may be the same pointer as X or Y
int ret = 0;
// montgomery mult prepare
if (again == false) {
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
DPORT_REG_WRITE(RSA_MULT_MODE_REG, hw_words / 16 - 1);
}
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG);
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, hw_words) );
wait_op_complete();
/* Read back the result */
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, hw_words);
/* from HAC 14.36 - 3. If Z >= M then Z = Z - M */
if (mbedtls_mpi_cmp_mpi(Z, M) >= 0) {
MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(Z, Z, M));
}
cleanup:
return ret;
}
/* Special-case of mbedtls_mpi_mult_mpi(), where we use hardware montgomery mod
multiplication to calculate an mbedtls_mpi_mult_mpi result where either
A or B are >2048 bits so can't use the standard multiplication method.
Result (z_words, based on A bits + B bits) must still be less than 4096 bits.
This case is simpler than the general case modulo multiply of
esp_mpi_mul_mpi_mod() because we can control the other arguments:
* Modulus is chosen with M=(2^num_bits - 1) (ie M=R-1), so output
isn't actually modulo anything.
* Mprime and Rinv are therefore predictable as follows:
Mprime = 1
Rinv = 1
(See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
*/
void esp_mpi_mult_mpi_failover_mod_mult_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
{
size_t hw_words = num_words;
/* M = 2^num_words - 1, so block is entirely FF */
for (int i = 0; i < hw_words; i++) {
DPORT_REG_WRITE(RSA_MEM_M_BLOCK_BASE + i * 4, UINT32_MAX);
}
/* Mprime = 1 */
DPORT_REG_WRITE(RSA_M_DASH_REG, 1);
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hw_words / 16) - 1);
/* Load X */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
/* Rinv = 1, write first word */
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE, 1);
/* Zero out rest of the Rinv words */
for (int i = 1; i < hw_words; i++) {
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
}
start_op(RSA_MULT_START_REG);
wait_op_complete();
/* finish the modular multiplication */
/* Load Y to X input memory block, rerun */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG);
}

View File

@ -1,739 +0,0 @@
/**
* \brief Multi-precision integer library, ESP32 hardware accelerated parts
*
* based on mbedTLS implementation
*
* Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
* Additions Copyright (C) 2016, Espressif Systems (Shanghai) PTE Ltd
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/param.h>
#include "esp32/rom/bigint.h"
#include "soc/hwcrypto_periph.h"
#include "esp_system.h"
#include "esp_log.h"
#include "esp_intr_alloc.h"
#include "esp_attr.h"
#include <mbedtls/bignum.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
#include "driver/periph_ctrl.h"
/* Some implementation notes:
*
* - Naming convention x_words, y_words, z_words for number of words (limbs) used in a particular
* bignum. This number may be less than the size of the bignum
*
* - Naming convention hw_words for the hardware length of the operation. This number is always
* rounded up to a 512 bit multiple, and may be larger than any of the numbers involved in the
* calculation.
*
* - Timing behaviour of these functions will depend on the length of the inputs. This is fundamentally
* the same constraint as the software mbedTLS implementations, and relies on the same
* countermeasures (exponent blinding, etc) which are used in mbedTLS.
*/
static const __attribute__((unused)) char *TAG = "bignum";
#define ciL (sizeof(mbedtls_mpi_uint)) /* chars in limb */
#define biL (ciL << 3) /* bits in limb */
static _lock_t mpi_lock;
void esp_mpi_acquire_hardware( void )
{
/* newlib locks lazy initialize on ESP-IDF */
_lock_acquire(&mpi_lock);
/* Enable RSA hardware */
periph_module_enable(PERIPH_RSA_MODULE);
DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
while(DPORT_REG_READ(RSA_CLEAN_REG) != 1);
// Note: from enabling RSA clock to here takes about 1.3us
}
void esp_mpi_release_hardware( void )
{
DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
/* Disable RSA hardware */
periph_module_disable(PERIPH_RSA_MODULE);
_lock_release(&mpi_lock);
}
/* Convert bit count to word count
*/
static inline size_t bits_to_words(size_t bits)
{
return (bits + 31) / 32;
}
/* Round up number of words to nearest
512 bit (16 word) block count.
*/
static inline size_t hardware_words(size_t words)
{
return (words + 0xF) & ~0xF;
}
/* Number of words used to hold 'mpi'.
Equivalent of bits_to_words(mbedtls_mpi_bitlen(mpi)), but uses less cycles if the
exact bit count is not needed.
Note that mpi->n (size of memory buffer) may be higher than this
number, if the high bits are mostly zeroes.
*/
static inline size_t word_length(const mbedtls_mpi *mpi)
{
for(size_t i = mpi->n; i > 0; i--) {
if( mpi->p[i - 1] != 0 ) {
return i;
}
}
return 0;
}
/* Copy mbedTLS MPI bignum 'mpi' to hardware memory block at 'mem_base'.
If hw_words is higher than the number of words in the bignum then
these additional words will be zeroed in the memory buffer.
*/
static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t hw_words)
{
uint32_t *pbase = (uint32_t *)mem_base;
uint32_t copy_words = hw_words < mpi->n ? hw_words : mpi->n;
/* Copy MPI data to memory block registers */
for (int i = 0; i < copy_words; i++) {
pbase[i] = mpi->p[i];
}
/* Zero any remaining memory block data */
for (int i = copy_words; i < hw_words; i++) {
pbase[i] = 0;
}
/* Note: not executing memw here, can do it before we start a bignum operation */
}
/* Read mbedTLS MPI bignum back from hardware memory block.
Reads num_words words from block.
Bignum 'x' should already be grown to at least num_words by caller (can be done while
calculation is in progress, to save some cycles)
*/
static inline void mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
{
assert(x->n >= num_words);
/* Copy data from memory block registers */
esp_dport_access_read_buffer(x->p, mem_base, num_words);
/* Zero any remaining limbs in the bignum, if the buffer is bigger
than num_words */
for(size_t i = num_words; i < x->n; i++) {
x->p[i] = 0;
}
}
/**
*
* There is a need for the value of integer N' such that B^-1(B-1)-N^-1N'=1,
* where B^-1(B-1) mod N=1. Actually, only the least significant part of
* N' is needed, hence the definition N0'=N' mod b. We reproduce below the
* simple algorithm from an article by Dusse and Kaliski to efficiently
* find N0' from N0 and b
*/
static mbedtls_mpi_uint modular_inverse(const mbedtls_mpi *M)
{
int i;
uint64_t t = 1;
uint64_t two_2_i_minus_1 = 2; /* 2^(i-1) */
uint64_t two_2_i = 4; /* 2^i */
uint64_t N = M->p[0];
for (i = 2; i <= 32; i++) {
if ((mbedtls_mpi_uint) N * t % two_2_i >= two_2_i_minus_1) {
t += two_2_i_minus_1;
}
two_2_i_minus_1 <<= 1;
two_2_i <<= 1;
}
return (mbedtls_mpi_uint)(UINT32_MAX - t + 1);
}
/* Calculate Rinv = RR^2 mod M, where:
*
* R = b^n where b = 2^32, n=num_words,
* R = 2^N (where N=num_bits)
* RR = R^2 = 2^(2*N) (where N=num_bits=num_words*32)
*
* This calculation is computationally expensive (mbedtls_mpi_mod_mpi)
* so caller should cache the result where possible.
*
* DO NOT call this function while holding esp_mpi_acquire_hardware().
*
*/
static int calculate_rinv(mbedtls_mpi *Rinv, const mbedtls_mpi *M, int num_words)
{
int ret;
size_t num_bits = num_words * 32;
mbedtls_mpi RR;
mbedtls_mpi_init(&RR);
MBEDTLS_MPI_CHK(mbedtls_mpi_set_bit(&RR, num_bits * 2, 1));
MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(Rinv, &RR, M));
cleanup:
mbedtls_mpi_free(&RR);
return ret;
}
/* Begin an RSA operation. op_reg specifies which 'START' register
to write to.
*/
static inline void start_op(uint32_t op_reg)
{
/* Clear interrupt status */
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
/* Note: above REG_WRITE includes a memw, so we know any writes
to the memory blocks are also complete. */
DPORT_REG_WRITE(op_reg, 1);
}
/* Wait for an RSA operation to complete.
*/
static inline void wait_op_complete(uint32_t op_reg)
{
while(DPORT_REG_READ(RSA_INTERRUPT_REG) != 1)
{ }
/* clear the interrupt */
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
}
/* Sub-stages of modulo multiplication/exponentiation operations */
inline static int modular_multiply_finish(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t hw_words, size_t z_words);
/* Z = (X * Y) mod M
Not an mbedTLS function
*/
int esp_mpi_mul_mpi_mod(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M)
{
int ret;
size_t x_bits = mbedtls_mpi_bitlen(X);
size_t y_bits = mbedtls_mpi_bitlen(Y);
size_t m_bits = mbedtls_mpi_bitlen(M);
size_t z_bits = MIN(m_bits, x_bits + y_bits);
size_t x_words = bits_to_words(x_bits);
size_t y_words = bits_to_words(y_bits);
size_t m_words = bits_to_words(m_bits);
size_t z_words = bits_to_words(z_bits);
size_t hw_words = hardware_words(MAX(x_words, MAX(y_words, m_words))); /* longest operand */
mbedtls_mpi Rinv;
mbedtls_mpi_uint Mprime;
/* Calculate and load the first stage montgomery multiplication */
mbedtls_mpi_init(&Rinv);
MBEDTLS_MPI_CHK(calculate_rinv(&Rinv, M, hw_words));
Mprime = modular_inverse(M);
esp_mpi_acquire_hardware();
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, &Rinv, hw_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, (uint32_t)Mprime);
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hw_words / 16) - 1);
/* Execute first stage montgomery multiplication */
start_op(RSA_MULT_START_REG);
wait_op_complete(RSA_MULT_START_REG);
/* execute second stage */
ret = modular_multiply_finish(Z, X, Y, hw_words, z_words);
esp_mpi_release_hardware();
cleanup:
mbedtls_mpi_free(&Rinv);
return ret;
}
#if defined(MBEDTLS_MPI_EXP_MOD_ALT)
static int mont(mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M,
mbedtls_mpi_uint Mprime,
size_t hw_words,
bool again)
{
// Note Z may be the same pointer as X or Y
int ret = 0;
// montgomery mult prepare
if (again == false) {
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
DPORT_REG_WRITE(RSA_MULT_MODE_REG, hw_words / 16 - 1);
}
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG);
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, hw_words) );
wait_op_complete(RSA_MULT_START_REG);
/* Read back the result */
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, hw_words);
/* from HAC 14.36 - 3. If Z >= M then Z = Z - M */
if (mbedtls_mpi_cmp_mpi(Z, M) >= 0) {
MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(Z, Z, M));
}
cleanup:
return ret;
}
/*
* Return the most significant one-bit.
*/
static size_t mbedtls_mpi_msb( const mbedtls_mpi* X )
{
int i, j;
if (X != NULL && X->n != 0) {
for (i = X->n - 1; i >= 0; i--) {
if (X->p[i] != 0) {
for (j = biL - 1; j >= 0; j--) {
if ((X->p[i] & (1 << j)) != 0) {
return (i * biL) + j;
}
}
}
}
}
return 0;
}
/*
* Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94)
*/
static int mpi_montgomery_exp_calc( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M,
mbedtls_mpi* Rinv,
size_t hw_words,
mbedtls_mpi_uint Mprime )
{
int ret = 0;
mbedtls_mpi X_, one;
mbedtls_mpi_init(&X_);
mbedtls_mpi_init(&one);
if( ( ( ret = mbedtls_mpi_grow(&one, hw_words) ) != 0 ) ||
( ( ret = mbedtls_mpi_set_bit(&one, 0, 1) ) != 0 ) ) {
goto cleanup2;
}
// Algorithm from HAC 14.94
{
// 0 determine t (highest bit set in y)
int t = mbedtls_mpi_msb(Y);
esp_mpi_acquire_hardware();
// 1.1 x_ = mont(x, R^2 mod m)
// = mont(x, rb)
MBEDTLS_MPI_CHK( mont(&X_, X, Rinv, M, Mprime, hw_words, false) );
// 1.2 z = R mod m
// now z = R mod m = Mont (R^2 mod m, 1) mod M (as Mont(x) = X&R^-1 mod M)
MBEDTLS_MPI_CHK( mont(Z, Rinv, &one, M, Mprime, hw_words, true) );
// 2 for i from t down to 0
for (int i = t; i >= 0; i--) {
// 2.1 z = mont(z,z)
if (i != t) { // skip on the first iteration as is still unity
MBEDTLS_MPI_CHK( mont(Z, Z, Z, M, Mprime, hw_words, true) );
}
// 2.2 if y[i] = 1 then z = mont(A, x_)
if (mbedtls_mpi_get_bit(Y, i)) {
MBEDTLS_MPI_CHK( mont(Z, Z, &X_, M, Mprime, hw_words, true) );
}
}
// 3 z = Mont(z, 1)
MBEDTLS_MPI_CHK( mont(Z, Z, &one, M, Mprime, hw_words, true) );
}
cleanup:
mbedtls_mpi_free(&X_);
mbedtls_mpi_free(&one);
esp_mpi_release_hardware();
return ret;
cleanup2:
mbedtls_mpi_free(&one);
return ret;
}
/*
* Z = X ^ Y mod M
*
* _Rinv is optional pre-calculated version of Rinv (via calculate_rinv()).
*
* (See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
*
*/
int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M, mbedtls_mpi* _Rinv )
{
int ret = 0;
size_t x_words = word_length(X);
size_t y_words = word_length(Y);
size_t m_words = word_length(M);
/* "all numbers must be the same length", so choose longest number
as cardinal length of operation...
*/
size_t hw_words = hardware_words(MAX(m_words, MAX(x_words, y_words)));
mbedtls_mpi Rinv_new; /* used if _Rinv == NULL */
mbedtls_mpi *Rinv; /* points to _Rinv (if not NULL) othwerwise &RR_new */
mbedtls_mpi_uint Mprime;
if (mbedtls_mpi_cmp_int(M, 0) <= 0 || (M->p[0] & 1) == 0) {
return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
}
if (mbedtls_mpi_cmp_int(Y, 0) < 0) {
return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
}
if (mbedtls_mpi_cmp_int(Y, 0) == 0) {
return mbedtls_mpi_lset(Z, 1);
}
if (hw_words * 32 > 4096) {
return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
}
/* Determine RR pointer, either _RR for cached value
or local RR_new */
if (_Rinv == NULL) {
mbedtls_mpi_init(&Rinv_new);
Rinv = &Rinv_new;
} else {
Rinv = _Rinv;
}
if (Rinv->p == NULL) {
MBEDTLS_MPI_CHK(calculate_rinv(Rinv, M, hw_words));
}
Mprime = modular_inverse(M);
// Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94)
MBEDTLS_MPI_CHK( mpi_montgomery_exp_calc(Z, X, Y, M, Rinv, hw_words, Mprime) );
// Compensate for negative X
if (X->s == -1 && (Y->p[0] & 1) != 0) {
Z->s = -1;
MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(Z, M, Z));
} else {
Z->s = 1;
}
cleanup:
if (_Rinv == NULL) {
mbedtls_mpi_free(&Rinv_new);
}
return ret;
}
#endif /* MBEDTLS_MPI_EXP_MOD_ALT */
/* Second & final step of a modular multiply - load second multiplication
* factor Y, run the operation (modular inverse), read back the result
* into Z.
*
* Called from both mbedtls_mpi_exp_mod and mbedtls_mpi_mod_mpi.
*
* @param Z result value
* @param X first multiplication factor (used to set sign of result).
* @param Y second multiplication factor.
* @param hw_words Size of the hardware operation, in words
* @param z_words Size of the expected result, in words (may be less than hw_words).
* Z will be grown to at least this length.
*
* Caller must have already called esp_mpi_acquire_hardware().
*/
static int modular_multiply_finish(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t hw_words, size_t z_words)
{
int ret = 0;
/* Load Y to X input memory block, rerun */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG);
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) );
wait_op_complete(RSA_MULT_START_REG);
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
Z->s = X->s * Y->s;
cleanup:
return ret;
}
#if defined(MBEDTLS_MPI_MUL_MPI_ALT) /* MBEDTLS_MPI_MUL_MPI_ALT */
static int mpi_mult_mpi_failover_mod_mult(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words);
static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t Y_bits, size_t z_words);
/* Z = X * Y */
int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y )
{
int ret = 0;
size_t x_bits = mbedtls_mpi_bitlen(X);
size_t y_bits = mbedtls_mpi_bitlen(Y);
size_t x_words = bits_to_words(x_bits);
size_t y_words = bits_to_words(y_bits);
size_t z_words = bits_to_words(x_bits + y_bits);
size_t hw_words = hardware_words(MAX(x_words, y_words)); // length of one operand in hardware
/* Short-circuit eval if either argument is 0 or 1.
This is needed as the mpi modular division
argument will sometimes call in here when one
argument is too large for the hardware unit, but the other
argument is zero or one.
*/
if (x_bits == 0 || y_bits == 0) {
mbedtls_mpi_lset(Z, 0);
return 0;
}
if (x_bits == 1) {
ret = mbedtls_mpi_copy(Z, Y);
Z->s *= X->s;
return ret;
}
if (y_bits == 1) {
ret = mbedtls_mpi_copy(Z, X);
Z->s *= Y->s;
return ret;
}
/* Grow Z to result size early, avoid interim allocations */
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) );
/* If either factor is over 2048 bits, we can't use the standard hardware multiplier
(it assumes result is double longest factor, and result is max 4096 bits.)
However, we can fail over to mod_mult for up to 4096 bits of result (modulo
multiplication doesn't have the same restriction, so result is simply the
number of bits in X plus number of bits in in Y.)
*/
if (hw_words * 32 > 2048) {
if (z_words * 32 <= 4096) {
/* Note: it's possible to use mpi_mult_mpi_overlong
for this case as well, but it's very slightly
slower and requires a memory allocation.
*/
return mpi_mult_mpi_failover_mod_mult(Z, X, Y, z_words);
} else {
/* Still too long for the hardware unit... */
if(y_words > x_words) {
return mpi_mult_mpi_overlong(Z, X, Y, y_words, z_words);
} else {
return mpi_mult_mpi_overlong(Z, Y, X, x_words, z_words);
}
}
}
/* Otherwise, we can use the (faster) multiply hardware unit */
esp_mpi_acquire_hardware();
/* Copy X (right-extended) & Y (left-extended) to memory block */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_Z_BLOCK_BASE + hw_words * 4, Y, hw_words);
/* NB: as Y is left-extended, we don't zero the bottom words_mult words of Y block.
This is OK for now because zeroing is done by hardware when we do esp_mpi_acquire_hardware().
*/
DPORT_REG_WRITE(RSA_M_DASH_REG, 0);
/* "mode" register loaded with number of 512-bit blocks in result,
plus 7 (for range 9-12). (this is ((N~ / 32) - 1) + 8))
*/
DPORT_REG_WRITE(RSA_MULT_MODE_REG, ((hw_words * 2) / 16) + 7);
start_op(RSA_MULT_START_REG);
wait_op_complete(RSA_MULT_START_REG);
/* Read back the result */
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
Z->s = X->s * Y->s;
cleanup:
esp_mpi_release_hardware();
return ret;
}
/* Special-case of mbedtls_mpi_mult_mpi(), where we use hardware montgomery mod
multiplication to calculate an mbedtls_mpi_mult_mpi result where either
A or B are >2048 bits so can't use the standard multiplication method.
Result (z_words, based on A bits + B bits) must still be less than 4096 bits.
This case is simpler than the general case modulo multiply of
esp_mpi_mul_mpi_mod() because we can control the other arguments:
* Modulus is chosen with M=(2^num_bits - 1) (ie M=R-1), so output
isn't actually modulo anything.
* Mprime and Rinv are therefore predictable as follows:
Mprime = 1
Rinv = 1
(See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
*/
static int mpi_mult_mpi_failover_mod_mult(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words)
{
int ret = 0;
size_t hw_words = hardware_words(z_words);
/* Load coefficients to hardware */
esp_mpi_acquire_hardware();
/* M = 2^num_words - 1, so block is entirely FF */
for(int i = 0; i < hw_words; i++) {
DPORT_REG_WRITE(RSA_MEM_M_BLOCK_BASE + i * 4, UINT32_MAX);
}
/* Mprime = 1 */
DPORT_REG_WRITE(RSA_M_DASH_REG, 1);
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hw_words / 16) - 1);
/* Load X */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
/* Rinv = 1 */
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE, 1);
for(int i = 1; i < hw_words; i++) {
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
}
start_op(RSA_MULT_START_REG);
wait_op_complete(RSA_MULT_START_REG);
/* finish the modular multiplication */
ret = modular_multiply_finish(Z, X, Y, hw_words, z_words);
esp_mpi_release_hardware();
return ret;
}
/* Deal with the case when X & Y are too long for the hardware unit, by splitting one operand
into two halves.
Y must be the longer operand
Slice Y into Yp, Ypp such that:
Yp = lower 'b' bits of Y
Ypp = upper 'b' bits of Y (right shifted)
Such that
Z = X * Y
Z = X * (Yp + Ypp<<b)
Z = (X * Yp) + (X * Ypp<<b)
Note that this function may recurse multiple times, if both X & Y
are too long for the hardware multiplication unit.
*/
static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t y_words, size_t z_words)
{
int ret = 0;
mbedtls_mpi Ztemp;
/* Rather than slicing in two on bits we slice on limbs (32 bit words) */
const size_t words_slice = y_words / 2;
/* Yp holds lower bits of Y (declared to reuse Y's array contents to save on copying) */
const mbedtls_mpi Yp = {
.p = Y->p,
.n = words_slice,
.s = Y->s
};
/* Ypp holds upper bits of Y, right shifted (also reuses Y's array contents) */
const mbedtls_mpi Ypp = {
.p = Y->p + words_slice,
.n = y_words - words_slice,
.s = Y->s
};
mbedtls_mpi_init(&Ztemp);
/* Get result Ztemp = Yp * X (need temporary variable Ztemp) */
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi(&Ztemp, X, &Yp) );
/* Z = Ypp * Y */
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi(Z, X, &Ypp) );
/* Z = Z << b */
MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l(Z, words_slice * 32) );
/* Z += Ztemp */
MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi(Z, Z, &Ztemp) );
cleanup:
mbedtls_mpi_free(&Ztemp);
return ret;
}
#endif /* MBEDTLS_MPI_MUL_MPI_ALT */

View File

@ -0,0 +1,220 @@
/**
* \brief Multi-precision integer library, ESP32 S2 hardware accelerated parts
*
* based on mbedTLS implementation
*
* Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
* Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE Ltd
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "soc/hwcrypto_periph.h"
#include "driver/periph_ctrl.h"
#include <mbedtls/bignum.h>
#include "bignum_impl.h"
#include "soc/dport_reg.h"
#include "soc/periph_defs.h"
#include <sys/param.h>
size_t esp_mpi_hardware_words(size_t words)
{
return words;
}
void esp_mpi_enable_hardware_hw_op( void )
{
/* Enable RSA hardware */
periph_module_enable(PERIPH_RSA_MODULE);
DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_MEM_PD);
while (DPORT_REG_READ(RSA_QUERY_CLEAN_REG) != 1) {
}
// Note: from enabling RSA clock to here takes about 1.3us
}
void esp_mpi_disable_hardware_hw_op( void )
{
DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
/* Disable RSA hardware */
periph_module_disable(PERIPH_RSA_MODULE);
}
/* Copy mbedTLS MPI bignum 'mpi' to hardware memory block at 'mem_base'.
If num_words is higher than the number of words in the bignum then
these additional words will be zeroed in the memory buffer.
*/
static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t num_words)
{
uint32_t *pbase = (uint32_t *)mem_base;
uint32_t copy_words = MIN(num_words, mpi->n);
/* Copy MPI data to memory block registers */
for (int i = 0; i < copy_words; i++) {
pbase[i] = mpi->p[i];
}
/* Zero any remaining memory block data */
for (int i = copy_words; i < num_words; i++) {
pbase[i] = 0;
}
}
/* Read mbedTLS MPI bignum back from hardware memory block.
Reads num_words words from block.
*/
static inline void mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
{
/* Copy data from memory block registers */
esp_dport_access_read_buffer(x->p, mem_base, num_words);
/* Zero any remaining limbs in the bignum, if the buffer is bigger
than num_words */
for (size_t i = num_words; i < x->n; i++) {
x->p[i] = 0;
}
}
/* Begin an RSA operation. op_reg specifies which 'START' register
to write to.
*/
static inline void start_op(uint32_t op_reg)
{
/* Clear interrupt status */
DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
/* Note: above REG_WRITE includes a memw, so we know any writes
to the memory blocks are also complete. */
DPORT_REG_WRITE(op_reg, 1);
}
/* Wait for an RSA operation to complete.
*/
static inline void wait_op_complete(void)
{
while (DPORT_REG_READ(RSA_QUERY_INTERRUPT_REG) != 1)
{ }
/* clear the interrupt */
DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
}
/* Read result from last MPI operation */
void esp_mpi_read_result_hw_op(mbedtls_mpi *Z, size_t z_words)
{
wait_op_complete();
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
}
/* Z = (X * Y) mod M
Not an mbedTLS function
*/
void esp_mpi_mul_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t num_words)
{
DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words - 1));
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
start_op(RSA_MOD_MULT_START_REG);
}
/* Z = (X ^ Y) mod M
*/
void esp_mpi_exp_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t num_words)
{
size_t y_bits = mbedtls_mpi_bitlen(Y);
DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words - 1));
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
/* Enable acceleration options */
DPORT_REG_WRITE(RSA_CONSTANT_TIME_REG, 0);
DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 1);
DPORT_REG_WRITE(RSA_SEARCH_POS_REG, y_bits - 1);
/* Execute first stage montgomery multiplication */
start_op(RSA_MODEXP_START_REG);
DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 0);
}
/* Z = X * Y */
void esp_mpi_mul_mpi_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
{
/* Copy X (right-extended) & Y (left-extended) to memory block */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Z_BLOCK_BASE + num_words * 4, Y, num_words);
/* NB: as Y is left-extended, we don't zero the bottom words_mult words of Y block.
This is OK for now because zeroing is done by hardware when we do esp_mpi_acquire_hardware().
*/
DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words * 2 - 1));
start_op(RSA_MULT_START_REG);
}
/**
* @brief Special-case of (X * Y), where we use hardware montgomery mod
multiplication to calculate result where either A or B are >2048 bits so
can't use the standard multiplication method.
*
*/
void esp_mpi_mult_mpi_failover_mod_mult_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
{
/* M = 2^num_words - 1, so block is entirely FF */
for (int i = 0; i < num_words; i++) {
DPORT_REG_WRITE(RSA_MEM_M_BLOCK_BASE + i * 4, UINT32_MAX);
}
/* Mprime = 1 */
DPORT_REG_WRITE(RSA_M_DASH_REG, 1);
DPORT_REG_WRITE(RSA_LENGTH_REG, num_words - 1);
/* Load X & Y */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
/* Rinv = 1, write first word */
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE, 1);
/* Zero out rest of the Rinv words */
for (int i = 1; i < num_words; i++) {
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
}
start_op(RSA_MOD_MULT_START_REG);
}

View File

@ -1,5 +1,5 @@
/**
* \brief Multi-precision integer library, ESP32C hardware accelerated parts
* \brief Multi-precision integer library, ESP32 hardware accelerated parts
*
* based on mbedTLS implementation
*
@ -27,20 +27,28 @@
#include <assert.h>
#include <stdlib.h>
#include <sys/param.h>
#include "mbedtls/bignum.h"
#include "esp32s2/rom/bigint.h"
#include "soc/hwcrypto_reg.h"
#include "soc/hwcrypto_periph.h"
#include "esp_system.h"
#include "esp_log.h"
#include "esp_intr_alloc.h"
#include "esp_attr.h"
#include "bignum_impl.h"
#include "soc/dport_reg.h"
#include "soc/periph_defs.h"
#include <mbedtls/bignum.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
/* Some implementation notes:
*
* - Naming convention x_words, y_words, z_words for number of words (limbs) used in a particular
* bignum. This number may be less than the size of the bignum
*
* - Naming convention hw_words for the hardware length of the operation. This number maybe be rounded up
* for targets that requres this (e.g. ESP32), and may be larger than any of the numbers
* involved in the calculation.
*
* - Timing behaviour of these functions will depend on the length of the inputs. This is fundamentally
* the same constraint as the software mbedTLS implementations, and relies on the same
* countermeasures (exponent blinding, etc) which are used in mbedTLS.
*/
static const __attribute__((unused)) char *TAG = "bignum";
@ -50,34 +58,6 @@ static const __attribute__((unused)) char *TAG = "bignum";
static _lock_t mpi_lock;
void esp_mpi_acquire_hardware( void )
{
/* newlib locks lazy initialize on ESP-IDF */
_lock_acquire(&mpi_lock);
DPORT_REG_SET_BIT(DPORT_PERIP_CLK_EN1_REG, DPORT_CRYPTO_RSA_CLK_EN);
/* also clear reset on digital signature, otherwise RSA is held in reset */
DPORT_REG_CLR_BIT(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_RSA_RST
| DPORT_CRYPTO_DS_RST);
DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_MEM_PD);
while (DPORT_REG_READ(RSA_QUERY_CLEAN_REG) != 1) {
}
// Note: from enabling RSA clock to here takes about 1.3us
}
void esp_mpi_release_hardware( void )
{
DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
/* don't reset digital signature unit, as this resets AES also */
DPORT_REG_SET_BIT(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_RSA_RST);
DPORT_REG_CLR_BIT(DPORT_PERIP_CLK_EN1_REG, DPORT_CRYPTO_RSA_CLK_EN);
_lock_release(&mpi_lock);
}
/* Convert bit count to word count
*/
static inline size_t bits_to_words(size_t bits)
@ -85,10 +65,10 @@ static inline size_t bits_to_words(size_t bits)
return (bits + 31) / 32;
}
/* Return the number of words actually used to represent an mpi
number.
*/
#if defined(MBEDTLS_MPI_EXP_MOD_ALT)
static size_t mpi_words(const mbedtls_mpi *mpi)
{
for (size_t i = mpi->n; i > 0; i--) {
@ -99,56 +79,27 @@ static size_t mpi_words(const mbedtls_mpi *mpi)
return 0;
}
/* Copy mbedTLS MPI bignum 'mpi' to hardware memory block at 'mem_base'.
#endif //MBEDTLS_MPI_EXP_MOD_ALT
If num_words is higher than the number of words in the bignum then
these additional words will be zeroed in the memory buffer.
*/
static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t num_words)
void esp_mpi_acquire_hardware( void )
{
uint32_t *pbase = (uint32_t *)mem_base;
uint32_t copy_words = num_words < mpi->n ? num_words : mpi->n;
/* newlib locks lazy initialize on ESP-IDF */
_lock_acquire(&mpi_lock);
/* Copy MPI data to memory block registers */
for (int i = 0; i < copy_words; i++) {
pbase[i] = mpi->p[i];
}
/* Zero any remaining memory block data */
for (int i = copy_words; i < num_words; i++) {
pbase[i] = 0;
}
/* Note: not executing memw here, can do it before we start a bignum operation */
/* Enable RSA hardware */
esp_mpi_enable_hardware_hw_op();
}
/* Read mbedTLS MPI bignum back from hardware memory block.
Reads num_words words from block.
Can return a failure result if fails to grow the MPI result.
*/
static inline int mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
void esp_mpi_release_hardware( void )
{
int ret = 0;
esp_mpi_disable_hardware_hw_op();
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(x, num_words) );
/* Copy data from memory block registers */
esp_dport_access_read_buffer(x->p, mem_base, num_words);
/* Zero any remaining limbs in the bignum, if the buffer is bigger
than num_words */
for (size_t i = num_words; i < x->n; i++) {
x->p[i] = 0;
}
asm volatile ("memw");
cleanup:
return ret;
_lock_release(&mpi_lock);
}
/**
*
* There is a need for the value of integer N' such that B^-1(B-1)-N^-1N'=1,
@ -200,35 +151,14 @@ static int calculate_rinv(mbedtls_mpi *Rinv, const mbedtls_mpi *M, int num_words
cleanup:
mbedtls_mpi_free(&RR);
return ret;
}
/* Begin an RSA operation. op_reg specifies which 'START' register
to write to.
*/
static inline void start_op(uint32_t op_reg)
{
/* Clear interrupt status */
DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
/* Note: above REG_WRITE includes a memw, so we know any writes
to the memory blocks are also complete. */
DPORT_REG_WRITE(op_reg, 1);
}
/* Wait for an RSA operation to complete.
*/
static inline void wait_op_complete(uint32_t op_reg)
{
while (DPORT_REG_READ(RSA_QUERY_INTERRUPT_REG) != 1)
{ }
/* clear the interrupt */
DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
}
/* Z = (X * Y) mod M
@ -236,61 +166,127 @@ static inline void wait_op_complete(uint32_t op_reg)
*/
int esp_mpi_mul_mpi_mod(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M)
{
int ret = 0;
int ret;
size_t x_bits = mbedtls_mpi_bitlen(X);
size_t y_bits = mbedtls_mpi_bitlen(Y);
size_t x_words = mpi_words(X);
size_t y_words = mpi_words(Y);
size_t m_words = mpi_words(M);
size_t m_bits = mbedtls_mpi_bitlen(M);
size_t z_bits = MIN(m_bits, x_bits + y_bits);
size_t x_words = bits_to_words(x_bits);
size_t y_words = bits_to_words(y_bits);
size_t m_words = bits_to_words(m_bits);
size_t z_words = bits_to_words(z_bits);
size_t hw_words = esp_mpi_hardware_words(MAX(x_words, MAX(y_words, m_words))); /* longest operand */
mbedtls_mpi Rinv;
mbedtls_mpi_uint Mprime;
size_t num_words = MAX(MAX(m_words, x_words), y_words);
if (num_words * 32 > 4096) {
return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
}
/* Calculate and load the first stage montgomery multiplication */
mbedtls_mpi_init(&Rinv);
MBEDTLS_MPI_CHK(calculate_rinv(&Rinv, M, num_words));
MBEDTLS_MPI_CHK(calculate_rinv(&Rinv, M, hw_words));
Mprime = modular_inverse(M);
esp_mpi_acquire_hardware();
/* Load and start a (X * Y) mod M calculation */
esp_mpi_mul_mpi_mod_hw_op(X, Y, M, &Rinv, Mprime, hw_words);
DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words - 1));
DPORT_REG_WRITE(RSA_M_DASH_REG, (uint32_t)Mprime);
MBEDTLS_MPI_CHK(mbedtls_mpi_grow(Z, z_words));
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, &Rinv, num_words);
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
/* Enable acceleration options */
DPORT_REG_WRITE(RSA_CONSTANT_TIME_REG, 0);
DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 1);
DPORT_REG_WRITE(RSA_SEARCH_POS_REG, y_bits - 1);
/* Execute first stage montgomery multiplication */
start_op(RSA_MOD_MULT_START_REG);
wait_op_complete(RSA_MOD_MULT_START_REG);
DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 1);
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, m_words);
esp_mpi_release_hardware();
esp_mpi_read_result_hw_op(Z, z_words);
Z->s = X->s * Y->s;
cleanup:
mbedtls_mpi_free(&Rinv);
esp_mpi_release_hardware();
return ret;
}
#if defined(MBEDTLS_MPI_EXP_MOD_ALT)
#ifdef ESP_MPI_USE_MONT_EXP
/*
* Sliding-window exponentiation: Z = X^Y mod M (HAC 14.85)
* Return the most significant one-bit.
*/
static size_t mbedtls_mpi_msb( const mbedtls_mpi *X )
{
int i, j;
if (X != NULL && X->n != 0) {
for (i = X->n - 1; i >= 0; i--) {
if (X->p[i] != 0) {
for (j = biL - 1; j >= 0; j--) {
if ((X->p[i] & (1 << j)) != 0) {
return (i * biL) + j;
}
}
}
}
}
return 0;
}
/*
* Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94)
*/
static int mpi_montgomery_exp_calc( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M,
mbedtls_mpi *Rinv,
size_t hw_words,
mbedtls_mpi_uint Mprime )
{
int ret = 0;
mbedtls_mpi X_, one;
mbedtls_mpi_init(&X_);
mbedtls_mpi_init(&one);
if ( ( ( ret = mbedtls_mpi_grow(&one, hw_words) ) != 0 ) ||
( ( ret = mbedtls_mpi_set_bit(&one, 0, 1) ) != 0 ) ) {
goto cleanup2;
}
// Algorithm from HAC 14.94
{
// 0 determine t (highest bit set in y)
int t = mbedtls_mpi_msb(Y);
esp_mpi_acquire_hardware();
// 1.1 x_ = mont(x, R^2 mod m)
// = mont(x, rb)
MBEDTLS_MPI_CHK( esp_mont_hw_op(&X_, X, Rinv, M, Mprime, hw_words, false) );
// 1.2 z = R mod m
// now z = R mod m = Mont (R^2 mod m, 1) mod M (as Mont(x) = X&R^-1 mod M)
MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Rinv, &one, M, Mprime, hw_words, true) );
// 2 for i from t down to 0
for (int i = t; i >= 0; i--) {
// 2.1 z = mont(z,z)
if (i != t) { // skip on the first iteration as is still unity
MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Z, Z, M, Mprime, hw_words, true) );
}
// 2.2 if y[i] = 1 then z = mont(A, x_)
if (mbedtls_mpi_get_bit(Y, i)) {
MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Z, &X_, M, Mprime, hw_words, true) );
}
}
// 3 z = Mont(z, 1)
MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Z, &one, M, Mprime, hw_words, true) );
}
cleanup:
esp_mpi_release_hardware();
cleanup2:
mbedtls_mpi_free(&X_);
mbedtls_mpi_free(&one);
return ret;
}
#endif //USE_MONT_EXPONENATIATION
/*
* Z = X ^ Y mod M
*
* _Rinv is optional pre-calculated version of Rinv (via calculate_rinv()).
*
@ -300,20 +296,19 @@ cleanup:
int mbedtls_mpi_exp_mod( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, mbedtls_mpi *_Rinv )
{
int ret = 0;
size_t y_bits = mbedtls_mpi_bitlen(Y);
size_t x_words = mpi_words(X);
size_t y_words = mpi_words(Y);
size_t m_words = mpi_words(M);
size_t num_words;
mbedtls_mpi Rinv_new; /* used if _Rinv == NULL */
mbedtls_mpi *Rinv; /* points to _Rinv (if not NULL) othwerwise &RR_new */
mbedtls_mpi_uint Mprime;
/* "all numbers must be the same length", so choose longest number
as cardinal length of operation...
*/
num_words = MAX(m_words, MAX(x_words, y_words));
size_t num_words = esp_mpi_hardware_words(MAX(m_words, MAX(x_words, y_words)));
mbedtls_mpi Rinv_new; /* used if _Rinv == NULL */
mbedtls_mpi *Rinv; /* points to _Rinv (if not NULL) othwerwise &RR_new */
mbedtls_mpi_uint Mprime;
if (mbedtls_mpi_cmp_int(M, 0) <= 0 || (M->p[0] & 1) == 0) {
return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
@ -345,30 +340,22 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
Mprime = modular_inverse(M);
// Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94)
#ifdef ESP_MPI_USE_MONT_EXP
ret = mpi_montgomery_exp_calc(Z, X, Y, M, Rinv, num_words, Mprime) ;
MBEDTLS_MPI_CHK(ret);
#else
esp_mpi_acquire_hardware();
DPORT_REG_WRITE(RSA_LENGTH_REG, num_words - 1);
/* Load M, X, Rinv, M-prime (M-prime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
/* Enable acceleration options */
DPORT_REG_WRITE(RSA_CONSTANT_TIME_REG, 0);
DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 1);
DPORT_REG_WRITE(RSA_SEARCH_POS_REG, y_bits - 1);
start_op(RSA_MODEXP_START_REG);
wait_op_complete(RSA_MODEXP_START_REG);
DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 0);
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, m_words);
esp_mpi_exp_mpi_mod_hw_op(X, Y, M, Rinv, Mprime, num_words);
ret = mbedtls_mpi_grow(Z, m_words);
if (ret != 0) {
esp_mpi_release_hardware();
goto cleanup;
}
esp_mpi_read_result_hw_op(Z, m_words);
esp_mpi_release_hardware();
#endif
// Compensate for negative X
if (X->s == -1 && (Y->p[0] & 1) != 0) {
@ -382,15 +369,16 @@ cleanup:
if (_Rinv == NULL) {
mbedtls_mpi_free(&Rinv_new);
}
return ret;
}
#endif /* MBEDTLS_MPI_EXP_MOD_ALT */
#if defined(MBEDTLS_MPI_MUL_MPI_ALT) /* MBEDTLS_MPI_MUL_MPI_ALT */
static int mpi_mult_mpi_failover_mod_mult(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words);
static int mpi_mult_mpi_failover_mod_mult( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words);
static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t y_words, size_t z_words);
/* Z = X * Y */
@ -400,19 +388,16 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
size_t x_bits = mbedtls_mpi_bitlen(X);
size_t y_bits = mbedtls_mpi_bitlen(Y);
size_t x_words = bits_to_words(x_bits);
size_t y_words = bits_to_words(y_bits);
size_t num_words = MAX(x_words, y_words);
size_t z_words = x_words + y_words;
size_t y_words = bits_to_words(y_bits);
size_t z_words = bits_to_words(x_bits + y_bits);
size_t hw_words = esp_mpi_hardware_words(MAX(x_words, y_words)); // length of one operand in hardware
/* Short-circuit eval if either argument is 0 or 1.
This is needed as the mpi modular division
argument will sometimes call in here when one
argument is too large for the hardware unit, but the other
argument is zero or one.
This leaks some timing information, although overall there is a
lot less timing variation than a software MPI approach.
This is needed as the mpi modular division
argument will sometimes call in here when one
argument is too large for the hardware unit, but the other
argument is zero or one.
*/
if (x_bits == 0 || y_bits == 0) {
mbedtls_mpi_lset(Z, 0);
@ -429,6 +414,9 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
return ret;
}
/* Grow Z to result size early, avoid interim allocations */
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) );
/* If either factor is over 2048 bits, we can't use the standard hardware multiplier
(it assumes result is double longest factor, and result is max 4096 bits.)
@ -436,10 +424,7 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
multiplication doesn't have the same restriction, so result is simply the
number of bits in X plus number of bits in in Y.)
*/
if (num_words * 32 > 2048) {
if (hw_words * 32 > 2048) {
if (z_words * 32 <= 4096) {
/* Note: it's possible to use mpi_mult_mpi_overlong
for this case as well, but it's very slightly
@ -448,7 +433,6 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
return mpi_mult_mpi_failover_mod_mult(Z, X, Y, z_words);
} else {
/* Still too long for the hardware unit... */
mbedtls_mpi_grow(Z, z_words);
if (y_words > x_words) {
return mpi_mult_mpi_overlong(Z, X, Y, y_words, z_words);
} else {
@ -460,81 +444,18 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
/* Otherwise, we can use the (faster) multiply hardware unit */
esp_mpi_acquire_hardware();
/* Copy X (right-extended) & Y (left-extended) to memory block */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Z_BLOCK_BASE + num_words * 4, Y, num_words);
/* NB: as Y is left-extended, we don't zero the bottom words_mult words of Y block.
This is OK for now because zeroing is done by hardware when we do esp_mpi_acquire_hardware().
*/
esp_mpi_mul_mpi_hw_op(X, Y, hw_words);
esp_mpi_read_result_hw_op(Z, z_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, 0);
DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words * 2 - 1));
start_op(RSA_MULT_START_REG);
wait_op_complete(RSA_MULT_START_REG);
/* Read back the result */
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
esp_mpi_release_hardware();
Z->s = X->s * Y->s;
esp_mpi_release_hardware();
cleanup:
return ret;
}
/* Special-case of mbedtls_mpi_mult_mpi(), where we use hardware montgomery mod
multiplication to calculate an mbedtls_mpi_mult_mpi result where either
A or B are >2048 bits so can't use the standard multiplication method.
Result (number of words, based on A bits + B bits) must still be less than 4096 bits.
This case is simpler than the general case modulo multiply of
esp_mpi_mul_mpi_mod() because we can control the other arguments:
* Modulus is chosen with M=(2^num_bits - 1) (ie M=R-1), so output
isn't actually modulo anything.
* Mprime and Rinv are therefore predictable as follows:
Mprime = 1
Rinv = 1
(See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
*/
static int mpi_mult_mpi_failover_mod_mult(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
{
int ret = 0;
/* Load coefficients to hardware */
esp_mpi_acquire_hardware();
/* M = 2^num_words - 1, so block is entirely FF */
for (int i = 0; i < num_words; i++) {
DPORT_REG_WRITE(RSA_MEM_M_BLOCK_BASE + i * 4, UINT32_MAX);
}
/* Mprime = 1 */
DPORT_REG_WRITE(RSA_M_DASH_REG, 1);
DPORT_REG_WRITE(RSA_LENGTH_REG, num_words - 1);
/* Load X & Y */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
/* Rinv = 1 */
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE, 1);
for (int i = 1; i < num_words; i++) {
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
}
start_op(RSA_MOD_MULT_START_REG);
wait_op_complete(RSA_MOD_MULT_START_REG);
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, num_words);
esp_mpi_release_hardware();
return ret;
}
/* Deal with the case when X & Y are too long for the hardware unit, by splitting one operand
into two halves.
@ -591,5 +512,40 @@ cleanup:
return ret;
}
/* Special-case of mbedtls_mpi_mult_mpi(), where we use hardware montgomery mod
multiplication to calculate an mbedtls_mpi_mult_mpi result where either
A or B are >2048 bits so can't use the standard multiplication method.
Result (number of words, based on A bits + B bits) must still be less than 4096 bits.
This case is simpler than the general case modulo multiply of
esp_mpi_mul_mpi_mod() because we can control the other arguments:
* Modulus is chosen with M=(2^num_bits - 1) (ie M=R-1), so output
* Mprime and Rinv are therefore predictable as follows:
isn't actually modulo anything.
Mprime 1
Rinv 1
(See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
*/
static int mpi_mult_mpi_failover_mod_mult( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words)
{
int ret;
size_t hw_words = esp_mpi_hardware_words(z_words);
esp_mpi_acquire_hardware();
esp_mpi_mult_mpi_failover_mod_mult_hw_op(X, Y, hw_words );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, hw_words) );
esp_mpi_read_result_hw_op(Z, hw_words);
Z->s = X->s * Y->s;
cleanup:
esp_mpi_release_hardware();
return ret;
}
#endif /* MBEDTLS_MPI_MUL_MPI_ALT */

View File

@ -0,0 +1,83 @@
#ifndef _ESP_BIGNUM_H_
#define _ESP_BIGNUM_H_
#include <mbedtls/bignum.h>
#include <stdbool.h>
/* Use montgomery exponentiation (HAC 14.94) for calculating X ^ Y mod M,
this may be faster for some targets. The hardware acceleration support for modular
exponentiation on the ESP32 is slow for public key operations, so use montgomery
exponentiation instead.
*/
#if CONFIG_IDF_TARGET_ESP32
#define ESP_MPI_USE_MONT_EXP
#endif
/**
* @brief Enable the MPI hardware
*
*/
void esp_mpi_enable_hardware_hw_op( void );
/**
* @brief Disable the MPI hardware
*
*/
void esp_mpi_disable_hardware_hw_op( void );
/**
* @brief Calculate the number of words needed to represent the input word in hardware
*
* @param words The number of words to be represented
*
* @return size_t Number of words required
*/
size_t esp_mpi_hardware_words(size_t words);
/**
* @brief Starts a (X * Y) Mod M calculation in hardware. Rinv and M_prime needs to be precalculated in software.
*
*/
void esp_mpi_mul_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t hw_words);
/**
* @brief Starts a (X * Y) calculation in hardware.
*
*/
void esp_mpi_mul_mpi_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words);
/**
* @brief Special-case of (X * Y), where we use hardware montgomery mod
multiplication to calculate result where either A or B are >2048 bits so
can't use the standard multiplication method.
*
*/
void esp_mpi_mult_mpi_failover_mod_mult_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words);
/**
* @brief Read out the result from the previous calculation.
*
*/
void esp_mpi_read_result_hw_op(mbedtls_mpi *Z, size_t z_words);
#ifdef ESP_MPI_USE_MONT_EXP
/**
* @brief Starts a montgomery multiplication calculation in hardware
*
*/
int esp_mont_hw_op(mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M,
mbedtls_mpi_uint Mprime,
size_t hw_words,
bool again);
#else
/**
* @brief Starts a (X ^ Y) Mod M calculation in hardware. Rinv and M_prime needs to be precalculated in software.
*
*/
void esp_mpi_exp_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t hw_words);
#endif //ESP_MPI_USE_MONT_EXP
#endif

View File

@ -186,9 +186,38 @@ static const uint8_t pki_rsa2048_output[] = {
#ifdef CONFIG_MBEDTLS_HARDWARE_MPI
/* Pregenerated RSA 4096 size keys using openssl */
static const char privkey_buf[] = "-----BEGIN RSA PRIVATE KEY-----\n"
static const char privkey_4096_buf[] = "-----BEGIN RSA PRIVATE KEY-----\n"
"MIIJKAIBAAKCAgEA1blr9wfIzTylroJHxcoq+YFA765gF5vj9b6tfaPG0XQExSkjndHv5sra4ar7T+k2sBB4OcKKeGHkNk6wk8tGmOS79r2L74XZs1eB0UruG+huV7Sd+YiWzwN8y9jGImA9hIkf1qxIvkco5WTmT7cVwUnCQ7qiiVadD/LgyeGD04yKZpzv9UJzfjXz5ITTn/ejcn7423M9qz41nhRWwK4zw1jv7IB57d1dWOCbN3RO4dvfVndCz8DOmLzJrZAkLsz39vppbIwbMqTXKFxWqzZY2xrYmnMx9p3v4hLeju7ls3fsekESonoP0C76u50wJfZWO2XcIUo4pue/jHi2o9KouhLXW/vyasplXvE6FFrBjSpsm1Nar4KQMUolEkUbO9baGcQvH9G5WOH0kwPt7AOSqM2EUPvBd7Jv0tbMI/BRZVVltC/t6WhannCM/I6nZrlNe5tie/qYlFu474jp5/tpa8RykkDxwl9whejIqd4iQbvDiP/GXgBYtDJ9VU/S2KqHJhQFLDi+F3+ewOcF391fgt1e1J2vNYLKZOfxTOl/1vJbU/2IjRWTRQ7cXnmpR/GNCRfgH2as6Z/0oknBSVephguDnO5QlveP4Cx2EOVY/A/KgDpu8PumSrlIk+YQgLxdKsXaVI6eDY4rY7q2uCJH3yIAfZJXEeD+ResUuSZltvECAwEAAQKCAgBwR89+oipOGHR6b5tBP+q/1bXFtXhqLs3eBuSiQu5qj2cKJYi+mtJMD3paYDdTThQa/ywKPDf+8n6wQTrnCj32iQRupjnkBg/O9kQPLixVoRCHJy5vL+D6tLxVY3cEDEeFX3zIjQ5SWJQVn6KXcnoNZ7CVYHGPcV9mR5TsuntFImp7aituUBDY14NgJKABRFosBqS6tZpKYo5MlCbXZy1ujUTOnNhxrIAj9yvUQFhIs/hrNpB1ELf46gWSF03LAIesyvWjvx9yxcL7QzeNDyozQbFVwvsWsvaZcIxXzw4B8RjdSV5+2V2BY4z6D6SB7R50ahjxrEqC9PFe3PQmsL9OvFjV9idYwFOhxiWXGjIm3wwFFLOj3e0TShscj2Iw+Ghd3wApvSdBZxzdjap1NHC+Q6yYU+BnivxUHcopVPPM3rsLndyRC6zfrQw/OkOlAP3bNL1hRedPRmRDOz0V1ihEpgC1VfXx6XOu4eg8xWiJgWX+BGvT5GWjfQg2hB1Jm344r3l0eLhr25dO80GIac2QGT2+WmYkXcsQ3AiqAn2VF8UB5mU+Iyh96jmSFVVltGZgfp98yFYN63/7wB++lhVQmJZwbglutng1qjQBFslIULddIHiYvF+AVvkrO3Hc2zg8rT91tbE13k06A1zlNGcQuQKLax8e+2/BNjsZU2E4uQKCAQEA7L4obKWYRHgG6j1VEDRrQU8Vkm4L11B+ZD/rsEh3q7LbzViOPv+1dZ40jX2qYScWyaefI46bukJlk/mlNv4Dh3EnSFvHPCInDM3oImCYImwUx0hkbSRyRNwlwRwx81LJzIR84cCqpNWrXXcplomUSM62ea1E1vtNSZs9Bg2OLoWvFOTPgk/xDi6ezdb6JFiId6cARup/bmZ363mg8jCq0wpTLVdUGrezfMj4GpB1uQET5xqXleumQu/04cHPOfXwpV0ikIOId/ldY/PetiRd86B32aB2Xd4fHUpxHMY+63MFmL6SsqMQJMPubv+eIrOId4HhT+nXNFBZXolT5XG5NwKCAQEA5xvvccHNyCTL0AebxD6EihWnp0/Dd0DwXWxZw0Yhhc9xa/W/QtygB6kPb35oKGvCKdm4dWCIGln03dU5D6CMNkJlbkxpo8gybz34SJ/6OvU836rBLHZXE3Xiqbe5XkdMdarA7kTEhEUqekDXPxhws9dWh0YjtAnBPpm1GQppiykI2edkiIhRgju5ghe+/UjAjxrEgCKZeAODh46hwZHERRKQN2MFUOFcOVDq+2wTJem9r3h1uBQAiZn8PDyx0rlRkwH2dZSSauVW+I713N0JGucOV7FeMO0ebioqqckh0i91ZJNH//Io8Sp8WuBsU/vcP9jT+5BDkCbc71BRO/AFFwKCAQBj4a6oeA0QBhvUw9+ZoKQHv9f4GZnBU+KfZSCJFWn39NQrhMsu5S+n2gGOGJDDwHwqxB+uHsKxCMZWciM0WmMex6ytKJucUURscIsZxespyrPRiEdmjNPxHXiISt8AK9OcB+GwVVsphERygI35Rz5aoWv3VhUPJqNrBKXwYdO06Q3/ILIz5oprU1wIuER9BSU+ZiUFxnXRHEZIAN7Yj5Piyh5hqNCBHTQK17dlbcFdNokxHdUKmYth/l8wyFYnvA21lt+4XOY8x+aQ/xjde+ZvnSozlTGbVNWHxBqI61MsfzDDStQVrhpniIqWJh6PwXM4CIII9z2mgqfR7NqKmTptAoIBAQDTYQOigmZbFvyrayoXVi8XtTLAnv3jByxR5pY7OtvSbagJ3J1w5CYim4iYq39M6TKP4KkMApy5rWl/tFQabPeRcS0gsxc0TBmFEaMTme7fGgrxcFZ6+koubHZCUN5k0sWmIeWQiKlNaY2uf7vf49TBSMXFuGtTclCjlybCnnlmZMPJuhCDqFsUyNelm15+f5pPyWXM5NiFooEc7WIZj996Zb4uSo1EKruVWONzzqe814s9AOp60SCkuoiv97uVRxbLZNItPRSmXNktQmSx/CEl0AuYPYwvJ9HbZQncfTBH9ExlDyidernjyr4uyHGMZyJN614ICy0gncsZv9ZtAd1FAoIBAA4toGPU/VcKFmK92zgO05jsg5vJzw5xeoxRWKrLg7iby6Su6BuNgaVwfYWeZuOhnXakid7FvFXKH6x44o9gyFm5bKqFhaXDzAnxzqcLeM5V+gititOsstpZCbVOoKQOhgTHyxpFNVX3E/nB8EunydWyhQMxKme//NsRroFm1vWljQKyL3zER82AzyseEpEYZoB/6g0n5uF2lR7KllxeBlINsceQ8g3JkmJTdS1hoXcyUSsZ+EgrRbCykNB5aVC5G3/W1OSZsFHbbMrYHCMnaYKwMqLmOkb11o6nOrJJ4pgHj8CVcp2TNjfy3y0Ru6RZ42b0Q+3LktJBGu9r5d04FgI=\n"
"-----END RSA PRIVATE KEY-----";
static const char privkey_2048_buf[] = "-----BEGIN RSA PRIVATE KEY-----\r\n"
"MIIEowIBAAKCAQEA8N8hdkemvj6Tpk975/OWhv9BrTsCBCu+ZYfDb5VI7U2meKBg\r\n"
"3dAkyyhRlY3fNwSRzBUMCzsHjpgnsB40wxOgiwlB9n6PMhq0qUVKAdCpKwFztsKd\r\n"
"JJAsCUC+Zlwxn4RpH6ZnMl3a/njRYjuDyI32kucMP/lBRo7ks1798Gy/j+x1h5xA\r\n"
"vZSlFoEXKjCC6S1DWhALePuZnk4m/jGP6g+YfyJXSTqsenKa/DcWndfn/JoElZ0J\r\n"
"nhud8lBXwVe6mMheE1yqfL+VTU1nwg/TPNZrZsFz2sXig/RQCKt6LuSuzhRpsLp+\r\n"
"BdwqEs9xrwlhZnp7j4kQBomISd6kAxQfYVROHQIDAQABAoIBAHgtO4rB8QWWPyCJ\r\n"
"I670r7OnA2OkvzrJgHMzq2SuvPX4+gfRLMM+qDzcXugZIrdWhk+maJ3p07lnXNXY\r\n"
"HEcAMedstQaA2n0LKfwSX/xL2TtlvBABRVoKvI3ZSaXUdcW60KBD69ULUsoICZ/T\r\n"
"Rcr4WX+t20TH3bOQc7ayvEwKVgE95xIUpTH9asw8uOPvKxW2j5OLQgZuWrWyUDg0\r\n"
"MFh92PhWtw3i5zq6OpTTsFJeceKYV/VstIYjZ+FslmhjQxJbr+2DJRbpHXKceqy6\r\n"
"9yWlSV0EM7neFCHlDa2WPhK8we+6IvMiNVQKj46fHGYNBaW/ZSX7TiG5J0Uqj2e9\r\n"
"0MUGJ8ECgYEA+frJabhfzW5+JfGjTObeznJZE6fAOjFzaBIwFu8Kz2mIjYpQlwVK\r\n"
"EepMkv2KkrJuqS4GnI+Nkq7G0BAUyUj9tTJ3HQzvtJrxsnxVi99Yofx1s1P4YAnu\r\n"
"c8t3ElJoQ4BRoQIs/hIvyYn22IxllBHiGESrnPQ38D82xyXQgd6S8JkCgYEA9qww\r\n"
"j7jx6Xpy/D1Dq8Dvalm7pz3J+yHnti4w2cqZ67grUoyGnNPtciNDdfi4JzLiKkUu\r\n"
"SDS3DacvFpFyND0m8sbpMjnR8Rvhj+bfH8KcOAowD+YR/+6vSb/P/aBt6gYXcaBn\r\n"
"cjepx+sE81mnC7UrHb4TjG4hO5t3ZTc6X28gyCUCgYAMZn9lSisecrO5SCJUp0M4\r\n"
"NH3stq6XdGqIKBbQnG0J2u9WLh1PUIjbGKdRx1f/bPCGXe0gCRL5yse7/IA7d+51\r\n"
"9ZnpDAI8EE+bDgXkWWD5MB/alHjGstdsURSICSR47L2f4g6/T8GlGr3vAg/r53My\r\n"
"xv1IXOkFdu1NtbeBKbxaSQKBgENDmw5mAVmIcXiFAEICn4ahp4EoYT6g9T2BhQKu\r\n"
"s6BKnU2qUj7Lr5ETOp8dzqGpx3B9Yux/q3cGotmFmd3S2x8SzJ5MlAoqbyy9aRSR\r\n"
"DeZeKNL9CuV+YcA7lOz1ZWOOe7AZbHwB38NLPBNb3CheI769iTkfAuLtNvabw8go\r\n"
"VokdAoGBALyvBhW+Squ5tx8NOEgAisakhAVOnT6jcoeKy6FyjcvKaWagmCOCC7Gz\r\n"
"QB9Yf1tJ+3di+aLtWWdmU494iKJHBtPMhfrYltCpxHHQGlUc/GLPY3Z5bBYYYWpb\r\n"
"Wzw4ZvDraKlAs7a9CRwS5cpktk5ptK4rc5noSXkvV+yOT75zXat2\r\n"
"-----END RSA PRIVATE KEY-----\r\n";
#endif
_Static_assert(sizeof(pki_rsa2048_output) == 2048/8, "rsa2048 output is wrong size");
@ -277,10 +306,10 @@ static void print_rsa_details(mbedtls_rsa_context *rsa)
}
#endif
TEST_CASE("test performance RSA key operations", "[bignum][ignore]")
TEST_CASE("test performance RSA key operations", "[bignum]")
{
for (int keysize = 2048; keysize <= 4096; keysize += 2048) {
rsa_key_operations(keysize, true, false, true);
rsa_key_operations(keysize, true, false, false);
}
}
@ -307,12 +336,17 @@ static void rsa_key_operations(int keysize, bool check_performance, bool use_bli
if (generate_new_rsa) {
mbedtls_rsa_init(&rsa, MBEDTLS_RSA_PRIVATE, 0);
TEST_ASSERT_EQUAL(0, mbedtls_rsa_gen_key(&rsa, myrand, NULL, keysize, 65537));
} else if (keysize==4096) { // pre-generated private key only available for keysize=4096
} else if (keysize==4096) {
mbedtls_pk_context clientkey;
mbedtls_pk_init(&clientkey);
TEST_ASSERT_EQUAL(0, mbedtls_pk_parse_key(&clientkey, (const uint8_t *)privkey_buf, sizeof(privkey_buf), NULL, 0));
TEST_ASSERT_EQUAL(0, mbedtls_pk_parse_key(&clientkey, (const uint8_t *)privkey_4096_buf, sizeof(privkey_4096_buf), NULL, 0));
memcpy(&rsa, mbedtls_pk_rsa(clientkey), sizeof(mbedtls_rsa_context));
} else {
} else if (keysize==2048) {
mbedtls_pk_context clientkey;
mbedtls_pk_init(&clientkey);
TEST_ASSERT_EQUAL(0, mbedtls_pk_parse_key(&clientkey, (const uint8_t *)privkey_2048_buf, sizeof(privkey_2048_buf), NULL, 0));
memcpy(&rsa, mbedtls_pk_rsa(clientkey), sizeof(mbedtls_rsa_context));
} else { // pre-generated private key only available for keysize=4096 and 2048
printf("Not supported keysize, please use generate_new_rsa=true\n");
abort();
}

View File

@ -174,10 +174,10 @@ static inline uint32_t periph_ll_get_rst_en_mask(periph_module_t periph, bool en
}
case PERIPH_RSA_MODULE:
if (enable == true) {
// Also clear reset on digital signature, otherwise RSA is held in reset
/* also clear reset on digital signature, otherwise RSA is held in reset */
return (DPORT_CRYPTO_RSA_RST | DPORT_CRYPTO_DS_RST);
} else {
// Don't reset digital signature unit, as this resets AES also
/* don't reset digital signature unit, as this resets AES also */
return DPORT_CRYPTO_RSA_RST;
}
case PERIPH_CRYPTO_DMA_MODULE:
@ -262,4 +262,4 @@ static inline void periph_ll_reset(periph_module_t periph)
#ifdef __cplusplus
}
#endif
#endif

View File

@ -23,7 +23,6 @@
#include "mbedtls/ecp.h"
typedef struct crypto_bignum crypto_bignum;
#if !TEMPORARY_DISABLED_FOR_TARGETS(ESP32S2)
TEST_CASE("Test crypto lib bignum apis", "[wpa_crypto]")
{
{
@ -279,7 +278,6 @@ TEST_CASE("Test crypto lib bignum apis", "[wpa_crypto]")
}
}
#endif //!TEMPORARY_DISABLED_FOR_TARGETS(ESP32S2)
/*

View File

@ -27,7 +27,7 @@
typedef struct crypto_bignum crypto_bignum;
#if !TEMPORARY_DISABLED_FOR_TARGETS(ESP32S2)
static struct wpabuf *wpabuf_alloc2(size_t len)
{
struct wpabuf *buf = (struct wpabuf *)os_zalloc(sizeof(struct wpabuf) + len);
@ -267,6 +267,5 @@ TEST_CASE("Test SAE functionality with ECC group", "[wpa3_sae]")
ESP_LOGI("SAE Test", "=========== Complete ============");
}
#endif //!TEMPORARY_DISABLED_FOR_TARGETS(ESP32S2)
#endif /* CONFIG_WPA3_SAE */