optimize hardware encryption when using ECC certificate

This commit is contained in:
xutao 2020-11-17 17:50:05 +08:00
parent bea8c46ba6
commit c4352192a4

View File

@ -63,29 +63,6 @@ static const __attribute__((unused)) char *TAG = "bignum";
#define ciL (sizeof(mbedtls_mpi_uint)) /* chars in limb */ #define ciL (sizeof(mbedtls_mpi_uint)) /* chars in limb */
#define biL (ciL << 3) /* bits in limb */ #define biL (ciL << 3) /* bits in limb */
#if defined(CONFIG_MBEDTLS_MPI_USE_INTERRUPT)
static SemaphoreHandle_t op_complete_sem;
static IRAM_ATTR void rsa_complete_isr(void *arg)
{
BaseType_t higher_woken;
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
xSemaphoreGiveFromISR(op_complete_sem, &higher_woken);
if (higher_woken) {
portYIELD_FROM_ISR();
}
}
static void rsa_isr_initialise()
{
if (op_complete_sem == NULL) {
op_complete_sem = xSemaphoreCreateBinary();
esp_intr_alloc(ETS_RSA_INTR_SOURCE, 0, rsa_complete_isr, NULL, NULL);
}
}
#endif /* CONFIG_MBEDTLS_MPI_USE_INTERRUPT */
static _lock_t mpi_lock; static _lock_t mpi_lock;
void esp_mpi_acquire_hardware( void ) void esp_mpi_acquire_hardware( void )
@ -100,9 +77,6 @@ void esp_mpi_acquire_hardware( void )
while(DPORT_REG_READ(RSA_CLEAN_REG) != 1); while(DPORT_REG_READ(RSA_CLEAN_REG) != 1);
// Note: from enabling RSA clock to here takes about 1.3us // Note: from enabling RSA clock to here takes about 1.3us
#ifdef CONFIG_MBEDTLS_MPI_USE_INTERRUPT
rsa_isr_initialise();
#endif
} }
void esp_mpi_release_hardware( void ) void esp_mpi_release_hardware( void )
@ -267,19 +241,12 @@ static inline void start_op(uint32_t op_reg)
*/ */
static inline void wait_op_complete(uint32_t op_reg) static inline void wait_op_complete(uint32_t op_reg)
{ {
#ifdef CONFIG_MBEDTLS_MPI_USE_INTERRUPT
if (!xSemaphoreTake(op_complete_sem, 2000 / portTICK_PERIOD_MS)) {
ESP_LOGE(TAG, "Timed out waiting for RSA operation (op_reg 0x%x int_reg 0x%x)",
op_reg, DPORT_REG_READ(RSA_INTERRUPT_REG));
abort(); /* indicates a fundamental problem with driver */
}
#else
while(DPORT_REG_READ(RSA_INTERRUPT_REG) != 1) while(DPORT_REG_READ(RSA_INTERRUPT_REG) != 1)
{ } { }
/* clear the interrupt */ /* clear the interrupt */
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1); DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
#endif
} }
@ -338,14 +305,129 @@ int esp_mpi_mul_mpi_mod(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
#if defined(MBEDTLS_MPI_EXP_MOD_ALT) #if defined(MBEDTLS_MPI_EXP_MOD_ALT)
static int mont(mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M,
mbedtls_mpi_uint Mprime,
size_t hw_words,
bool again)
{
// Note Z may be the same pointer as X or Y
int ret = 0;
// montgomery mult prepare
if (again == false) {
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
DPORT_REG_WRITE(RSA_MULT_MODE_REG, hw_words / 16 - 1);
}
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG);
Z->s = 1;
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, hw_words) );
wait_op_complete(RSA_MULT_START_REG);
/* Read back the result */
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, hw_words);
/* from HAC 14.36 - 3. If Z >= M then Z = Z - M */
if (mbedtls_mpi_cmp_mpi(Z, M) >= 0) {
MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(Z, Z, M));
}
cleanup:
return ret;
}
/* /*
* Sliding-window exponentiation: Z = X^Y mod M (HAC 14.85) * Return the most significant one-bit.
* *
* _Rinv is optional pre-calculated version of Rinv (via calculate_rinv()). * _Rinv is optional pre-calculated version of Rinv (via calculate_rinv()).
* *
* (See RSA Accelerator section in Technical Reference for more about Mprime, Rinv) * (See RSA Accelerator section in Technical Reference for more about Mprime, Rinv)
* *
*/ */
static size_t mbedtls_mpi_msb( const mbedtls_mpi* X )
{
int i, j;
if (X != NULL && X->n != 0) {
for (i = X->n - 1; i >= 0; i--) {
if (X->p[i] != 0) {
for (j = biL - 1; j >= 0; j--) {
if ((X->p[i] & (1 << j)) != 0) {
return (i * biL) + j;
}
}
}
}
}
return 0;
}
/*
* Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94)
*/
static int mpi_montgomery_exp_calc( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M,
mbedtls_mpi* Rinv,
size_t hw_words,
mbedtls_mpi_uint Mprime )
{
int ret = 0;
mbedtls_mpi X_, one;
mbedtls_mpi_init(&X_);
mbedtls_mpi_init(&one);
if( ( ( ret = mbedtls_mpi_grow(&one, hw_words) ) != 0 ) ||
( ( ret = mbedtls_mpi_set_bit(&one, 0, 1) ) != 0 ) ) {
goto cleanup2;
}
// Algorithm from HAC 14.94
{
// 0 determine t (highest bit set in y)
int t = mbedtls_mpi_msb(Y);
esp_mpi_acquire_hardware();
// 1.1 x_ = mont(x, R^2 mod m)
// = mont(x, rb)
MBEDTLS_MPI_CHK( mont(&X_, X, Rinv, M, Mprime, hw_words, false) );
// 1.2 z = R mod m
// now z = R mod m = Mont (R^2 mod m, 1) mod M (as Mont(x) = X&R^-1 mod M)
MBEDTLS_MPI_CHK( mont(Z, Rinv, &one, M, Mprime, hw_words, true) );
// 2 for i from t down to 0
for (int i = t; i >= 0; i--) {
// 2.1 z = mont(z,z)
if (i != t) { // skip on the first iteration as is still unity
MBEDTLS_MPI_CHK( mont(Z, Z, Z, M, Mprime, hw_words, true) );
}
// 2.2 if y[i] = 1 then z = mont(A, x_)
if (mbedtls_mpi_get_bit(Y, i)) {
MBEDTLS_MPI_CHK( mont(Z, Z, &X_, M, Mprime, hw_words, true) );
}
}
// 3 z = Mont(z, 1)
MBEDTLS_MPI_CHK( mont(Z, Z, &one, M, Mprime, hw_words, true) );
}
cleanup:
mbedtls_mpi_free(&X_);
mbedtls_mpi_free(&one);
esp_mpi_release_hardware();
return ret;
cleanup2:
mbedtls_mpi_free(&one);
return ret;
}
int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M, mbedtls_mpi* _Rinv ) int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi* Y, const mbedtls_mpi* M, mbedtls_mpi* _Rinv )
{ {
int ret = 0; int ret = 0;
@ -392,31 +474,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi
Mprime = modular_inverse(M); Mprime = modular_inverse(M);
esp_mpi_acquire_hardware(); MBEDTLS_MPI_CHK( mpi_montgomery_exp_calc(Z, X, Y, M, Rinv, hw_words, Mprime) );
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
DPORT_REG_WRITE(RSA_MODEXP_MODE_REG, (hw_words / 16) - 1);
/* Load M, X, Rinv, M-prime (M-prime is mod 2^32) */
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, hw_words);
mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, hw_words);
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, hw_words);
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, hw_words);
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
start_op(RSA_START_MODEXP_REG);
/* X ^ Y may actually be shorter than M, but unlikely when used for crypto */
if ((ret = mbedtls_mpi_grow(Z, m_words)) != 0) {
esp_mpi_release_hardware();
goto cleanup;
}
wait_op_complete(RSA_START_MODEXP_REG);
mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, m_words);
esp_mpi_release_hardware();
// Compensate for negative X // Compensate for negative X
if (X->s == -1 && (Y->p[0] & 1) != 0) { if (X->s == -1 && (Y->p[0] & 1) != 0) {
Z->s = -1; Z->s = -1;
@ -458,7 +516,7 @@ static int modular_multiply_finish(mbedtls_mpi *Z, const mbedtls_mpi *X, const m
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words); mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, hw_words);
start_op(RSA_MULT_START_REG); start_op(RSA_MULT_START_REG);
Z->s = 1;
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) ); MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) );
wait_op_complete(RSA_MULT_START_REG); wait_op_complete(RSA_MULT_START_REG);
@ -509,6 +567,9 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
return ret; return ret;
} }
/* Grow Z to result size early, avoid interim allocations */
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) );
/* If either factor is over 2048 bits, we can't use the standard hardware multiplier /* If either factor is over 2048 bits, we can't use the standard hardware multiplier
(it assumes result is double longest factor, and result is max 4096 bits.) (it assumes result is double longest factor, and result is max 4096 bits.)
@ -553,8 +614,6 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
start_op(RSA_MULT_START_REG); start_op(RSA_MULT_START_REG);
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) );
wait_op_complete(RSA_MULT_START_REG); wait_op_complete(RSA_MULT_START_REG);
/* Read back the result */ /* Read back the result */
@ -661,9 +720,6 @@ static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbe
}; };
mbedtls_mpi_init(&Ztemp); mbedtls_mpi_init(&Ztemp);
/* Grow Z to result size early, avoid interim allocations */
mbedtls_mpi_grow(Z, z_words);
/* Get result Ztemp = Yp * X (need temporary variable Ztemp) */ /* Get result Ztemp = Yp * X (need temporary variable Ztemp) */
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi(&Ztemp, X, &Yp) ); MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi(&Ztemp, X, &Yp) );