diff options
-rw-r--r-- | hal.h | 45 | ||||
-rw-r--r-- | hal_io_eim.c | 31 | ||||
-rw-r--r-- | hal_io_fmc.c | 31 | ||||
-rw-r--r-- | hal_io_i2c.c | 27 | ||||
-rw-r--r-- | modexp.c | 243 | ||||
-rw-r--r-- | rsa.c | 102 | ||||
-rw-r--r-- | verilog_constants.h | 32 |
7 files changed, 285 insertions, 226 deletions
@@ -201,11 +201,37 @@ typedef struct hal_core hal_core_t; extern void hal_io_set_debug(int onoff); extern hal_error_t hal_io_write(const hal_core_t *core, hal_addr_t offset, const uint8_t *buf, size_t len); extern hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, size_t len); -extern hal_error_t hal_io_init(const hal_core_t *core); -extern hal_error_t hal_io_next(const hal_core_t *core); extern hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count); -extern hal_error_t hal_io_wait_ready(const hal_core_t *core); -extern hal_error_t hal_io_wait_valid(const hal_core_t *core); + +static inline hal_error_t hal_io_zero(const hal_core_t *core) +{ + const uint8_t buf[4] = { 0, 0, 0, 0 }; + return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); +} + +static inline hal_error_t hal_io_init(const hal_core_t *core) +{ + const uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; + return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); +} + +static inline hal_error_t hal_io_next(const hal_core_t *core) +{ + const uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; + return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); +} + +static inline hal_error_t hal_io_wait_ready(const hal_core_t *core) +{ + int limit = -1; + return hal_io_wait(core, STATUS_READY, &limit); +} + +static inline hal_error_t hal_io_wait_valid(const hal_core_t *core) +{ + int limit = -1; + return hal_io_wait(core, STATUS_VALID, &limit); +} /* * Core management functions. @@ -378,10 +404,13 @@ extern hal_error_t hal_pbkdf2(hal_core_t *core, extern void hal_modexp_set_debug(const int onoff); extern hal_error_t hal_modexp(hal_core_t *core, - const uint8_t * const msg, const size_t msg_len, /* Message */ - const uint8_t * const exp, const size_t exp_len, /* Exponent */ - const uint8_t * const mod, const size_t mod_len, /* Modulus */ - uint8_t * result, const size_t result_len); + const int precalc_done, + const uint8_t * const msg, const size_t msg_len, /* Message */ + const uint8_t * const exp, const size_t exp_len, /* Exponent */ + const uint8_t * const mod, const size_t mod_len, /* Modulus */ + uint8_t * result, const size_t result_len, /* Result of exponentiation */ + uint8_t * coeff, const size_t coeff_len, /* Modulus coefficient (r/w) */ + uint8_t * mont, const size_t mont_len); /* Montgomery factor (r/w)*/ /* * Master Key Memory Interface diff --git a/hal_io_eim.c b/hal_io_eim.c index 5824f5b..eabc42e 100644 --- a/hal_io_eim.c +++ b/hal_io_eim.c @@ -47,7 +47,7 @@ static int inited = 0; #define EIM_IO_TIMEOUT 100000000 #endif -static hal_error_t init(void) +static inline hal_error_t init(void) { if (inited) return HAL_OK; @@ -61,7 +61,7 @@ static hal_error_t init(void) /* translate cryptech register number to EIM address */ -static hal_addr_t eim_offset(hal_addr_t offset) +static inline hal_addr_t eim_offset(hal_addr_t offset) { return EIM_BASE_ADDR + (offset << 2); } @@ -134,24 +134,15 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, return HAL_OK; } -hal_error_t hal_io_init(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - -hal_error_t hal_io_next(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) { hal_error_t err; uint8_t buf[4]; int i; + if (count && *count == -1) + *count = EIM_IO_TIMEOUT; + for (i = 1; ; ++i) { if (count && (*count > 0) && (i >= *count)) @@ -168,18 +159,6 @@ hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) } } -hal_error_t hal_io_wait_ready(const hal_core_t *core) -{ - int limit = EIM_IO_TIMEOUT; - return hal_io_wait(core, STATUS_READY, &limit); -} - -hal_error_t hal_io_wait_valid(const hal_core_t *core) -{ - int limit = EIM_IO_TIMEOUT; - return hal_io_wait(core, STATUS_VALID, &limit); -} - /* * Local variables: * indent-tabs-mode: nil diff --git a/hal_io_fmc.c b/hal_io_fmc.c index 76d6883..5ac73c4 100644 --- a/hal_io_fmc.c +++ b/hal_io_fmc.c @@ -51,7 +51,7 @@ static int inited = 0; #define FMC_IO_TIMEOUT 100000000 #endif -static hal_error_t init(void) +static inline hal_error_t init(void) { if (!inited) { fmc_init(); @@ -62,7 +62,7 @@ static hal_error_t init(void) /* Translate cryptech register number to FMC address. */ -static hal_addr_t fmc_offset(hal_addr_t offset) +static inline hal_addr_t fmc_offset(hal_addr_t offset) { return offset << 2; } @@ -136,24 +136,15 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, return HAL_OK; } -hal_error_t hal_io_init(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - -hal_error_t hal_io_next(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) { hal_error_t err; uint8_t buf[4]; int i; + if (count && *count == -1) + *count = FMC_IO_TIMEOUT; + for (i = 1; ; ++i) { if (count && (*count > 0) && (i >= *count)) @@ -172,18 +163,6 @@ hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) } } -hal_error_t hal_io_wait_ready(const hal_core_t *core) -{ - int limit = FMC_IO_TIMEOUT; - return hal_io_wait(core, STATUS_READY, &limit); -} - -hal_error_t hal_io_wait_valid(const hal_core_t *core) -{ - int limit = FMC_IO_TIMEOUT; - return hal_io_wait(core, STATUS_VALID, &limit); -} - /* * Local variables: * indent-tabs-mode: nil diff --git a/hal_io_i2c.c b/hal_io_i2c.c index e7dbbb6..018e264 100644 --- a/hal_io_i2c.c +++ b/hal_io_i2c.c @@ -301,24 +301,15 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, return HAL_OK; } -hal_error_t hal_io_init(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; - return hal_io_write(core, ADDR_CTRL, buf, 4); -} - -hal_error_t hal_io_next(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; - return hal_io_write(core, ADDR_CTRL, buf, 4); -} - hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) { hal_error_t err; uint8_t buf[4]; int i; + if (count && *count == -1) + *count = 10; + for (i = 1; ; ++i) { if (count && (*count > 0) && (i >= *count)) @@ -336,18 +327,6 @@ hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) } } -hal_error_t hal_io_wait_ready(const hal_core_t *core) -{ - int limit = 10; - return hal_io_wait(core, STATUS_READY, &limit); -} - -hal_error_t hal_io_wait_valid(const hal_core_t *core) -{ - int limit = 10; - return hal_io_wait(core, STATUS_VALID, &limit); -} - /* * Local variables: * indent-tabs-mode: nil @@ -43,7 +43,6 @@ #include <stdio.h> #include <stdint.h> -#include <assert.h> #include "hal.h" #include "hal_internal.h" @@ -60,173 +59,217 @@ void hal_modexp_set_debug(const int onoff) } /* - * Check a result, report on failure if debugging, pass failures up - * the chain. + * Get value of an ordinary register. */ -#define check(_expr_) \ - do { \ - hal_error_t _err = (_expr_); \ - if (_err != HAL_OK && debug) \ - printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \ - if (_err != HAL_OK) \ - return _err; \ - } while (0) +static hal_error_t inline get_register(const hal_core_t *core, + const hal_addr_t addr, + uint32_t &value) +{ + hal_error_t err; + uint8_t w[4]; + + if (value == NULL) + return HAL_ERROR_IMPOSSIBLE; + + if ((err = hal_io_read(core, addr, w, sizeof(w))) != HAL_OK) + return err; + + *value = (w[0] << 0) | (w[1] << 8) | (w[2] << 16) | (w[3] << 24); + + return HAL_OK; +} /* - * Set an ordinary register. + * Set value of an ordinary register. */ -static hal_error_t set_register(const hal_core_t *core, - const hal_addr_t addr, - uint32_t value) +static hal_error_t inline set_register(const hal_core_t *core, + const hal_addr_t addr, + const uint32_t value) { - uint8_t w[4]; - int i; - - for (i = 3; i >= 0; i--) { - w[i] = value & 0xFF; - value >>= 8; - } + const uint8_t w[4] = { + ((value >> 24) & 0xFF), + ((value >> 16) & 0xFF), + ((value >> 8) & 0xFF), + ((value >> 0) & 0xFF) + }; return hal_io_write(core, addr, w, sizeof(w)); } /* * Get value of a data buffer. We reverse the order of 32-bit words - * in the buffer during the transfer to match what the modexps6 core + * in the buffer during the transfer to match what the modexpa7 core * expects. */ -static hal_error_t get_buffer(const hal_core_t *core, - const hal_addr_t data_addr, - uint8_t *value, - const size_t length) +static inline hal_error_t get_buffer(const hal_core_t *core, + const hal_addr_t data_addr, + uint8_t *value, + const size_t length) { + hal_error_t err; size_t i; - assert(value != NULL && length % 4 == 0); + if (value == NULL || length % 4 != 0) + return HAL_ERROR_IMPOSSIBLE; for (i = 0; i < length; i += 4) - check(hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)); + if ((err = hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK) + return err; return HAL_OK; } /* * Set value of a data buffer. We reverse the order of 32-bit words - * in the buffer during the transfer to match what the modexps6 core + * in the buffer during the transfer to match what the modexpa7 core * expects. + * + * Do we need to zero the portion of the buffer we're not using + * explictly (that is, the portion between `length` and the value of + * the core's MODEXPA7_ADDR_BUFFER_BITS register)? We've gotten away + * without doing this so far, but the core doesn't take an explicit + * length parameter for the message itself, instead it assumes that + * the message is either as long as or twice as long as the exponent, + * depending on the setting of the CRT mode bit. Maybe initializing + * the core clears the excess bits so there's no issue? Dunno. Have + * never seen a problem with this yet, just dont' know why not. */ -static hal_error_t set_buffer(const hal_core_t *core, - const hal_addr_t data_addr, - const uint8_t * const value, - const size_t length) +static inline hal_error_t set_buffer(const hal_core_t *core, + const hal_addr_t data_addr, + const uint8_t * const value, + const size_t length) { + hal_error_t; size_t i; - assert(value != NULL && length % 4 == 0); + if (value == NULL || length % 4 != 0) + return HAL_ERROR_IMPOSSIBLE; for (i = 0; i < length; i += 4) - check(hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)); + if ((err = hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK) + return err; return HAL_OK; } /* + * Check a result, report on failure if debugging, pass failures up + * the chain. + */ + +#define check(_expr_) \ + do { \ + hal_error_t _err = (_expr_); \ + if (_err != HAL_OK && debug) \ + hal_log(HAL_LOG_WARN, "%s failed: %s\n", #_expr_, hal_error_string(_err)); \ + if (_err != HAL_OK) { \ + hal_core_free(core); \ + return _err; \ + } \ + } while (0) + +/* * Run one modexp operation. */ hal_error_t hal_modexp(hal_core_t *core, - const uint8_t * const msg, const size_t msg_len, /* Message */ - const uint8_t * const exp, const size_t exp_len, /* Exponent */ - const uint8_t * const mod, const size_t mod_len, /* Modulus */ - uint8_t *result, const size_t result_len) + const int precalc_done, + const uint8_t * const msg, const size_t msg_len, /* Message */ + const uint8_t * const exp, const size_t exp_len, /* Exponent */ + const uint8_t * const mod, const size_t mod_len, /* Modulus */ + uint8_t *result, const size_t result_len, /* Result of exponentiation */ + uint8_t *coeff, const size_t coeff_len, /* Modulus coefficient (r/w) */ + uint8_t *mont, const size_t mont_len) /* Montgomery factor (r/w)*/ { hal_error_t err; /* - * All pointers must be set, neither message nor exponent may be - * longer than modulus, result buffer must not be shorter than - * modulus, and all input lengths must be a multiple of four. - * - * The multiple-of-four restriction is a pain, but the rest of the - * HAL code currently enforces the same restriction, and allowing - * arbitrary lengths would require some tedious shuffling to deal - * with alignment issues, so it's not worth trying to fix only here. + * All pointers must be set, exponent may not be longer than + * modulus, message may not be longer than twice the modulus (CRT + * mode), result buffer must not be shorter than modulus, and all + * input lengths must be a multiple of four bytes (the core is all + * about 32-bit words). */ - if (msg == NULL || exp == NULL || mod == NULL || result == NULL || - msg_len > mod_len || exp_len > mod_len || result_len < mod_len || - ((msg_len | exp_len | mod_len) & 3) != 0) + if (mod == NULL || + msg == NULL || msg_len > mod_len * 2 || + exp == NULL || exp_len > mod_len || + result == NULL || result_len < mod_len || + coeff == NULL || coeff_len != mod_len || + mont == NULL || mont_len != mod_len || + ((msg_len | exp_len | mod_len | coeff_len | mont_len) & 3) != 0) return HAL_ERROR_BAD_ARGUMENTS; - if (((err = hal_core_alloc(MODEXPS6_NAME, &core)) == HAL_ERROR_CORE_NOT_FOUND) && - ((err = hal_core_alloc(MODEXPA7_NAME, &core)) != HAL_OK)) - return err; + /* + * Gonna need to think about running two modexpa7 cores in parallel + * in CRT mode for full speed signature. + */ -#undef check -#define check(_expr_) \ - do { \ - hal_error_t _err = (_expr_); \ - if (_err != HAL_OK && debug) \ - printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \ - if (_err != HAL_OK) { \ - hal_core_free(core); \ - return _err; \ - } \ - } while (0) + if (((err = hal_core_alloc(MODEXPA7_NAME, &core)) != HAL_OK)) + return err; /* - * We probably ought to take the mode (fast vs constant-time) as an - * argument, but for the moment we just guess that really short - * exponent means we're using the public key and can use fast mode, - * really short messages are Miller-Rabin tests and can also use - * fast mode, all other cases are something to do with the private - * key and therefore must use constant-time mode. - * - * Unclear whether it's worth trying to figure out exactly how long - * the operands are: assuming a multiple of eight is safe, but makes - * a bit more work for the core; checking to see how many bits are - * really set leaves the core sitting idle while the main CPU does - * these checks. No way to know which is faster without testing; - * take simple approach for the moment. + * Now that we have the core, check operand length against what it + * says it can handle. */ - /* Select mode (1 = fast, 0 = safe) */ - check(set_register(core, MODEXPS6_ADDR_MODE, (exp_len <= 4 || msg_len <= 4))); + uint32_t operand_max; + check(get_register(core, MODEXPA7_ADDR_BUFFER_BITS, &operand_max)); + operand_max /= 8; - /* Set modulus size in bits */ - check(set_register(core, MODEXPS6_ADDR_MODULUS_WIDTH, mod_len * 8)); + if (msg_len > operand_max || exp_len > operand_max || mod_len > operand_max || + (coeff != NULL && coeff_len > operand_max) || + (mont != NULL && mont_len > operand_max)) { + hal_core_free(core); + return HAL_ERROR_BAD_ARGUMENTS; + } - /* Write new modulus */ - check(set_buffer(core, MODEXPS6_ADDR_MODULUS, mod, mod_len)); + /* Set modulus */ - /* Pre-calcuate speed-up coefficient */ - check(hal_io_init(core)); + check(set_register(core, MODEXPA7_ADDR_MODULUS_BITS, mod_len * 8)); + check(set_buffer(core, MODEXPA7_ADDR_MODULUS, mod, mod_len)); - /* Wait for calculation to complete */ - check(hal_io_wait_ready(core)); + /* + * Calculate modulus-dependent speedup factors if needed. Buffer + * space is always caller's problem (because caller almost certainly + * wants to stash these values in the keystore anyway). Calculation + * is edge-triggered by "init" bit going from zero to one. + */ - /* Write new message */ - check(set_buffer(core, MODEXPS6_ADDR_MESSAGE, msg, msg_len)); + if (!precalc_done) { + check(hal_io_zero(core)); + check(hal_io_init(core)); + check(hal_io_wait_ready(core)); + check(get_buffer(core, MODEXPA7_ADDR_MODULUS_COEFF_OUT, coeff, coeff_len)); + check(get_buffer(core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT, mont, mont_len)); + } - /* Set new exponent length in bits */ - check(set_register(core, MODEXPS6_ADDR_EXPONENT_WIDTH, exp_len * 8)); + /* Load modulus-dependent speedup factors (even if we just calculated them) */ + check(set_buffer(core, MODEXPA7_ADDR_MODULUS_COEFF_IN, coeff, coeff_len)); + check(set_buffer(core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN, mont, mont_len)); - /* Set new exponent */ - check(set_buffer(core, MODEXPS6_ADDR_EXPONENT, exp, exp_len)); + /* Select CRT mode if and only if message is longer than exponent */ + check(set_register(core, MODEXPA7_ADDR_MODE, + (msg_len > mod_len + ? MODEXPA7_MODE_CRT + : MODEXPA7_MODE_PLAIN))); - /* Start calculation */ - check(hal_io_next(core)); + /* Set message and exponent */ + check(set_buffer(core, MODEXPA7_ADDR_MESSAGE, msg, msg_len)); + check(set_buffer(core, MODEXPA7_ADDR_EXPONENT, exp, exp_len)); + check(set_register(core, MODEXPA7_ADDR_EXPONENT_BITS, exp_len * 8)); - /* Wait for result */ + /* Edge-trigger the "next" bit to start calculation, then wait for the result */ + check(hal_io_zero(core)); + check(hal_io_next(core)); check(hal_io_wait_valid(core)); - /* Extract result */ - check(get_buffer(core, MODEXPS6_ADDR_RESULT, result, mod_len)); - + /* Extract result, clean up, then done */ + check(get_buffer(core, MODEXPA7_ADDR_RESULT, result, mod_len)); hal_core_free(core); return HAL_OK; } @@ -70,7 +70,6 @@ #include <stdlib.h> #include <stddef.h> #include <string.h> -#include <assert.h> #include "hal.h" #include "hal_internal.h" @@ -94,6 +93,15 @@ #endif /* + * How big to make the buffers for the modulus coefficient and + * Montgomery factor. This will almost certainly want tuning. + */ + +#ifndef HAL_RSA_MAX_OPERAND_LENGTH +#define HAL_RSA_MAX_OPERAND_LENGTH (4096 / 8) +#endif + +/* * Whether we want debug output. */ @@ -123,7 +131,7 @@ void hal_rsa_set_blinding(const int onoff) */ struct hal_rsa_key { - hal_key_type_t type; /* What kind of key this is */ + hal_key_type_t type; /* What kind of key this is */ fp_int n[1]; /* The modulus */ fp_int e[1]; /* Public exponent */ fp_int d[1]; /* Private exponent */ @@ -132,8 +140,17 @@ struct hal_rsa_key { fp_int u[1]; /* 1/q mod p */ fp_int dP[1]; /* d mod (p - 1) */ fp_int dQ[1]; /* d mod (q - 1) */ + unsigned flags; /* Internal key flags */ + uint8_t /* ModExpA7 speedup factors */ + nC[HAL_RSA_MAX_OPERAND_LENGTH], nF[HAL_RSA_MAX_OPERAND_LENGTH], + pC[HAL_RSA_MAX_OPERAND_LENGTH/2], pF[HAL_RSA_MAX_OPERAND_LENGTH/2], + qC[HAL_RSA_MAX_OPERAND_LENGTH/2], qF[HAL_RSA_MAX_OPERAND_LENGTH/2]; }; +#define RSA_FLAG_PRECALC_N_DONE (1 << 0) +#define RSA_FLAG_PRECALC_P_DONE (1 << 1) +#define RSA_FLAG_PRECALC_Q_DONE (1 << 2) + const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t); /* @@ -158,7 +175,7 @@ const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t); case FP_OKAY: break; \ case FP_VAL: lose(HAL_ERROR_BAD_ARGUMENTS); \ case FP_MEM: lose(HAL_ERROR_ALLOCATION_FAILURE); \ - default: lose(HAL_ERROR_IMPOSSIBLE); \ + default: lose(HAL_ERROR_IMPOSSIBLE); \ } \ } while (0) @@ -171,7 +188,8 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz { hal_error_t err = HAL_OK; - assert(bn != NULL && buffer != NULL); + if (bn == NULL || buffer == NULL) + return HAL_ERROR_IMPOSSIBLE; const size_t bytes = fp_unsigned_bin_size(unconst_fp_int(bn)); @@ -193,22 +211,18 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz */ static hal_error_t modexp(hal_core_t *core, - const fp_int * msg, + const int precalc_done, + const fp_int * const msg, const fp_int * const exp, const fp_int * const mod, - fp_int *res) + fp_int *res, + uint8_t *coeff, const size_t coeff_len, + uint8_t *mont, const size_t mont_len) { hal_error_t err = HAL_OK; - assert(msg != NULL && exp != NULL && mod != NULL && res != NULL); - - fp_int reduced_msg[1] = INIT_FP_INT; - - if (fp_cmp_mag(unconst_fp_int(msg), unconst_fp_int(mod)) != FP_LT) { - fp_init(reduced_msg); - fp_mod(unconst_fp_int(msg), unconst_fp_int(mod), reduced_msg); - msg = reduced_msg; - } + if (msg == NULL || exp == NULL || mod == NULL || res == NULL || coeff == NULL || mont == NULL) + return HAL_ERROR_IMPOSSIBLE; const size_t msg_len = (fp_unsigned_bin_size(unconst_fp_int(msg)) + 3) & ~3; const size_t exp_len = (fp_unsigned_bin_size(unconst_fp_int(exp)) + 3) & ~3; @@ -222,11 +236,13 @@ static hal_error_t modexp(hal_core_t *core, if ((err = unpack_fp(msg, msgbuf, sizeof(msgbuf))) != HAL_OK || (err = unpack_fp(exp, expbuf, sizeof(expbuf))) != HAL_OK || (err = unpack_fp(mod, modbuf, sizeof(modbuf))) != HAL_OK || - (err = hal_modexp(core, + (err = hal_modexp(core, precalc_done, msgbuf, sizeof(msgbuf), expbuf, sizeof(expbuf), modbuf, sizeof(modbuf), - resbuf, sizeof(resbuf))) != HAL_OK) + resbuf, sizeof(resbuf), + coeff, coeff_len, + mont, mont_len)) != HAL_OK) goto fail; fp_read_unsigned_bin(res, resbuf, sizeof(resbuf)); @@ -249,10 +265,14 @@ static hal_error_t modexp(hal_core_t *core, */ static hal_error_t modexp(const hal_core_t *core, /* ignored */ + const int precalc_done, /* ignored */ const fp_int * const msg, const fp_int * const exp, const fp_int * const mod, - fp_int *res) + fp_int *res, + uint8_t *coeff, const size_t coeff_len, /* ignored */ + uint8_t *mont, const size_t mont_len) /* ignored */ + { hal_error_t err = HAL_OK; FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp), unconst_fp_int(mod), res)); @@ -281,7 +301,12 @@ static hal_error_t modexp(const hal_core_t *core, /* ignored */ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) { - return modexp(NULL, a, b, c, d) == HAL_OK ? FP_OKAY : FP_VAL; + const size_t len = (fp_unsigned_bin_size(unconst_fp_int(b)) + 3) & ~3; + uint8_t C[len], F[len]; + const hal_error_t err = modexp(NULL, 0, a, b, c, d, C, sizeof(C), F, sizeof(F)); + memset(C, 0, sizeof(C)); + memset(F, 0, sizeof(F)); + return err == HAL_OK ? FP_OKAY : FP_VAL; } #endif /* HAL_RSA_SIGN_USE_MODEXP && HAL_RSA_KEYGEN_USE_MODEXP */ @@ -294,7 +319,8 @@ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *bf, fp_int *ubf) { - assert(key != NULL && bf != NULL && ubf != NULL); + if (key == NULL || bf == NULL || ubf == NULL) + return HAL_ERROR_IMPOSSIBLE; uint8_t rnd[fp_unsigned_bin_size(unconst_fp_int(key->n))]; hal_error_t err = HAL_OK; @@ -306,9 +332,12 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t fp_read_unsigned_bin(bf, rnd, sizeof(rnd)); fp_copy(bf, ubf); - if ((err = modexp(core, bf, key->e, key->n, bf)) != HAL_OK) + if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_N_DONE), bf, key->e, key->n, bf, + key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) != HAL_OK) goto fail; + key->flags |= RSA_FLAG_PRECALC_N_DONE; + FP_CHECK(fp_invmod(ubf, unconst_fp_int(key->n), ubf)); fail: @@ -322,7 +351,8 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *msg, fp_int *sig) { - assert(key != NULL && msg != NULL && sig != NULL); + if (key == NULL || msg == NULL || sig == NULL) + return HAL_ERROR_IMPOSSIBLE; hal_error_t err = HAL_OK; fp_int t[1] = INIT_FP_INT; @@ -343,11 +373,18 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp /* * m1 = msg ** dP mod p * m2 = msg ** dQ mod q + * + * This is just crying out to be done with parallel cores, but get + * the boring version working before jumping off that cliff. */ - if ((err = modexp(core, msg, key->dP, key->p, m1)) != HAL_OK || - (err = modexp(core, msg, key->dQ, key->q, m2)) != HAL_OK) + if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_P_DONE), + msg, key->dP, key->p, m1, key->pC, sizeof(key->pC), key->pF, sizeof(key->pF))) != HAL_OK || + (err = modexp(core, (key->flags & RSA_FLAG_PRECALC_Q_DONE), + msg, key->dQ, key->q, m2, key->qC, sizeof(key->qC), key->qF, sizeof(key->qF))) != HAL_OK) goto fail; + key->flags |= RSA_FLAG_PRECALC_P_DONE | RSA_FLAG_PRECALC_Q_DONE; + /* * t = m1 - m2. */ @@ -406,11 +443,12 @@ hal_error_t hal_rsa_encrypt(hal_core_t *core, fp_read_unsigned_bin(i, unconst_uint8_t(input), input_len); - if ((err = modexp(core, i, key->e, key->n, o)) != HAL_OK || - (err = unpack_fp(o, output, output_len)) != HAL_OK) - goto fail; + if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_N_DONE), i, key->e, key->n, o, + key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) == HAL_OK) { + key->flags |= RSA_FLAG_PRECALC_N_DONE; + err = unpack_fp(o, output, output_len); + } - fail: fp_zero(i); fp_zero(o); return err; @@ -436,11 +474,13 @@ hal_error_t hal_rsa_decrypt(hal_core_t *core, * just do brute force ModExp. */ - if (fp_iszero(key->p) || fp_iszero(key->q) || fp_iszero(key->u) || fp_iszero(key->dP) || fp_iszero(key->dQ)) - err = modexp(core, i, key->d, key->n, o); - else + if (!fp_iszero(key->p) && !fp_iszero(key->q) && !fp_iszero(key->u) && !fp_iszero(key->dP) && !fp_iszero(key->dQ)) err = rsa_crt(core, key, i, o); + else if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_N_DONE), i, key->d, key->n, o, + key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) == HAL_OK) + key->flags |= RSA_FLAG_PRECALC_N_DONE; + if (err != HAL_OK || (err = unpack_fp(o, output, output_len)) != HAL_OK) goto fail; diff --git a/verilog_constants.h b/verilog_constants.h index c9bb566..5307f68 100644 --- a/verilog_constants.h +++ b/verilog_constants.h @@ -222,23 +222,33 @@ #define MODEXPS6_ADDR_MESSAGE (MODEXPS6_ADDR_OPERANDS + 1 * MODEXPS6_OPERAND_WORDS) #define MODEXPS6_ADDR_EXPONENT (MODEXPS6_ADDR_OPERANDS + 2 * MODEXPS6_OPERAND_WORDS) #define MODEXPS6_ADDR_RESULT (MODEXPS6_ADDR_OPERANDS + 3 * MODEXPS6_OPERAND_WORDS) +#define MODEXPS6_MODE_CONSTANT_TIME (0) +#define MODEXPS6_MODE_FAST_PUBLIC (1) /* * ModExpA7 core. MODEXPA7_OPERAND_BITS is size in bits of largest * supported modulus. */ -#define MODEXPA7_OPERAND_BITS (4096) -#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32) -#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_OPERANDS (4 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10) -#define MODEXPA7_ADDR_MODULUS_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x11) -#define MODEXPA7_ADDR_EXPONENT_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x12) -#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_OPERAND_BITS (4096) +#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32) +#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_OPERANDS (4 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10) +#define MODEXPA7_ADDR_MODULUS_BITS (MODEXPA7_ADDR_REGISTERS + 0x11) +#define MODEXPA7_ADDR_EXPONENT_BITS (MODEXPA7_ADDR_REGISTERS + 0x12) +#define MODEXPA7_ADDR_BUFFER_BITS (MODEXPA7_ADDR_REGISTERS + 0x13) +#define MODEXPA7_ADDR_ARRAY_BITS (MODEXPA7_ADDR_REGISTERS + 0x14) +#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MODULUS_COEFF_IN (MODEXPA7_ADDR_OPERANDS + 4 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MODULUS_COEFF_OUT (MODEXPA7_ADDR_OPERANDS + 5 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN (MODEXPA7_ADDR_OPERANDS + 6 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT (MODEXPA7_ADDR_OPERANDS + 7 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_MODE_CRT (1 << 1) +#define MODEXPA7_MODE_PLAIN (0 << 1) /* * ECDSA P-256 point multiplier core. ECDSA256_OPERAND_BITS is size |