aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hal.h45
-rw-r--r--hal_io_eim.c31
-rw-r--r--hal_io_fmc.c31
-rw-r--r--hal_io_i2c.c27
-rw-r--r--modexp.c243
-rw-r--r--rsa.c102
-rw-r--r--verilog_constants.h32
7 files changed, 285 insertions, 226 deletions
diff --git a/hal.h b/hal.h
index 47ebe25..d216984 100644
--- a/hal.h
+++ b/hal.h
@@ -201,11 +201,37 @@ typedef struct hal_core hal_core_t;
extern void hal_io_set_debug(int onoff);
extern hal_error_t hal_io_write(const hal_core_t *core, hal_addr_t offset, const uint8_t *buf, size_t len);
extern hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, size_t len);
-extern hal_error_t hal_io_init(const hal_core_t *core);
-extern hal_error_t hal_io_next(const hal_core_t *core);
extern hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count);
-extern hal_error_t hal_io_wait_ready(const hal_core_t *core);
-extern hal_error_t hal_io_wait_valid(const hal_core_t *core);
+
+static inline hal_error_t hal_io_zero(const hal_core_t *core)
+{
+ const uint8_t buf[4] = { 0, 0, 0, 0 };
+ return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
+}
+
+static inline hal_error_t hal_io_init(const hal_core_t *core)
+{
+ const uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
+ return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
+}
+
+static inline hal_error_t hal_io_next(const hal_core_t *core)
+{
+ const uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
+ return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
+}
+
+static inline hal_error_t hal_io_wait_ready(const hal_core_t *core)
+{
+ int limit = -1;
+ return hal_io_wait(core, STATUS_READY, &limit);
+}
+
+static inline hal_error_t hal_io_wait_valid(const hal_core_t *core)
+{
+ int limit = -1;
+ return hal_io_wait(core, STATUS_VALID, &limit);
+}
/*
* Core management functions.
@@ -378,10 +404,13 @@ extern hal_error_t hal_pbkdf2(hal_core_t *core,
extern void hal_modexp_set_debug(const int onoff);
extern hal_error_t hal_modexp(hal_core_t *core,
- const uint8_t * const msg, const size_t msg_len, /* Message */
- const uint8_t * const exp, const size_t exp_len, /* Exponent */
- const uint8_t * const mod, const size_t mod_len, /* Modulus */
- uint8_t * result, const size_t result_len);
+ const int precalc_done,
+ const uint8_t * const msg, const size_t msg_len, /* Message */
+ const uint8_t * const exp, const size_t exp_len, /* Exponent */
+ const uint8_t * const mod, const size_t mod_len, /* Modulus */
+ uint8_t * result, const size_t result_len, /* Result of exponentiation */
+ uint8_t * coeff, const size_t coeff_len, /* Modulus coefficient (r/w) */
+ uint8_t * mont, const size_t mont_len); /* Montgomery factor (r/w)*/
/*
* Master Key Memory Interface
diff --git a/hal_io_eim.c b/hal_io_eim.c
index 5824f5b..eabc42e 100644
--- a/hal_io_eim.c
+++ b/hal_io_eim.c
@@ -47,7 +47,7 @@ static int inited = 0;
#define EIM_IO_TIMEOUT 100000000
#endif
-static hal_error_t init(void)
+static inline hal_error_t init(void)
{
if (inited)
return HAL_OK;
@@ -61,7 +61,7 @@ static hal_error_t init(void)
/* translate cryptech register number to EIM address
*/
-static hal_addr_t eim_offset(hal_addr_t offset)
+static inline hal_addr_t eim_offset(hal_addr_t offset)
{
return EIM_BASE_ADDR + (offset << 2);
}
@@ -134,24 +134,15 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf,
return HAL_OK;
}
-hal_error_t hal_io_init(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
-hal_error_t hal_io_next(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
{
hal_error_t err;
uint8_t buf[4];
int i;
+ if (count && *count == -1)
+ *count = EIM_IO_TIMEOUT;
+
for (i = 1; ; ++i) {
if (count && (*count > 0) && (i >= *count))
@@ -168,18 +159,6 @@ hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
}
}
-hal_error_t hal_io_wait_ready(const hal_core_t *core)
-{
- int limit = EIM_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_READY, &limit);
-}
-
-hal_error_t hal_io_wait_valid(const hal_core_t *core)
-{
- int limit = EIM_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_VALID, &limit);
-}
-
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/hal_io_fmc.c b/hal_io_fmc.c
index 76d6883..5ac73c4 100644
--- a/hal_io_fmc.c
+++ b/hal_io_fmc.c
@@ -51,7 +51,7 @@ static int inited = 0;
#define FMC_IO_TIMEOUT 100000000
#endif
-static hal_error_t init(void)
+static inline hal_error_t init(void)
{
if (!inited) {
fmc_init();
@@ -62,7 +62,7 @@ static hal_error_t init(void)
/* Translate cryptech register number to FMC address.
*/
-static hal_addr_t fmc_offset(hal_addr_t offset)
+static inline hal_addr_t fmc_offset(hal_addr_t offset)
{
return offset << 2;
}
@@ -136,24 +136,15 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf,
return HAL_OK;
}
-hal_error_t hal_io_init(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
-hal_error_t hal_io_next(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
{
hal_error_t err;
uint8_t buf[4];
int i;
+ if (count && *count == -1)
+ *count = FMC_IO_TIMEOUT;
+
for (i = 1; ; ++i) {
if (count && (*count > 0) && (i >= *count))
@@ -172,18 +163,6 @@ hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
}
}
-hal_error_t hal_io_wait_ready(const hal_core_t *core)
-{
- int limit = FMC_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_READY, &limit);
-}
-
-hal_error_t hal_io_wait_valid(const hal_core_t *core)
-{
- int limit = FMC_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_VALID, &limit);
-}
-
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/hal_io_i2c.c b/hal_io_i2c.c
index e7dbbb6..018e264 100644
--- a/hal_io_i2c.c
+++ b/hal_io_i2c.c
@@ -301,24 +301,15 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf,
return HAL_OK;
}
-hal_error_t hal_io_init(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
- return hal_io_write(core, ADDR_CTRL, buf, 4);
-}
-
-hal_error_t hal_io_next(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
- return hal_io_write(core, ADDR_CTRL, buf, 4);
-}
-
hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
{
hal_error_t err;
uint8_t buf[4];
int i;
+ if (count && *count == -1)
+ *count = 10;
+
for (i = 1; ; ++i) {
if (count && (*count > 0) && (i >= *count))
@@ -336,18 +327,6 @@ hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
}
}
-hal_error_t hal_io_wait_ready(const hal_core_t *core)
-{
- int limit = 10;
- return hal_io_wait(core, STATUS_READY, &limit);
-}
-
-hal_error_t hal_io_wait_valid(const hal_core_t *core)
-{
- int limit = 10;
- return hal_io_wait(core, STATUS_VALID, &limit);
-}
-
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/modexp.c b/modexp.c
index 3e634aa..3ded27e 100644
--- a/modexp.c
+++ b/modexp.c
@@ -43,7 +43,6 @@
#include <stdio.h>
#include <stdint.h>
-#include <assert.h>
#include "hal.h"
#include "hal_internal.h"
@@ -60,173 +59,217 @@ void hal_modexp_set_debug(const int onoff)
}
/*
- * Check a result, report on failure if debugging, pass failures up
- * the chain.
+ * Get value of an ordinary register.
*/
-#define check(_expr_) \
- do { \
- hal_error_t _err = (_expr_); \
- if (_err != HAL_OK && debug) \
- printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \
- if (_err != HAL_OK) \
- return _err; \
- } while (0)
+static hal_error_t inline get_register(const hal_core_t *core,
+ const hal_addr_t addr,
+ uint32_t &value)
+{
+ hal_error_t err;
+ uint8_t w[4];
+
+ if (value == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
+
+ if ((err = hal_io_read(core, addr, w, sizeof(w))) != HAL_OK)
+ return err;
+
+ *value = (w[0] << 0) | (w[1] << 8) | (w[2] << 16) | (w[3] << 24);
+
+ return HAL_OK;
+}
/*
- * Set an ordinary register.
+ * Set value of an ordinary register.
*/
-static hal_error_t set_register(const hal_core_t *core,
- const hal_addr_t addr,
- uint32_t value)
+static hal_error_t inline set_register(const hal_core_t *core,
+ const hal_addr_t addr,
+ const uint32_t value)
{
- uint8_t w[4];
- int i;
-
- for (i = 3; i >= 0; i--) {
- w[i] = value & 0xFF;
- value >>= 8;
- }
+ const uint8_t w[4] = {
+ ((value >> 24) & 0xFF),
+ ((value >> 16) & 0xFF),
+ ((value >> 8) & 0xFF),
+ ((value >> 0) & 0xFF)
+ };
return hal_io_write(core, addr, w, sizeof(w));
}
/*
* Get value of a data buffer. We reverse the order of 32-bit words
- * in the buffer during the transfer to match what the modexps6 core
+ * in the buffer during the transfer to match what the modexpa7 core
* expects.
*/
-static hal_error_t get_buffer(const hal_core_t *core,
- const hal_addr_t data_addr,
- uint8_t *value,
- const size_t length)
+static inline hal_error_t get_buffer(const hal_core_t *core,
+ const hal_addr_t data_addr,
+ uint8_t *value,
+ const size_t length)
{
+ hal_error_t err;
size_t i;
- assert(value != NULL && length % 4 == 0);
+ if (value == NULL || length % 4 != 0)
+ return HAL_ERROR_IMPOSSIBLE;
for (i = 0; i < length; i += 4)
- check(hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4));
+ if ((err = hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK)
+ return err;
return HAL_OK;
}
/*
* Set value of a data buffer. We reverse the order of 32-bit words
- * in the buffer during the transfer to match what the modexps6 core
+ * in the buffer during the transfer to match what the modexpa7 core
* expects.
+ *
+ * Do we need to zero the portion of the buffer we're not using
+ * explictly (that is, the portion between `length` and the value of
+ * the core's MODEXPA7_ADDR_BUFFER_BITS register)? We've gotten away
+ * without doing this so far, but the core doesn't take an explicit
+ * length parameter for the message itself, instead it assumes that
+ * the message is either as long as or twice as long as the exponent,
+ * depending on the setting of the CRT mode bit. Maybe initializing
+ * the core clears the excess bits so there's no issue? Dunno. Have
+ * never seen a problem with this yet, just dont' know why not.
*/
-static hal_error_t set_buffer(const hal_core_t *core,
- const hal_addr_t data_addr,
- const uint8_t * const value,
- const size_t length)
+static inline hal_error_t set_buffer(const hal_core_t *core,
+ const hal_addr_t data_addr,
+ const uint8_t * const value,
+ const size_t length)
{
+ hal_error_t;
size_t i;
- assert(value != NULL && length % 4 == 0);
+ if (value == NULL || length % 4 != 0)
+ return HAL_ERROR_IMPOSSIBLE;
for (i = 0; i < length; i += 4)
- check(hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4));
+ if ((err = hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK)
+ return err;
return HAL_OK;
}
/*
+ * Check a result, report on failure if debugging, pass failures up
+ * the chain.
+ */
+
+#define check(_expr_) \
+ do { \
+ hal_error_t _err = (_expr_); \
+ if (_err != HAL_OK && debug) \
+ hal_log(HAL_LOG_WARN, "%s failed: %s\n", #_expr_, hal_error_string(_err)); \
+ if (_err != HAL_OK) { \
+ hal_core_free(core); \
+ return _err; \
+ } \
+ } while (0)
+
+/*
* Run one modexp operation.
*/
hal_error_t hal_modexp(hal_core_t *core,
- const uint8_t * const msg, const size_t msg_len, /* Message */
- const uint8_t * const exp, const size_t exp_len, /* Exponent */
- const uint8_t * const mod, const size_t mod_len, /* Modulus */
- uint8_t *result, const size_t result_len)
+ const int precalc_done,
+ const uint8_t * const msg, const size_t msg_len, /* Message */
+ const uint8_t * const exp, const size_t exp_len, /* Exponent */
+ const uint8_t * const mod, const size_t mod_len, /* Modulus */
+ uint8_t *result, const size_t result_len, /* Result of exponentiation */
+ uint8_t *coeff, const size_t coeff_len, /* Modulus coefficient (r/w) */
+ uint8_t *mont, const size_t mont_len) /* Montgomery factor (r/w)*/
{
hal_error_t err;
/*
- * All pointers must be set, neither message nor exponent may be
- * longer than modulus, result buffer must not be shorter than
- * modulus, and all input lengths must be a multiple of four.
- *
- * The multiple-of-four restriction is a pain, but the rest of the
- * HAL code currently enforces the same restriction, and allowing
- * arbitrary lengths would require some tedious shuffling to deal
- * with alignment issues, so it's not worth trying to fix only here.
+ * All pointers must be set, exponent may not be longer than
+ * modulus, message may not be longer than twice the modulus (CRT
+ * mode), result buffer must not be shorter than modulus, and all
+ * input lengths must be a multiple of four bytes (the core is all
+ * about 32-bit words).
*/
- if (msg == NULL || exp == NULL || mod == NULL || result == NULL ||
- msg_len > mod_len || exp_len > mod_len || result_len < mod_len ||
- ((msg_len | exp_len | mod_len) & 3) != 0)
+ if (mod == NULL ||
+ msg == NULL || msg_len > mod_len * 2 ||
+ exp == NULL || exp_len > mod_len ||
+ result == NULL || result_len < mod_len ||
+ coeff == NULL || coeff_len != mod_len ||
+ mont == NULL || mont_len != mod_len ||
+ ((msg_len | exp_len | mod_len | coeff_len | mont_len) & 3) != 0)
return HAL_ERROR_BAD_ARGUMENTS;
- if (((err = hal_core_alloc(MODEXPS6_NAME, &core)) == HAL_ERROR_CORE_NOT_FOUND) &&
- ((err = hal_core_alloc(MODEXPA7_NAME, &core)) != HAL_OK))
- return err;
+ /*
+ * Gonna need to think about running two modexpa7 cores in parallel
+ * in CRT mode for full speed signature.
+ */
-#undef check
-#define check(_expr_) \
- do { \
- hal_error_t _err = (_expr_); \
- if (_err != HAL_OK && debug) \
- printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \
- if (_err != HAL_OK) { \
- hal_core_free(core); \
- return _err; \
- } \
- } while (0)
+ if (((err = hal_core_alloc(MODEXPA7_NAME, &core)) != HAL_OK))
+ return err;
/*
- * We probably ought to take the mode (fast vs constant-time) as an
- * argument, but for the moment we just guess that really short
- * exponent means we're using the public key and can use fast mode,
- * really short messages are Miller-Rabin tests and can also use
- * fast mode, all other cases are something to do with the private
- * key and therefore must use constant-time mode.
- *
- * Unclear whether it's worth trying to figure out exactly how long
- * the operands are: assuming a multiple of eight is safe, but makes
- * a bit more work for the core; checking to see how many bits are
- * really set leaves the core sitting idle while the main CPU does
- * these checks. No way to know which is faster without testing;
- * take simple approach for the moment.
+ * Now that we have the core, check operand length against what it
+ * says it can handle.
*/
- /* Select mode (1 = fast, 0 = safe) */
- check(set_register(core, MODEXPS6_ADDR_MODE, (exp_len <= 4 || msg_len <= 4)));
+ uint32_t operand_max;
+ check(get_register(core, MODEXPA7_ADDR_BUFFER_BITS, &operand_max));
+ operand_max /= 8;
- /* Set modulus size in bits */
- check(set_register(core, MODEXPS6_ADDR_MODULUS_WIDTH, mod_len * 8));
+ if (msg_len > operand_max || exp_len > operand_max || mod_len > operand_max ||
+ (coeff != NULL && coeff_len > operand_max) ||
+ (mont != NULL && mont_len > operand_max)) {
+ hal_core_free(core);
+ return HAL_ERROR_BAD_ARGUMENTS;
+ }
- /* Write new modulus */
- check(set_buffer(core, MODEXPS6_ADDR_MODULUS, mod, mod_len));
+ /* Set modulus */
- /* Pre-calcuate speed-up coefficient */
- check(hal_io_init(core));
+ check(set_register(core, MODEXPA7_ADDR_MODULUS_BITS, mod_len * 8));
+ check(set_buffer(core, MODEXPA7_ADDR_MODULUS, mod, mod_len));
- /* Wait for calculation to complete */
- check(hal_io_wait_ready(core));
+ /*
+ * Calculate modulus-dependent speedup factors if needed. Buffer
+ * space is always caller's problem (because caller almost certainly
+ * wants to stash these values in the keystore anyway). Calculation
+ * is edge-triggered by "init" bit going from zero to one.
+ */
- /* Write new message */
- check(set_buffer(core, MODEXPS6_ADDR_MESSAGE, msg, msg_len));
+ if (!precalc_done) {
+ check(hal_io_zero(core));
+ check(hal_io_init(core));
+ check(hal_io_wait_ready(core));
+ check(get_buffer(core, MODEXPA7_ADDR_MODULUS_COEFF_OUT, coeff, coeff_len));
+ check(get_buffer(core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT, mont, mont_len));
+ }
- /* Set new exponent length in bits */
- check(set_register(core, MODEXPS6_ADDR_EXPONENT_WIDTH, exp_len * 8));
+ /* Load modulus-dependent speedup factors (even if we just calculated them) */
+ check(set_buffer(core, MODEXPA7_ADDR_MODULUS_COEFF_IN, coeff, coeff_len));
+ check(set_buffer(core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN, mont, mont_len));
- /* Set new exponent */
- check(set_buffer(core, MODEXPS6_ADDR_EXPONENT, exp, exp_len));
+ /* Select CRT mode if and only if message is longer than exponent */
+ check(set_register(core, MODEXPA7_ADDR_MODE,
+ (msg_len > mod_len
+ ? MODEXPA7_MODE_CRT
+ : MODEXPA7_MODE_PLAIN)));
- /* Start calculation */
- check(hal_io_next(core));
+ /* Set message and exponent */
+ check(set_buffer(core, MODEXPA7_ADDR_MESSAGE, msg, msg_len));
+ check(set_buffer(core, MODEXPA7_ADDR_EXPONENT, exp, exp_len));
+ check(set_register(core, MODEXPA7_ADDR_EXPONENT_BITS, exp_len * 8));
- /* Wait for result */
+ /* Edge-trigger the "next" bit to start calculation, then wait for the result */
+ check(hal_io_zero(core));
+ check(hal_io_next(core));
check(hal_io_wait_valid(core));
- /* Extract result */
- check(get_buffer(core, MODEXPS6_ADDR_RESULT, result, mod_len));
-
+ /* Extract result, clean up, then done */
+ check(get_buffer(core, MODEXPA7_ADDR_RESULT, result, mod_len));
hal_core_free(core);
return HAL_OK;
}
diff --git a/rsa.c b/rsa.c
index eeb611c..90a878f 100644
--- a/rsa.c
+++ b/rsa.c
@@ -70,7 +70,6 @@
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
-#include <assert.h>
#include "hal.h"
#include "hal_internal.h"
@@ -94,6 +93,15 @@
#endif
/*
+ * How big to make the buffers for the modulus coefficient and
+ * Montgomery factor. This will almost certainly want tuning.
+ */
+
+#ifndef HAL_RSA_MAX_OPERAND_LENGTH
+#define HAL_RSA_MAX_OPERAND_LENGTH (4096 / 8)
+#endif
+
+/*
* Whether we want debug output.
*/
@@ -123,7 +131,7 @@ void hal_rsa_set_blinding(const int onoff)
*/
struct hal_rsa_key {
- hal_key_type_t type; /* What kind of key this is */
+ hal_key_type_t type; /* What kind of key this is */
fp_int n[1]; /* The modulus */
fp_int e[1]; /* Public exponent */
fp_int d[1]; /* Private exponent */
@@ -132,8 +140,17 @@ struct hal_rsa_key {
fp_int u[1]; /* 1/q mod p */
fp_int dP[1]; /* d mod (p - 1) */
fp_int dQ[1]; /* d mod (q - 1) */
+ unsigned flags; /* Internal key flags */
+ uint8_t /* ModExpA7 speedup factors */
+ nC[HAL_RSA_MAX_OPERAND_LENGTH], nF[HAL_RSA_MAX_OPERAND_LENGTH],
+ pC[HAL_RSA_MAX_OPERAND_LENGTH/2], pF[HAL_RSA_MAX_OPERAND_LENGTH/2],
+ qC[HAL_RSA_MAX_OPERAND_LENGTH/2], qF[HAL_RSA_MAX_OPERAND_LENGTH/2];
};
+#define RSA_FLAG_PRECALC_N_DONE (1 << 0)
+#define RSA_FLAG_PRECALC_P_DONE (1 << 1)
+#define RSA_FLAG_PRECALC_Q_DONE (1 << 2)
+
const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t);
/*
@@ -158,7 +175,7 @@ const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t);
case FP_OKAY: break; \
case FP_VAL: lose(HAL_ERROR_BAD_ARGUMENTS); \
case FP_MEM: lose(HAL_ERROR_ALLOCATION_FAILURE); \
- default: lose(HAL_ERROR_IMPOSSIBLE); \
+ default: lose(HAL_ERROR_IMPOSSIBLE); \
} \
} while (0)
@@ -171,7 +188,8 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz
{
hal_error_t err = HAL_OK;
- assert(bn != NULL && buffer != NULL);
+ if (bn == NULL || buffer == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
const size_t bytes = fp_unsigned_bin_size(unconst_fp_int(bn));
@@ -193,22 +211,18 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz
*/
static hal_error_t modexp(hal_core_t *core,
- const fp_int * msg,
+ const int precalc_done,
+ const fp_int * const msg,
const fp_int * const exp,
const fp_int * const mod,
- fp_int *res)
+ fp_int *res,
+ uint8_t *coeff, const size_t coeff_len,
+ uint8_t *mont, const size_t mont_len)
{
hal_error_t err = HAL_OK;
- assert(msg != NULL && exp != NULL && mod != NULL && res != NULL);
-
- fp_int reduced_msg[1] = INIT_FP_INT;
-
- if (fp_cmp_mag(unconst_fp_int(msg), unconst_fp_int(mod)) != FP_LT) {
- fp_init(reduced_msg);
- fp_mod(unconst_fp_int(msg), unconst_fp_int(mod), reduced_msg);
- msg = reduced_msg;
- }
+ if (msg == NULL || exp == NULL || mod == NULL || res == NULL || coeff == NULL || mont == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
const size_t msg_len = (fp_unsigned_bin_size(unconst_fp_int(msg)) + 3) & ~3;
const size_t exp_len = (fp_unsigned_bin_size(unconst_fp_int(exp)) + 3) & ~3;
@@ -222,11 +236,13 @@ static hal_error_t modexp(hal_core_t *core,
if ((err = unpack_fp(msg, msgbuf, sizeof(msgbuf))) != HAL_OK ||
(err = unpack_fp(exp, expbuf, sizeof(expbuf))) != HAL_OK ||
(err = unpack_fp(mod, modbuf, sizeof(modbuf))) != HAL_OK ||
- (err = hal_modexp(core,
+ (err = hal_modexp(core, precalc_done,
msgbuf, sizeof(msgbuf),
expbuf, sizeof(expbuf),
modbuf, sizeof(modbuf),
- resbuf, sizeof(resbuf))) != HAL_OK)
+ resbuf, sizeof(resbuf),
+ coeff, coeff_len,
+ mont, mont_len)) != HAL_OK)
goto fail;
fp_read_unsigned_bin(res, resbuf, sizeof(resbuf));
@@ -249,10 +265,14 @@ static hal_error_t modexp(hal_core_t *core,
*/
static hal_error_t modexp(const hal_core_t *core, /* ignored */
+ const int precalc_done, /* ignored */
const fp_int * const msg,
const fp_int * const exp,
const fp_int * const mod,
- fp_int *res)
+ fp_int *res,
+ uint8_t *coeff, const size_t coeff_len, /* ignored */
+ uint8_t *mont, const size_t mont_len) /* ignored */
+
{
hal_error_t err = HAL_OK;
FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp), unconst_fp_int(mod), res));
@@ -281,7 +301,12 @@ static hal_error_t modexp(const hal_core_t *core, /* ignored */
int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{
- return modexp(NULL, a, b, c, d) == HAL_OK ? FP_OKAY : FP_VAL;
+ const size_t len = (fp_unsigned_bin_size(unconst_fp_int(b)) + 3) & ~3;
+ uint8_t C[len], F[len];
+ const hal_error_t err = modexp(NULL, 0, a, b, c, d, C, sizeof(C), F, sizeof(F));
+ memset(C, 0, sizeof(C));
+ memset(F, 0, sizeof(F));
+ return err == HAL_OK ? FP_OKAY : FP_VAL;
}
#endif /* HAL_RSA_SIGN_USE_MODEXP && HAL_RSA_KEYGEN_USE_MODEXP */
@@ -294,7 +319,8 @@ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *bf, fp_int *ubf)
{
- assert(key != NULL && bf != NULL && ubf != NULL);
+ if (key == NULL || bf == NULL || ubf == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
uint8_t rnd[fp_unsigned_bin_size(unconst_fp_int(key->n))];
hal_error_t err = HAL_OK;
@@ -306,9 +332,12 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t
fp_read_unsigned_bin(bf, rnd, sizeof(rnd));
fp_copy(bf, ubf);
- if ((err = modexp(core, bf, key->e, key->n, bf)) != HAL_OK)
+ if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_N_DONE), bf, key->e, key->n, bf,
+ key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) != HAL_OK)
goto fail;
+ key->flags |= RSA_FLAG_PRECALC_N_DONE;
+
FP_CHECK(fp_invmod(ubf, unconst_fp_int(key->n), ubf));
fail:
@@ -322,7 +351,8 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t
static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *msg, fp_int *sig)
{
- assert(key != NULL && msg != NULL && sig != NULL);
+ if (key == NULL || msg == NULL || sig == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
hal_error_t err = HAL_OK;
fp_int t[1] = INIT_FP_INT;
@@ -343,11 +373,18 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp
/*
* m1 = msg ** dP mod p
* m2 = msg ** dQ mod q
+ *
+ * This is just crying out to be done with parallel cores, but get
+ * the boring version working before jumping off that cliff.
*/
- if ((err = modexp(core, msg, key->dP, key->p, m1)) != HAL_OK ||
- (err = modexp(core, msg, key->dQ, key->q, m2)) != HAL_OK)
+ if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_P_DONE),
+ msg, key->dP, key->p, m1, key->pC, sizeof(key->pC), key->pF, sizeof(key->pF))) != HAL_OK ||
+ (err = modexp(core, (key->flags & RSA_FLAG_PRECALC_Q_DONE),
+ msg, key->dQ, key->q, m2, key->qC, sizeof(key->qC), key->qF, sizeof(key->qF))) != HAL_OK)
goto fail;
+ key->flags |= RSA_FLAG_PRECALC_P_DONE | RSA_FLAG_PRECALC_Q_DONE;
+
/*
* t = m1 - m2.
*/
@@ -406,11 +443,12 @@ hal_error_t hal_rsa_encrypt(hal_core_t *core,
fp_read_unsigned_bin(i, unconst_uint8_t(input), input_len);
- if ((err = modexp(core, i, key->e, key->n, o)) != HAL_OK ||
- (err = unpack_fp(o, output, output_len)) != HAL_OK)
- goto fail;
+ if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_N_DONE), i, key->e, key->n, o,
+ key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) == HAL_OK) {
+ key->flags |= RSA_FLAG_PRECALC_N_DONE;
+ err = unpack_fp(o, output, output_len);
+ }
- fail:
fp_zero(i);
fp_zero(o);
return err;
@@ -436,11 +474,13 @@ hal_error_t hal_rsa_decrypt(hal_core_t *core,
* just do brute force ModExp.
*/
- if (fp_iszero(key->p) || fp_iszero(key->q) || fp_iszero(key->u) || fp_iszero(key->dP) || fp_iszero(key->dQ))
- err = modexp(core, i, key->d, key->n, o);
- else
+ if (!fp_iszero(key->p) && !fp_iszero(key->q) && !fp_iszero(key->u) && !fp_iszero(key->dP) && !fp_iszero(key->dQ))
err = rsa_crt(core, key, i, o);
+ else if ((err = modexp(core, (key->flags & RSA_FLAG_PRECALC_N_DONE), i, key->d, key->n, o,
+ key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) == HAL_OK)
+ key->flags |= RSA_FLAG_PRECALC_N_DONE;
+
if (err != HAL_OK || (err = unpack_fp(o, output, output_len)) != HAL_OK)
goto fail;
diff --git a/verilog_constants.h b/verilog_constants.h
index c9bb566..5307f68 100644
--- a/verilog_constants.h
+++ b/verilog_constants.h
@@ -222,23 +222,33 @@
#define MODEXPS6_ADDR_MESSAGE (MODEXPS6_ADDR_OPERANDS + 1 * MODEXPS6_OPERAND_WORDS)
#define MODEXPS6_ADDR_EXPONENT (MODEXPS6_ADDR_OPERANDS + 2 * MODEXPS6_OPERAND_WORDS)
#define MODEXPS6_ADDR_RESULT (MODEXPS6_ADDR_OPERANDS + 3 * MODEXPS6_OPERAND_WORDS)
+#define MODEXPS6_MODE_CONSTANT_TIME (0)
+#define MODEXPS6_MODE_FAST_PUBLIC (1)
/*
* ModExpA7 core. MODEXPA7_OPERAND_BITS is size in bits of largest
* supported modulus.
*/
-#define MODEXPA7_OPERAND_BITS (4096)
-#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32)
-#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_OPERANDS (4 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10)
-#define MODEXPA7_ADDR_MODULUS_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x11)
-#define MODEXPA7_ADDR_EXPONENT_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x12)
-#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_OPERAND_BITS (4096)
+#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32)
+#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_OPERANDS (4 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10)
+#define MODEXPA7_ADDR_MODULUS_BITS (MODEXPA7_ADDR_REGISTERS + 0x11)
+#define MODEXPA7_ADDR_EXPONENT_BITS (MODEXPA7_ADDR_REGISTERS + 0x12)
+#define MODEXPA7_ADDR_BUFFER_BITS (MODEXPA7_ADDR_REGISTERS + 0x13)
+#define MODEXPA7_ADDR_ARRAY_BITS (MODEXPA7_ADDR_REGISTERS + 0x14)
+#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MODULUS_COEFF_IN (MODEXPA7_ADDR_OPERANDS + 4 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MODULUS_COEFF_OUT (MODEXPA7_ADDR_OPERANDS + 5 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN (MODEXPA7_ADDR_OPERANDS + 6 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT (MODEXPA7_ADDR_OPERANDS + 7 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_MODE_CRT (1 << 1)
+#define MODEXPA7_MODE_PLAIN (0 << 1)
/*
* ECDSA P-256 point multiplier core. ECDSA256_OPERAND_BITS is size