aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2017-12-13 14:15:45 -0500
committerRob Austein <sra@hactrn.net>2017-12-13 14:17:46 -0500
commitbc167c214e97ed35f39d088a7dee3f1a9511340e (patch)
tree7143f5fef99dfe4f605f42664ffc58b904589a5f
parente5d8d558e954addf0e26ff887e9494d216da2225 (diff)
parent238e33e53195385dac51e18fffd0f4511244c560 (diff)
Merge branch systolic_crt into master.
This branch was sitting for long enough that master had been through a cleanup pass, so beware of accidental reversions.
-rw-r--r--Makefile7
-rw-r--r--asn1_internal.h10
-rw-r--r--core.c39
-rw-r--r--cryptech/libhal.py2
-rw-r--r--hal.h54
-rw-r--r--hal_internal.h63
-rw-r--r--hal_io.c114
-rw-r--r--hal_io_eim.c54
-rw-r--r--hal_io_fmc.c56
-rw-r--r--hal_io_i2c.c47
-rw-r--r--ks.c123
-rw-r--r--ks.h2
-rw-r--r--ks_token.c20
-rw-r--r--modexp.c300
-rw-r--r--rpc_pkey.c27
-rw-r--r--rsa.c371
-rw-r--r--tests/test-rsa.c19
-rw-r--r--tests/test-trng.c1
-rw-r--r--unit-tests.py4
-rw-r--r--verilog_constants.h37
20 files changed, 951 insertions, 399 deletions
diff --git a/Makefile b/Makefile
index ae6888d..59236af 100644
--- a/Makefile
+++ b/Makefile
@@ -109,12 +109,13 @@ CORE_OBJ = core.o csprng.o pbkdf2.o aes_keywrap.o modexp.o mkmif.o ${IO_OBJ}
# i2c: Older I2C bus from Novena
# fmc: FMC bus from dev-bridge and alpha boards
+IO_OBJ = hal_io.o
ifeq "${IO_BUS}" "eim"
- IO_OBJ = hal_io_eim.o novena-eim.o
+ IO_OBJ += hal_io_eim.o novena-eim.o
else ifeq "${IO_BUS}" "i2c"
- IO_OBJ = hal_io_i2c.o
+ IO_OBJ += hal_io_i2c.o
else ifeq "${IO_BUS}" "fmc"
- IO_OBJ = hal_io_fmc.o
+ IO_OBJ += hal_io_fmc.o
endif
# If we're building for STM32, position-independent code leads to some
diff --git a/asn1_internal.h b/asn1_internal.h
index fe2f293..3de8bd6 100644
--- a/asn1_internal.h
+++ b/asn1_internal.h
@@ -151,6 +151,16 @@ extern hal_error_t hal_asn1_decode_pkcs8_encryptedprivatekeyinfo(const uint8_t *
extern hal_error_t hal_asn1_guess_key_type(hal_key_type_t *type, hal_curve_name_t *curve,
const uint8_t *const der, const size_t der_len);
+/*
+ * Peek ahead for an OPTIONAL attribute.
+ */
+
+static inline int hal_asn1_peek(const uint8_t tag,
+ const uint8_t * const der, size_t der_max)
+{
+ return der != NULL && der_max > 0 && der[0] == tag;
+}
+
#endif /* _HAL_ASN1_INTERNAL_H_ */
/*
diff --git a/core.c b/core.c
index f80735c..daa82fa 100644
--- a/core.c
+++ b/core.c
@@ -97,7 +97,7 @@ static int name_matches(const hal_core_t *const core, const char * const name)
static const struct { const char *name; hal_addr_t extra; } gaps[] = {
{ "csprng", 11 * CORE_SIZE }, /* empty slots after csprng */
{ "modexps6", 3 * CORE_SIZE }, /* ModexpS6 uses four slots */
- { "modexpa7", 3 * CORE_SIZE }, /* ModexpA7 uses four slots */
+ { "modexpa7", 7 * CORE_SIZE }, /* ModexpA7 uses eight slots */
};
static hal_core_t *head = NULL;
@@ -203,15 +203,17 @@ hal_core_t *hal_core_find(const char *name, hal_core_t *core)
hal_error_t hal_core_alloc(const char *name, hal_core_t **pcore)
{
- hal_core_t *core;
- hal_error_t err = HAL_ERROR_CORE_NOT_FOUND;
+ /*
+ * This used to allow name == NULL iff *core != NULL, but the
+ * semantics were fragile and in practice we always pass a name
+ * anyway, so simplify by requiring name != NULL, always.
+ */
- if (name == NULL && (pcore == NULL || *pcore == NULL))
+ if (name == NULL || pcore == NULL)
return HAL_ERROR_BAD_ARGUMENTS;
- core = *pcore;
- if (name == NULL)
- name = core->info.name;
+ hal_error_t err = HAL_ERROR_CORE_NOT_FOUND;
+ hal_core_t *core = *pcore;
if (core != NULL) {
/* if we can reallocate the same core, do it now */
@@ -221,24 +223,23 @@ hal_error_t hal_core_alloc(const char *name, hal_core_t **pcore)
hal_critical_section_end();
return HAL_OK;
}
- /* else fall through to search */
+ /* else forget that core and fall through to search */
+ *pcore = NULL;
}
while (1) {
hal_critical_section_start();
for (core = hal_core_iterate(NULL); core != NULL; core = core->next) {
- if (name_matches(core, name)) {
- if (core->busy) {
- err = HAL_ERROR_CORE_BUSY;
- continue;
- }
- else {
- err = HAL_OK;
- *pcore = core;
- core->busy = 1;
- break;
- }
+ if (!name_matches(core, name))
+ continue;
+ if (core->busy) {
+ err = HAL_ERROR_CORE_BUSY;
+ continue;
}
+ err = HAL_OK;
+ *pcore = core;
+ core->busy = 1;
+ break;
}
hal_critical_section_end();
if (err == HAL_ERROR_CORE_BUSY)
diff --git a/cryptech/libhal.py b/cryptech/libhal.py
index 8666d15..acd1abb 100644
--- a/cryptech/libhal.py
+++ b/cryptech/libhal.py
@@ -403,7 +403,7 @@ class PKey(Handle):
return result
def export_pkey(self, pkey):
- return self.hsm.pkey_export(pkey = pkey, kekek = self, pkcs8_max = 2560, kek_max = 512)
+ return self.hsm.pkey_export(pkey = pkey, kekek = self, pkcs8_max = 5480, kek_max = 512)
def import_pkey(self, pkcs8, kek, flags = 0):
return self.hsm.pkey_import(kekek = self, pkcs8 = pkcs8, kek = kek, flags = flags)
diff --git a/hal.h b/hal.h
index f750ebb..0030f3d 100644
--- a/hal.h
+++ b/hal.h
@@ -201,11 +201,8 @@ typedef struct hal_core hal_core_t;
extern void hal_io_set_debug(int onoff);
extern hal_error_t hal_io_write(const hal_core_t *core, hal_addr_t offset, const uint8_t *buf, size_t len);
extern hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, size_t len);
-extern hal_error_t hal_io_init(const hal_core_t *core);
-extern hal_error_t hal_io_next(const hal_core_t *core);
-extern hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count);
-extern hal_error_t hal_io_wait_ready(const hal_core_t *core);
-extern hal_error_t hal_io_wait_valid(const hal_core_t *core);
+extern hal_error_t hal_io_wait(const hal_core_t *core, const uint8_t status, int *count);
+extern hal_error_t hal_io_wait2(const hal_core_t *core1, const hal_core_t *core2, const uint8_t status, int *count);
/*
* Core management functions.
@@ -372,16 +369,25 @@ extern hal_error_t hal_pbkdf2(hal_core_t *core,
unsigned iterations_desired);
/*
- * Modular exponentiation.
+ * Modular exponentiation. This takes a ridiculous number of
+ * arguments of very similar types, making it easy to confuse them,
+ * particularly when performing two modexp operations in parallel, so
+ * we encapsulate the arguments in a structure.
*/
-extern void hal_modexp_set_debug(const int onoff);
+typedef struct {
+ hal_core_t *core;
+ const uint8_t *msg; size_t msg_len; /* Message */
+ const uint8_t *exp; size_t exp_len; /* Exponent */
+ const uint8_t *mod; size_t mod_len; /* Modulus */
+ uint8_t *result; size_t result_len; /* Result of exponentiation */
+ uint8_t *coeff; size_t coeff_len; /* Modulus coefficient (r/w) */
+ uint8_t *mont; size_t mont_len; /* Montgomery factor (r/w)*/
+} hal_modexp_arg_t;
-extern hal_error_t hal_modexp(hal_core_t *core,
- const uint8_t * const msg, const size_t msg_len, /* Message */
- const uint8_t * const exp, const size_t exp_len, /* Exponent */
- const uint8_t * const mod, const size_t mod_len, /* Modulus */
- uint8_t * result, const size_t result_len);
+extern void hal_modexp_set_debug(const int onoff);
+extern hal_error_t hal_modexp( const int precalc, hal_modexp_arg_t *args);
+extern hal_error_t hal_modexp2(const int precalc, hal_modexp_arg_t *args1, hal_modexp_arg_t *args2);
/*
* Master Key Memory Interface
@@ -459,12 +465,13 @@ extern hal_error_t hal_rsa_key_get_public_exponent(const hal_rsa_key_t * const k
extern void hal_rsa_key_clear(hal_rsa_key_t *key);
extern hal_error_t hal_rsa_encrypt(hal_core_t *core,
- const hal_rsa_key_t * const key,
+ hal_rsa_key_t *key,
const uint8_t * const input, const size_t input_len,
uint8_t * output, const size_t output_len);
-extern hal_error_t hal_rsa_decrypt(hal_core_t *core,
- const hal_rsa_key_t * const key,
+extern hal_error_t hal_rsa_decrypt(hal_core_t *core1,
+ hal_core_t *core2,
+ hal_rsa_key_t *key,
const uint8_t * const input, const size_t input_len,
uint8_t * output, const size_t output_len);
@@ -477,7 +484,8 @@ extern hal_error_t hal_rsa_key_gen(hal_core_t *core,
extern hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key,
uint8_t *der, size_t *der_len, const size_t der_max);
-extern size_t hal_rsa_private_key_to_der_len(const hal_rsa_key_t * const key);
+extern hal_error_t hal_rsa_private_key_to_der_extra(const hal_rsa_key_t * const key,
+ uint8_t *der, size_t *der_len, const size_t der_max);
extern hal_error_t hal_rsa_private_key_from_der(hal_rsa_key_t **key,
void *keybuf, const size_t keybuf_len,
@@ -492,6 +500,20 @@ extern hal_error_t hal_rsa_public_key_from_der(hal_rsa_key_t **key,
void *keybuf, const size_t keybuf_len,
const uint8_t * const der, const size_t der_len);
+extern int hal_rsa_key_needs_saving(const hal_rsa_key_t * const key);
+
+static inline size_t hal_rsa_private_key_to_der_len(const hal_rsa_key_t * const key)
+{
+ size_t len = 0;
+ return hal_rsa_private_key_to_der(key, NULL, &len, 0) == HAL_OK ? len : 0;
+}
+
+static inline size_t hal_rsa_private_key_to_der_extra_len(const hal_rsa_key_t * const key)
+{
+ size_t len = 0;
+ return hal_rsa_private_key_to_der_extra(key, NULL, &len, 0) == HAL_OK ? len : 0;
+}
+
/*
* ECDSA.
*/
diff --git a/hal_internal.h b/hal_internal.h
index 2486fd2..ac51cfb 100644
--- a/hal_internal.h
+++ b/hal_internal.h
@@ -69,6 +69,53 @@ inline uint32_t htonl(uint32_t w)
#endif
/*
+ * Low-level I/O convenience functions, moved here from hal.h
+ * because they use symbols defined in verilog_constants.h.
+ */
+
+static inline hal_error_t hal_io_zero(const hal_core_t *core)
+{
+ const uint8_t buf[4] = { 0, 0, 0, 0 };
+ return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
+}
+
+static inline hal_error_t hal_io_init(const hal_core_t *core)
+{
+ const uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
+ return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
+}
+
+static inline hal_error_t hal_io_next(const hal_core_t *core)
+{
+ const uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
+ return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
+}
+
+static inline hal_error_t hal_io_wait_ready(const hal_core_t *core)
+{
+ int limit = -1;
+ return hal_io_wait(core, STATUS_READY, &limit);
+}
+
+static inline hal_error_t hal_io_wait_valid(const hal_core_t *core)
+{
+ int limit = -1;
+ return hal_io_wait(core, STATUS_VALID, &limit);
+}
+
+static inline hal_error_t hal_io_wait_ready2(const hal_core_t *core1, const hal_core_t *core2)
+{
+ int limit = -1;
+ return hal_io_wait2(core1, core2, STATUS_READY, &limit);
+}
+
+static inline hal_error_t hal_io_wait_valid2(const hal_core_t *core1, const hal_core_t *core2)
+{
+ int limit = -1;
+ return hal_io_wait2(core1, core2, STATUS_VALID, &limit);
+}
+
+/*
* Static memory allocation on start-up. Don't use this except where
* really necessary. By design, there's no way to free this, we don't
* want to have to manage a heap. Intent is just to allow allocation
@@ -370,7 +417,19 @@ static inline hal_crc32_t hal_crc32_finalize(hal_crc32_t crc)
* moment we take the easy way out and cap this at 4096-bit RSA.
*/
+#if 0
#define HAL_KS_WRAPPED_KEYSIZE ((2373 + 15) & ~7)
+#else
+#warning Temporary test hack to HAL_KS_WRAPPED_KEYSIZE, clean this up
+//
+// See how much of the problem we're having with pkey support for the
+// new modexpa7 components is just this buffer size being too small.
+//
+#define HAL_KS_WRAPPED_KEYSIZE ((2373 + 6 * 4096 / 8 + 6 * 4 + 15) & ~7)
+#if HAL_KS_WRAPPED_KEYSIZE + 8 > 4096
+#warning HAL_KS_WRAPPED_KEYSIZE is too big for a single 4096-octet block
+#endif
+#endif
/*
* PINs.
@@ -531,6 +590,10 @@ extern hal_error_t hal_ks_get_attributes(hal_ks_t *ks,
extern hal_error_t hal_ks_logout(hal_ks_t *ks,
const hal_client_handle_t client);
+extern hal_error_t hal_ks_rewrite_der(hal_ks_t *ks,
+ hal_pkey_slot_t *slot,
+ const uint8_t * const der, const size_t der_len);
+
/*
* RPC lowest-level send and receive routines. These are blocking, and
* transport-specific (sockets, USB).
diff --git a/hal_io.c b/hal_io.c
new file mode 100644
index 0000000..f885712
--- /dev/null
+++ b/hal_io.c
@@ -0,0 +1,114 @@
+/*
+ * hal_io.c
+ * --------
+ * This module contains common code to talk to the FPGA over the bus du jour.
+ *
+ * Author: Paul Selkirk, Rob Austein
+ * Copyright (c) 2014-2017, NORDUnet A/S All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the NORDUnet nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include "hal.h"
+#include "hal_internal.h"
+
+#ifndef HAL_IO_TIMEOUT
+#define HAL_IO_TIMEOUT 100000000
+#endif
+
+static inline hal_error_t test_status(const hal_core_t *core,
+ const uint8_t status,
+ int *done)
+{
+ if (done == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
+
+ if (*done || core == NULL)
+ return HAL_OK;
+
+ uint8_t buf[4];
+
+ const hal_error_t err = hal_io_read(core, ADDR_STATUS, buf, sizeof(buf));
+
+ if (err == HAL_OK)
+ *done = (buf[3] & status) != 0;
+
+ return err;
+}
+
+hal_error_t hal_io_wait2(const hal_core_t *core1,
+ const hal_core_t *core2,
+ const uint8_t status,
+ int *count)
+{
+ int done1 = 0, done2 = 0;
+ hal_error_t err;
+
+ if (core1 == NULL)
+ return HAL_ERROR_BAD_ARGUMENTS;
+
+ if (core2 == NULL)
+ done2 = 1;
+
+ if (count && *count == -1)
+ *count = HAL_IO_TIMEOUT;
+
+ for (int i = 1; ; ++i) {
+
+ if (count && (*count > 0) && (i >= *count))
+ return HAL_ERROR_IO_TIMEOUT;
+
+ hal_task_yield();
+
+ if ((err = test_status(core1, status, &done1)) != HAL_OK ||
+ (err = test_status(core2, status, &done2)) != HAL_OK)
+ return err;
+
+ if (done1 && done2) {
+ if (count)
+ *count = i;
+ return HAL_OK;
+ }
+ }
+}
+
+hal_error_t hal_io_wait(const hal_core_t *core,
+ const uint8_t status,
+ int *count)
+{
+ return hal_io_wait2(core, NULL, status, count);
+}
+
+/*
+ * Local variables:
+ * indent-tabs-mode: nil
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/hal_io_eim.c b/hal_io_eim.c
index 5824f5b..040cb2b 100644
--- a/hal_io_eim.c
+++ b/hal_io_eim.c
@@ -43,11 +43,7 @@
static int debug = 0;
static int inited = 0;
-#ifndef EIM_IO_TIMEOUT
-#define EIM_IO_TIMEOUT 100000000
-#endif
-
-static hal_error_t init(void)
+static inline hal_error_t init(void)
{
if (inited)
return HAL_OK;
@@ -61,7 +57,7 @@ static hal_error_t init(void)
/* translate cryptech register number to EIM address
*/
-static hal_addr_t eim_offset(hal_addr_t offset)
+static inline hal_addr_t eim_offset(hal_addr_t offset)
{
return EIM_BASE_ADDR + (offset << 2);
}
@@ -134,52 +130,6 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf,
return HAL_OK;
}
-hal_error_t hal_io_init(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
-hal_error_t hal_io_next(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
-hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
-{
- hal_error_t err;
- uint8_t buf[4];
- int i;
-
- for (i = 1; ; ++i) {
-
- if (count && (*count > 0) && (i >= *count))
- return HAL_ERROR_IO_TIMEOUT;
-
- if ((err = hal_io_read(core, ADDR_STATUS, buf, sizeof(buf))) != HAL_OK)
- return err;
-
- if ((buf[3] & status) != 0) {
- if (count)
- *count = i;
- return HAL_OK;
- }
- }
-}
-
-hal_error_t hal_io_wait_ready(const hal_core_t *core)
-{
- int limit = EIM_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_READY, &limit);
-}
-
-hal_error_t hal_io_wait_valid(const hal_core_t *core)
-{
- int limit = EIM_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_VALID, &limit);
-}
-
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/hal_io_fmc.c b/hal_io_fmc.c
index 76d6883..0d49f1e 100644
--- a/hal_io_fmc.c
+++ b/hal_io_fmc.c
@@ -47,11 +47,7 @@
static int debug = 0;
static int inited = 0;
-#ifndef FMC_IO_TIMEOUT
-#define FMC_IO_TIMEOUT 100000000
-#endif
-
-static hal_error_t init(void)
+static inline hal_error_t init(void)
{
if (!inited) {
fmc_init();
@@ -62,7 +58,7 @@ static hal_error_t init(void)
/* Translate cryptech register number to FMC address.
*/
-static hal_addr_t fmc_offset(hal_addr_t offset)
+static inline hal_addr_t fmc_offset(hal_addr_t offset)
{
return offset << 2;
}
@@ -136,54 +132,6 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf,
return HAL_OK;
}
-hal_error_t hal_io_init(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
-hal_error_t hal_io_next(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
- return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf));
-}
-
-hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
-{
- hal_error_t err;
- uint8_t buf[4];
- int i;
-
- for (i = 1; ; ++i) {
-
- if (count && (*count > 0) && (i >= *count))
- return HAL_ERROR_IO_TIMEOUT;
-
- hal_task_yield();
-
- if ((err = hal_io_read(core, ADDR_STATUS, buf, sizeof(buf))) != HAL_OK)
- return err;
-
- if ((buf[3] & status) != 0) {
- if (count)
- *count = i;
- return HAL_OK;
- }
- }
-}
-
-hal_error_t hal_io_wait_ready(const hal_core_t *core)
-{
- int limit = FMC_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_READY, &limit);
-}
-
-hal_error_t hal_io_wait_valid(const hal_core_t *core)
-{
- int limit = FMC_IO_TIMEOUT;
- return hal_io_wait(core, STATUS_VALID, &limit);
-}
-
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/hal_io_i2c.c b/hal_io_i2c.c
index e7dbbb6..8596174 100644
--- a/hal_io_i2c.c
+++ b/hal_io_i2c.c
@@ -301,53 +301,6 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf,
return HAL_OK;
}
-hal_error_t hal_io_init(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_INIT };
- return hal_io_write(core, ADDR_CTRL, buf, 4);
-}
-
-hal_error_t hal_io_next(const hal_core_t *core)
-{
- uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT };
- return hal_io_write(core, ADDR_CTRL, buf, 4);
-}
-
-hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count)
-{
- hal_error_t err;
- uint8_t buf[4];
- int i;
-
- for (i = 1; ; ++i) {
-
- if (count && (*count > 0) && (i >= *count))
- return HAL_ERROR_IO_TIMEOUT;
-
- if ((err = hal_io_read(core, ADDR_STATUS, buf, 4)) != HAL_OK)
- return err;
-
- if (buf[3] & status) {
- if (count)
- *count = i;
- return HAL_OK;
-
- }
- }
-}
-
-hal_error_t hal_io_wait_ready(const hal_core_t *core)
-{
- int limit = 10;
- return hal_io_wait(core, STATUS_READY, &limit);
-}
-
-hal_error_t hal_io_wait_valid(const hal_core_t *core)
-{
- int limit = 10;
- return hal_io_wait(core, STATUS_VALID, &limit);
-}
-
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/ks.c b/ks.c
index b39e3f6..1598679 100644
--- a/ks.c
+++ b/ks.c
@@ -520,6 +520,46 @@ static inline int acceptable_key_type(const hal_key_type_t type)
}
}
+/*
+ * Internal bits of constructing a new key block.
+ */
+
+static hal_error_t construct_key_block(hal_ks_block_t *block,
+ hal_pkey_slot_t *slot,
+ const uint8_t * const der, const size_t der_len)
+{
+ if (block == NULL || slot == NULL || der == NULL || der_len == 0)
+ return HAL_ERROR_IMPOSSIBLE;
+
+ hal_ks_key_block_t *k = &block->key;
+ hal_error_t err = HAL_OK;
+ uint8_t kek[KEK_LENGTH];
+ size_t kek_len;
+
+ memset(block, 0xFF, sizeof(*block));
+
+ block->header.block_type = HAL_KS_BLOCK_TYPE_KEY;
+ block->header.block_status = HAL_KS_BLOCK_STATUS_LIVE;
+
+ k->name = slot->name;
+ k->type = slot->type;
+ k->curve = slot->curve;
+ k->flags = slot->flags;
+ k->der_len = SIZEOF_KS_KEY_BLOCK_DER;
+ k->attributes_len = 0;
+
+ if ((err = hal_mkm_get_kek(kek, &kek_len, sizeof(kek))) == HAL_OK)
+ err = hal_aes_keywrap(NULL, kek, kek_len, der, der_len, k->der, &k->der_len);
+
+ memset(kek, 0, sizeof(kek));
+
+ return err;
+}
+
+/*
+ * Store a key block.
+ */
+
hal_error_t hal_ks_store(hal_ks_t *ks,
hal_pkey_slot_t *slot,
const uint8_t * const der, const size_t der_len)
@@ -529,9 +569,6 @@ hal_error_t hal_ks_store(hal_ks_t *ks,
hal_error_t err = HAL_OK;
hal_ks_block_t *block;
- hal_ks_key_block_t *k;
- uint8_t kek[KEK_LENGTH];
- size_t kek_len;
unsigned b;
hal_ks_lock();
@@ -541,35 +578,16 @@ hal_error_t hal_ks_store(hal_ks_t *ks,
goto done;
}
- k = &block->key;
-
if ((err = hal_ks_index_add(ks, &slot->name, &b, &slot->hint)) != HAL_OK)
goto done;
hal_ks_cache_mark_used(ks, block, b);
- memset(block, 0xFF, sizeof(*block));
-
- block->header.block_type = HAL_KS_BLOCK_TYPE_KEY;
- block->header.block_status = HAL_KS_BLOCK_STATUS_LIVE;
-
- k->name = slot->name;
- k->type = slot->type;
- k->curve = slot->curve;
- k->flags = slot->flags;
- k->der_len = SIZEOF_KS_KEY_BLOCK_DER;
- k->attributes_len = 0;
-
if (ks->used < ks->size)
err = hal_ks_block_erase_maybe(ks, ks->index[ks->used]);
if (err == HAL_OK)
- err = hal_mkm_get_kek(kek, &kek_len, sizeof(kek));
-
- if (err == HAL_OK)
- err = hal_aes_keywrap(NULL, kek, kek_len, der, der_len, k->der, &k->der_len);
-
- memset(kek, 0, sizeof(kek));
+ err = construct_key_block(block, slot, der, der_len);
if (err == HAL_OK)
err = hal_ks_block_write(ks, b, block);
@@ -933,6 +951,65 @@ hal_error_t hal_ks_get_attributes(hal_ks_t *ks,
return err;
}
+hal_error_t hal_ks_rewrite_der(hal_ks_t *ks,
+ hal_pkey_slot_t *slot,
+ const uint8_t * const der, const size_t der_len)
+{
+ if (ks == NULL || slot == NULL || der == NULL || der_len == 0 || !acceptable_key_type(slot->type))
+ return HAL_ERROR_BAD_ARGUMENTS;
+
+ hal_ks_block_t *block = NULL;
+ hal_error_t err = HAL_OK;
+ unsigned b;
+
+ hal_ks_lock();
+
+ {
+ if ((err = hal_ks_index_find(ks, &slot->name, &b, &slot->hint)) != HAL_OK ||
+ (err = hal_ks_block_test_owner(ks, b, slot->client, slot->session)) != HAL_OK ||
+ (err = hal_ks_block_read_cached(ks, b, &block)) != HAL_OK)
+ goto done;
+
+ hal_ks_cache_mark_used(ks, block, b);
+
+ size_t bytes_len = 0, attributes_len = 0;
+ unsigned *count = NULL;
+ uint8_t *bytes = NULL;
+
+ if ((err = locate_attributes(block, &bytes, &bytes_len, &count)) != HAL_OK ||
+ (err = hal_ks_attribute_scan(bytes, bytes_len, NULL, *count, &attributes_len)) != HAL_OK)
+ goto done;
+
+ if (der_len + attributes_len > SIZEOF_KS_KEY_BLOCK_DER) {
+ err = HAL_ERROR_RESULT_TOO_LONG;
+ goto done;
+ }
+
+ uint8_t attributes[attributes_len > 0 ? attributes_len : 1];
+ hal_ks_key_block_t *k = &block->key;
+ unsigned attributes_count = *count;
+
+ memcpy(attributes, bytes, attributes_len);
+
+ if ((err = construct_key_block(block, slot, der, der_len)) != HAL_OK)
+ goto done;
+
+ if (k->der_len + attributes_len > SIZEOF_KS_KEY_BLOCK_DER) {
+ err = HAL_ERROR_IMPOSSIBLE;
+ goto done;
+ }
+
+ memcpy(k->der + k->der_len, attributes, attributes_len);
+ k->attributes_len = attributes_count;
+
+ err = hal_ks_block_update(ks, b, block, &slot->name, &slot->hint);
+ }
+
+ done:
+ hal_ks_unlock();
+ return err;
+}
+
/*
* Local variables:
* indent-tabs-mode: nil
diff --git a/ks.h b/ks.h
index 7bfdd61..ae1ba1c 100644
--- a/ks.h
+++ b/ks.h
@@ -46,7 +46,7 @@
*/
#ifndef HAL_KS_BLOCK_SIZE
-#define HAL_KS_BLOCK_SIZE (4096)
+#define HAL_KS_BLOCK_SIZE (4096 * 2)
#endif
/*
diff --git a/ks_token.c b/ks_token.c
index 4950f0b..1676bc0 100644
--- a/ks_token.c
+++ b/ks_token.c
@@ -60,12 +60,13 @@
#define KS_TOKEN_CACHE_SIZE 4
#endif
-#define NUM_FLASH_BLOCKS KEYSTORE_NUM_SUBSECTORS
-
#if HAL_KS_BLOCK_SIZE % KEYSTORE_SUBSECTOR_SIZE != 0
#error Keystore block size is not a multiple of flash subsector size
#endif
+#define NUM_FLASH_BLOCKS ((KEYSTORE_NUM_SUBSECTORS * KEYSTORE_SUBSECTOR_SIZE) / HAL_KS_BLOCK_SIZE)
+#define SUBSECTORS_PER_BLOCK (HAL_KS_BLOCK_SIZE / KEYSTORE_SUBSECTOR_SIZE)
+
/*
* Keystore database.
*/
@@ -90,7 +91,7 @@ typedef struct {
static inline uint32_t ks_token_offset(const unsigned blockno)
{
- return blockno * KEYSTORE_SUBSECTOR_SIZE;
+ return blockno * HAL_KS_BLOCK_SIZE;
}
/*
@@ -102,7 +103,7 @@ static inline uint32_t ks_token_offset(const unsigned blockno)
static hal_error_t ks_token_read(hal_ks_t *ks, const unsigned blockno, hal_ks_block_t *block)
{
- if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != KEYSTORE_SUBSECTOR_SIZE)
+ if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != HAL_KS_BLOCK_SIZE)
return HAL_ERROR_IMPOSSIBLE;
if (keystore_read_data(ks_token_offset(blockno),
@@ -192,8 +193,13 @@ static hal_error_t ks_token_erase(hal_ks_t *ks, const unsigned blockno)
if (ks != hal_ks_token || blockno >= NUM_FLASH_BLOCKS)
return HAL_ERROR_IMPOSSIBLE;
- if (keystore_erase_subsector(blockno) != CMSIS_HAL_OK)
- return HAL_ERROR_KEYSTORE_ACCESS;
+ unsigned subsector = blockno * SUBSECTORS_PER_BLOCK;
+ const unsigned end = (blockno + 1) * SUBSECTORS_PER_BLOCK;
+
+ do {
+ if (keystore_erase_subsector(subsector) != CMSIS_HAL_OK)
+ return HAL_ERROR_KEYSTORE_ACCESS;
+ } while (++subsector < end);
return HAL_OK;
}
@@ -232,7 +238,7 @@ static hal_error_t ks_token_erase_maybe(hal_ks_t *ks, const unsigned blockno)
static hal_error_t ks_token_write(hal_ks_t *ks, const unsigned blockno, hal_ks_block_t *block)
{
- if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != KEYSTORE_SUBSECTOR_SIZE)
+ if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != HAL_KS_BLOCK_SIZE)
return HAL_ERROR_IMPOSSIBLE;
hal_error_t err = ks_token_erase_maybe(ks, blockno);
diff --git a/modexp.c b/modexp.c
index 3e634aa..7973258 100644
--- a/modexp.c
+++ b/modexp.c
@@ -43,7 +43,6 @@
#include <stdio.h>
#include <stdint.h>
-#include <assert.h>
#include "hal.h"
#include "hal_internal.h"
@@ -60,175 +59,270 @@ void hal_modexp_set_debug(const int onoff)
}
/*
- * Check a result, report on failure if debugging, pass failures up
- * the chain.
+ * Get value of an ordinary register.
*/
-#define check(_expr_) \
- do { \
- hal_error_t _err = (_expr_); \
- if (_err != HAL_OK && debug) \
- printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \
- if (_err != HAL_OK) \
- return _err; \
- } while (0)
+static hal_error_t inline get_register(const hal_core_t *core,
+ const hal_addr_t addr,
+ uint32_t *value)
+{
+ hal_error_t err;
+ uint8_t w[4];
+
+ if (value == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
+
+ if ((err = hal_io_read(core, addr, w, sizeof(w))) != HAL_OK)
+ return err;
+
+ *value = (w[0] << 0) | (w[1] << 8) | (w[2] << 16) | (w[3] << 24);
+
+ return HAL_OK;
+}
/*
- * Set an ordinary register.
+ * Set value of an ordinary register.
*/
-static hal_error_t set_register(const hal_core_t *core,
- const hal_addr_t addr,
- uint32_t value)
+static hal_error_t inline set_register(const hal_core_t *core,
+ const hal_addr_t addr,
+ const uint32_t value)
{
- uint8_t w[4];
- int i;
-
- for (i = 3; i >= 0; i--) {
- w[i] = value & 0xFF;
- value >>= 8;
- }
+ const uint8_t w[4] = {
+ ((value >> 24) & 0xFF),
+ ((value >> 16) & 0xFF),
+ ((value >> 8) & 0xFF),
+ ((value >> 0) & 0xFF)
+ };
return hal_io_write(core, addr, w, sizeof(w));
}
/*
* Get value of a data buffer. We reverse the order of 32-bit words
- * in the buffer during the transfer to match what the modexps6 core
+ * in the buffer during the transfer to match what the modexpa7 core
* expects.
*/
-static hal_error_t get_buffer(const hal_core_t *core,
- const hal_addr_t data_addr,
- uint8_t *value,
- const size_t length)
+static inline hal_error_t get_buffer(const hal_core_t *core,
+ const hal_addr_t data_addr,
+ uint8_t *value,
+ const size_t length)
{
+ hal_error_t err;
size_t i;
- assert(value != NULL && length % 4 == 0);
+ if (value == NULL || length % 4 != 0)
+ return HAL_ERROR_IMPOSSIBLE;
for (i = 0; i < length; i += 4)
- check(hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4));
+ if ((err = hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK)
+ return err;
return HAL_OK;
}
/*
* Set value of a data buffer. We reverse the order of 32-bit words
- * in the buffer during the transfer to match what the modexps6 core
+ * in the buffer during the transfer to match what the modexpa7 core
* expects.
+ *
+ * Do we need to zero the portion of the buffer we're not using
+ * explictly (that is, the portion between `length` and the value of
+ * the core's MODEXPA7_ADDR_BUFFER_BITS register)? We've gotten away
+ * without doing this so far, but the core doesn't take an explicit
+ * length parameter for the message itself, instead it assumes that
+ * the message is either as long as or twice as long as the exponent,
+ * depending on the setting of the CRT mode bit. Maybe initializing
+ * the core clears the excess bits so there's no issue? Dunno. Have
+ * never seen a problem with this yet, just dont' know why not.
*/
-static hal_error_t set_buffer(const hal_core_t *core,
- const hal_addr_t data_addr,
- const uint8_t * const value,
- const size_t length)
+static inline hal_error_t set_buffer(const hal_core_t *core,
+ const hal_addr_t data_addr,
+ const uint8_t * const value,
+ const size_t length)
{
+ hal_error_t err;
size_t i;
- assert(value != NULL && length % 4 == 0);
+ if (value == NULL || length % 4 != 0)
+ return HAL_ERROR_IMPOSSIBLE;
for (i = 0; i < length; i += 4)
- check(hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4));
+ if ((err = hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK)
+ return err;
return HAL_OK;
}
/*
- * Run one modexp operation.
+ * Stuff moved out of modexp so we can run two cores in parallel more
+ * easily. We have to return to the jacket routine every time we kick
+ * a core into doing something, since only the jacket routines know
+ * how many cores we're running for any particular calculation.
+ *
+ * In theory we could do something clever where we don't wait for both
+ * cores to finish precalc before starting either of them on the main
+ * computation, but that way probably lies madness.
*/
-hal_error_t hal_modexp(hal_core_t *core,
- const uint8_t * const msg, const size_t msg_len, /* Message */
- const uint8_t * const exp, const size_t exp_len, /* Exponent */
- const uint8_t * const mod, const size_t mod_len, /* Modulus */
- uint8_t *result, const size_t result_len)
+static inline hal_error_t check_args(hal_modexp_arg_t *a)
+{
+ /*
+ * All data pointers must be set, exponent may not be longer than
+ * modulus, message may not be longer than twice the modulus (CRT
+ * mode), result buffer must not be shorter than modulus, and all
+ * input lengths must be a multiple of four bytes (the core is all
+ * about 32-bit words).
+ */
+
+ if (a == NULL ||
+ a->msg == NULL || a->msg_len > MODEXPA7_OPERAND_BYTES || a->msg_len > a->mod_len * 2 ||
+ a->exp == NULL || a->exp_len > MODEXPA7_OPERAND_BYTES || a->exp_len > a->mod_len ||
+ a->mod == NULL || a->mod_len > MODEXPA7_OPERAND_BYTES ||
+ a->result == NULL || a->result_len > MODEXPA7_OPERAND_BYTES || a->result_len < a->mod_len ||
+ a->coeff == NULL || a->coeff_len > MODEXPA7_OPERAND_BYTES ||
+ a->mont == NULL || a->mont_len > MODEXPA7_OPERAND_BYTES ||
+ ((a->msg_len | a->exp_len | a->mod_len) & 3) != 0)
+ return HAL_ERROR_BAD_ARGUMENTS;
+
+ return HAL_OK;
+}
+
+static inline hal_error_t setup_precalc(const int precalc, hal_modexp_arg_t *a)
{
hal_error_t err;
/*
- * All pointers must be set, neither message nor exponent may be
- * longer than modulus, result buffer must not be shorter than
- * modulus, and all input lengths must be a multiple of four.
- *
- * The multiple-of-four restriction is a pain, but the rest of the
- * HAL code currently enforces the same restriction, and allowing
- * arbitrary lengths would require some tedious shuffling to deal
- * with alignment issues, so it's not worth trying to fix only here.
+ * Check that operand size is compatabible with the core.
*/
- if (msg == NULL || exp == NULL || mod == NULL || result == NULL ||
- msg_len > mod_len || exp_len > mod_len || result_len < mod_len ||
- ((msg_len | exp_len | mod_len) & 3) != 0)
+ uint32_t operand_max = 0;
+
+ if ((err = get_register(a->core, MODEXPA7_ADDR_BUFFER_BITS, &operand_max)) != HAL_OK)
+ return err;
+
+ operand_max /= 8;
+
+ if (a->msg_len > operand_max ||
+ a->exp_len > operand_max ||
+ a->mod_len > operand_max ||
+ a->coeff_len > operand_max ||
+ a->mont_len > operand_max)
return HAL_ERROR_BAD_ARGUMENTS;
- if (((err = hal_core_alloc(MODEXPS6_NAME, &core)) == HAL_ERROR_CORE_NOT_FOUND) &&
- ((err = hal_core_alloc(MODEXPA7_NAME, &core)) != HAL_OK))
+ /*
+ * Set the modulus, then initiate calculation of modulus-dependent
+ * speedup factors if necessary, by edge-triggering the "init" bit,
+ * then return to caller so it can wait for precalc.
+ */
+
+ if ((err = set_register(a->core, MODEXPA7_ADDR_MODULUS_BITS, a->mod_len * 8)) != HAL_OK ||
+ (err = set_buffer(a->core, MODEXPA7_ADDR_MODULUS, a->mod, a->mod_len)) != HAL_OK ||
+ (precalc && (err = hal_io_zero(a->core)) != HAL_OK) ||
+ (precalc && (err = hal_io_init(a->core)) != HAL_OK))
return err;
-#undef check
-#define check(_expr_) \
- do { \
- hal_error_t _err = (_expr_); \
- if (_err != HAL_OK && debug) \
- printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \
- if (_err != HAL_OK) { \
- hal_core_free(core); \
- return _err; \
- } \
- } while (0)
+ return HAL_OK;
+}
+
+static inline hal_error_t setup_calc(const int precalc, hal_modexp_arg_t *a)
+{
+ hal_error_t err;
/*
- * We probably ought to take the mode (fast vs constant-time) as an
- * argument, but for the moment we just guess that really short
- * exponent means we're using the public key and can use fast mode,
- * really short messages are Miller-Rabin tests and can also use
- * fast mode, all other cases are something to do with the private
- * key and therefore must use constant-time mode.
- *
- * Unclear whether it's worth trying to figure out exactly how long
- * the operands are: assuming a multiple of eight is safe, but makes
- * a bit more work for the core; checking to see how many bits are
- * really set leaves the core sitting idle while the main CPU does
- * these checks. No way to know which is faster without testing;
- * take simple approach for the moment.
+ * Select CRT mode if and only if message is longer than exponent.
*/
- /* Select mode (1 = fast, 0 = safe) */
- check(set_register(core, MODEXPS6_ADDR_MODE, (exp_len <= 4 || msg_len <= 4)));
+ const uint32_t mode = a->msg_len > a->mod_len ? MODEXPA7_MODE_CRT : MODEXPA7_MODE_PLAIN;
- /* Set modulus size in bits */
- check(set_register(core, MODEXPS6_ADDR_MODULUS_WIDTH, mod_len * 8));
+ /*
+ * Copy out precalc results if necessary, then load everything and
+ * start the calculation by edge-triggering the "next" bit. If
+ * everything works, return to caller so it can wait for the
+ * calculation to complete.
+ */
- /* Write new modulus */
- check(set_buffer(core, MODEXPS6_ADDR_MODULUS, mod, mod_len));
+ if ((precalc &&
+ (err = get_buffer(a->core, MODEXPA7_ADDR_MODULUS_COEFF_OUT, a->coeff, a->coeff_len)) != HAL_OK) ||
+ (precalc &&
+ (err = get_buffer(a->core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT, a->mont, a->mont_len)) != HAL_OK) ||
+ (err = set_buffer(a->core, MODEXPA7_ADDR_MODULUS_COEFF_IN, a->coeff, a->coeff_len)) != HAL_OK ||
+ (err = set_buffer(a->core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN, a->mont, a->mont_len)) != HAL_OK ||
+ (err = set_register(a->core, MODEXPA7_ADDR_MODE, mode)) != HAL_OK ||
+ (err = set_buffer(a->core, MODEXPA7_ADDR_MESSAGE, a->msg, a->msg_len)) != HAL_OK ||
+ (err = set_buffer(a->core, MODEXPA7_ADDR_EXPONENT, a->exp, a->exp_len)) != HAL_OK ||
+ (err = set_register(a->core, MODEXPA7_ADDR_EXPONENT_BITS, a->exp_len * 8)) != HAL_OK ||
+ (err = hal_io_zero(a->core)) != HAL_OK ||
+ (err = hal_io_next(a->core)) != HAL_OK)
+ return err;
- /* Pre-calcuate speed-up coefficient */
- check(hal_io_init(core));
+ return HAL_OK;
+}
- /* Wait for calculation to complete */
- check(hal_io_wait_ready(core));
+static inline hal_error_t extract_result(hal_modexp_arg_t *a)
+{
+ /*
+ * Extract results from the main calculation and we're done.
+ * Hardly seems worth making this a separate function.
+ */
- /* Write new message */
- check(set_buffer(core, MODEXPS6_ADDR_MESSAGE, msg, msg_len));
+ return get_buffer(a->core, MODEXPA7_ADDR_RESULT, a->result, a->mod_len);
+}
- /* Set new exponent length in bits */
- check(set_register(core, MODEXPS6_ADDR_EXPONENT_WIDTH, exp_len * 8));
+/*
+ * Run one modexp operation.
+ */
- /* Set new exponent */
- check(set_buffer(core, MODEXPS6_ADDR_EXPONENT, exp, exp_len));
+hal_error_t hal_modexp(const int precalc, hal_modexp_arg_t *a)
+{
+ hal_error_t err;
- /* Start calculation */
- check(hal_io_next(core));
+ if ((err = check_args(a)) != HAL_OK)
+ return err;
- /* Wait for result */
- check(hal_io_wait_valid(core));
+ if ((err = hal_core_alloc(MODEXPA7_NAME, &a->core)) == HAL_OK &&
+ (err = setup_precalc(precalc, a)) == HAL_OK &&
+ (!precalc ||
+ (err = hal_io_wait_ready(a->core)) == HAL_OK) &&
+ (err = setup_calc(precalc, a)) == HAL_OK &&
+ (err = hal_io_wait_valid(a->core)) == HAL_OK &&
+ (err = extract_result(a)) == HAL_OK)
+ err = HAL_OK;
+
+ hal_core_free(a->core);
+ return err;
+}
- /* Extract result */
- check(get_buffer(core, MODEXPS6_ADDR_RESULT, result, mod_len));
+/*
+ * Run two modexp operations in parallel.
+ */
- hal_core_free(core);
- return HAL_OK;
+hal_error_t hal_modexp2(const int precalc, hal_modexp_arg_t *a1, hal_modexp_arg_t *a2)
+{
+ hal_error_t err;
+
+ if ((err = check_args(a1)) != HAL_OK ||
+ (err = check_args(a2)) != HAL_OK)
+ return err;
+
+ if ((err = hal_core_alloc(MODEXPA7_NAME, &a1->core)) == HAL_OK &&
+ (err = hal_core_alloc(MODEXPA7_NAME, &a2->core)) == HAL_OK &&
+ (err = setup_precalc(precalc, a1)) == HAL_OK &&
+ (err = setup_precalc(precalc, a2)) == HAL_OK &&
+ (!precalc ||
+ (err = hal_io_wait_ready2(a1->core, a2->core)) == HAL_OK) &&
+ (err = setup_calc(precalc, a1)) == HAL_OK &&
+ (err = setup_calc(precalc, a2)) == HAL_OK &&
+ (err = hal_io_wait_valid2(a1->core, a2->core)) == HAL_OK &&
+ (err = extract_result(a1)) == HAL_OK &&
+ (err = extract_result(a2)) == HAL_OK)
+ err = HAL_OK;
+
+ hal_core_free(a1->core);
+ hal_core_free(a2->core);
+ return err;
}
/*
diff --git a/rpc_pkey.c b/rpc_pkey.c
index 55cc054..294a3e5 100644
--- a/rpc_pkey.c
+++ b/rpc_pkey.c
@@ -734,7 +734,8 @@ static hal_error_t pkey_local_get_public_key(const hal_pkey_handle_t pkey,
* algorithm-specific functions.
*/
-static hal_error_t pkey_local_sign_rsa(uint8_t *keybuf, const size_t keybuf_len,
+static hal_error_t pkey_local_sign_rsa(hal_pkey_slot_t *slot,
+ uint8_t *keybuf, const size_t keybuf_len,
const uint8_t * const der, const size_t der_len,
const hal_hash_handle_t hash,
const uint8_t * input, size_t input_len,
@@ -759,14 +760,25 @@ static hal_error_t pkey_local_sign_rsa(uint8_t *keybuf, const size_t keybuf_len,
input = signature;
}
- if ((err = pkcs1_5_pad(input, input_len, signature, *signature_len, 0x01)) != HAL_OK ||
- (err = hal_rsa_decrypt(NULL, key, signature, *signature_len, signature, *signature_len)) != HAL_OK)
+ if ((err = pkcs1_5_pad(input, input_len, signature, *signature_len, 0x01)) != HAL_OK ||
+ (err = hal_rsa_decrypt(NULL, NULL, key, signature, *signature_len, signature, *signature_len)) != HAL_OK)
return err;
+ if (hal_rsa_key_needs_saving(key)) {
+ uint8_t pkcs8[hal_rsa_private_key_to_der_extra_len(key)];
+ size_t pkcs8_len = 0;
+ if ((err = hal_rsa_private_key_to_der_extra(key, pkcs8, &pkcs8_len, sizeof(pkcs8))) == HAL_OK)
+ err = hal_ks_rewrite_der(ks_from_flags(slot->flags), slot, pkcs8, pkcs8_len);
+ memset(pkcs8, 0, sizeof(pkcs8));
+ if (err != HAL_OK)
+ return err;
+ }
+
return HAL_OK;
}
-static hal_error_t pkey_local_sign_ecdsa(uint8_t *keybuf, const size_t keybuf_len,
+static hal_error_t pkey_local_sign_ecdsa(hal_pkey_slot_t *slot,
+ uint8_t *keybuf, const size_t keybuf_len,
const uint8_t * const der, const size_t der_len,
const hal_hash_handle_t hash,
const uint8_t * input, size_t input_len,
@@ -813,7 +825,8 @@ static hal_error_t pkey_local_sign(const hal_pkey_handle_t pkey,
if (slot == NULL)
return HAL_ERROR_KEY_NOT_FOUND;
- hal_error_t (*signer)(uint8_t *keybuf, const size_t keybuf_len,
+ hal_error_t (*signer)(hal_pkey_slot_t *slot,
+ uint8_t *keybuf, const size_t keybuf_len,
const uint8_t * const der, const size_t der_len,
const hal_hash_handle_t hash,
const uint8_t * const input, const size_t input_len,
@@ -840,7 +853,7 @@ static hal_error_t pkey_local_sign(const hal_pkey_handle_t pkey,
hal_error_t err;
if ((err = ks_fetch_from_flags(slot, der, &der_len, sizeof(der))) == HAL_OK)
- err = signer(keybuf, sizeof(keybuf), der, der_len, hash, input, input_len,
+ err = signer(slot, keybuf, sizeof(keybuf), der, der_len, hash, input, input_len,
signature, signature_len, signature_max);
memset(keybuf, 0, sizeof(keybuf));
@@ -1263,7 +1276,7 @@ static hal_error_t pkey_local_import(const hal_client_handle_t client,
goto fail;
}
- if ((err = hal_rsa_decrypt(NULL, rsa, data, data_len, der, data_len)) != HAL_OK)
+ if ((err = hal_rsa_decrypt(NULL, NULL, rsa, data, data_len, der, data_len)) != HAL_OK)
goto fail;
if ((err = hal_get_random(NULL, kek, sizeof(kek))) != HAL_OK)
diff --git a/rsa.c b/rsa.c
index 9af67d3..b5e52c5 100644
--- a/rsa.c
+++ b/rsa.c
@@ -70,7 +70,6 @@
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
-#include <assert.h>
#include "hal.h"
#include "hal_internal.h"
@@ -94,6 +93,15 @@
#endif
/*
+ * How big to make the buffers for the modulus coefficient and
+ * Montgomery factor. This will almost certainly want tuning.
+ */
+
+#ifndef HAL_RSA_MAX_OPERAND_LENGTH
+#define HAL_RSA_MAX_OPERAND_LENGTH MODEXPA7_OPERAND_BYTES
+#endif
+
+/*
* Whether we want debug output.
*/
@@ -123,7 +131,7 @@ void hal_rsa_set_blinding(const int onoff)
*/
struct hal_rsa_key {
- hal_key_type_t type; /* What kind of key this is */
+ hal_key_type_t type; /* What kind of key this is */
fp_int n[1]; /* The modulus */
fp_int e[1]; /* Public exponent */
fp_int d[1]; /* Private exponent */
@@ -132,8 +140,17 @@ struct hal_rsa_key {
fp_int u[1]; /* 1/q mod p */
fp_int dP[1]; /* d mod (p - 1) */
fp_int dQ[1]; /* d mod (q - 1) */
+ unsigned flags; /* Internal key flags */
+ uint8_t /* ModExpA7 speedup factors */
+ nC[HAL_RSA_MAX_OPERAND_LENGTH], nF[HAL_RSA_MAX_OPERAND_LENGTH],
+ pC[HAL_RSA_MAX_OPERAND_LENGTH/2], pF[HAL_RSA_MAX_OPERAND_LENGTH/2],
+ qC[HAL_RSA_MAX_OPERAND_LENGTH/2], qF[HAL_RSA_MAX_OPERAND_LENGTH/2];
};
+#define RSA_FLAG_NEEDS_SAVING (1 << 0)
+#define RSA_FLAG_PRECALC_N_DONE (1 << 1)
+#define RSA_FLAG_PRECALC_PQ_DONE (1 << 2)
+
const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t);
/*
@@ -158,7 +175,7 @@ const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t);
case FP_OKAY: break; \
case FP_VAL: lose(HAL_ERROR_BAD_ARGUMENTS); \
case FP_MEM: lose(HAL_ERROR_ALLOCATION_FAILURE); \
- default: lose(HAL_ERROR_IMPOSSIBLE); \
+ default: lose(HAL_ERROR_IMPOSSIBLE); \
} \
} while (0)
@@ -171,7 +188,8 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz
{
hal_error_t err = HAL_OK;
- assert(bn != NULL && buffer != NULL);
+ if (bn == NULL || buffer == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
const size_t bytes = fp_unsigned_bin_size(unconst_fp_int(bn));
@@ -193,22 +211,18 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz
*/
static hal_error_t modexp(hal_core_t *core,
- const fp_int * msg,
+ const int precalc,
+ const fp_int * const msg,
const fp_int * const exp,
const fp_int * const mod,
- fp_int *res)
+ fp_int *res,
+ uint8_t *coeff, const size_t coeff_len,
+ uint8_t *mont, const size_t mont_len)
{
hal_error_t err = HAL_OK;
- assert(msg != NULL && exp != NULL && mod != NULL && res != NULL);
-
- fp_int reduced_msg[1] = INIT_FP_INT;
-
- if (fp_cmp_mag(unconst_fp_int(msg), unconst_fp_int(mod)) != FP_LT) {
- fp_init(reduced_msg);
- fp_mod(unconst_fp_int(msg), unconst_fp_int(mod), reduced_msg);
- msg = reduced_msg;
- }
+ if (msg == NULL || exp == NULL || mod == NULL || res == NULL || coeff == NULL || mont == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
const size_t msg_len = (fp_unsigned_bin_size(unconst_fp_int(msg)) + 3) & ~3;
const size_t exp_len = (fp_unsigned_bin_size(unconst_fp_int(exp)) + 3) & ~3;
@@ -219,14 +233,20 @@ static hal_error_t modexp(hal_core_t *core,
uint8_t modbuf[mod_len];
uint8_t resbuf[mod_len];
+ hal_modexp_arg_t args = {
+ .core = core,
+ .msg = msgbuf, .msg_len = sizeof(msgbuf),
+ .exp = expbuf, .exp_len = sizeof(expbuf),
+ .mod = modbuf, .mod_len = sizeof(modbuf),
+ .result = resbuf, .result_len = sizeof(resbuf),
+ .coeff = coeff, .coeff_len = coeff_len,
+ .mont = mont, .mont_len = mont_len
+ };
+
if ((err = unpack_fp(msg, msgbuf, sizeof(msgbuf))) != HAL_OK ||
(err = unpack_fp(exp, expbuf, sizeof(expbuf))) != HAL_OK ||
(err = unpack_fp(mod, modbuf, sizeof(modbuf))) != HAL_OK ||
- (err = hal_modexp(core,
- msgbuf, sizeof(msgbuf),
- expbuf, sizeof(expbuf),
- modbuf, sizeof(modbuf),
- resbuf, sizeof(resbuf))) != HAL_OK)
+ (err = hal_modexp(precalc, &args)) != HAL_OK)
goto fail;
fp_read_unsigned_bin(res, resbuf, sizeof(resbuf));
@@ -236,6 +256,83 @@ static hal_error_t modexp(hal_core_t *core,
memset(expbuf, 0, sizeof(expbuf));
memset(modbuf, 0, sizeof(modbuf));
memset(resbuf, 0, sizeof(resbuf));
+ memset(&args, 0, sizeof(args));
+ return err;
+}
+
+static hal_error_t modexp2(const int precalc,
+ const fp_int * const msg,
+ hal_core_t *core1,
+ const fp_int * const exp1,
+ const fp_int * const mod1,
+ fp_int * res1,
+ uint8_t *coeff1, const size_t coeff1_len,
+ uint8_t *mont1, const size_t mont1_len,
+ hal_core_t *core2,
+ const fp_int * const exp2,
+ const fp_int * const mod2,
+ fp_int * res2,
+ uint8_t *coeff2, const size_t coeff2_len,
+ uint8_t *mont2, const size_t mont2_len)
+{
+ hal_error_t err = HAL_OK;
+
+ if (msg == NULL ||
+ exp1 == NULL || mod1 == NULL || res1 == NULL || coeff1 == NULL || mont1 == NULL ||
+ exp2 == NULL || mod2 == NULL || res2 == NULL || coeff2 == NULL || mont2 == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
+
+ const size_t msg_len = (fp_unsigned_bin_size(unconst_fp_int(msg)) + 3) & ~3;
+ const size_t exp1_len = (fp_unsigned_bin_size(unconst_fp_int(exp1)) + 3) & ~3;
+ const size_t mod1_len = (fp_unsigned_bin_size(unconst_fp_int(mod1)) + 3) & ~3;
+ const size_t exp2_len = (fp_unsigned_bin_size(unconst_fp_int(exp2)) + 3) & ~3;
+ const size_t mod2_len = (fp_unsigned_bin_size(unconst_fp_int(mod2)) + 3) & ~3;
+
+ uint8_t msgbuf[msg_len];
+ uint8_t expbuf1[exp1_len], modbuf1[mod1_len], resbuf1[mod1_len];
+ uint8_t expbuf2[exp2_len], modbuf2[mod2_len], resbuf2[mod2_len];
+
+ hal_modexp_arg_t args1 = {
+ .core = core1,
+ .msg = msgbuf, .msg_len = sizeof(msgbuf),
+ .exp = expbuf1, .exp_len = sizeof(expbuf1),
+ .mod = modbuf1, .mod_len = sizeof(modbuf1),
+ .result = resbuf1, .result_len = sizeof(resbuf1),
+ .coeff = coeff1, .coeff_len = coeff1_len,
+ .mont = mont1, .mont_len = mont1_len
+ };
+
+ hal_modexp_arg_t args2 = {
+ .core = core2,
+ .msg = msgbuf, .msg_len = sizeof(msgbuf),
+ .exp = expbuf2, .exp_len = sizeof(expbuf2),
+ .mod = modbuf2, .mod_len = sizeof(modbuf2),
+ .result = resbuf2, .result_len = sizeof(resbuf2),
+ .coeff = coeff2, .coeff_len = coeff2_len,
+ .mont = mont2, .mont_len = mont2_len
+ };
+
+ if ((err = unpack_fp(msg, msgbuf, sizeof(msgbuf))) != HAL_OK ||
+ (err = unpack_fp(exp1, expbuf1, sizeof(expbuf1))) != HAL_OK ||
+ (err = unpack_fp(mod1, modbuf1, sizeof(modbuf1))) != HAL_OK ||
+ (err = unpack_fp(exp2, expbuf2, sizeof(expbuf2))) != HAL_OK ||
+ (err = unpack_fp(mod2, modbuf2, sizeof(modbuf2))) != HAL_OK ||
+ (err = hal_modexp2(precalc, &args1, &args2)) != HAL_OK)
+ goto fail;
+
+ fp_read_unsigned_bin(res1, resbuf1, sizeof(resbuf1));
+ fp_read_unsigned_bin(res2, resbuf2, sizeof(resbuf2));
+
+ fail:
+ memset(msgbuf, 0, sizeof(msgbuf));
+ memset(expbuf1, 0, sizeof(expbuf1));
+ memset(modbuf1, 0, sizeof(modbuf1));
+ memset(resbuf1, 0, sizeof(resbuf1));
+ memset(&args1, 0, sizeof(args1));
+ memset(expbuf2, 0, sizeof(expbuf2));
+ memset(modbuf2, 0, sizeof(modbuf2));
+ memset(resbuf2, 0, sizeof(resbuf2));
+ memset(&args2, 0, sizeof(args2));
return err;
}
@@ -249,10 +346,14 @@ static hal_error_t modexp(hal_core_t *core,
*/
static hal_error_t modexp(const hal_core_t *core, /* ignored */
+ const int precalc, /* ignored */
const fp_int * const msg,
const fp_int * const exp,
const fp_int * const mod,
- fp_int *res)
+ fp_int *res,
+ uint8_t *coeff, const size_t coeff_len, /* ignored */
+ uint8_t *mont, const size_t mont_len) /* ignored */
+
{
hal_error_t err = HAL_OK;
FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp), unconst_fp_int(mod), res));
@@ -260,6 +361,28 @@ static hal_error_t modexp(const hal_core_t *core, /* ignored */
return err;
}
+static hal_error_t modexp2(const int precalc, /* ignored */
+ const fp_int * const msg,
+ hal_core_t *core1, /* ignored */
+ const fp_int * const exp1,
+ const fp_int * const mod1,
+ fp_int * res1,
+ uint8_t *coeff1, const size_t coeff1_len, /* ignored */
+ uint8_t *mont1, const size_t mont1_len, /* ignored */
+ hal_core_t *core2, /* ignored */
+ const fp_int * const exp2,
+ const fp_int * const mod2,
+ fp_int * res2,
+ uint8_t *coeff2, const size_t coeff2_len, /* ignored */
+ uint8_t *mont2, const size_t mont2_len) /* ignored */
+{
+ hal_error_t err = HAL_OK;
+ FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp1), unconst_fp_int(mod1), res1));
+ FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp2), unconst_fp_int(mod2), res2));
+ fail:
+ return err;
+}
+
#endif /* HAL_RSA_SIGN_USE_MODEXP */
/*
@@ -281,7 +404,12 @@ static hal_error_t modexp(const hal_core_t *core, /* ignored */
int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
{
- return modexp(NULL, a, b, c, d) == HAL_OK ? FP_OKAY : FP_VAL;
+ const size_t len = (fp_unsigned_bin_size(unconst_fp_int(b)) + 3) & ~3;
+ uint8_t C[len], F[len];
+ const hal_error_t err = modexp(NULL, 0, a, b, c, d, C, sizeof(C), F, sizeof(F));
+ memset(C, 0, sizeof(C));
+ memset(F, 0, sizeof(F));
+ return err == HAL_OK ? FP_OKAY : FP_VAL;
}
#endif /* HAL_RSA_SIGN_USE_MODEXP && HAL_RSA_KEYGEN_USE_MODEXP */
@@ -292,10 +420,12 @@ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
* try. Come back to this if it looks like a bottleneck.
*/
-static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *bf, fp_int *ubf)
+static hal_error_t create_blinding_factors(hal_core_t *core, hal_rsa_key_t *key, fp_int *bf, fp_int *ubf)
{
- assert(key != NULL && bf != NULL && ubf != NULL);
+ if (key == NULL || bf == NULL || ubf == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
+ const int precalc = !(key->flags & RSA_FLAG_PRECALC_N_DONE);
uint8_t rnd[fp_unsigned_bin_size(unconst_fp_int(key->n))];
hal_error_t err = HAL_OK;
@@ -306,9 +436,13 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t
fp_read_unsigned_bin(bf, rnd, sizeof(rnd));
fp_copy(bf, ubf);
- if ((err = modexp(core, bf, key->e, key->n, bf)) != HAL_OK)
+ if ((err = modexp(core, precalc, bf, key->e, key->n, bf,
+ key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) != HAL_OK)
goto fail;
+ if (precalc)
+ key->flags |= RSA_FLAG_PRECALC_N_DONE | RSA_FLAG_NEEDS_SAVING;
+
FP_CHECK(fp_invmod(ubf, unconst_fp_int(key->n), ubf));
fail:
@@ -320,10 +454,12 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t
* RSA decryption via Chinese Remainder Theorem (Garner's formula).
*/
-static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *msg, fp_int *sig)
+static hal_error_t rsa_crt(hal_core_t *core1, hal_core_t *core2, hal_rsa_key_t *key, fp_int *msg, fp_int *sig)
{
- assert(key != NULL && msg != NULL && sig != NULL);
+ if (key == NULL || msg == NULL || sig == NULL)
+ return HAL_ERROR_IMPOSSIBLE;
+ const int precalc = !(key->flags & RSA_FLAG_PRECALC_PQ_DONE);
hal_error_t err = HAL_OK;
fp_int t[1] = INIT_FP_INT;
fp_int m1[1] = INIT_FP_INT;
@@ -335,7 +471,7 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp
* Handle blinding if requested.
*/
if (blinding) {
- if ((err = create_blinding_factors(core, key, bf, ubf)) != HAL_OK)
+ if ((err = create_blinding_factors(core1, key, bf, ubf)) != HAL_OK)
goto fail;
FP_CHECK(fp_mulmod(msg, bf, unconst_fp_int(key->n), msg));
}
@@ -344,10 +480,14 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp
* m1 = msg ** dP mod p
* m2 = msg ** dQ mod q
*/
- if ((err = modexp(core, msg, key->dP, key->p, m1)) != HAL_OK ||
- (err = modexp(core, msg, key->dQ, key->q, m2)) != HAL_OK)
+ if ((err = modexp2(precalc, msg,
+ core1, key->dP, key->p, m1, key->pC, sizeof(key->pC), key->pF, sizeof(key->pF),
+ core2, key->dQ, key->q, m2, key->qC, sizeof(key->qC), key->qF, sizeof(key->qF))) != HAL_OK)
goto fail;
+ if (precalc)
+ key->flags |= RSA_FLAG_PRECALC_PQ_DONE | RSA_FLAG_NEEDS_SAVING;
+
/*
* t = m1 - m2.
*/
@@ -392,7 +532,7 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp
*/
hal_error_t hal_rsa_encrypt(hal_core_t *core,
- const hal_rsa_key_t * const key,
+ hal_rsa_key_t *key,
const uint8_t * const input, const size_t input_len,
uint8_t * output, const size_t output_len)
{
@@ -401,23 +541,29 @@ hal_error_t hal_rsa_encrypt(hal_core_t *core,
if (key == NULL || input == NULL || output == NULL || input_len > output_len)
return HAL_ERROR_BAD_ARGUMENTS;
+ const int precalc = !(key->flags & RSA_FLAG_PRECALC_N_DONE);
fp_int i[1] = INIT_FP_INT;
fp_int o[1] = INIT_FP_INT;
fp_read_unsigned_bin(i, unconst_uint8_t(input), input_len);
- if ((err = modexp(core, i, key->e, key->n, o)) != HAL_OK ||
- (err = unpack_fp(o, output, output_len)) != HAL_OK)
- goto fail;
+ err = modexp(core, precalc, i, key->e, key->n, o,
+ key->nC, sizeof(key->nC), key->nF, sizeof(key->nF));
+
+ if (err == HAL_OK && precalc)
+ key->flags |= RSA_FLAG_PRECALC_N_DONE | RSA_FLAG_NEEDS_SAVING;
+
+ if (err == HAL_OK)
+ err = unpack_fp(o, output, output_len);
- fail:
fp_zero(i);
fp_zero(o);
return err;
}
-hal_error_t hal_rsa_decrypt(hal_core_t *core,
- const hal_rsa_key_t * const key,
+hal_error_t hal_rsa_decrypt(hal_core_t *core1,
+ hal_core_t *core2,
+ hal_rsa_key_t *key,
const uint8_t * const input, const size_t input_len,
uint8_t * output, const size_t output_len)
{
@@ -436,10 +582,17 @@ hal_error_t hal_rsa_decrypt(hal_core_t *core,
* just do brute force ModExp.
*/
- if (fp_iszero(key->p) || fp_iszero(key->q) || fp_iszero(key->u) || fp_iszero(key->dP) || fp_iszero(key->dQ))
- err = modexp(core, i, key->d, key->n, o);
- else
- err = rsa_crt(core, key, i, o);
+ if (!fp_iszero(key->p) && !fp_iszero(key->q) && !fp_iszero(key->u) &&
+ !fp_iszero(key->dP) && !fp_iszero(key->dQ))
+ err = rsa_crt(core1, core2, key, i, o);
+
+ else {
+ const int precalc = !(key->flags & RSA_FLAG_PRECALC_N_DONE);
+ err = modexp(core1, precalc, i, key->d, key->n, o, key->nC, sizeof(key->nC),
+ key->nF, sizeof(key->nF));
+ if (err == HAL_OK && precalc)
+ key->flags |= RSA_FLAG_PRECALC_N_DONE | RSA_FLAG_NEEDS_SAVING;
+ }
if (err != HAL_OK || (err = unpack_fp(o, output, output_len)) != HAL_OK)
goto fail;
@@ -762,6 +915,8 @@ hal_error_t hal_rsa_key_gen(hal_core_t *core,
FP_CHECK(fp_mod(key->d, q_1, key->dQ)); /* dQ = d % (q-1) */
FP_CHECK(fp_invmod(key->q, key->p, key->u)); /* u = (1/q) % p */
+ key->flags |= RSA_FLAG_NEEDS_SAVING;
+
*key_ = key;
/* Fall through to cleanup */
@@ -775,10 +930,26 @@ hal_error_t hal_rsa_key_gen(hal_core_t *core,
}
/*
+ * Whether a key contains new data that need saving (newly generated
+ * key, updated speedup components, whatever).
+ */
+
+int hal_rsa_key_needs_saving(const hal_rsa_key_t * const key)
+{
+ return key != NULL && (key->flags & RSA_FLAG_NEEDS_SAVING);
+}
+
+/*
* Just enough ASN.1 to read and write PKCS #1.5 RSAPrivateKey syntax
* (RFC 2313 section 7.2) wrapped in a PKCS #8 PrivateKeyInfo (RFC 5208).
*
* RSAPrivateKey fields in the required order.
+ *
+ * The "extra" fields are additional key components specific to the
+ * systolic modexpa7 core. We represent these in ASN.1 as OPTIONAL
+ * fields using IMPLICIT PRIVATE tags, since this is neither
+ * standardized nor meaningful to anybody else. Underlying encoding
+ * is INTEGER or OCTET STRING (currently the latter).
*/
#define RSAPrivateKey_fields \
@@ -792,8 +963,17 @@ hal_error_t hal_rsa_key_gen(hal_core_t *core,
_(key->dQ); \
_(key->u);
-hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key,
- uint8_t *der, size_t *der_len, const size_t der_max)
+#define RSAPrivateKey_extra_fields \
+ _(ASN1_PRIVATE + 0, nC, RSA_FLAG_PRECALC_N_DONE); \
+ _(ASN1_PRIVATE + 1, nF, RSA_FLAG_PRECALC_N_DONE); \
+ _(ASN1_PRIVATE + 2, pC, RSA_FLAG_PRECALC_PQ_DONE); \
+ _(ASN1_PRIVATE + 3, pF, RSA_FLAG_PRECALC_PQ_DONE); \
+ _(ASN1_PRIVATE + 4, qC, RSA_FLAG_PRECALC_PQ_DONE); \
+ _(ASN1_PRIVATE + 5, qF, RSA_FLAG_PRECALC_PQ_DONE);
+
+hal_error_t hal_rsa_private_key_to_der_internal(const hal_rsa_key_t * const key,
+ const int include_extra,
+ uint8_t *der, size_t *der_len, const size_t der_max)
{
hal_error_t err = HAL_OK;
@@ -808,10 +988,32 @@ hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key,
size_t hlen = 0, vlen = 0;
-#define _(x) { size_t n; if ((err = hal_asn1_encode_integer(x, NULL, &n, der_max - vlen)) != HAL_OK) return err; vlen += n; }
+#define _(x) \
+ { \
+ size_t n = 0; \
+ err = hal_asn1_encode_integer(x, NULL, &n, der_max - vlen); \
+ if (err != HAL_OK) \
+ return err; \
+ vlen += n; \
+ }
+
RSAPrivateKey_fields;
#undef _
+#define _(x,y,z) \
+ if ((key->flags & z) != 0) { \
+ size_t n = 0; \
+ if ((err = hal_asn1_encode_header(x, sizeof(key->y), NULL, \
+ &n, 0)) != HAL_OK) \
+ return err; \
+ vlen += n + sizeof(key->y); \
+ }
+
+ if (include_extra) {
+ RSAPrivateKey_extra_fields;
+ }
+#undef _
+
if ((err = hal_asn1_encode_header(ASN1_SEQUENCE, vlen, NULL, &hlen, 0)) != HAL_OK)
return err;
@@ -832,18 +1034,51 @@ hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key,
uint8_t *d = der + hlen;
memset(d, 0, vlen);
-#define _(x) { size_t n; if ((err = hal_asn1_encode_integer(x, d, &n, vlen)) != HAL_OK) return err; d += n; vlen -= n; }
+#define _(x) \
+ { \
+ size_t n = 0; \
+ err = hal_asn1_encode_integer(x, d, &n, vlen); \
+ if (err != HAL_OK) \
+ return err; \
+ d += n; \
+ vlen -= n; \
+ }
+
RSAPrivateKey_fields;
#undef _
+#define _(x,y,z) \
+ if ((key->flags & z) != 0) { \
+ size_t n = 0; \
+ if ((err = hal_asn1_encode_header(x, sizeof(key->y), d, \
+ &n, vlen)) != HAL_OK) \
+ return err; \
+ d += n; \
+ vlen -= n; \
+ memcpy(d, key->y, sizeof(key->y)); \
+ d += sizeof(key->y); \
+ vlen -= sizeof(key->y); \
+ }
+
+ if (include_extra) {
+ RSAPrivateKey_extra_fields;
+ }
+#undef _
+
return hal_asn1_encode_pkcs8_privatekeyinfo(hal_asn1_oid_rsaEncryption, hal_asn1_oid_rsaEncryption_len,
NULL, 0, der, d - der, der, der_len, der_max);
}
-size_t hal_rsa_private_key_to_der_len(const hal_rsa_key_t * const key)
+hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key,
+ uint8_t *der, size_t *der_len, const size_t der_max)
+{
+ return hal_rsa_private_key_to_der_internal(key, 0, der, der_len, der_max);
+}
+
+hal_error_t hal_rsa_private_key_to_der_extra(const hal_rsa_key_t * const key,
+ uint8_t *der, size_t *der_len, const size_t der_max)
{
- size_t len = 0;
- return hal_rsa_private_key_to_der(key, NULL, &len, 0) == HAL_OK ? len : 0;
+ return hal_rsa_private_key_to_der_internal(key, 1, der, der_len, der_max);
}
hal_error_t hal_rsa_private_key_from_der(hal_rsa_key_t **key_,
@@ -881,12 +1116,48 @@ hal_error_t hal_rsa_private_key_from_der(hal_rsa_key_t **key_,
fp_int version[1] = INIT_FP_INT;
-#define _(x) { size_t n; if ((err = hal_asn1_decode_integer(x, d, &n, vlen)) != HAL_OK) return err; d += n; vlen -= n; }
+#define _(x) \
+ { \
+ size_t n; \
+ err = hal_asn1_decode_integer(x, d, &n, vlen); \
+ if (err != HAL_OK) \
+ return err; \
+ d += n; \
+ vlen -= n; \
+ }
+
RSAPrivateKey_fields;
#undef _
- if (d != privkey + privkey_len || !fp_iszero(version))
+#define _(x,y,z) \
+ if (hal_asn1_peek(x, d, vlen)) { \
+ size_t hl = 0, vl = 0; \
+ if ((err = hal_asn1_decode_header(x, d, vlen, &hl, &vl)) != HAL_OK) \
+ return err; \
+ if (vl > sizeof(key->y)) { \
+ hal_log(HAL_LOG_DEBUG, "extra factor %s too big (%lu > %lu)", \
+ #y, (unsigned long) vl, (unsigned long) sizeof(key->y)); \
+ return HAL_ERROR_ASN1_PARSE_FAILED; \
+ } \
+ memcpy(key->y, d + hl, vl); \
+ key->flags |= z; \
+ d += hl + vl; \
+ vlen -= hl + vl; \
+ }
+
+ RSAPrivateKey_extra_fields;
+#undef _
+
+ if (d != privkey + privkey_len) {
+ hal_log(HAL_LOG_DEBUG, "not at end of buffer (0x%lx != 0x%lx)",
+ (unsigned long) d, (unsigned long) privkey + privkey_len);
return HAL_ERROR_ASN1_PARSE_FAILED;
+ }
+
+ if (!fp_iszero(version)) {
+ hal_log(HAL_LOG_DEBUG, "nonzero version");
+ return HAL_ERROR_ASN1_PARSE_FAILED;
+ }
*key_ = key;
diff --git a/tests/test-rsa.c b/tests/test-rsa.c
index f4e7a8f..176ba03 100644
--- a/tests/test-rsa.c
+++ b/tests/test-rsa.c
@@ -56,12 +56,21 @@ static int test_modexp(hal_core_t *core,
const rsa_tc_bn_t * const exp, /* Exponent */
const rsa_tc_bn_t * const val) /* Expected result */
{
- uint8_t result[tc->n.len];
+ uint8_t result[tc->n.len], C[tc->n.len], F[tc->n.len];
printf("%s test for %lu-bit RSA key\n", kind, (unsigned long) tc->size);
- if (hal_modexp(core, msg->val, msg->len, exp->val, exp->len,
- tc->n.val, tc->n.len, result, sizeof(result)) != HAL_OK)
+ hal_modexp_arg_t args = {
+ .core = core,
+ .msg = msg->val, .msg_len = msg->len,
+ .exp = exp->val, .exp_len = exp->len,
+ .mod = tc->n.val, .mod_len = tc->n.len,
+ .result = result, .result_len = sizeof(result),
+ .coeff = C, .coeff_len = sizeof(C),
+ .mont = F, .mont_len = sizeof(F)
+ };
+
+ if (hal_modexp(1, &args) != HAL_OK)
return printf("ModExp failed\n"), 0;
if (memcmp(result, val->val, val->len))
@@ -98,7 +107,7 @@ static int test_decrypt(hal_core_t *core,
uint8_t result[tc->n.len];
- if ((err = hal_rsa_decrypt(core, key, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK)
+ if ((err = hal_rsa_decrypt(core, NULL, key, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK)
printf("RSA CRT failed: %s\n", hal_error_string(err));
const int mismatch = (err == HAL_OK && memcmp(result, tc->s.val, tc->s.len) != 0);
@@ -165,7 +174,7 @@ static int test_gen(hal_core_t *core,
uint8_t result[tc->n.len];
- if ((err = hal_rsa_decrypt(core, key1, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK)
+ if ((err = hal_rsa_decrypt(core, NULL, key1, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK)
printf("RSA CRT failed: %s\n", hal_error_string(err));
snprintf(fn, sizeof(fn), "test-rsa-sig-%04lu.der", (unsigned long) tc->size);
diff --git a/tests/test-trng.c b/tests/test-trng.c
index f570752..45dec56 100644
--- a/tests/test-trng.c
+++ b/tests/test-trng.c
@@ -43,6 +43,7 @@
#include <sys/time.h>
#include <hal.h>
+#include <hal_internal.h>
#include <verilog_constants.h>
#ifndef WAIT_FOR_CSPRNG_VALID
diff --git a/unit-tests.py b/unit-tests.py
index 824d495..514aace 100644
--- a/unit-tests.py
+++ b/unit-tests.py
@@ -1279,6 +1279,7 @@ class TestPKeyAttribute(TestCaseLoggedIn):
self.load_and_fill(0, n_attrs = 64)
def test_attribute_bloat_volatile_many(self):
+ self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh")
with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG):
self.load_and_fill(0, n_attrs = 128)
@@ -1286,6 +1287,7 @@ class TestPKeyAttribute(TestCaseLoggedIn):
self.load_and_fill(0, n_attrs = 6, n_fill = 256)
def test_attribute_bloat_volatile_big(self):
+ self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh")
with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG):
self.load_and_fill(0, n_attrs = 6, n_fill = 512)
@@ -1293,6 +1295,7 @@ class TestPKeyAttribute(TestCaseLoggedIn):
self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 64)
def test_attribute_bloat_token_many(self):
+ self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh")
with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG):
self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 128)
@@ -1300,6 +1303,7 @@ class TestPKeyAttribute(TestCaseLoggedIn):
self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 6, n_fill = 256)
def test_attribute_bloat_token_big(self):
+ self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh")
with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG):
self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 6, n_fill = 512)
diff --git a/verilog_constants.h b/verilog_constants.h
index c9bb566..1b00b96 100644
--- a/verilog_constants.h
+++ b/verilog_constants.h
@@ -222,23 +222,38 @@
#define MODEXPS6_ADDR_MESSAGE (MODEXPS6_ADDR_OPERANDS + 1 * MODEXPS6_OPERAND_WORDS)
#define MODEXPS6_ADDR_EXPONENT (MODEXPS6_ADDR_OPERANDS + 2 * MODEXPS6_OPERAND_WORDS)
#define MODEXPS6_ADDR_RESULT (MODEXPS6_ADDR_OPERANDS + 3 * MODEXPS6_OPERAND_WORDS)
+#define MODEXPS6_MODE_CONSTANT_TIME (0)
+#define MODEXPS6_MODE_FAST_PUBLIC (1)
/*
* ModExpA7 core. MODEXPA7_OPERAND_BITS is size in bits of largest
* supported modulus.
+ *
+ * I prefer the way Pavel wrote the constants for this in his sample
+ * code to what I've done here, but let's get the thing working before
+ * worrying about the yaks' pedicures.
*/
-#define MODEXPA7_OPERAND_BITS (4096)
-#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32)
-#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_OPERANDS (4 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10)
-#define MODEXPA7_ADDR_MODULUS_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x11)
-#define MODEXPA7_ADDR_EXPONENT_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x12)
-#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS)
-#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_OPERAND_BITS (4096)
+#define MODEXPA7_OPERAND_BYTES (MODEXPA7_OPERAND_BITS / 8)
+#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32)
+#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_OPERANDS (8 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10)
+#define MODEXPA7_ADDR_MODULUS_BITS (MODEXPA7_ADDR_REGISTERS + 0x11)
+#define MODEXPA7_ADDR_EXPONENT_BITS (MODEXPA7_ADDR_REGISTERS + 0x12)
+#define MODEXPA7_ADDR_BUFFER_BITS (MODEXPA7_ADDR_REGISTERS + 0x13)
+#define MODEXPA7_ADDR_ARRAY_BITS (MODEXPA7_ADDR_REGISTERS + 0x14)
+#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MODULUS_COEFF_OUT (MODEXPA7_ADDR_OPERANDS + 4 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MODULUS_COEFF_IN (MODEXPA7_ADDR_OPERANDS + 5 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT (MODEXPA7_ADDR_OPERANDS + 6 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN (MODEXPA7_ADDR_OPERANDS + 7 * MODEXPA7_OPERAND_WORDS)
+#define MODEXPA7_MODE_CRT (1 << 1)
+#define MODEXPA7_MODE_PLAIN (0 << 1)
/*
* ECDSA P-256 point multiplier core. ECDSA256_OPERAND_BITS is size