diff options
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | asn1_internal.h | 10 | ||||
-rw-r--r-- | core.c | 39 | ||||
-rw-r--r-- | cryptech/libhal.py | 2 | ||||
-rw-r--r-- | hal.h | 54 | ||||
-rw-r--r-- | hal_internal.h | 63 | ||||
-rw-r--r-- | hal_io.c | 114 | ||||
-rw-r--r-- | hal_io_eim.c | 54 | ||||
-rw-r--r-- | hal_io_fmc.c | 56 | ||||
-rw-r--r-- | hal_io_i2c.c | 47 | ||||
-rw-r--r-- | ks.c | 123 | ||||
-rw-r--r-- | ks.h | 2 | ||||
-rw-r--r-- | ks_token.c | 20 | ||||
-rw-r--r-- | modexp.c | 300 | ||||
-rw-r--r-- | rpc_pkey.c | 27 | ||||
-rw-r--r-- | rsa.c | 371 | ||||
-rw-r--r-- | tests/test-rsa.c | 19 | ||||
-rw-r--r-- | tests/test-trng.c | 1 | ||||
-rw-r--r-- | unit-tests.py | 4 | ||||
-rw-r--r-- | verilog_constants.h | 37 |
20 files changed, 951 insertions, 399 deletions
@@ -109,12 +109,13 @@ CORE_OBJ = core.o csprng.o pbkdf2.o aes_keywrap.o modexp.o mkmif.o ${IO_OBJ} # i2c: Older I2C bus from Novena # fmc: FMC bus from dev-bridge and alpha boards +IO_OBJ = hal_io.o ifeq "${IO_BUS}" "eim" - IO_OBJ = hal_io_eim.o novena-eim.o + IO_OBJ += hal_io_eim.o novena-eim.o else ifeq "${IO_BUS}" "i2c" - IO_OBJ = hal_io_i2c.o + IO_OBJ += hal_io_i2c.o else ifeq "${IO_BUS}" "fmc" - IO_OBJ = hal_io_fmc.o + IO_OBJ += hal_io_fmc.o endif # If we're building for STM32, position-independent code leads to some diff --git a/asn1_internal.h b/asn1_internal.h index fe2f293..3de8bd6 100644 --- a/asn1_internal.h +++ b/asn1_internal.h @@ -151,6 +151,16 @@ extern hal_error_t hal_asn1_decode_pkcs8_encryptedprivatekeyinfo(const uint8_t * extern hal_error_t hal_asn1_guess_key_type(hal_key_type_t *type, hal_curve_name_t *curve, const uint8_t *const der, const size_t der_len); +/* + * Peek ahead for an OPTIONAL attribute. + */ + +static inline int hal_asn1_peek(const uint8_t tag, + const uint8_t * const der, size_t der_max) +{ + return der != NULL && der_max > 0 && der[0] == tag; +} + #endif /* _HAL_ASN1_INTERNAL_H_ */ /* @@ -97,7 +97,7 @@ static int name_matches(const hal_core_t *const core, const char * const name) static const struct { const char *name; hal_addr_t extra; } gaps[] = { { "csprng", 11 * CORE_SIZE }, /* empty slots after csprng */ { "modexps6", 3 * CORE_SIZE }, /* ModexpS6 uses four slots */ - { "modexpa7", 3 * CORE_SIZE }, /* ModexpA7 uses four slots */ + { "modexpa7", 7 * CORE_SIZE }, /* ModexpA7 uses eight slots */ }; static hal_core_t *head = NULL; @@ -203,15 +203,17 @@ hal_core_t *hal_core_find(const char *name, hal_core_t *core) hal_error_t hal_core_alloc(const char *name, hal_core_t **pcore) { - hal_core_t *core; - hal_error_t err = HAL_ERROR_CORE_NOT_FOUND; + /* + * This used to allow name == NULL iff *core != NULL, but the + * semantics were fragile and in practice we always pass a name + * anyway, so simplify by requiring name != NULL, always. + */ - if (name == NULL && (pcore == NULL || *pcore == NULL)) + if (name == NULL || pcore == NULL) return HAL_ERROR_BAD_ARGUMENTS; - core = *pcore; - if (name == NULL) - name = core->info.name; + hal_error_t err = HAL_ERROR_CORE_NOT_FOUND; + hal_core_t *core = *pcore; if (core != NULL) { /* if we can reallocate the same core, do it now */ @@ -221,24 +223,23 @@ hal_error_t hal_core_alloc(const char *name, hal_core_t **pcore) hal_critical_section_end(); return HAL_OK; } - /* else fall through to search */ + /* else forget that core and fall through to search */ + *pcore = NULL; } while (1) { hal_critical_section_start(); for (core = hal_core_iterate(NULL); core != NULL; core = core->next) { - if (name_matches(core, name)) { - if (core->busy) { - err = HAL_ERROR_CORE_BUSY; - continue; - } - else { - err = HAL_OK; - *pcore = core; - core->busy = 1; - break; - } + if (!name_matches(core, name)) + continue; + if (core->busy) { + err = HAL_ERROR_CORE_BUSY; + continue; } + err = HAL_OK; + *pcore = core; + core->busy = 1; + break; } hal_critical_section_end(); if (err == HAL_ERROR_CORE_BUSY) diff --git a/cryptech/libhal.py b/cryptech/libhal.py index 8666d15..acd1abb 100644 --- a/cryptech/libhal.py +++ b/cryptech/libhal.py @@ -403,7 +403,7 @@ class PKey(Handle): return result def export_pkey(self, pkey): - return self.hsm.pkey_export(pkey = pkey, kekek = self, pkcs8_max = 2560, kek_max = 512) + return self.hsm.pkey_export(pkey = pkey, kekek = self, pkcs8_max = 5480, kek_max = 512) def import_pkey(self, pkcs8, kek, flags = 0): return self.hsm.pkey_import(kekek = self, pkcs8 = pkcs8, kek = kek, flags = flags) @@ -201,11 +201,8 @@ typedef struct hal_core hal_core_t; extern void hal_io_set_debug(int onoff); extern hal_error_t hal_io_write(const hal_core_t *core, hal_addr_t offset, const uint8_t *buf, size_t len); extern hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, size_t len); -extern hal_error_t hal_io_init(const hal_core_t *core); -extern hal_error_t hal_io_next(const hal_core_t *core); -extern hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count); -extern hal_error_t hal_io_wait_ready(const hal_core_t *core); -extern hal_error_t hal_io_wait_valid(const hal_core_t *core); +extern hal_error_t hal_io_wait(const hal_core_t *core, const uint8_t status, int *count); +extern hal_error_t hal_io_wait2(const hal_core_t *core1, const hal_core_t *core2, const uint8_t status, int *count); /* * Core management functions. @@ -372,16 +369,25 @@ extern hal_error_t hal_pbkdf2(hal_core_t *core, unsigned iterations_desired); /* - * Modular exponentiation. + * Modular exponentiation. This takes a ridiculous number of + * arguments of very similar types, making it easy to confuse them, + * particularly when performing two modexp operations in parallel, so + * we encapsulate the arguments in a structure. */ -extern void hal_modexp_set_debug(const int onoff); +typedef struct { + hal_core_t *core; + const uint8_t *msg; size_t msg_len; /* Message */ + const uint8_t *exp; size_t exp_len; /* Exponent */ + const uint8_t *mod; size_t mod_len; /* Modulus */ + uint8_t *result; size_t result_len; /* Result of exponentiation */ + uint8_t *coeff; size_t coeff_len; /* Modulus coefficient (r/w) */ + uint8_t *mont; size_t mont_len; /* Montgomery factor (r/w)*/ +} hal_modexp_arg_t; -extern hal_error_t hal_modexp(hal_core_t *core, - const uint8_t * const msg, const size_t msg_len, /* Message */ - const uint8_t * const exp, const size_t exp_len, /* Exponent */ - const uint8_t * const mod, const size_t mod_len, /* Modulus */ - uint8_t * result, const size_t result_len); +extern void hal_modexp_set_debug(const int onoff); +extern hal_error_t hal_modexp( const int precalc, hal_modexp_arg_t *args); +extern hal_error_t hal_modexp2(const int precalc, hal_modexp_arg_t *args1, hal_modexp_arg_t *args2); /* * Master Key Memory Interface @@ -459,12 +465,13 @@ extern hal_error_t hal_rsa_key_get_public_exponent(const hal_rsa_key_t * const k extern void hal_rsa_key_clear(hal_rsa_key_t *key); extern hal_error_t hal_rsa_encrypt(hal_core_t *core, - const hal_rsa_key_t * const key, + hal_rsa_key_t *key, const uint8_t * const input, const size_t input_len, uint8_t * output, const size_t output_len); -extern hal_error_t hal_rsa_decrypt(hal_core_t *core, - const hal_rsa_key_t * const key, +extern hal_error_t hal_rsa_decrypt(hal_core_t *core1, + hal_core_t *core2, + hal_rsa_key_t *key, const uint8_t * const input, const size_t input_len, uint8_t * output, const size_t output_len); @@ -477,7 +484,8 @@ extern hal_error_t hal_rsa_key_gen(hal_core_t *core, extern hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key, uint8_t *der, size_t *der_len, const size_t der_max); -extern size_t hal_rsa_private_key_to_der_len(const hal_rsa_key_t * const key); +extern hal_error_t hal_rsa_private_key_to_der_extra(const hal_rsa_key_t * const key, + uint8_t *der, size_t *der_len, const size_t der_max); extern hal_error_t hal_rsa_private_key_from_der(hal_rsa_key_t **key, void *keybuf, const size_t keybuf_len, @@ -492,6 +500,20 @@ extern hal_error_t hal_rsa_public_key_from_der(hal_rsa_key_t **key, void *keybuf, const size_t keybuf_len, const uint8_t * const der, const size_t der_len); +extern int hal_rsa_key_needs_saving(const hal_rsa_key_t * const key); + +static inline size_t hal_rsa_private_key_to_der_len(const hal_rsa_key_t * const key) +{ + size_t len = 0; + return hal_rsa_private_key_to_der(key, NULL, &len, 0) == HAL_OK ? len : 0; +} + +static inline size_t hal_rsa_private_key_to_der_extra_len(const hal_rsa_key_t * const key) +{ + size_t len = 0; + return hal_rsa_private_key_to_der_extra(key, NULL, &len, 0) == HAL_OK ? len : 0; +} + /* * ECDSA. */ diff --git a/hal_internal.h b/hal_internal.h index 2486fd2..ac51cfb 100644 --- a/hal_internal.h +++ b/hal_internal.h @@ -69,6 +69,53 @@ inline uint32_t htonl(uint32_t w) #endif /* + * Low-level I/O convenience functions, moved here from hal.h + * because they use symbols defined in verilog_constants.h. + */ + +static inline hal_error_t hal_io_zero(const hal_core_t *core) +{ + const uint8_t buf[4] = { 0, 0, 0, 0 }; + return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); +} + +static inline hal_error_t hal_io_init(const hal_core_t *core) +{ + const uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; + return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); +} + +static inline hal_error_t hal_io_next(const hal_core_t *core) +{ + const uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; + return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); +} + +static inline hal_error_t hal_io_wait_ready(const hal_core_t *core) +{ + int limit = -1; + return hal_io_wait(core, STATUS_READY, &limit); +} + +static inline hal_error_t hal_io_wait_valid(const hal_core_t *core) +{ + int limit = -1; + return hal_io_wait(core, STATUS_VALID, &limit); +} + +static inline hal_error_t hal_io_wait_ready2(const hal_core_t *core1, const hal_core_t *core2) +{ + int limit = -1; + return hal_io_wait2(core1, core2, STATUS_READY, &limit); +} + +static inline hal_error_t hal_io_wait_valid2(const hal_core_t *core1, const hal_core_t *core2) +{ + int limit = -1; + return hal_io_wait2(core1, core2, STATUS_VALID, &limit); +} + +/* * Static memory allocation on start-up. Don't use this except where * really necessary. By design, there's no way to free this, we don't * want to have to manage a heap. Intent is just to allow allocation @@ -370,7 +417,19 @@ static inline hal_crc32_t hal_crc32_finalize(hal_crc32_t crc) * moment we take the easy way out and cap this at 4096-bit RSA. */ +#if 0 #define HAL_KS_WRAPPED_KEYSIZE ((2373 + 15) & ~7) +#else +#warning Temporary test hack to HAL_KS_WRAPPED_KEYSIZE, clean this up +// +// See how much of the problem we're having with pkey support for the +// new modexpa7 components is just this buffer size being too small. +// +#define HAL_KS_WRAPPED_KEYSIZE ((2373 + 6 * 4096 / 8 + 6 * 4 + 15) & ~7) +#if HAL_KS_WRAPPED_KEYSIZE + 8 > 4096 +#warning HAL_KS_WRAPPED_KEYSIZE is too big for a single 4096-octet block +#endif +#endif /* * PINs. @@ -531,6 +590,10 @@ extern hal_error_t hal_ks_get_attributes(hal_ks_t *ks, extern hal_error_t hal_ks_logout(hal_ks_t *ks, const hal_client_handle_t client); +extern hal_error_t hal_ks_rewrite_der(hal_ks_t *ks, + hal_pkey_slot_t *slot, + const uint8_t * const der, const size_t der_len); + /* * RPC lowest-level send and receive routines. These are blocking, and * transport-specific (sockets, USB). diff --git a/hal_io.c b/hal_io.c new file mode 100644 index 0000000..f885712 --- /dev/null +++ b/hal_io.c @@ -0,0 +1,114 @@ +/* + * hal_io.c + * -------- + * This module contains common code to talk to the FPGA over the bus du jour. + * + * Author: Paul Selkirk, Rob Austein + * Copyright (c) 2014-2017, NORDUnet A/S All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of the NORDUnet nor the names of its contributors may + * be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdint.h> + +#include "hal.h" +#include "hal_internal.h" + +#ifndef HAL_IO_TIMEOUT +#define HAL_IO_TIMEOUT 100000000 +#endif + +static inline hal_error_t test_status(const hal_core_t *core, + const uint8_t status, + int *done) +{ + if (done == NULL) + return HAL_ERROR_IMPOSSIBLE; + + if (*done || core == NULL) + return HAL_OK; + + uint8_t buf[4]; + + const hal_error_t err = hal_io_read(core, ADDR_STATUS, buf, sizeof(buf)); + + if (err == HAL_OK) + *done = (buf[3] & status) != 0; + + return err; +} + +hal_error_t hal_io_wait2(const hal_core_t *core1, + const hal_core_t *core2, + const uint8_t status, + int *count) +{ + int done1 = 0, done2 = 0; + hal_error_t err; + + if (core1 == NULL) + return HAL_ERROR_BAD_ARGUMENTS; + + if (core2 == NULL) + done2 = 1; + + if (count && *count == -1) + *count = HAL_IO_TIMEOUT; + + for (int i = 1; ; ++i) { + + if (count && (*count > 0) && (i >= *count)) + return HAL_ERROR_IO_TIMEOUT; + + hal_task_yield(); + + if ((err = test_status(core1, status, &done1)) != HAL_OK || + (err = test_status(core2, status, &done2)) != HAL_OK) + return err; + + if (done1 && done2) { + if (count) + *count = i; + return HAL_OK; + } + } +} + +hal_error_t hal_io_wait(const hal_core_t *core, + const uint8_t status, + int *count) +{ + return hal_io_wait2(core, NULL, status, count); +} + +/* + * Local variables: + * indent-tabs-mode: nil + * c-basic-offset: 2 + * End: + */ diff --git a/hal_io_eim.c b/hal_io_eim.c index 5824f5b..040cb2b 100644 --- a/hal_io_eim.c +++ b/hal_io_eim.c @@ -43,11 +43,7 @@ static int debug = 0; static int inited = 0; -#ifndef EIM_IO_TIMEOUT -#define EIM_IO_TIMEOUT 100000000 -#endif - -static hal_error_t init(void) +static inline hal_error_t init(void) { if (inited) return HAL_OK; @@ -61,7 +57,7 @@ static hal_error_t init(void) /* translate cryptech register number to EIM address */ -static hal_addr_t eim_offset(hal_addr_t offset) +static inline hal_addr_t eim_offset(hal_addr_t offset) { return EIM_BASE_ADDR + (offset << 2); } @@ -134,52 +130,6 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, return HAL_OK; } -hal_error_t hal_io_init(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - -hal_error_t hal_io_next(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - -hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) -{ - hal_error_t err; - uint8_t buf[4]; - int i; - - for (i = 1; ; ++i) { - - if (count && (*count > 0) && (i >= *count)) - return HAL_ERROR_IO_TIMEOUT; - - if ((err = hal_io_read(core, ADDR_STATUS, buf, sizeof(buf))) != HAL_OK) - return err; - - if ((buf[3] & status) != 0) { - if (count) - *count = i; - return HAL_OK; - } - } -} - -hal_error_t hal_io_wait_ready(const hal_core_t *core) -{ - int limit = EIM_IO_TIMEOUT; - return hal_io_wait(core, STATUS_READY, &limit); -} - -hal_error_t hal_io_wait_valid(const hal_core_t *core) -{ - int limit = EIM_IO_TIMEOUT; - return hal_io_wait(core, STATUS_VALID, &limit); -} - /* * Local variables: * indent-tabs-mode: nil diff --git a/hal_io_fmc.c b/hal_io_fmc.c index 76d6883..0d49f1e 100644 --- a/hal_io_fmc.c +++ b/hal_io_fmc.c @@ -47,11 +47,7 @@ static int debug = 0; static int inited = 0; -#ifndef FMC_IO_TIMEOUT -#define FMC_IO_TIMEOUT 100000000 -#endif - -static hal_error_t init(void) +static inline hal_error_t init(void) { if (!inited) { fmc_init(); @@ -62,7 +58,7 @@ static hal_error_t init(void) /* Translate cryptech register number to FMC address. */ -static hal_addr_t fmc_offset(hal_addr_t offset) +static inline hal_addr_t fmc_offset(hal_addr_t offset) { return offset << 2; } @@ -136,54 +132,6 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, return HAL_OK; } -hal_error_t hal_io_init(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - -hal_error_t hal_io_next(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; - return hal_io_write(core, ADDR_CTRL, buf, sizeof(buf)); -} - -hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) -{ - hal_error_t err; - uint8_t buf[4]; - int i; - - for (i = 1; ; ++i) { - - if (count && (*count > 0) && (i >= *count)) - return HAL_ERROR_IO_TIMEOUT; - - hal_task_yield(); - - if ((err = hal_io_read(core, ADDR_STATUS, buf, sizeof(buf))) != HAL_OK) - return err; - - if ((buf[3] & status) != 0) { - if (count) - *count = i; - return HAL_OK; - } - } -} - -hal_error_t hal_io_wait_ready(const hal_core_t *core) -{ - int limit = FMC_IO_TIMEOUT; - return hal_io_wait(core, STATUS_READY, &limit); -} - -hal_error_t hal_io_wait_valid(const hal_core_t *core) -{ - int limit = FMC_IO_TIMEOUT; - return hal_io_wait(core, STATUS_VALID, &limit); -} - /* * Local variables: * indent-tabs-mode: nil diff --git a/hal_io_i2c.c b/hal_io_i2c.c index e7dbbb6..8596174 100644 --- a/hal_io_i2c.c +++ b/hal_io_i2c.c @@ -301,53 +301,6 @@ hal_error_t hal_io_read(const hal_core_t *core, hal_addr_t offset, uint8_t *buf, return HAL_OK; } -hal_error_t hal_io_init(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_INIT }; - return hal_io_write(core, ADDR_CTRL, buf, 4); -} - -hal_error_t hal_io_next(const hal_core_t *core) -{ - uint8_t buf[4] = { 0, 0, 0, CTRL_NEXT }; - return hal_io_write(core, ADDR_CTRL, buf, 4); -} - -hal_error_t hal_io_wait(const hal_core_t *core, uint8_t status, int *count) -{ - hal_error_t err; - uint8_t buf[4]; - int i; - - for (i = 1; ; ++i) { - - if (count && (*count > 0) && (i >= *count)) - return HAL_ERROR_IO_TIMEOUT; - - if ((err = hal_io_read(core, ADDR_STATUS, buf, 4)) != HAL_OK) - return err; - - if (buf[3] & status) { - if (count) - *count = i; - return HAL_OK; - - } - } -} - -hal_error_t hal_io_wait_ready(const hal_core_t *core) -{ - int limit = 10; - return hal_io_wait(core, STATUS_READY, &limit); -} - -hal_error_t hal_io_wait_valid(const hal_core_t *core) -{ - int limit = 10; - return hal_io_wait(core, STATUS_VALID, &limit); -} - /* * Local variables: * indent-tabs-mode: nil @@ -520,6 +520,46 @@ static inline int acceptable_key_type(const hal_key_type_t type) } } +/* + * Internal bits of constructing a new key block. + */ + +static hal_error_t construct_key_block(hal_ks_block_t *block, + hal_pkey_slot_t *slot, + const uint8_t * const der, const size_t der_len) +{ + if (block == NULL || slot == NULL || der == NULL || der_len == 0) + return HAL_ERROR_IMPOSSIBLE; + + hal_ks_key_block_t *k = &block->key; + hal_error_t err = HAL_OK; + uint8_t kek[KEK_LENGTH]; + size_t kek_len; + + memset(block, 0xFF, sizeof(*block)); + + block->header.block_type = HAL_KS_BLOCK_TYPE_KEY; + block->header.block_status = HAL_KS_BLOCK_STATUS_LIVE; + + k->name = slot->name; + k->type = slot->type; + k->curve = slot->curve; + k->flags = slot->flags; + k->der_len = SIZEOF_KS_KEY_BLOCK_DER; + k->attributes_len = 0; + + if ((err = hal_mkm_get_kek(kek, &kek_len, sizeof(kek))) == HAL_OK) + err = hal_aes_keywrap(NULL, kek, kek_len, der, der_len, k->der, &k->der_len); + + memset(kek, 0, sizeof(kek)); + + return err; +} + +/* + * Store a key block. + */ + hal_error_t hal_ks_store(hal_ks_t *ks, hal_pkey_slot_t *slot, const uint8_t * const der, const size_t der_len) @@ -529,9 +569,6 @@ hal_error_t hal_ks_store(hal_ks_t *ks, hal_error_t err = HAL_OK; hal_ks_block_t *block; - hal_ks_key_block_t *k; - uint8_t kek[KEK_LENGTH]; - size_t kek_len; unsigned b; hal_ks_lock(); @@ -541,35 +578,16 @@ hal_error_t hal_ks_store(hal_ks_t *ks, goto done; } - k = &block->key; - if ((err = hal_ks_index_add(ks, &slot->name, &b, &slot->hint)) != HAL_OK) goto done; hal_ks_cache_mark_used(ks, block, b); - memset(block, 0xFF, sizeof(*block)); - - block->header.block_type = HAL_KS_BLOCK_TYPE_KEY; - block->header.block_status = HAL_KS_BLOCK_STATUS_LIVE; - - k->name = slot->name; - k->type = slot->type; - k->curve = slot->curve; - k->flags = slot->flags; - k->der_len = SIZEOF_KS_KEY_BLOCK_DER; - k->attributes_len = 0; - if (ks->used < ks->size) err = hal_ks_block_erase_maybe(ks, ks->index[ks->used]); if (err == HAL_OK) - err = hal_mkm_get_kek(kek, &kek_len, sizeof(kek)); - - if (err == HAL_OK) - err = hal_aes_keywrap(NULL, kek, kek_len, der, der_len, k->der, &k->der_len); - - memset(kek, 0, sizeof(kek)); + err = construct_key_block(block, slot, der, der_len); if (err == HAL_OK) err = hal_ks_block_write(ks, b, block); @@ -933,6 +951,65 @@ hal_error_t hal_ks_get_attributes(hal_ks_t *ks, return err; } +hal_error_t hal_ks_rewrite_der(hal_ks_t *ks, + hal_pkey_slot_t *slot, + const uint8_t * const der, const size_t der_len) +{ + if (ks == NULL || slot == NULL || der == NULL || der_len == 0 || !acceptable_key_type(slot->type)) + return HAL_ERROR_BAD_ARGUMENTS; + + hal_ks_block_t *block = NULL; + hal_error_t err = HAL_OK; + unsigned b; + + hal_ks_lock(); + + { + if ((err = hal_ks_index_find(ks, &slot->name, &b, &slot->hint)) != HAL_OK || + (err = hal_ks_block_test_owner(ks, b, slot->client, slot->session)) != HAL_OK || + (err = hal_ks_block_read_cached(ks, b, &block)) != HAL_OK) + goto done; + + hal_ks_cache_mark_used(ks, block, b); + + size_t bytes_len = 0, attributes_len = 0; + unsigned *count = NULL; + uint8_t *bytes = NULL; + + if ((err = locate_attributes(block, &bytes, &bytes_len, &count)) != HAL_OK || + (err = hal_ks_attribute_scan(bytes, bytes_len, NULL, *count, &attributes_len)) != HAL_OK) + goto done; + + if (der_len + attributes_len > SIZEOF_KS_KEY_BLOCK_DER) { + err = HAL_ERROR_RESULT_TOO_LONG; + goto done; + } + + uint8_t attributes[attributes_len > 0 ? attributes_len : 1]; + hal_ks_key_block_t *k = &block->key; + unsigned attributes_count = *count; + + memcpy(attributes, bytes, attributes_len); + + if ((err = construct_key_block(block, slot, der, der_len)) != HAL_OK) + goto done; + + if (k->der_len + attributes_len > SIZEOF_KS_KEY_BLOCK_DER) { + err = HAL_ERROR_IMPOSSIBLE; + goto done; + } + + memcpy(k->der + k->der_len, attributes, attributes_len); + k->attributes_len = attributes_count; + + err = hal_ks_block_update(ks, b, block, &slot->name, &slot->hint); + } + + done: + hal_ks_unlock(); + return err; +} + /* * Local variables: * indent-tabs-mode: nil @@ -46,7 +46,7 @@ */ #ifndef HAL_KS_BLOCK_SIZE -#define HAL_KS_BLOCK_SIZE (4096) +#define HAL_KS_BLOCK_SIZE (4096 * 2) #endif /* @@ -60,12 +60,13 @@ #define KS_TOKEN_CACHE_SIZE 4 #endif -#define NUM_FLASH_BLOCKS KEYSTORE_NUM_SUBSECTORS - #if HAL_KS_BLOCK_SIZE % KEYSTORE_SUBSECTOR_SIZE != 0 #error Keystore block size is not a multiple of flash subsector size #endif +#define NUM_FLASH_BLOCKS ((KEYSTORE_NUM_SUBSECTORS * KEYSTORE_SUBSECTOR_SIZE) / HAL_KS_BLOCK_SIZE) +#define SUBSECTORS_PER_BLOCK (HAL_KS_BLOCK_SIZE / KEYSTORE_SUBSECTOR_SIZE) + /* * Keystore database. */ @@ -90,7 +91,7 @@ typedef struct { static inline uint32_t ks_token_offset(const unsigned blockno) { - return blockno * KEYSTORE_SUBSECTOR_SIZE; + return blockno * HAL_KS_BLOCK_SIZE; } /* @@ -102,7 +103,7 @@ static inline uint32_t ks_token_offset(const unsigned blockno) static hal_error_t ks_token_read(hal_ks_t *ks, const unsigned blockno, hal_ks_block_t *block) { - if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != KEYSTORE_SUBSECTOR_SIZE) + if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != HAL_KS_BLOCK_SIZE) return HAL_ERROR_IMPOSSIBLE; if (keystore_read_data(ks_token_offset(blockno), @@ -192,8 +193,13 @@ static hal_error_t ks_token_erase(hal_ks_t *ks, const unsigned blockno) if (ks != hal_ks_token || blockno >= NUM_FLASH_BLOCKS) return HAL_ERROR_IMPOSSIBLE; - if (keystore_erase_subsector(blockno) != CMSIS_HAL_OK) - return HAL_ERROR_KEYSTORE_ACCESS; + unsigned subsector = blockno * SUBSECTORS_PER_BLOCK; + const unsigned end = (blockno + 1) * SUBSECTORS_PER_BLOCK; + + do { + if (keystore_erase_subsector(subsector) != CMSIS_HAL_OK) + return HAL_ERROR_KEYSTORE_ACCESS; + } while (++subsector < end); return HAL_OK; } @@ -232,7 +238,7 @@ static hal_error_t ks_token_erase_maybe(hal_ks_t *ks, const unsigned blockno) static hal_error_t ks_token_write(hal_ks_t *ks, const unsigned blockno, hal_ks_block_t *block) { - if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != KEYSTORE_SUBSECTOR_SIZE) + if (ks != hal_ks_token || block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != HAL_KS_BLOCK_SIZE) return HAL_ERROR_IMPOSSIBLE; hal_error_t err = ks_token_erase_maybe(ks, blockno); @@ -43,7 +43,6 @@ #include <stdio.h> #include <stdint.h> -#include <assert.h> #include "hal.h" #include "hal_internal.h" @@ -60,175 +59,270 @@ void hal_modexp_set_debug(const int onoff) } /* - * Check a result, report on failure if debugging, pass failures up - * the chain. + * Get value of an ordinary register. */ -#define check(_expr_) \ - do { \ - hal_error_t _err = (_expr_); \ - if (_err != HAL_OK && debug) \ - printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \ - if (_err != HAL_OK) \ - return _err; \ - } while (0) +static hal_error_t inline get_register(const hal_core_t *core, + const hal_addr_t addr, + uint32_t *value) +{ + hal_error_t err; + uint8_t w[4]; + + if (value == NULL) + return HAL_ERROR_IMPOSSIBLE; + + if ((err = hal_io_read(core, addr, w, sizeof(w))) != HAL_OK) + return err; + + *value = (w[0] << 0) | (w[1] << 8) | (w[2] << 16) | (w[3] << 24); + + return HAL_OK; +} /* - * Set an ordinary register. + * Set value of an ordinary register. */ -static hal_error_t set_register(const hal_core_t *core, - const hal_addr_t addr, - uint32_t value) +static hal_error_t inline set_register(const hal_core_t *core, + const hal_addr_t addr, + const uint32_t value) { - uint8_t w[4]; - int i; - - for (i = 3; i >= 0; i--) { - w[i] = value & 0xFF; - value >>= 8; - } + const uint8_t w[4] = { + ((value >> 24) & 0xFF), + ((value >> 16) & 0xFF), + ((value >> 8) & 0xFF), + ((value >> 0) & 0xFF) + }; return hal_io_write(core, addr, w, sizeof(w)); } /* * Get value of a data buffer. We reverse the order of 32-bit words - * in the buffer during the transfer to match what the modexps6 core + * in the buffer during the transfer to match what the modexpa7 core * expects. */ -static hal_error_t get_buffer(const hal_core_t *core, - const hal_addr_t data_addr, - uint8_t *value, - const size_t length) +static inline hal_error_t get_buffer(const hal_core_t *core, + const hal_addr_t data_addr, + uint8_t *value, + const size_t length) { + hal_error_t err; size_t i; - assert(value != NULL && length % 4 == 0); + if (value == NULL || length % 4 != 0) + return HAL_ERROR_IMPOSSIBLE; for (i = 0; i < length; i += 4) - check(hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)); + if ((err = hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK) + return err; return HAL_OK; } /* * Set value of a data buffer. We reverse the order of 32-bit words - * in the buffer during the transfer to match what the modexps6 core + * in the buffer during the transfer to match what the modexpa7 core * expects. + * + * Do we need to zero the portion of the buffer we're not using + * explictly (that is, the portion between `length` and the value of + * the core's MODEXPA7_ADDR_BUFFER_BITS register)? We've gotten away + * without doing this so far, but the core doesn't take an explicit + * length parameter for the message itself, instead it assumes that + * the message is either as long as or twice as long as the exponent, + * depending on the setting of the CRT mode bit. Maybe initializing + * the core clears the excess bits so there's no issue? Dunno. Have + * never seen a problem with this yet, just dont' know why not. */ -static hal_error_t set_buffer(const hal_core_t *core, - const hal_addr_t data_addr, - const uint8_t * const value, - const size_t length) +static inline hal_error_t set_buffer(const hal_core_t *core, + const hal_addr_t data_addr, + const uint8_t * const value, + const size_t length) { + hal_error_t err; size_t i; - assert(value != NULL && length % 4 == 0); + if (value == NULL || length % 4 != 0) + return HAL_ERROR_IMPOSSIBLE; for (i = 0; i < length; i += 4) - check(hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)); + if ((err = hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK) + return err; return HAL_OK; } /* - * Run one modexp operation. + * Stuff moved out of modexp so we can run two cores in parallel more + * easily. We have to return to the jacket routine every time we kick + * a core into doing something, since only the jacket routines know + * how many cores we're running for any particular calculation. + * + * In theory we could do something clever where we don't wait for both + * cores to finish precalc before starting either of them on the main + * computation, but that way probably lies madness. */ -hal_error_t hal_modexp(hal_core_t *core, - const uint8_t * const msg, const size_t msg_len, /* Message */ - const uint8_t * const exp, const size_t exp_len, /* Exponent */ - const uint8_t * const mod, const size_t mod_len, /* Modulus */ - uint8_t *result, const size_t result_len) +static inline hal_error_t check_args(hal_modexp_arg_t *a) +{ + /* + * All data pointers must be set, exponent may not be longer than + * modulus, message may not be longer than twice the modulus (CRT + * mode), result buffer must not be shorter than modulus, and all + * input lengths must be a multiple of four bytes (the core is all + * about 32-bit words). + */ + + if (a == NULL || + a->msg == NULL || a->msg_len > MODEXPA7_OPERAND_BYTES || a->msg_len > a->mod_len * 2 || + a->exp == NULL || a->exp_len > MODEXPA7_OPERAND_BYTES || a->exp_len > a->mod_len || + a->mod == NULL || a->mod_len > MODEXPA7_OPERAND_BYTES || + a->result == NULL || a->result_len > MODEXPA7_OPERAND_BYTES || a->result_len < a->mod_len || + a->coeff == NULL || a->coeff_len > MODEXPA7_OPERAND_BYTES || + a->mont == NULL || a->mont_len > MODEXPA7_OPERAND_BYTES || + ((a->msg_len | a->exp_len | a->mod_len) & 3) != 0) + return HAL_ERROR_BAD_ARGUMENTS; + + return HAL_OK; +} + +static inline hal_error_t setup_precalc(const int precalc, hal_modexp_arg_t *a) { hal_error_t err; /* - * All pointers must be set, neither message nor exponent may be - * longer than modulus, result buffer must not be shorter than - * modulus, and all input lengths must be a multiple of four. - * - * The multiple-of-four restriction is a pain, but the rest of the - * HAL code currently enforces the same restriction, and allowing - * arbitrary lengths would require some tedious shuffling to deal - * with alignment issues, so it's not worth trying to fix only here. + * Check that operand size is compatabible with the core. */ - if (msg == NULL || exp == NULL || mod == NULL || result == NULL || - msg_len > mod_len || exp_len > mod_len || result_len < mod_len || - ((msg_len | exp_len | mod_len) & 3) != 0) + uint32_t operand_max = 0; + + if ((err = get_register(a->core, MODEXPA7_ADDR_BUFFER_BITS, &operand_max)) != HAL_OK) + return err; + + operand_max /= 8; + + if (a->msg_len > operand_max || + a->exp_len > operand_max || + a->mod_len > operand_max || + a->coeff_len > operand_max || + a->mont_len > operand_max) return HAL_ERROR_BAD_ARGUMENTS; - if (((err = hal_core_alloc(MODEXPS6_NAME, &core)) == HAL_ERROR_CORE_NOT_FOUND) && - ((err = hal_core_alloc(MODEXPA7_NAME, &core)) != HAL_OK)) + /* + * Set the modulus, then initiate calculation of modulus-dependent + * speedup factors if necessary, by edge-triggering the "init" bit, + * then return to caller so it can wait for precalc. + */ + + if ((err = set_register(a->core, MODEXPA7_ADDR_MODULUS_BITS, a->mod_len * 8)) != HAL_OK || + (err = set_buffer(a->core, MODEXPA7_ADDR_MODULUS, a->mod, a->mod_len)) != HAL_OK || + (precalc && (err = hal_io_zero(a->core)) != HAL_OK) || + (precalc && (err = hal_io_init(a->core)) != HAL_OK)) return err; -#undef check -#define check(_expr_) \ - do { \ - hal_error_t _err = (_expr_); \ - if (_err != HAL_OK && debug) \ - printf("%s failed: %s\n", #_expr_, hal_error_string(_err)); \ - if (_err != HAL_OK) { \ - hal_core_free(core); \ - return _err; \ - } \ - } while (0) + return HAL_OK; +} + +static inline hal_error_t setup_calc(const int precalc, hal_modexp_arg_t *a) +{ + hal_error_t err; /* - * We probably ought to take the mode (fast vs constant-time) as an - * argument, but for the moment we just guess that really short - * exponent means we're using the public key and can use fast mode, - * really short messages are Miller-Rabin tests and can also use - * fast mode, all other cases are something to do with the private - * key and therefore must use constant-time mode. - * - * Unclear whether it's worth trying to figure out exactly how long - * the operands are: assuming a multiple of eight is safe, but makes - * a bit more work for the core; checking to see how many bits are - * really set leaves the core sitting idle while the main CPU does - * these checks. No way to know which is faster without testing; - * take simple approach for the moment. + * Select CRT mode if and only if message is longer than exponent. */ - /* Select mode (1 = fast, 0 = safe) */ - check(set_register(core, MODEXPS6_ADDR_MODE, (exp_len <= 4 || msg_len <= 4))); + const uint32_t mode = a->msg_len > a->mod_len ? MODEXPA7_MODE_CRT : MODEXPA7_MODE_PLAIN; - /* Set modulus size in bits */ - check(set_register(core, MODEXPS6_ADDR_MODULUS_WIDTH, mod_len * 8)); + /* + * Copy out precalc results if necessary, then load everything and + * start the calculation by edge-triggering the "next" bit. If + * everything works, return to caller so it can wait for the + * calculation to complete. + */ - /* Write new modulus */ - check(set_buffer(core, MODEXPS6_ADDR_MODULUS, mod, mod_len)); + if ((precalc && + (err = get_buffer(a->core, MODEXPA7_ADDR_MODULUS_COEFF_OUT, a->coeff, a->coeff_len)) != HAL_OK) || + (precalc && + (err = get_buffer(a->core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT, a->mont, a->mont_len)) != HAL_OK) || + (err = set_buffer(a->core, MODEXPA7_ADDR_MODULUS_COEFF_IN, a->coeff, a->coeff_len)) != HAL_OK || + (err = set_buffer(a->core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN, a->mont, a->mont_len)) != HAL_OK || + (err = set_register(a->core, MODEXPA7_ADDR_MODE, mode)) != HAL_OK || + (err = set_buffer(a->core, MODEXPA7_ADDR_MESSAGE, a->msg, a->msg_len)) != HAL_OK || + (err = set_buffer(a->core, MODEXPA7_ADDR_EXPONENT, a->exp, a->exp_len)) != HAL_OK || + (err = set_register(a->core, MODEXPA7_ADDR_EXPONENT_BITS, a->exp_len * 8)) != HAL_OK || + (err = hal_io_zero(a->core)) != HAL_OK || + (err = hal_io_next(a->core)) != HAL_OK) + return err; - /* Pre-calcuate speed-up coefficient */ - check(hal_io_init(core)); + return HAL_OK; +} - /* Wait for calculation to complete */ - check(hal_io_wait_ready(core)); +static inline hal_error_t extract_result(hal_modexp_arg_t *a) +{ + /* + * Extract results from the main calculation and we're done. + * Hardly seems worth making this a separate function. + */ - /* Write new message */ - check(set_buffer(core, MODEXPS6_ADDR_MESSAGE, msg, msg_len)); + return get_buffer(a->core, MODEXPA7_ADDR_RESULT, a->result, a->mod_len); +} - /* Set new exponent length in bits */ - check(set_register(core, MODEXPS6_ADDR_EXPONENT_WIDTH, exp_len * 8)); +/* + * Run one modexp operation. + */ - /* Set new exponent */ - check(set_buffer(core, MODEXPS6_ADDR_EXPONENT, exp, exp_len)); +hal_error_t hal_modexp(const int precalc, hal_modexp_arg_t *a) +{ + hal_error_t err; - /* Start calculation */ - check(hal_io_next(core)); + if ((err = check_args(a)) != HAL_OK) + return err; - /* Wait for result */ - check(hal_io_wait_valid(core)); + if ((err = hal_core_alloc(MODEXPA7_NAME, &a->core)) == HAL_OK && + (err = setup_precalc(precalc, a)) == HAL_OK && + (!precalc || + (err = hal_io_wait_ready(a->core)) == HAL_OK) && + (err = setup_calc(precalc, a)) == HAL_OK && + (err = hal_io_wait_valid(a->core)) == HAL_OK && + (err = extract_result(a)) == HAL_OK) + err = HAL_OK; + + hal_core_free(a->core); + return err; +} - /* Extract result */ - check(get_buffer(core, MODEXPS6_ADDR_RESULT, result, mod_len)); +/* + * Run two modexp operations in parallel. + */ - hal_core_free(core); - return HAL_OK; +hal_error_t hal_modexp2(const int precalc, hal_modexp_arg_t *a1, hal_modexp_arg_t *a2) +{ + hal_error_t err; + + if ((err = check_args(a1)) != HAL_OK || + (err = check_args(a2)) != HAL_OK) + return err; + + if ((err = hal_core_alloc(MODEXPA7_NAME, &a1->core)) == HAL_OK && + (err = hal_core_alloc(MODEXPA7_NAME, &a2->core)) == HAL_OK && + (err = setup_precalc(precalc, a1)) == HAL_OK && + (err = setup_precalc(precalc, a2)) == HAL_OK && + (!precalc || + (err = hal_io_wait_ready2(a1->core, a2->core)) == HAL_OK) && + (err = setup_calc(precalc, a1)) == HAL_OK && + (err = setup_calc(precalc, a2)) == HAL_OK && + (err = hal_io_wait_valid2(a1->core, a2->core)) == HAL_OK && + (err = extract_result(a1)) == HAL_OK && + (err = extract_result(a2)) == HAL_OK) + err = HAL_OK; + + hal_core_free(a1->core); + hal_core_free(a2->core); + return err; } /* @@ -734,7 +734,8 @@ static hal_error_t pkey_local_get_public_key(const hal_pkey_handle_t pkey, * algorithm-specific functions. */ -static hal_error_t pkey_local_sign_rsa(uint8_t *keybuf, const size_t keybuf_len, +static hal_error_t pkey_local_sign_rsa(hal_pkey_slot_t *slot, + uint8_t *keybuf, const size_t keybuf_len, const uint8_t * const der, const size_t der_len, const hal_hash_handle_t hash, const uint8_t * input, size_t input_len, @@ -759,14 +760,25 @@ static hal_error_t pkey_local_sign_rsa(uint8_t *keybuf, const size_t keybuf_len, input = signature; } - if ((err = pkcs1_5_pad(input, input_len, signature, *signature_len, 0x01)) != HAL_OK || - (err = hal_rsa_decrypt(NULL, key, signature, *signature_len, signature, *signature_len)) != HAL_OK) + if ((err = pkcs1_5_pad(input, input_len, signature, *signature_len, 0x01)) != HAL_OK || + (err = hal_rsa_decrypt(NULL, NULL, key, signature, *signature_len, signature, *signature_len)) != HAL_OK) return err; + if (hal_rsa_key_needs_saving(key)) { + uint8_t pkcs8[hal_rsa_private_key_to_der_extra_len(key)]; + size_t pkcs8_len = 0; + if ((err = hal_rsa_private_key_to_der_extra(key, pkcs8, &pkcs8_len, sizeof(pkcs8))) == HAL_OK) + err = hal_ks_rewrite_der(ks_from_flags(slot->flags), slot, pkcs8, pkcs8_len); + memset(pkcs8, 0, sizeof(pkcs8)); + if (err != HAL_OK) + return err; + } + return HAL_OK; } -static hal_error_t pkey_local_sign_ecdsa(uint8_t *keybuf, const size_t keybuf_len, +static hal_error_t pkey_local_sign_ecdsa(hal_pkey_slot_t *slot, + uint8_t *keybuf, const size_t keybuf_len, const uint8_t * const der, const size_t der_len, const hal_hash_handle_t hash, const uint8_t * input, size_t input_len, @@ -813,7 +825,8 @@ static hal_error_t pkey_local_sign(const hal_pkey_handle_t pkey, if (slot == NULL) return HAL_ERROR_KEY_NOT_FOUND; - hal_error_t (*signer)(uint8_t *keybuf, const size_t keybuf_len, + hal_error_t (*signer)(hal_pkey_slot_t *slot, + uint8_t *keybuf, const size_t keybuf_len, const uint8_t * const der, const size_t der_len, const hal_hash_handle_t hash, const uint8_t * const input, const size_t input_len, @@ -840,7 +853,7 @@ static hal_error_t pkey_local_sign(const hal_pkey_handle_t pkey, hal_error_t err; if ((err = ks_fetch_from_flags(slot, der, &der_len, sizeof(der))) == HAL_OK) - err = signer(keybuf, sizeof(keybuf), der, der_len, hash, input, input_len, + err = signer(slot, keybuf, sizeof(keybuf), der, der_len, hash, input, input_len, signature, signature_len, signature_max); memset(keybuf, 0, sizeof(keybuf)); @@ -1263,7 +1276,7 @@ static hal_error_t pkey_local_import(const hal_client_handle_t client, goto fail; } - if ((err = hal_rsa_decrypt(NULL, rsa, data, data_len, der, data_len)) != HAL_OK) + if ((err = hal_rsa_decrypt(NULL, NULL, rsa, data, data_len, der, data_len)) != HAL_OK) goto fail; if ((err = hal_get_random(NULL, kek, sizeof(kek))) != HAL_OK) @@ -70,7 +70,6 @@ #include <stdlib.h> #include <stddef.h> #include <string.h> -#include <assert.h> #include "hal.h" #include "hal_internal.h" @@ -94,6 +93,15 @@ #endif /* + * How big to make the buffers for the modulus coefficient and + * Montgomery factor. This will almost certainly want tuning. + */ + +#ifndef HAL_RSA_MAX_OPERAND_LENGTH +#define HAL_RSA_MAX_OPERAND_LENGTH MODEXPA7_OPERAND_BYTES +#endif + +/* * Whether we want debug output. */ @@ -123,7 +131,7 @@ void hal_rsa_set_blinding(const int onoff) */ struct hal_rsa_key { - hal_key_type_t type; /* What kind of key this is */ + hal_key_type_t type; /* What kind of key this is */ fp_int n[1]; /* The modulus */ fp_int e[1]; /* Public exponent */ fp_int d[1]; /* Private exponent */ @@ -132,8 +140,17 @@ struct hal_rsa_key { fp_int u[1]; /* 1/q mod p */ fp_int dP[1]; /* d mod (p - 1) */ fp_int dQ[1]; /* d mod (q - 1) */ + unsigned flags; /* Internal key flags */ + uint8_t /* ModExpA7 speedup factors */ + nC[HAL_RSA_MAX_OPERAND_LENGTH], nF[HAL_RSA_MAX_OPERAND_LENGTH], + pC[HAL_RSA_MAX_OPERAND_LENGTH/2], pF[HAL_RSA_MAX_OPERAND_LENGTH/2], + qC[HAL_RSA_MAX_OPERAND_LENGTH/2], qF[HAL_RSA_MAX_OPERAND_LENGTH/2]; }; +#define RSA_FLAG_NEEDS_SAVING (1 << 0) +#define RSA_FLAG_PRECALC_N_DONE (1 << 1) +#define RSA_FLAG_PRECALC_PQ_DONE (1 << 2) + const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t); /* @@ -158,7 +175,7 @@ const size_t hal_rsa_key_t_size = sizeof(hal_rsa_key_t); case FP_OKAY: break; \ case FP_VAL: lose(HAL_ERROR_BAD_ARGUMENTS); \ case FP_MEM: lose(HAL_ERROR_ALLOCATION_FAILURE); \ - default: lose(HAL_ERROR_IMPOSSIBLE); \ + default: lose(HAL_ERROR_IMPOSSIBLE); \ } \ } while (0) @@ -171,7 +188,8 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz { hal_error_t err = HAL_OK; - assert(bn != NULL && buffer != NULL); + if (bn == NULL || buffer == NULL) + return HAL_ERROR_IMPOSSIBLE; const size_t bytes = fp_unsigned_bin_size(unconst_fp_int(bn)); @@ -193,22 +211,18 @@ static hal_error_t unpack_fp(const fp_int * const bn, uint8_t *buffer, const siz */ static hal_error_t modexp(hal_core_t *core, - const fp_int * msg, + const int precalc, + const fp_int * const msg, const fp_int * const exp, const fp_int * const mod, - fp_int *res) + fp_int *res, + uint8_t *coeff, const size_t coeff_len, + uint8_t *mont, const size_t mont_len) { hal_error_t err = HAL_OK; - assert(msg != NULL && exp != NULL && mod != NULL && res != NULL); - - fp_int reduced_msg[1] = INIT_FP_INT; - - if (fp_cmp_mag(unconst_fp_int(msg), unconst_fp_int(mod)) != FP_LT) { - fp_init(reduced_msg); - fp_mod(unconst_fp_int(msg), unconst_fp_int(mod), reduced_msg); - msg = reduced_msg; - } + if (msg == NULL || exp == NULL || mod == NULL || res == NULL || coeff == NULL || mont == NULL) + return HAL_ERROR_IMPOSSIBLE; const size_t msg_len = (fp_unsigned_bin_size(unconst_fp_int(msg)) + 3) & ~3; const size_t exp_len = (fp_unsigned_bin_size(unconst_fp_int(exp)) + 3) & ~3; @@ -219,14 +233,20 @@ static hal_error_t modexp(hal_core_t *core, uint8_t modbuf[mod_len]; uint8_t resbuf[mod_len]; + hal_modexp_arg_t args = { + .core = core, + .msg = msgbuf, .msg_len = sizeof(msgbuf), + .exp = expbuf, .exp_len = sizeof(expbuf), + .mod = modbuf, .mod_len = sizeof(modbuf), + .result = resbuf, .result_len = sizeof(resbuf), + .coeff = coeff, .coeff_len = coeff_len, + .mont = mont, .mont_len = mont_len + }; + if ((err = unpack_fp(msg, msgbuf, sizeof(msgbuf))) != HAL_OK || (err = unpack_fp(exp, expbuf, sizeof(expbuf))) != HAL_OK || (err = unpack_fp(mod, modbuf, sizeof(modbuf))) != HAL_OK || - (err = hal_modexp(core, - msgbuf, sizeof(msgbuf), - expbuf, sizeof(expbuf), - modbuf, sizeof(modbuf), - resbuf, sizeof(resbuf))) != HAL_OK) + (err = hal_modexp(precalc, &args)) != HAL_OK) goto fail; fp_read_unsigned_bin(res, resbuf, sizeof(resbuf)); @@ -236,6 +256,83 @@ static hal_error_t modexp(hal_core_t *core, memset(expbuf, 0, sizeof(expbuf)); memset(modbuf, 0, sizeof(modbuf)); memset(resbuf, 0, sizeof(resbuf)); + memset(&args, 0, sizeof(args)); + return err; +} + +static hal_error_t modexp2(const int precalc, + const fp_int * const msg, + hal_core_t *core1, + const fp_int * const exp1, + const fp_int * const mod1, + fp_int * res1, + uint8_t *coeff1, const size_t coeff1_len, + uint8_t *mont1, const size_t mont1_len, + hal_core_t *core2, + const fp_int * const exp2, + const fp_int * const mod2, + fp_int * res2, + uint8_t *coeff2, const size_t coeff2_len, + uint8_t *mont2, const size_t mont2_len) +{ + hal_error_t err = HAL_OK; + + if (msg == NULL || + exp1 == NULL || mod1 == NULL || res1 == NULL || coeff1 == NULL || mont1 == NULL || + exp2 == NULL || mod2 == NULL || res2 == NULL || coeff2 == NULL || mont2 == NULL) + return HAL_ERROR_IMPOSSIBLE; + + const size_t msg_len = (fp_unsigned_bin_size(unconst_fp_int(msg)) + 3) & ~3; + const size_t exp1_len = (fp_unsigned_bin_size(unconst_fp_int(exp1)) + 3) & ~3; + const size_t mod1_len = (fp_unsigned_bin_size(unconst_fp_int(mod1)) + 3) & ~3; + const size_t exp2_len = (fp_unsigned_bin_size(unconst_fp_int(exp2)) + 3) & ~3; + const size_t mod2_len = (fp_unsigned_bin_size(unconst_fp_int(mod2)) + 3) & ~3; + + uint8_t msgbuf[msg_len]; + uint8_t expbuf1[exp1_len], modbuf1[mod1_len], resbuf1[mod1_len]; + uint8_t expbuf2[exp2_len], modbuf2[mod2_len], resbuf2[mod2_len]; + + hal_modexp_arg_t args1 = { + .core = core1, + .msg = msgbuf, .msg_len = sizeof(msgbuf), + .exp = expbuf1, .exp_len = sizeof(expbuf1), + .mod = modbuf1, .mod_len = sizeof(modbuf1), + .result = resbuf1, .result_len = sizeof(resbuf1), + .coeff = coeff1, .coeff_len = coeff1_len, + .mont = mont1, .mont_len = mont1_len + }; + + hal_modexp_arg_t args2 = { + .core = core2, + .msg = msgbuf, .msg_len = sizeof(msgbuf), + .exp = expbuf2, .exp_len = sizeof(expbuf2), + .mod = modbuf2, .mod_len = sizeof(modbuf2), + .result = resbuf2, .result_len = sizeof(resbuf2), + .coeff = coeff2, .coeff_len = coeff2_len, + .mont = mont2, .mont_len = mont2_len + }; + + if ((err = unpack_fp(msg, msgbuf, sizeof(msgbuf))) != HAL_OK || + (err = unpack_fp(exp1, expbuf1, sizeof(expbuf1))) != HAL_OK || + (err = unpack_fp(mod1, modbuf1, sizeof(modbuf1))) != HAL_OK || + (err = unpack_fp(exp2, expbuf2, sizeof(expbuf2))) != HAL_OK || + (err = unpack_fp(mod2, modbuf2, sizeof(modbuf2))) != HAL_OK || + (err = hal_modexp2(precalc, &args1, &args2)) != HAL_OK) + goto fail; + + fp_read_unsigned_bin(res1, resbuf1, sizeof(resbuf1)); + fp_read_unsigned_bin(res2, resbuf2, sizeof(resbuf2)); + + fail: + memset(msgbuf, 0, sizeof(msgbuf)); + memset(expbuf1, 0, sizeof(expbuf1)); + memset(modbuf1, 0, sizeof(modbuf1)); + memset(resbuf1, 0, sizeof(resbuf1)); + memset(&args1, 0, sizeof(args1)); + memset(expbuf2, 0, sizeof(expbuf2)); + memset(modbuf2, 0, sizeof(modbuf2)); + memset(resbuf2, 0, sizeof(resbuf2)); + memset(&args2, 0, sizeof(args2)); return err; } @@ -249,10 +346,14 @@ static hal_error_t modexp(hal_core_t *core, */ static hal_error_t modexp(const hal_core_t *core, /* ignored */ + const int precalc, /* ignored */ const fp_int * const msg, const fp_int * const exp, const fp_int * const mod, - fp_int *res) + fp_int *res, + uint8_t *coeff, const size_t coeff_len, /* ignored */ + uint8_t *mont, const size_t mont_len) /* ignored */ + { hal_error_t err = HAL_OK; FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp), unconst_fp_int(mod), res)); @@ -260,6 +361,28 @@ static hal_error_t modexp(const hal_core_t *core, /* ignored */ return err; } +static hal_error_t modexp2(const int precalc, /* ignored */ + const fp_int * const msg, + hal_core_t *core1, /* ignored */ + const fp_int * const exp1, + const fp_int * const mod1, + fp_int * res1, + uint8_t *coeff1, const size_t coeff1_len, /* ignored */ + uint8_t *mont1, const size_t mont1_len, /* ignored */ + hal_core_t *core2, /* ignored */ + const fp_int * const exp2, + const fp_int * const mod2, + fp_int * res2, + uint8_t *coeff2, const size_t coeff2_len, /* ignored */ + uint8_t *mont2, const size_t mont2_len) /* ignored */ +{ + hal_error_t err = HAL_OK; + FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp1), unconst_fp_int(mod1), res1)); + FP_CHECK(fp_exptmod(unconst_fp_int(msg), unconst_fp_int(exp2), unconst_fp_int(mod2), res2)); + fail: + return err; +} + #endif /* HAL_RSA_SIGN_USE_MODEXP */ /* @@ -281,7 +404,12 @@ static hal_error_t modexp(const hal_core_t *core, /* ignored */ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) { - return modexp(NULL, a, b, c, d) == HAL_OK ? FP_OKAY : FP_VAL; + const size_t len = (fp_unsigned_bin_size(unconst_fp_int(b)) + 3) & ~3; + uint8_t C[len], F[len]; + const hal_error_t err = modexp(NULL, 0, a, b, c, d, C, sizeof(C), F, sizeof(F)); + memset(C, 0, sizeof(C)); + memset(F, 0, sizeof(F)); + return err == HAL_OK ? FP_OKAY : FP_VAL; } #endif /* HAL_RSA_SIGN_USE_MODEXP && HAL_RSA_KEYGEN_USE_MODEXP */ @@ -292,10 +420,12 @@ int fp_exptmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) * try. Come back to this if it looks like a bottleneck. */ -static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *bf, fp_int *ubf) +static hal_error_t create_blinding_factors(hal_core_t *core, hal_rsa_key_t *key, fp_int *bf, fp_int *ubf) { - assert(key != NULL && bf != NULL && ubf != NULL); + if (key == NULL || bf == NULL || ubf == NULL) + return HAL_ERROR_IMPOSSIBLE; + const int precalc = !(key->flags & RSA_FLAG_PRECALC_N_DONE); uint8_t rnd[fp_unsigned_bin_size(unconst_fp_int(key->n))]; hal_error_t err = HAL_OK; @@ -306,9 +436,13 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t fp_read_unsigned_bin(bf, rnd, sizeof(rnd)); fp_copy(bf, ubf); - if ((err = modexp(core, bf, key->e, key->n, bf)) != HAL_OK) + if ((err = modexp(core, precalc, bf, key->e, key->n, bf, + key->nC, sizeof(key->nC), key->nF, sizeof(key->nF))) != HAL_OK) goto fail; + if (precalc) + key->flags |= RSA_FLAG_PRECALC_N_DONE | RSA_FLAG_NEEDS_SAVING; + FP_CHECK(fp_invmod(ubf, unconst_fp_int(key->n), ubf)); fail: @@ -320,10 +454,12 @@ static hal_error_t create_blinding_factors(hal_core_t *core, const hal_rsa_key_t * RSA decryption via Chinese Remainder Theorem (Garner's formula). */ -static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp_int *msg, fp_int *sig) +static hal_error_t rsa_crt(hal_core_t *core1, hal_core_t *core2, hal_rsa_key_t *key, fp_int *msg, fp_int *sig) { - assert(key != NULL && msg != NULL && sig != NULL); + if (key == NULL || msg == NULL || sig == NULL) + return HAL_ERROR_IMPOSSIBLE; + const int precalc = !(key->flags & RSA_FLAG_PRECALC_PQ_DONE); hal_error_t err = HAL_OK; fp_int t[1] = INIT_FP_INT; fp_int m1[1] = INIT_FP_INT; @@ -335,7 +471,7 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp * Handle blinding if requested. */ if (blinding) { - if ((err = create_blinding_factors(core, key, bf, ubf)) != HAL_OK) + if ((err = create_blinding_factors(core1, key, bf, ubf)) != HAL_OK) goto fail; FP_CHECK(fp_mulmod(msg, bf, unconst_fp_int(key->n), msg)); } @@ -344,10 +480,14 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp * m1 = msg ** dP mod p * m2 = msg ** dQ mod q */ - if ((err = modexp(core, msg, key->dP, key->p, m1)) != HAL_OK || - (err = modexp(core, msg, key->dQ, key->q, m2)) != HAL_OK) + if ((err = modexp2(precalc, msg, + core1, key->dP, key->p, m1, key->pC, sizeof(key->pC), key->pF, sizeof(key->pF), + core2, key->dQ, key->q, m2, key->qC, sizeof(key->qC), key->qF, sizeof(key->qF))) != HAL_OK) goto fail; + if (precalc) + key->flags |= RSA_FLAG_PRECALC_PQ_DONE | RSA_FLAG_NEEDS_SAVING; + /* * t = m1 - m2. */ @@ -392,7 +532,7 @@ static hal_error_t rsa_crt(hal_core_t *core, const hal_rsa_key_t * const key, fp */ hal_error_t hal_rsa_encrypt(hal_core_t *core, - const hal_rsa_key_t * const key, + hal_rsa_key_t *key, const uint8_t * const input, const size_t input_len, uint8_t * output, const size_t output_len) { @@ -401,23 +541,29 @@ hal_error_t hal_rsa_encrypt(hal_core_t *core, if (key == NULL || input == NULL || output == NULL || input_len > output_len) return HAL_ERROR_BAD_ARGUMENTS; + const int precalc = !(key->flags & RSA_FLAG_PRECALC_N_DONE); fp_int i[1] = INIT_FP_INT; fp_int o[1] = INIT_FP_INT; fp_read_unsigned_bin(i, unconst_uint8_t(input), input_len); - if ((err = modexp(core, i, key->e, key->n, o)) != HAL_OK || - (err = unpack_fp(o, output, output_len)) != HAL_OK) - goto fail; + err = modexp(core, precalc, i, key->e, key->n, o, + key->nC, sizeof(key->nC), key->nF, sizeof(key->nF)); + + if (err == HAL_OK && precalc) + key->flags |= RSA_FLAG_PRECALC_N_DONE | RSA_FLAG_NEEDS_SAVING; + + if (err == HAL_OK) + err = unpack_fp(o, output, output_len); - fail: fp_zero(i); fp_zero(o); return err; } -hal_error_t hal_rsa_decrypt(hal_core_t *core, - const hal_rsa_key_t * const key, +hal_error_t hal_rsa_decrypt(hal_core_t *core1, + hal_core_t *core2, + hal_rsa_key_t *key, const uint8_t * const input, const size_t input_len, uint8_t * output, const size_t output_len) { @@ -436,10 +582,17 @@ hal_error_t hal_rsa_decrypt(hal_core_t *core, * just do brute force ModExp. */ - if (fp_iszero(key->p) || fp_iszero(key->q) || fp_iszero(key->u) || fp_iszero(key->dP) || fp_iszero(key->dQ)) - err = modexp(core, i, key->d, key->n, o); - else - err = rsa_crt(core, key, i, o); + if (!fp_iszero(key->p) && !fp_iszero(key->q) && !fp_iszero(key->u) && + !fp_iszero(key->dP) && !fp_iszero(key->dQ)) + err = rsa_crt(core1, core2, key, i, o); + + else { + const int precalc = !(key->flags & RSA_FLAG_PRECALC_N_DONE); + err = modexp(core1, precalc, i, key->d, key->n, o, key->nC, sizeof(key->nC), + key->nF, sizeof(key->nF)); + if (err == HAL_OK && precalc) + key->flags |= RSA_FLAG_PRECALC_N_DONE | RSA_FLAG_NEEDS_SAVING; + } if (err != HAL_OK || (err = unpack_fp(o, output, output_len)) != HAL_OK) goto fail; @@ -762,6 +915,8 @@ hal_error_t hal_rsa_key_gen(hal_core_t *core, FP_CHECK(fp_mod(key->d, q_1, key->dQ)); /* dQ = d % (q-1) */ FP_CHECK(fp_invmod(key->q, key->p, key->u)); /* u = (1/q) % p */ + key->flags |= RSA_FLAG_NEEDS_SAVING; + *key_ = key; /* Fall through to cleanup */ @@ -775,10 +930,26 @@ hal_error_t hal_rsa_key_gen(hal_core_t *core, } /* + * Whether a key contains new data that need saving (newly generated + * key, updated speedup components, whatever). + */ + +int hal_rsa_key_needs_saving(const hal_rsa_key_t * const key) +{ + return key != NULL && (key->flags & RSA_FLAG_NEEDS_SAVING); +} + +/* * Just enough ASN.1 to read and write PKCS #1.5 RSAPrivateKey syntax * (RFC 2313 section 7.2) wrapped in a PKCS #8 PrivateKeyInfo (RFC 5208). * * RSAPrivateKey fields in the required order. + * + * The "extra" fields are additional key components specific to the + * systolic modexpa7 core. We represent these in ASN.1 as OPTIONAL + * fields using IMPLICIT PRIVATE tags, since this is neither + * standardized nor meaningful to anybody else. Underlying encoding + * is INTEGER or OCTET STRING (currently the latter). */ #define RSAPrivateKey_fields \ @@ -792,8 +963,17 @@ hal_error_t hal_rsa_key_gen(hal_core_t *core, _(key->dQ); \ _(key->u); -hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key, - uint8_t *der, size_t *der_len, const size_t der_max) +#define RSAPrivateKey_extra_fields \ + _(ASN1_PRIVATE + 0, nC, RSA_FLAG_PRECALC_N_DONE); \ + _(ASN1_PRIVATE + 1, nF, RSA_FLAG_PRECALC_N_DONE); \ + _(ASN1_PRIVATE + 2, pC, RSA_FLAG_PRECALC_PQ_DONE); \ + _(ASN1_PRIVATE + 3, pF, RSA_FLAG_PRECALC_PQ_DONE); \ + _(ASN1_PRIVATE + 4, qC, RSA_FLAG_PRECALC_PQ_DONE); \ + _(ASN1_PRIVATE + 5, qF, RSA_FLAG_PRECALC_PQ_DONE); + +hal_error_t hal_rsa_private_key_to_der_internal(const hal_rsa_key_t * const key, + const int include_extra, + uint8_t *der, size_t *der_len, const size_t der_max) { hal_error_t err = HAL_OK; @@ -808,10 +988,32 @@ hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key, size_t hlen = 0, vlen = 0; -#define _(x) { size_t n; if ((err = hal_asn1_encode_integer(x, NULL, &n, der_max - vlen)) != HAL_OK) return err; vlen += n; } +#define _(x) \ + { \ + size_t n = 0; \ + err = hal_asn1_encode_integer(x, NULL, &n, der_max - vlen); \ + if (err != HAL_OK) \ + return err; \ + vlen += n; \ + } + RSAPrivateKey_fields; #undef _ +#define _(x,y,z) \ + if ((key->flags & z) != 0) { \ + size_t n = 0; \ + if ((err = hal_asn1_encode_header(x, sizeof(key->y), NULL, \ + &n, 0)) != HAL_OK) \ + return err; \ + vlen += n + sizeof(key->y); \ + } + + if (include_extra) { + RSAPrivateKey_extra_fields; + } +#undef _ + if ((err = hal_asn1_encode_header(ASN1_SEQUENCE, vlen, NULL, &hlen, 0)) != HAL_OK) return err; @@ -832,18 +1034,51 @@ hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key, uint8_t *d = der + hlen; memset(d, 0, vlen); -#define _(x) { size_t n; if ((err = hal_asn1_encode_integer(x, d, &n, vlen)) != HAL_OK) return err; d += n; vlen -= n; } +#define _(x) \ + { \ + size_t n = 0; \ + err = hal_asn1_encode_integer(x, d, &n, vlen); \ + if (err != HAL_OK) \ + return err; \ + d += n; \ + vlen -= n; \ + } + RSAPrivateKey_fields; #undef _ +#define _(x,y,z) \ + if ((key->flags & z) != 0) { \ + size_t n = 0; \ + if ((err = hal_asn1_encode_header(x, sizeof(key->y), d, \ + &n, vlen)) != HAL_OK) \ + return err; \ + d += n; \ + vlen -= n; \ + memcpy(d, key->y, sizeof(key->y)); \ + d += sizeof(key->y); \ + vlen -= sizeof(key->y); \ + } + + if (include_extra) { + RSAPrivateKey_extra_fields; + } +#undef _ + return hal_asn1_encode_pkcs8_privatekeyinfo(hal_asn1_oid_rsaEncryption, hal_asn1_oid_rsaEncryption_len, NULL, 0, der, d - der, der, der_len, der_max); } -size_t hal_rsa_private_key_to_der_len(const hal_rsa_key_t * const key) +hal_error_t hal_rsa_private_key_to_der(const hal_rsa_key_t * const key, + uint8_t *der, size_t *der_len, const size_t der_max) +{ + return hal_rsa_private_key_to_der_internal(key, 0, der, der_len, der_max); +} + +hal_error_t hal_rsa_private_key_to_der_extra(const hal_rsa_key_t * const key, + uint8_t *der, size_t *der_len, const size_t der_max) { - size_t len = 0; - return hal_rsa_private_key_to_der(key, NULL, &len, 0) == HAL_OK ? len : 0; + return hal_rsa_private_key_to_der_internal(key, 1, der, der_len, der_max); } hal_error_t hal_rsa_private_key_from_der(hal_rsa_key_t **key_, @@ -881,12 +1116,48 @@ hal_error_t hal_rsa_private_key_from_der(hal_rsa_key_t **key_, fp_int version[1] = INIT_FP_INT; -#define _(x) { size_t n; if ((err = hal_asn1_decode_integer(x, d, &n, vlen)) != HAL_OK) return err; d += n; vlen -= n; } +#define _(x) \ + { \ + size_t n; \ + err = hal_asn1_decode_integer(x, d, &n, vlen); \ + if (err != HAL_OK) \ + return err; \ + d += n; \ + vlen -= n; \ + } + RSAPrivateKey_fields; #undef _ - if (d != privkey + privkey_len || !fp_iszero(version)) +#define _(x,y,z) \ + if (hal_asn1_peek(x, d, vlen)) { \ + size_t hl = 0, vl = 0; \ + if ((err = hal_asn1_decode_header(x, d, vlen, &hl, &vl)) != HAL_OK) \ + return err; \ + if (vl > sizeof(key->y)) { \ + hal_log(HAL_LOG_DEBUG, "extra factor %s too big (%lu > %lu)", \ + #y, (unsigned long) vl, (unsigned long) sizeof(key->y)); \ + return HAL_ERROR_ASN1_PARSE_FAILED; \ + } \ + memcpy(key->y, d + hl, vl); \ + key->flags |= z; \ + d += hl + vl; \ + vlen -= hl + vl; \ + } + + RSAPrivateKey_extra_fields; +#undef _ + + if (d != privkey + privkey_len) { + hal_log(HAL_LOG_DEBUG, "not at end of buffer (0x%lx != 0x%lx)", + (unsigned long) d, (unsigned long) privkey + privkey_len); return HAL_ERROR_ASN1_PARSE_FAILED; + } + + if (!fp_iszero(version)) { + hal_log(HAL_LOG_DEBUG, "nonzero version"); + return HAL_ERROR_ASN1_PARSE_FAILED; + } *key_ = key; diff --git a/tests/test-rsa.c b/tests/test-rsa.c index f4e7a8f..176ba03 100644 --- a/tests/test-rsa.c +++ b/tests/test-rsa.c @@ -56,12 +56,21 @@ static int test_modexp(hal_core_t *core, const rsa_tc_bn_t * const exp, /* Exponent */ const rsa_tc_bn_t * const val) /* Expected result */ { - uint8_t result[tc->n.len]; + uint8_t result[tc->n.len], C[tc->n.len], F[tc->n.len]; printf("%s test for %lu-bit RSA key\n", kind, (unsigned long) tc->size); - if (hal_modexp(core, msg->val, msg->len, exp->val, exp->len, - tc->n.val, tc->n.len, result, sizeof(result)) != HAL_OK) + hal_modexp_arg_t args = { + .core = core, + .msg = msg->val, .msg_len = msg->len, + .exp = exp->val, .exp_len = exp->len, + .mod = tc->n.val, .mod_len = tc->n.len, + .result = result, .result_len = sizeof(result), + .coeff = C, .coeff_len = sizeof(C), + .mont = F, .mont_len = sizeof(F) + }; + + if (hal_modexp(1, &args) != HAL_OK) return printf("ModExp failed\n"), 0; if (memcmp(result, val->val, val->len)) @@ -98,7 +107,7 @@ static int test_decrypt(hal_core_t *core, uint8_t result[tc->n.len]; - if ((err = hal_rsa_decrypt(core, key, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK) + if ((err = hal_rsa_decrypt(core, NULL, key, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK) printf("RSA CRT failed: %s\n", hal_error_string(err)); const int mismatch = (err == HAL_OK && memcmp(result, tc->s.val, tc->s.len) != 0); @@ -165,7 +174,7 @@ static int test_gen(hal_core_t *core, uint8_t result[tc->n.len]; - if ((err = hal_rsa_decrypt(core, key1, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK) + if ((err = hal_rsa_decrypt(core, NULL, key1, tc->m.val, tc->m.len, result, sizeof(result))) != HAL_OK) printf("RSA CRT failed: %s\n", hal_error_string(err)); snprintf(fn, sizeof(fn), "test-rsa-sig-%04lu.der", (unsigned long) tc->size); diff --git a/tests/test-trng.c b/tests/test-trng.c index f570752..45dec56 100644 --- a/tests/test-trng.c +++ b/tests/test-trng.c @@ -43,6 +43,7 @@ #include <sys/time.h> #include <hal.h> +#include <hal_internal.h> #include <verilog_constants.h> #ifndef WAIT_FOR_CSPRNG_VALID diff --git a/unit-tests.py b/unit-tests.py index 824d495..514aace 100644 --- a/unit-tests.py +++ b/unit-tests.py @@ -1279,6 +1279,7 @@ class TestPKeyAttribute(TestCaseLoggedIn): self.load_and_fill(0, n_attrs = 64) def test_attribute_bloat_volatile_many(self): + self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh") with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG): self.load_and_fill(0, n_attrs = 128) @@ -1286,6 +1287,7 @@ class TestPKeyAttribute(TestCaseLoggedIn): self.load_and_fill(0, n_attrs = 6, n_fill = 256) def test_attribute_bloat_volatile_big(self): + self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh") with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG): self.load_and_fill(0, n_attrs = 6, n_fill = 512) @@ -1293,6 +1295,7 @@ class TestPKeyAttribute(TestCaseLoggedIn): self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 64) def test_attribute_bloat_token_many(self): + self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh") with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG): self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 128) @@ -1300,6 +1303,7 @@ class TestPKeyAttribute(TestCaseLoggedIn): self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 6, n_fill = 256) def test_attribute_bloat_token_big(self): + self.skipUnlessAll("bloat tests with large flash blocks exceed XDR limits, sigh") with self.assertRaises(HAL_ERROR_RESULT_TOO_LONG): self.load_and_fill(HAL_KEY_FLAG_TOKEN, n_attrs = 6, n_fill = 512) diff --git a/verilog_constants.h b/verilog_constants.h index c9bb566..1b00b96 100644 --- a/verilog_constants.h +++ b/verilog_constants.h @@ -222,23 +222,38 @@ #define MODEXPS6_ADDR_MESSAGE (MODEXPS6_ADDR_OPERANDS + 1 * MODEXPS6_OPERAND_WORDS) #define MODEXPS6_ADDR_EXPONENT (MODEXPS6_ADDR_OPERANDS + 2 * MODEXPS6_OPERAND_WORDS) #define MODEXPS6_ADDR_RESULT (MODEXPS6_ADDR_OPERANDS + 3 * MODEXPS6_OPERAND_WORDS) +#define MODEXPS6_MODE_CONSTANT_TIME (0) +#define MODEXPS6_MODE_FAST_PUBLIC (1) /* * ModExpA7 core. MODEXPA7_OPERAND_BITS is size in bits of largest * supported modulus. + * + * I prefer the way Pavel wrote the constants for this in his sample + * code to what I've done here, but let's get the thing working before + * worrying about the yaks' pedicures. */ -#define MODEXPA7_OPERAND_BITS (4096) -#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32) -#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_OPERANDS (4 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10) -#define MODEXPA7_ADDR_MODULUS_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x11) -#define MODEXPA7_ADDR_EXPONENT_WIDTH (MODEXPA7_ADDR_REGISTERS + 0x12) -#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS) -#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_OPERAND_BITS (4096) +#define MODEXPA7_OPERAND_BYTES (MODEXPA7_OPERAND_BITS / 8) +#define MODEXPA7_OPERAND_WORDS (MODEXPA7_OPERAND_BITS / 32) +#define MODEXPA7_ADDR_REGISTERS (0 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_OPERANDS (8 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MODE (MODEXPA7_ADDR_REGISTERS + 0x10) +#define MODEXPA7_ADDR_MODULUS_BITS (MODEXPA7_ADDR_REGISTERS + 0x11) +#define MODEXPA7_ADDR_EXPONENT_BITS (MODEXPA7_ADDR_REGISTERS + 0x12) +#define MODEXPA7_ADDR_BUFFER_BITS (MODEXPA7_ADDR_REGISTERS + 0x13) +#define MODEXPA7_ADDR_ARRAY_BITS (MODEXPA7_ADDR_REGISTERS + 0x14) +#define MODEXPA7_ADDR_MODULUS (MODEXPA7_ADDR_OPERANDS + 0 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MESSAGE (MODEXPA7_ADDR_OPERANDS + 1 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MODULUS_COEFF_OUT (MODEXPA7_ADDR_OPERANDS + 4 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MODULUS_COEFF_IN (MODEXPA7_ADDR_OPERANDS + 5 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT (MODEXPA7_ADDR_OPERANDS + 6 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN (MODEXPA7_ADDR_OPERANDS + 7 * MODEXPA7_OPERAND_WORDS) +#define MODEXPA7_MODE_CRT (1 << 1) +#define MODEXPA7_MODE_PLAIN (0 << 1) /* * ECDSA P-256 point multiplier core. ECDSA256_OPERAND_BITS is size |