/* * modexp.c * ---------- * Wrapper around Cryptech ModExp cores. * * This doesn't do full RSA, that's another module. This module's job * is just the I/O to get bits in and out of the ModExp core, including * compensating for a few known bugs that haven't been resolved yet. * * If at some point the interface to the ModExp core becomes simple * enough that this module is no longer needed, it will go away. * * Authors: Rob Austein * Copyright (c) 2015-2017, NORDUnet A/S All rights reserved. * Copyright: 2020, The Commons Conservancy Cryptech Project * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "hal.h" #include "hal_internal.h" /* * Enable use of the ModExpNG core, if present. */ static enum { unknown = -1, modexpa7_core = 0, modexpng_core = 1 } which_core = unknown; static inline hal_error_t init_modexp_core(void) { if (which_core != unknown) return HAL_OK; else if (hal_core_find(MODEXPNG_NAME, NULL) != NULL) return (which_core = modexpng_core), HAL_OK; else if (hal_core_find(MODEXPA7_NAME, NULL) != NULL) return (which_core = modexpa7_core), HAL_OK; else return HAL_ERROR_CORE_NOT_FOUND; } hal_error_t hal_modexp_use_modexpng(const int onoff) { if (onoff && hal_core_find(MODEXPNG_NAME, NULL) != NULL) return (which_core = modexpng_core), HAL_OK; else if (!onoff && hal_core_find(MODEXPA7_NAME, NULL) != NULL) return (which_core = modexpa7_core), HAL_OK; else return HAL_ERROR_CORE_NOT_FOUND; } int hal_modexp_using_modexpng(void) { return (init_modexp_core() == HAL_OK && which_core == modexpng_core); } /* * Whether we want debug output. */ static int debug = 0; void hal_modexp_set_debug(const int onoff) { debug = onoff; } /* * Get value of an ordinary register. */ static inline hal_error_t get_register(const hal_core_t *core, const hal_addr_t addr, uint32_t *value) { hal_error_t err; uint8_t w[4]; if (value == NULL) return HAL_ERROR_IMPOSSIBLE; if ((err = hal_io_read(core, addr, w, sizeof(w))) != HAL_OK) return err; *value = (w[0] << 0) | (w[1] << 8) | (w[2] << 16) | (w[3] << 24); return HAL_OK; } /* * Set value of an ordinary register. */ static inline hal_error_t set_register(const hal_core_t *core, const hal_addr_t addr, const uint32_t value) { const uint8_t w[4] = { ((value >> 24) & 0xFF), ((value >> 16) & 0xFF), ((value >> 8) & 0xFF), ((value >> 0) & 0xFF) }; return hal_io_write(core, addr, w, sizeof(w)); } /* * Get value of a data buffer. We reverse the order of 32-bit words * in the buffer during the transfer to match what the modexpa7 core * expects. */ static inline hal_error_t get_buffer(const hal_core_t *core, const hal_addr_t data_addr, uint8_t *value, const size_t length) { hal_error_t err; size_t i; if (value == NULL || length % 4 != 0) return HAL_ERROR_IMPOSSIBLE; for (i = 0; i < length; i += 4) if ((err = hal_io_read(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK) return err; return HAL_OK; } /* * Set value of a data buffer. We reverse the order of 32-bit words * in the buffer during the transfer to match what the modexpa7 core * expects. * * Do we need to zero the portion of the buffer we're not using * explictly (that is, the portion between `length` and the value of * the core's MODEXPA7_ADDR_BUFFER_BITS register)? We've gotten away * without doing this so far, but the core doesn't take an explicit * length parameter for the message itself, instead it assumes that * the message is either as long as or twice as long as the exponent, * depending on the setting of the CRT mode bit. Maybe initializing * the core clears the excess bits so there's no issue? Dunno. Have * never seen a problem with this yet, just dont' know why not. */ static inline hal_error_t set_buffer(const hal_core_t *core, const hal_addr_t data_addr, const uint8_t * const value, const size_t length) { hal_error_t err; size_t i; if (value == NULL || length % 4 != 0) return HAL_ERROR_IMPOSSIBLE; for (i = 0; i < length; i += 4) if ((err = hal_io_write(core, data_addr + i/4, &value[length - 4 - i], 4)) != HAL_OK) return err; return HAL_OK; } /* * Stuff moved out of modexp so we can run two cores in parallel more * easily. We have to return to the jacket routine every time we kick * a core into doing something, since only the jacket routines know * how many cores we're running for any particular calculation. * * In theory we could do something clever where we don't wait for both * cores to finish precalc before starting either of them on the main * computation, but that way probably lies madness. */ static inline hal_error_t check_args(hal_modexp_arg_t *a) { /* * All data pointers must be set, exponent may not be longer than * modulus, message may not be longer than twice the modulus (CRT * mode), result buffer must not be shorter than modulus, and all * input lengths must be a multiple of four bytes (the core is all * about 32-bit words). */ if (a == NULL || a->msg == NULL || a->msg_len > MODEXPA7_OPERAND_BYTES || a->msg_len > a->mod_len * 2 || a->exp == NULL || a->exp_len > MODEXPA7_OPERAND_BYTES || a->exp_len > a->mod_len || a->mod == NULL || a->mod_len > MODEXPA7_OPERAND_BYTES || a->result == NULL || a->result_len > MODEXPA7_OPERAND_BYTES || a->result_len < a->mod_len || a->coeff == NULL || a->coeff_len > MODEXPA7_OPERAND_BYTES + 4 || a->mont == NULL || a->mont_len > MODEXPA7_OPERAND_BYTES || ((a->msg_len | a->exp_len | a->mod_len) & 3) != 0) return HAL_ERROR_BAD_ARGUMENTS; return HAL_OK; } static inline hal_error_t setup_precalc(const int precalc, hal_modexp_arg_t *a) { hal_error_t err; /* * Check that operand size is compatabible with the core. */ uint32_t operand_max = 0; if ((err = get_register(a->core, MODEXPA7_ADDR_BUFFER_BITS, &operand_max)) != HAL_OK) return err; operand_max /= 8; if (a->msg_len > operand_max || a->exp_len > operand_max || a->mod_len > operand_max || a->coeff_len > operand_max || a->mont_len > operand_max) return HAL_ERROR_BAD_ARGUMENTS; /* * Set the modulus, then initiate calculation of modulus-dependent * speedup factors if necessary, by edge-triggering the "init" bit, * then return to caller so it can wait for precalc. */ if ((err = set_register(a->core, MODEXPA7_ADDR_MODULUS_BITS, a->mod_len * 8)) != HAL_OK || (err = set_buffer(a->core, MODEXPA7_ADDR_MODULUS, a->mod, a->mod_len)) != HAL_OK || (precalc && (err = hal_io_zero(a->core)) != HAL_OK) || (precalc && (err = hal_io_init(a->core)) != HAL_OK)) return err; return HAL_OK; } static inline hal_error_t setup_calc(const int precalc, hal_modexp_arg_t *a) { hal_error_t err; /* * Select CRT mode if and only if message is longer than exponent. */ const uint32_t mode = a->msg_len > a->mod_len ? MODEXPA7_MODE_CRT : MODEXPA7_MODE_PLAIN; /* * Copy out precalc results if necessary, then load everything and * start the calculation by edge-triggering the "next" bit. If * everything works, return to caller so it can wait for the * calculation to complete. */ if ((precalc && (err = get_buffer(a->core, MODEXPA7_ADDR_MODULUS_COEFF_OUT, a->coeff, a->coeff_len)) != HAL_OK) || (precalc && (err = get_buffer(a->core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_OUT, a->mont, a->mont_len)) != HAL_OK) || (err = set_buffer(a->core, MODEXPA7_ADDR_MODULUS_COEFF_IN, a->coeff, a->coeff_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPA7_ADDR_MONTGOMERY_FACTOR_IN, a->mont, a->mont_len)) != HAL_OK || (err = set_register(a->core, MODEXPA7_ADDR_MODE, mode)) != HAL_OK || (err = set_buffer(a->core, MODEXPA7_ADDR_MESSAGE, a->msg, a->msg_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPA7_ADDR_EXPONENT, a->exp, a->exp_len)) != HAL_OK || (err = set_register(a->core, MODEXPA7_ADDR_EXPONENT_BITS, a->exp_len * 8)) != HAL_OK || (err = hal_io_zero(a->core)) != HAL_OK || (err = hal_io_next(a->core)) != HAL_OK) return err; return HAL_OK; } static inline hal_error_t extract_result(hal_modexp_arg_t *a) { /* * Extract results from the main calculation and we're done. */ return get_buffer(a->core, MODEXPA7_ADDR_RESULT, a->result, a->mod_len); } /* * Run one modexp operation. */ hal_error_t hal_modexp(const int precalc, hal_modexp_arg_t *a) { hal_error_t err; if ((err = check_args(a)) != HAL_OK) return err; const int free_core = a->core == NULL; if ((!free_core || (err = hal_core_alloc(MODEXPA7_NAME, &a->core, NULL)) == HAL_OK) && (err = setup_precalc(precalc, a)) == HAL_OK && (!precalc || (err = hal_io_wait_ready(a->core)) == HAL_OK) && (err = setup_calc(precalc, a)) == HAL_OK && (err = hal_io_wait_valid(a->core)) == HAL_OK && (err = extract_result(a)) == HAL_OK) err = HAL_OK; if (free_core) { hal_core_free(a->core); a->core = NULL; } return err; } /* * Run two modexp operations in parallel. */ hal_error_t hal_modexp2(const int precalc, hal_modexp_arg_t *a1, hal_modexp_arg_t *a2) { int free_core = 0; hal_error_t err; if ((err = check_args(a1)) != HAL_OK || (err = check_args(a2)) != HAL_OK) return err; if (a1->core == NULL && a2->core == NULL) free_core = 1; else if (a1->core == NULL || a2->core == NULL) return HAL_ERROR_BAD_ARGUMENTS; if ((!free_core || (err = hal_core_alloc2(MODEXPA7_NAME, &a1->core, NULL, MODEXPA7_NAME, &a2->core, NULL)) == HAL_OK) && (err = setup_precalc(precalc, a1)) == HAL_OK && (err = setup_precalc(precalc, a2)) == HAL_OK && (!precalc || (err = hal_io_wait_ready2(a1->core, a2->core)) == HAL_OK) && (err = setup_calc(precalc, a1)) == HAL_OK && (err = setup_calc(precalc, a2)) == HAL_OK && (err = hal_io_wait_valid2(a1->core, a2->core)) == HAL_OK && (err = extract_result(a1)) == HAL_OK && (err = extract_result(a2)) == HAL_OK) err = HAL_OK; if (free_core) { hal_core_free(a1->core); hal_core_free(a2->core); a1->core = a2->core = NULL; } return err; } hal_error_t hal_modexpng(hal_modexpng_arg_t *a) { hal_error_t err; if ((err = check_args((hal_modexp_arg_t *)a)) != HAL_OK) return err; const int free_core = a->core == NULL; const uint32_t mode = (a->p == NULL) ? MODEXPNG_MODE_PLAIN : MODEXPNG_MODE_CRT; if ((free_core && (err = hal_core_alloc(MODEXPNG_NAME, &a->core, NULL)) != HAL_OK) || (err = hal_io_zero(a->core)) != HAL_OK || // <<<< (err = set_register(a->core, MODEXPNG_ADDR_MODE, mode)) != HAL_OK || (err = set_register(a->core, MODEXPNG_ADDR_MODULUS_BITS, a->mod_len * 8)) != HAL_OK || (err = set_register(a->core, MODEXPNG_ADDR_EXPONENT_BITS, a->exp_len * 8)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_M, a->msg, a->msg_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_N, a->mod, a->mod_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_N_FACTOR, a->mont, a->mont_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_N_COEFF, a->coeff, a->coeff_len)) != HAL_OK) goto fail; if (a->bf != NULL && a->ubf != NULL) { if ((err = set_buffer(a->core, MODEXPNG_ADDR_BANK_X, a->ubf, a->ubf_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_Y, a->bf, a->bf_len)) != HAL_OK) goto fail; } else { /* set blinding factors to (1,1) */ uint8_t one[a->mod_len]; memset(one, 0, sizeof(one)); one[sizeof(one) - 1] = 1; if ((err = set_buffer(a->core, MODEXPNG_ADDR_BANK_X, one, sizeof(one))) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_Y, one, sizeof(one))) != HAL_OK) goto fail; } if (mode == MODEXPNG_MODE_PLAIN) { if ((err = set_buffer(a->core, MODEXPNG_ADDR_BANK_D, a->exp, a->exp_len)) != HAL_OK) goto fail; } else { if ((err = set_buffer(a->core, MODEXPNG_ADDR_BANK_P, a->p, a->p_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_DP, a->dP, a->dP_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_P_FACTOR, a->pF, a->pF_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_P_COEFF, a->pC, a->pC_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_Q, a->q, a->q_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_DQ, a->dQ, a->dQ_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_Q_FACTOR, a->qF, a->qF_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_Q_COEFF, a->qC, a->qC_len)) != HAL_OK || (err = set_buffer(a->core, MODEXPNG_ADDR_BANK_QINV, a->qInv, a->qInv_len)) != HAL_OK) goto fail; } if ((err = hal_io_zero(a->core)) != HAL_OK || (err = hal_io_next(a->core)) != HAL_OK || (err = hal_io_wait_valid(a->core)) != HAL_OK || (err = get_buffer(a->core, MODEXPNG_ADDR_BANK_S, a->result, a->result_len)) != HAL_OK || ((a->bf != NULL && a->ubf != NULL) && ((err = get_buffer(a->core, MODEXPNG_ADDR_BANK_XM, a->ubf, a->ubf_len)) != HAL_OK || (err = get_buffer(a->core, MODEXPNG_ADDR_BANK_YM, a->bf, a->bf_len)) != HAL_OK))) goto fail; fail: if (free_core) { hal_core_free(a->core); a->core = NULL; } return err; } /* * Local variables: * indent-tabs-mode: nil * End: */