From a8c03ade40a65fea01a8ef2075d1e75e29bfd4bf Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Thu, 9 Mar 2017 00:39:59 -0500 Subject: Backport ECDSA core support to ksng branch. Support for the core/pkey/ecdsa{256,384} cores is cooked before the branch on which it was based. Oops. Time to backport. See pymux branch for original commit history. git should do the right thing when the pymux branch is cooked enough to merge back to the ksng or master branches. --- Makefile | 73 +++++++++++++++++++++------ ecdsa.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++ hal.h | 6 +++ verilog_constants.h | 30 ++++++++++- 4 files changed, 234 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index c0a136b..c360c6a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2016, NORDUnet A/S +# Copyright (c) 2015-2017, NORDUnet A/S # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -41,24 +41,28 @@ LIB = libhal.a # Error checking on known control options, some of which allow the user entirely too much rope. -USAGE := "usage: ${MAKE} [IO_BUS=eim|i2c|fmc] [RPC_MODE=none|server|client-simple|client-mixed] [KS=mmap|flash] [RPC_TRANSPORT=none|loopback|serial|daemon] [MODEXP_CORE=no|yes]" +USAGE := "usage: ${MAKE} [IO_BUS=eim|i2c|fmc] [RPC_MODE=none|server|client-simple|client-mixed] [KS=mmap|flash] [RPC_TRANSPORT=none|loopback|serial|daemon] [MODEXP_CORE=no|yes] [HASH_CORES=no|yes] [ECDSA_CORES=no|yes]" IO_BUS ?= none KS ?= flash RPC_MODE ?= none RPC_TRANSPORT ?= none MODEXP_CORE ?= no +HASH_CORES ?= no +ECDSA_CORES ?= yes ifeq (,$(and \ $(filter none eim i2c fmc ,${IO_BUS}),\ $(filter none server client-simple client-mixed ,${RPC_MODE}),\ $(filter mmap flash ,${KS}),\ $(filter none loopback serial daemon ,${RPC_TRANSPORT}),\ - $(filter no yes ,${MODEXP_CORE}))) + $(filter no yes ,${MODEXP_CORE}),\ + $(filter no yes ,${HASH_CORES}),\ + $(filter no yes ,${ECDSA_CORES}))) $(error ${USAGE}) endif -$(info Building libhal with configuration IO_BUS=${IO_BUS} RPC_MODE=${RPC_MODE} KS=${KS} RPC_TRANSPORT=${RPC_TRANSPORT} MODEXP_CORE=${MODEXP_CORE}) +$(info Building libhal with configuration IO_BUS=${IO_BUS} RPC_MODE=${RPC_MODE} KS=${KS} RPC_TRANSPORT=${RPC_TRANSPORT} MODEXP_CORE=${MODEXP_CORE} HASH_CORES=${HASH_CORES} ECDSA_CORES=${ECDSA_CORES}) # Whether the RSA code should use the ModExp | ModExpS6 | ModExpA7 core. @@ -68,6 +72,24 @@ else RSA_USE_MODEXP_CORE := 0 endif +# Whether the hash code should use the SHA-1 / SHA-256 / SHA-512 cores. + +ifeq "${HASH_CORES}" "yes" + HASH_ONLY_USE_SOFT_CORES := 0 +else + HASH_ONLY_USE_SOFT_CORES := 1 +endif + +# Whether the ECDSA code should use the ECDSA256 and ECDSA384 cores. + +ifeq "${ECDSA_CORES}" "yes" + ECDSA_USE_ECDSA256_CORE := 1 + ECDSA_USE_ECDSA384_CORE := 1 +else + ECDSA_USE_ECDSA256_CORE := 0 + ECDSA_USE_ECDSA384_CORE := 0 +endif + # Object files to build, initialized with ones we always want. # There's a balance here between skipping files we don't strictly # need and reducing the number of unnecessary conditionals in this @@ -169,15 +191,30 @@ endif ifeq "${RPC_MODE}" "none" OBJ += ${CORE_OBJ} CFLAGS += -DHAL_RSA_USE_MODEXP=${RSA_USE_MODEXP_CORE} + CFLAGS += -DHAL_ONLY_USE_SOFTWARE_HASH_CORES=${HASH_ONLY_USE_SOFT_CORES} + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER=${ECDSA_USE_ECDSA256_CORE} + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER=${ECDSA_USE_ECDSA384_CORE} else ifeq "${RPC_MODE}" "server" OBJ += ${CORE_OBJ} ${RPC_SERVER_OBJ} - CFLAGS += -DRPC_CLIENT=RPC_CLIENT_LOCAL -DHAL_RSA_USE_MODEXP=${RSA_USE_MODEXP_CORE} + CFLAGS += -DRPC_CLIENT=RPC_CLIENT_LOCAL + CFLAGS += -DHAL_RSA_USE_MODEXP=${RSA_USE_MODEXP_CORE} + CFLAGS += -DHAL_ONLY_USE_SOFTWARE_HASH_CORES=${HASH_ONLY_USE_SOFT_CORES} + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER=${ECDSA_USE_ECDSA256_CORE} + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER=${ECDSA_USE_ECDSA384_CORE} else ifeq "${RPC_MODE}" "client-simple" OBJ += ${RPC_CLIENT_OBJ} - CFLAGS += -DRPC_CLIENT=RPC_CLIENT_REMOTE -DHAL_RSA_USE_MODEXP=0 -DHAL_ONLY_USE_SOFTWARE_HASH_CORES=1 + CFLAGS += -DRPC_CLIENT=RPC_CLIENT_REMOTE + CFLAGS += -DHAL_RSA_USE_MODEXP=0 + CFLAGS += -DHAL_ONLY_USE_SOFTWARE_HASH_CORES=1 + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER=0 + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER=0 else ifeq "${RPC_MODE}" "client-mixed" OBJ += ${RPC_CLIENT_OBJ} - CFLAGS += -DRPC_CLIENT=RPC_CLIENT_MIXED -DHAL_RSA_USE_MODEXP=0 -DHAL_ONLY_USE_SOFTWARE_HASH_CORES=1 + CFLAGS += -DRPC_CLIENT=RPC_CLIENT_MIXED + CFLAGS += -DHAL_RSA_USE_MODEXP=0 + CFLAGS += -DHAL_ONLY_USE_SOFTWARE_HASH_CORES=1 + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER=0 + CFLAGS += -DHAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER=0 endif ifndef CRYPTECH_ROOT @@ -206,12 +243,19 @@ CFLAGS += -I${LIBTFM_BLD} CFLAGS += -DHAL_ENABLE_SOFTWARE_HASH_CORES=1 -export CFLAGS +# We used to "export CFLAGS" here, but for some reason that causes GNU +# make to duplicate its value, sometimes with conflicting settings. +# Weird, but this is complicated enough already, so we just pass +# CFLAGS explicitly in the small number of cases where we run a +# sub-make, below. + +#export CFLAGS + export RPC_MODE all: ${LIB} - cd tests; ${MAKE} $@ - cd utils; ${MAKE} $@ + ${MAKE} -C tests $@ CFLAGS='${CFLAGS}' + ${MAKE} -C utils $@ CFLAGS='${CFLAGS}' client: ${MAKE} RPC_MODE=client-simple RPC_TRANSPORT=daemon @@ -247,13 +291,12 @@ last_gasp_pin_internal.h: ./utils/last_gasp_default_pin >$@ test: all - export RPC_MODE - cd tests; ${MAKE} -k $@ + ${MAKE} -C tests -k $@ CFLAGS='${CFLAGS}' clean: - rm -f *.o ${LIB} cryptech_rpcd - cd tests; ${MAKE} $@ - cd utils; ${MAKE} $@ + rm -f *.o ${LIB} + ${MAKE} -C tests $@ CFLAGS='${CFLAGS}' + ${MAKE} -C utils $@ CFLAGS='${CFLAGS}' distclean: clean rm -f TAGS diff --git a/ecdsa.c b/ecdsa.c index 04e67b8..16d2b27 100644 --- a/ecdsa.c +++ b/ecdsa.c @@ -88,6 +88,18 @@ #define hal_get_random(core, buffer, length) hal_rpc_get_random(buffer, length) #endif +/* + * Whether to use the Verilog point multipliers. + */ + +#ifndef HAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER +#define HAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER 1 +#endif + +#ifndef HAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER +#define HAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER 1 +#endif + /* * Whether we want debug output. */ @@ -124,6 +136,7 @@ typedef struct { fp_digit rho; /* Montgomery reduction value */ const uint8_t *oid; /* OBJECT IDENTIFIER */ size_t oid_len; /* Length of OBJECT IDENTIFIER */ + hal_curve_name_t curve; /* Curve name */ } ecdsa_curve_t; /* @@ -206,6 +219,7 @@ static const ecdsa_curve_t * const get_curve(const hal_curve_name_t curve) fp_montgomery_calc_normalization(curve_p256.mu, curve_p256.q); curve_p256.oid = p256_oid; curve_p256.oid_len = sizeof(p256_oid); + curve_p256.curve = HAL_CURVE_P256; fp_read_unsigned_bin(curve_p384.q, unconst_uint8_t(p384_q), sizeof(p384_q)); fp_read_unsigned_bin(curve_p384.b, unconst_uint8_t(p384_b), sizeof(p384_b)); @@ -218,6 +232,7 @@ static const ecdsa_curve_t * const get_curve(const hal_curve_name_t curve) fp_montgomery_calc_normalization(curve_p384.mu, curve_p384.q); curve_p384.oid = p384_oid; curve_p384.oid_len = sizeof(p384_oid); + curve_p384.curve = HAL_CURVE_P384; fp_read_unsigned_bin(curve_p521.q, unconst_uint8_t(p521_q), sizeof(p521_q)); fp_read_unsigned_bin(curve_p521.b, unconst_uint8_t(p521_b), sizeof(p521_b)); @@ -230,6 +245,7 @@ static const ecdsa_curve_t * const get_curve(const hal_curve_name_t curve) fp_montgomery_calc_normalization(curve_p521.mu, curve_p521.q); curve_p521.oid = p521_oid; curve_p521.oid_len = sizeof(p521_oid); + curve_p521.curve = HAL_CURVE_P521; initialized = 1; } @@ -748,6 +764,113 @@ static inline hal_error_t get_random(void *buffer, const size_t length) #endif /* HAL_ECDSA_DEBUG_ONLY_STATIC_TEST_VECTOR_RANDOM */ +/* + * Use experimental Verilog base point multiplier cores to calculate + * public key given a private key. point_pick_random() has already + * selected a suitable private key for us, we just need to calculate + * the corresponding public key. + */ + +#if HAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER || HAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER + +typedef struct { + size_t bytes; + const char *name; + hal_addr_t k_addr; + hal_addr_t x_addr; + hal_addr_t y_addr; +} verilog_ecdsa_driver_t; + +static hal_error_t verilog_point_pick_random(const verilog_ecdsa_driver_t * const driver, + fp_int *k, + ec_point_t *P) +{ + assert(k != NULL && P != NULL); + + const size_t len = fp_unsigned_bin_size(k); + uint8_t b[driver->bytes]; + const uint8_t zero[4] = {0, 0, 0, 0}; + hal_core_t *core = NULL; + hal_error_t err; + + if (len > sizeof(b)) + return HAL_ERROR_RESULT_TOO_LONG; + + if ((err = hal_core_alloc(driver->name, &core)) != HAL_OK) + goto fail; + +#define check(_x_) do { if ((err = (_x_)) != HAL_OK) goto fail; } while (0) + + memset(b, 0, sizeof(b)); + fp_to_unsigned_bin(k, b + sizeof(b) - len); + + for (int i = 0; i < sizeof(b); i += 4) + check(hal_io_write(core, driver->k_addr + i/4, &b[sizeof(b) - 4 - i], 4)); + + check(hal_io_write(core, ADDR_CTRL, zero, sizeof(zero))); + check(hal_io_next(core)); + check(hal_io_wait_valid(core)); + + for (int i = 0; i < sizeof(b); i += 4) + check(hal_io_read(core, driver->x_addr + i/4, &b[sizeof(b) - 4 - i], 4)); + fp_read_unsigned_bin(P->x, b, sizeof(b)); + + for (int i = 0; i < sizeof(b); i += 4) + check(hal_io_read(core, driver->y_addr + i/4, &b[sizeof(b) - 4 - i], 4)); + fp_read_unsigned_bin(P->y, b, sizeof(b)); + + fp_set(P->z, 1); + +#undef check + + err = HAL_OK; + + fail: + hal_core_free(core); + memset(b, 0, sizeof(b)); + return err; +} + +#endif + +static inline hal_error_t verilog_p256_point_pick_random(fp_int *k, ec_point_t *P) +{ +#if HAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER + + static const verilog_ecdsa_driver_t p256_driver = { + .name = ECDSA256_NAME, + .bytes = ECDSA256_OPERAND_BITS / 8, + .k_addr = ECDSA256_ADDR_K, + .x_addr = ECDSA256_ADDR_X, + .y_addr = ECDSA256_ADDR_Y + }; + + return verilog_point_pick_random(&p256_driver, k, P); + +#endif + + return HAL_ERROR_CORE_NOT_FOUND; +} + +static inline hal_error_t verilog_p384_point_pick_random(fp_int *k, ec_point_t *P) +{ +#if HAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER + + static const verilog_ecdsa_driver_t p384_driver = { + .name = ECDSA384_NAME, + .bytes = ECDSA384_OPERAND_BITS / 8, + .k_addr = ECDSA384_ADDR_K, + .x_addr = ECDSA384_ADDR_X, + .y_addr = ECDSA384_ADDR_Y + }; + + return verilog_point_pick_random(&p384_driver, k, P); + +#endif + + return HAL_ERROR_CORE_NOT_FOUND; +} + /* * Pick a random point on the curve, return random scalar and * resulting point. @@ -792,6 +915,24 @@ static hal_error_t point_pick_random(const ecdsa_curve_t * const curve, memset(k_buf, 0, sizeof(k_buf)); +#if HAL_ECDSA_VERILOG_ECDSA256_MULTIPLIER || HAL_ECDSA_VERILOG_ECDSA384_MULTIPLIER + switch (curve->curve) { + + case HAL_CURVE_P256: + if ((err = verilog_p256_point_pick_random(k, P)) != HAL_ERROR_CORE_NOT_FOUND) + return err; + break; + + case HAL_CURVE_P384: + if ((err = verilog_p384_point_pick_random(k, P)) != HAL_ERROR_CORE_NOT_FOUND) + return err; + break; + + default: + break; + } +#endif + /* * Calculate P = kG and return. */ diff --git a/hal.h b/hal.h index 72b1d58..29b4dab 100644 --- a/hal.h +++ b/hal.h @@ -103,6 +103,12 @@ #define MKMIF_NAME "mkmif " #define MKMIF_VERSION "0.10" +#define ECDSA256_NAME "ecdsa256" +#define ECDSA256_VERSION "0.11" + +#define ECDSA384_NAME "ecdsa384" +#define ECDSA384_VERSION "0.11" + /* * C API error codes. Defined in this form so we can keep the tokens * and error strings together. See errorstrings.c. diff --git a/verilog_constants.h b/verilog_constants.h index f0ae070..c9bb566 100644 --- a/verilog_constants.h +++ b/verilog_constants.h @@ -8,7 +8,7 @@ * hand-edited. * * Authors: Joachim Strombergson, Paul Selkirk, Rob Austein - * Copyright (c) 2015-2016, NORDUnet A/S All rights reserved. + * Copyright (c) 2015-2017, NORDUnet A/S All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -240,6 +240,34 @@ #define MODEXPA7_ADDR_EXPONENT (MODEXPA7_ADDR_OPERANDS + 2 * MODEXPA7_OPERAND_WORDS) #define MODEXPA7_ADDR_RESULT (MODEXPA7_ADDR_OPERANDS + 3 * MODEXPA7_OPERAND_WORDS) +/* + * ECDSA P-256 point multiplier core. ECDSA256_OPERAND_BITS is size + * in bits of the (only) supported operand size (256 bits, imagine that). + * + * (Not sure which category EC Point Mulitiplier will end up in, but + * let's pretend it's "math".) + */ + +#define ECDSA256_OPERAND_BITS (256) +#define ECDSA256_ADDR_REGISTERS (0x00) +#define ECDSA256_ADDR_K (0x20) +#define ECDSA256_ADDR_X (0x28) +#define ECDSA256_ADDR_Y (0x30) + +/* + * ECDSA P-384 point multiplier core. ECDSA384_OPERAND_BITS is size + * in bits of the (only) supported operand size (384 bits, imagine that). + * + * (Not sure which category EC Point Mulitiplier will end up in, but + * let's pretend it's "math".) + */ + +#define ECDSA384_OPERAND_BITS (384) +#define ECDSA384_ADDR_REGISTERS (0x00) +#define ECDSA384_ADDR_K (0x40) +#define ECDSA384_ADDR_X (0x50) +#define ECDSA384_ADDR_Y (0x60) + /* * Utility cores. */ -- cgit v1.2.3