/* * modexpa7_driver_sample.c * ---------------------------------------------- * Demo program to test ModExpA7 core in hardware * * Authors: Pavel Shatov * Copyright (c) 2017, NORDUnet A/S * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the NORDUnet nor the names of its contributors may * be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Note, that the test program needs a custom bitstream without * the core selector, where the DUT is at offset 0. */ // stm32 headers #include "stm-init.h" #include "stm-led.h" #include "stm-fmc.h" // test vectors #include "test/modexp_fpga_model_vectors.h" // locations of core registers #define CORE_ADDR_NAME0 (0x00 << 2) #define CORE_ADDR_NAME1 (0x01 << 2) #define CORE_ADDR_VERSION (0x02 << 2) #define CORE_ADDR_CONTROL (0x08 << 2) #define CORE_ADDR_STATUS (0x09 << 2) #define CORE_ADDR_MODE (0x10 << 2) #define CORE_ADDR_MODULUS_BITS (0x11 << 2) #define CORE_ADDR_EXPONENT_BITS (0x12 << 2) #define CORE_ADDR_BUFFER_BITS (0x13 << 2) #define CORE_ADDR_ARRAY_BITS (0x14 << 2) // operand bank size #define BANK_LENGTH 0x200 // 0x200 = 512 bytes = 4096 bits // locations of operand buffers #define CORE_ADDR_BANK_MODULUS (BANK_LENGTH * (8 + 0)) #define CORE_ADDR_BANK_MESSAGE (BANK_LENGTH * (8 + 1)) #define CORE_ADDR_BANK_EXPONENT (BANK_LENGTH * (8 + 2)) #define CORE_ADDR_BANK_RESULT (BANK_LENGTH * (8 + 3)) #define CORE_ADDR_BANK_MODULUS_COEFF_OUT (BANK_LENGTH * (8 + 4)) #define CORE_ADDR_BANK_MODULUS_COEFF_IN (BANK_LENGTH * (8 + 5)) #define CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT (BANK_LENGTH * (8 + 6)) #define CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN (BANK_LENGTH * (8 + 7)) // bit maps #define CORE_CONTROL_BIT_INIT 0x00000001 #define CORE_CONTROL_BIT_NEXT 0x00000002 #define CORE_STATUS_BIT_READY 0x00000001 #define CORE_STATUS_BIT_VALID 0x00000002 #define CORE_MODE_BIT_CRT 0x00000002 /* * zero operands */ #define Z_384 \ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000} #define Z_192 \ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000} #define Z_512 \ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000} #define Z_256 \ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 0x00000000, 0x00000000, 0x00000000, 0x00000000} /* * test vectors */ static const uint32_t m_384[] = M_384; static const uint32_t n_384[] = N_384; static const uint32_t d_384[] = D_384; static const uint32_t s_384[] = S_384; static uint32_t n_coeff_384[] = Z_384; static uint32_t factor_384[] = Z_384; static const uint32_t m_512[] = M_512; static const uint32_t n_512[] = N_512; static const uint32_t d_512[] = D_512; static const uint32_t s_512[] = S_512; static uint32_t n_coeff_512[] = Z_512; static uint32_t factor_512[] = Z_512; static const uint32_t p_192[] = P_192; static const uint32_t q_192[] = Q_192; static const uint32_t dp_192[] = DP_192; static const uint32_t dq_192[] = DQ_192; static const uint32_t mp_192[] = MP_192; static const uint32_t mq_192[] = MQ_192; static uint32_t p_coeff_192[] = Z_192; static uint32_t q_coeff_192[] = Z_192; static uint32_t factor_p_192[] = Z_192; static uint32_t factor_q_192[] = Z_192; static const uint32_t p_256[] = P_256; static const uint32_t q_256[] = Q_256; static const uint32_t dp_256[] = DP_256; static const uint32_t dq_256[] = DQ_256; static const uint32_t mp_256[] = MP_256; static const uint32_t mq_256[] = MQ_256; static uint32_t p_coeff_256[] = Z_256; static uint32_t q_coeff_256[] = Z_256; static uint32_t factor_p_256[] = Z_256; static uint32_t factor_q_256[] = Z_256; /* * prototypes */ void toggle_yellow_led(void); void setup_modexpa7( const uint32_t *n, uint32_t *coeff, uint32_t *factor, size_t l); int test_modexpa7( const uint32_t *n, const uint32_t *m, const uint32_t *d, const uint32_t *s, const uint32_t *coeff, const uint32_t *factor, size_t l); int test_modexpa7_crt( const uint32_t *n, const uint32_t *m, const uint32_t *d, const uint32_t *s, const uint32_t *coeff, const uint32_t *factor, size_t l); /* * test routine */ int main() { int ok; stm_init(); fmc_init(); // turn on the green led led_on(LED_GREEN); led_off(LED_RED); led_off(LED_YELLOW); led_off(LED_BLUE); // check, that core is present uint32_t core_name0; uint32_t core_name1; uint32_t core_version; fmc_read_32(CORE_ADDR_NAME0, &core_name0); fmc_read_32(CORE_ADDR_NAME1, &core_name1); fmc_read_32(CORE_ADDR_VERSION, &core_version); // must be "mode", "xpa7", "0.25" if ( (core_name0 != 0x6D6F6465) || (core_name1 != 0x78706137) || (core_version != 0x302E3235)) { led_off(LED_GREEN); led_on(LED_RED); while (1); } // read compile-time settings uint32_t core_buffer_bits; uint32_t core_array_bits; // largest supported operand width, systolic array "power" fmc_read_32(CORE_ADDR_BUFFER_BITS, &core_buffer_bits); fmc_read_32(CORE_ADDR_ARRAY_BITS, &core_array_bits); // // do pre-computation for all the moduli and store speed-up quantities, // note that each key requires three precomputations: one for the entire // public key and two for each of the corresponding private key components // // we set the 'init' control bit, wait for `ready' status bit to go high, // then retrieve the calculated values from the corresponding "output" banks // // we turn off the green led and turn the yellow led during the process to // get an idea of how long it takes // led_off(LED_GREEN); led_on(LED_YELLOW); // 384-bit key and 192-bit primes setup_modexpa7(n_384, n_coeff_384, factor_384, 384); setup_modexpa7(p_192, p_coeff_192, factor_p_192, 192); setup_modexpa7(q_192, q_coeff_192, factor_q_192, 192); // 512-bit key and 256-bit primes setup_modexpa7(n_512, n_coeff_512, factor_512, 512); setup_modexpa7(p_256, p_coeff_256, factor_p_256, 256); setup_modexpa7(q_256, q_coeff_256, factor_q_256, 256); led_off(LED_YELLOW); led_on(LED_GREEN); // repeat forever while (1) { // fresh start ok = 1; { // try signing the message with the 384-bit test vector ok = ok && test_modexpa7(n_384, m_384, d_384, s_384, n_coeff_384, factor_384, 384); // try signing 384-bit base using 192-bit exponent ok = ok && test_modexpa7_crt(p_192, m_384, dp_192, mp_192, p_coeff_192, factor_p_192, 192); // try signing 384-bit base using 192-bit exponent ok = ok && test_modexpa7_crt(q_192, m_384, dq_192, mq_192, q_coeff_192, factor_q_192, 192); } { // try signing the message with the 512-bit test vector ok = ok && test_modexpa7(n_512, m_512, d_512, s_512, n_coeff_512, factor_512, 512); // try signing 512-bit base using 256-bit exponent ok = ok && test_modexpa7_crt(p_256, m_512, dp_256, mp_256, p_coeff_256, factor_p_256, 256); // try signing 512-bit base using 256-bit exponent ok = ok && test_modexpa7_crt(q_256, m_512, dq_256, mq_256, q_coeff_256, factor_q_256, 256); } // turn on the red led to indicate something went wrong if (!ok) { led_off(LED_GREEN); led_on(LED_RED); } // indicate, that we're alive doing something... toggle_yellow_led(); } } /* * Load new modulus and do all the necessary precomputations. */ void setup_modexpa7( const uint32_t *n, uint32_t *coeff, uint32_t *factor, size_t l) { size_t i, num_words; uint32_t num_bits; uint32_t reg_control, reg_status; uint32_t n_word; uint32_t coeff_word, factor_word; uint32_t dummy_num_cyc; // determine numbers of 32-bit words num_words = l >> 5; // set modulus width num_bits = l; fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits); // fill modulus bank (the least significant word is at the lowest offset) for (i=0; i> 5; // set modulus width, exponent width num_bits = l; fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits); fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits); // disable CRT mode mode = 0; fmc_write_32(CORE_ADDR_MODE, &mode); // fill modulus, message and exponent banks (the least significant // word is at the lowest offset), we also need to fill "input" core // banks with previously pre-calculated and saved modulus-dependent // speed-up coefficient and Montgomery factor for (i=0; i> 5; // set modulus width, exponent width num_bits = l; fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits); fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits); // enable CRT mode mode = CORE_MODE_BIT_CRT; fmc_write_32(CORE_ADDR_MODE, &mode); // fill modulus and exponent banks (the least significant word is at // the lowest offset), we also need to fill "input" core banks with // previously pre-calculated and saved modulus-dependent speed-up // coefficient and Montgomery factor for (i=0; i