//------------------------------------------------------------------------------ // // modexpng_driver_sample.c // ----------------------------------------------------- // Sample driver to test the "modexpng" core in hardware // // Authors: Pavel Shatov // // Copyright (c) 2019, NORDUnet A/S // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // //------------------------------------------------------------------------------ // // note, that the test program needs a custom bitstream where // the core is located at offset 0 (without the core selector) // // stm32 headers #include "stm-init.h" #include "stm-led.h" #include "stm-fmc.h" // test vectors (generated by the supplied python math model) #include "modexpng_vector_1024.h" #include "modexpng_vector_2048.h" #include "modexpng_vector_4096.h" // reference code #include "modexpng_util.h" // locations of core registers #define CORE_ADDR_NAME0 (0x00 << 2) #define CORE_ADDR_NAME1 (0x01 << 2) #define CORE_ADDR_VERSION (0x02 << 2) #define CORE_ADDR_CONTROL (0x08 << 2) #define CORE_ADDR_STATUS (0x09 << 2) #define CORE_ADDR_MODE (0x10 << 2) #define CORE_ADDR_MODULUS_BITS (0x11 << 2) #define CORE_ADDR_EXPONENT_BITS (0x12 << 2) #define CORE_ADDR_BANK_BITS (0x13 << 2) #define CORE_ADDR_NUM_MULTS (0x14 << 2) // locations of data buffers #define CORE_ADDR_BANK_M (1 * 0x1000 + 0 * 0x200) #define CORE_ADDR_BANK_N (1 * 0x1000 + 1 * 0x200) #define CORE_ADDR_BANK_N_FACTOR (1 * 0x1000 + 2 * 0x200) #define CORE_ADDR_BANK_N_COEFF (1 * 0x1000 + 3 * 0x200) #define CORE_ADDR_BANK_X (1 * 0x1000 + 5 * 0x200) #define CORE_ADDR_BANK_Y (1 * 0x1000 + 6 * 0x200) #define CORE_ADDR_BANK_D (2 * 0x1000 + 0 * 0x200) #define CORE_ADDR_BANK_P (2 * 0x1000 + 1 * 0x200) #define CORE_ADDR_BANK_DP (2 * 0x1000 + 3 * 0x100) #define CORE_ADDR_BANK_P_FACTOR (2 * 0x1000 + 2 * 0x200) #define CORE_ADDR_BANK_P_COEFF (2 * 0x1000 + 3 * 0x200) #define CORE_ADDR_BANK_Q (2 * 0x1000 + 4 * 0x200) #define CORE_ADDR_BANK_DQ (2 * 0x1000 + 9 * 0x100) #define CORE_ADDR_BANK_Q_FACTOR (2 * 0x1000 + 5 * 0x200) #define CORE_ADDR_BANK_Q_COEFF (2 * 0x1000 + 6 * 0x200) #define CORE_ADDR_BANK_QINV (2 * 0x1000 + 7 * 0x200) #define CORE_ADDR_BANK_S (3 * 0x1000 + 0 * 0x200) #define CORE_ADDR_BANK_XM (3 * 0x1000 + 1 * 0x200) #define CORE_ADDR_BANK_YM (3 * 0x1000 + 2 * 0x200) // bit maps #define CORE_CONTROL_BIT_NEXT 0x00000002 #define CORE_STATUS_BIT_VALID 0x00000002 #define CORE_MODE_USING_CRT 0x00000002 #define CORE_MODE_WITHOUT_CRT 0x00000000 // // test vectors // static const uint32_t M_1024[] = M_1024_INIT; static const uint32_t N_1024[] = N_1024_INIT; static const uint32_t N_FACTOR_1024[] = N_FACTOR_1024_INIT; static const uint32_t N_COEFF_1024[] = N_COEFF_1024_INIT; static uint32_t X_1024[] = X_1024_INIT; static uint32_t Y_1024[] = Y_1024_INIT; static const uint32_t P_1024[] = P_1024_INIT; static const uint32_t Q_1024[] = Q_1024_INIT; static const uint32_t P_FACTOR_1024[] = P_FACTOR_1024_INIT; static const uint32_t Q_FACTOR_1024[] = Q_FACTOR_1024_INIT; static const uint32_t P_COEFF_1024[] = P_COEFF_1024_INIT; static const uint32_t Q_COEFF_1024[] = Q_COEFF_1024_INIT; static const uint32_t D_1024[] = D_1024_INIT; static const uint32_t DP_1024[] = DP_1024_INIT; static const uint32_t DQ_1024[] = DQ_1024_INIT; static const uint32_t QINV_1024[] = QINV_1024_INIT; static const uint32_t XM_1024[] = XM_1024_INIT; static const uint32_t YM_1024[] = YM_1024_INIT; static const uint32_t S_1024[] = S_1024_INIT; static const uint32_t M_2048[] = M_2048_INIT; static const uint32_t N_2048[] = N_2048_INIT; static const uint32_t N_FACTOR_2048[] = N_FACTOR_2048_INIT; static const uint32_t N_COEFF_2048[] = N_COEFF_2048_INIT; static uint32_t X_2048[] = X_2048_INIT; static uint32_t Y_2048[] = Y_2048_INIT; static const uint32_t P_2048[] = P_2048_INIT; static const uint32_t Q_2048[] = Q_2048_INIT; static const uint32_t P_FACTOR_2048[] = P_FACTOR_2048_INIT; static const uint32_t Q_FACTOR_2048[] = Q_FACTOR_2048_INIT; static const uint32_t P_COEFF_2048[] = P_COEFF_2048_INIT; static const uint32_t Q_COEFF_2048[] = Q_COEFF_2048_INIT; static const uint32_t D_2048[] = D_2048_INIT; static const uint32_t DP_2048[] = DP_2048_INIT; static const uint32_t DQ_2048[] = DQ_2048_INIT; static const uint32_t QINV_2048[] = QINV_2048_INIT; static const uint32_t XM_2048[] = XM_2048_INIT; static const uint32_t YM_2048[] = YM_2048_INIT; static const uint32_t S_2048[] = S_2048_INIT; static const uint32_t M_4096[] = M_4096_INIT; static const uint32_t N_4096[] = N_4096_INIT; static const uint32_t N_FACTOR_4096[] = N_FACTOR_4096_INIT; static const uint32_t N_COEFF_4096[] = N_COEFF_4096_INIT; static uint32_t X_4096[] = X_4096_INIT; static uint32_t Y_4096[] = Y_4096_INIT; static const uint32_t P_4096[] = P_4096_INIT; static const uint32_t Q_4096[] = Q_4096_INIT; static const uint32_t P_FACTOR_4096[] = P_FACTOR_4096_INIT; static const uint32_t Q_FACTOR_4096[] = Q_FACTOR_4096_INIT; static const uint32_t P_COEFF_4096[] = P_COEFF_4096_INIT; static const uint32_t Q_COEFF_4096[] = Q_COEFF_4096_INIT; static const uint32_t D_4096[] = D_4096_INIT; static const uint32_t DP_4096[] = DP_4096_INIT; static const uint32_t DQ_4096[] = DQ_4096_INIT; static const uint32_t QINV_4096[] = QINV_4096_INIT; static const uint32_t XM_4096[] = XM_4096_INIT; static const uint32_t YM_4096[] = YM_4096_INIT; static const uint32_t S_4096[] = S_4096_INIT; // // buffers // static uint32_t mod_rev[BUF_NUM_WORDS]; static uint32_t mod_factor_rev[BUF_NUM_WORDS]; static uint32_t mod_coeff_rev[BUF_NUM_WORDS+1]; // // prototypes // void toggle_yellow_led(void); int check_montgomery_factor(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_factor); int check_modulus_coeff(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_coeff); int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, const uint32_t *m, const uint32_t *n, const uint32_t *n_factor, const uint32_t *n_coeff, uint32_t *x, uint32_t *y, const uint32_t *p, const uint32_t *q, const uint32_t *p_factor, const uint32_t *p_coeff, const uint32_t *q_factor, const uint32_t *q_coeff, const uint32_t *dp, const uint32_t *dq, const uint32_t *d, const uint32_t *qinv, const uint32_t *s, const uint32_t *xm, const uint32_t *ym); // // easier calls // #define sign_without_crt(k,f,m,n,nf,nc,x,y,d,s,xm,ym) \ _sign_handler (k,0,f,m,n,nf,nc,x,y,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,d,NULL,s,xm,ym) #define sign_using_crt(k,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,qinv,s,xm,ym) \ _sign_handler (k,1,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,NULL,qinv,s,xm,ym) // // test routine // int main() { int ok; int first_run; // initialize stm_init(); fmc_init(); // initialize led_on(LED_GREEN); led_off(LED_RED); led_off(LED_YELLOW); led_off(LED_BLUE); // make sure, that ModExpNG is there uint32_t core_name0; uint32_t core_name1; uint32_t core_version; fmc_read_32(CORE_ADDR_NAME0, &core_name0); fmc_read_32(CORE_ADDR_NAME1, &core_name1); fmc_read_32(CORE_ADDR_VERSION, &core_version); // "mode", "xpng" if ((core_name0 != 0x6D6F6465) || (core_name1 != 0x78706E67)) { led_off(LED_GREEN); led_on(LED_RED); while (1); } // check, that reference code works correctly ok = 1; ok = ok && check_montgomery_factor(1024, N_1024, N_FACTOR_1024); ok = ok && check_montgomery_factor( 512, P_1024, P_FACTOR_1024); ok = ok && check_montgomery_factor( 512, Q_1024, Q_FACTOR_1024); ok = ok && check_montgomery_factor(2048, N_2048, N_FACTOR_2048); ok = ok && check_montgomery_factor(1024, P_2048, P_FACTOR_2048); ok = ok && check_montgomery_factor(1024, Q_2048, Q_FACTOR_2048); ok = ok && check_montgomery_factor(4096, N_4096, N_FACTOR_4096); ok = ok && check_montgomery_factor(2048, P_4096, P_FACTOR_4096); ok = ok && check_montgomery_factor(2048, Q_4096, Q_FACTOR_4096); ok = ok && check_modulus_coeff(1024, N_1024, N_COEFF_1024); ok = ok && check_modulus_coeff( 512, P_1024, P_COEFF_1024); ok = ok && check_modulus_coeff( 512, Q_1024, Q_COEFF_1024); ok = ok && check_modulus_coeff(2048, N_2048, N_COEFF_2048); ok = ok && check_modulus_coeff(1024, P_2048, P_COEFF_2048); ok = ok && check_modulus_coeff(1024, Q_2048, Q_COEFF_2048); // ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec) ok = ok && check_modulus_coeff(2048, P_4096, P_COEFF_4096); ok = ok && check_modulus_coeff(2048, Q_4096, Q_COEFF_4096); if (!ok) { led_off(LED_GREEN); led_on(LED_RED); while (1); } // repeat forever ok = 1, first_run = 1; while (1) { ok = ok && sign_without_crt(1024, first_run, M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024, X_1024, Y_1024, D_1024, S_1024, XM_1024, YM_1024); ok = ok && sign_without_crt(2048, first_run, M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048, X_2048, Y_2048, D_2048, S_2048, XM_2048, YM_2048); ok = ok && sign_without_crt(4096, first_run, M_4096, N_4096, N_FACTOR_4096, N_COEFF_4096, X_4096, Y_4096, D_4096, S_4096, XM_4096, YM_4096); ok = ok && sign_using_crt(1024, first_run, M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024, X_1024, Y_1024, P_1024, Q_1024, P_FACTOR_1024, P_COEFF_1024, Q_FACTOR_1024, Q_COEFF_1024, DP_1024, DQ_1024, QINV_1024, S_1024, XM_1024, YM_1024); ok = ok && sign_using_crt(2048, first_run, M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048, X_2048, Y_2048, P_2048, Q_2048, P_FACTOR_2048, P_COEFF_2048, Q_FACTOR_2048, Q_COEFF_2048, DP_2048, DQ_2048, QINV_2048, S_2048, XM_2048, YM_2048); ok = ok && sign_using_crt(4096, first_run, M_4096, N_4096, N_FACTOR_4096, N_COEFF_4096, X_4096, Y_4096, P_4096, Q_4096, P_FACTOR_4096, P_COEFF_4096, Q_FACTOR_4096, Q_COEFF_4096, DP_4096, DQ_4096, QINV_4096, S_4096, XM_4096, YM_4096); if (!ok) { led_off(LED_GREEN); led_on(LED_RED); } first_run = 0; toggle_yellow_led(); } } int check_montgomery_factor(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_factor) { uint32_t i, j; uint32_t num_words = key_length / UINT32_BITS; // _calc_montgomery_factor() expects the least significant byte in [0], // but C array initialization places it in [N-1], so we need to // reverse the array before passing it to the function for (i=0, j=num_words-1; i> 3; uint32_t num_words_half = num_words >> 1; uint32_t reg_control, reg_status; uint32_t reg_mode; uint32_t reg_modulus_bits, reg_exponent_bits; // fill in all the necessary input values // d is only written when CRT is not enabled (we wipe it otherwise just in case) // note, that n_coeff is one word larger, than the modulus, so we need a single // extra write after the word-by-word loop for (i=0, j=num_words-1; i