diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-23 17:03:17 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-23 17:03:17 +0300 |
commit | 0f111bf1ea0e5310852c50448a7226db57e61d5b (patch) | |
tree | 9f05558be50dd93a523ee343d1ecd73235879cb9 /stm32/modexpng_driver_sample.c | |
parent | d50bb6039649a61b2048447a63a9670895e1e2be (diff) |
Added demo driver code for STM32.
Diffstat (limited to 'stm32/modexpng_driver_sample.c')
-rw-r--r-- | stm32/modexpng_driver_sample.c | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/stm32/modexpng_driver_sample.c b/stm32/modexpng_driver_sample.c new file mode 100644 index 0000000..1f0a767 --- /dev/null +++ b/stm32/modexpng_driver_sample.c @@ -0,0 +1,475 @@ +// +// simple driver to test "modexpng" core in hardware +// + +// +// note, that the test program needs a custom bitstream where +// the core is located at offset 0 (without the core selector) +// + +// stm32 headers +#include "stm-init.h" +#include "stm-led.h" +#include "stm-fmc.h" + +// test vectors (generated by the supplied python math model) +#include "modexpng_vector_1024.h" +#include "modexpng_vector_2048.h" +#include "modexpng_vector_4096.h" + +// reference code +#include "modexpng_util.h" + +// locations of core registers +#define CORE_ADDR_NAME0 (0x00 << 2) +#define CORE_ADDR_NAME1 (0x01 << 2) +#define CORE_ADDR_VERSION (0x02 << 2) +#define CORE_ADDR_CONTROL (0x08 << 2) +#define CORE_ADDR_STATUS (0x09 << 2) +#define CORE_ADDR_MODE (0x10 << 2) +#define CORE_ADDR_MODULUS_BITS (0x11 << 2) +#define CORE_ADDR_EXPONENT_BITS (0x12 << 2) +#define CORE_ADDR_BANK_BITS (0x13 << 2) +#define CORE_ADDR_NUM_MULTS (0x14 << 2) + +// locations of data buffers +#define CORE_ADDR_BANK_M (1 * 0x1000 + 0 * 0x200) +#define CORE_ADDR_BANK_N (1 * 0x1000 + 1 * 0x200) +#define CORE_ADDR_BANK_N_FACTOR (1 * 0x1000 + 2 * 0x200) +#define CORE_ADDR_BANK_N_COEFF (1 * 0x1000 + 3 * 0x200) +#define CORE_ADDR_BANK_X (1 * 0x1000 + 5 * 0x200) +#define CORE_ADDR_BANK_Y (1 * 0x1000 + 6 * 0x200) + +#define CORE_ADDR_BANK_D (2 * 0x1000 + 0 * 0x200) +#define CORE_ADDR_BANK_P (2 * 0x1000 + 1 * 0x200) +#define CORE_ADDR_BANK_DP (2 * 0x1000 + 3 * 0x100) +#define CORE_ADDR_BANK_P_FACTOR (2 * 0x1000 + 2 * 0x200) +#define CORE_ADDR_BANK_P_COEFF (2 * 0x1000 + 3 * 0x200) +#define CORE_ADDR_BANK_Q (2 * 0x1000 + 4 * 0x200) +#define CORE_ADDR_BANK_DQ (2 * 0x1000 + 9 * 0x100) +#define CORE_ADDR_BANK_Q_FACTOR (2 * 0x1000 + 5 * 0x200) +#define CORE_ADDR_BANK_Q_COEFF (2 * 0x1000 + 6 * 0x200) +#define CORE_ADDR_BANK_QINV (2 * 0x1000 + 7 * 0x200) + +#define CORE_ADDR_BANK_S (3 * 0x1000 + 0 * 0x200) +#define CORE_ADDR_BANK_XM (3 * 0x1000 + 1 * 0x200) +#define CORE_ADDR_BANK_YM (3 * 0x1000 + 2 * 0x200) + +// bit maps +#define CORE_CONTROL_BIT_NEXT 0x00000002 +#define CORE_STATUS_BIT_VALID 0x00000002 + +#define CORE_MODE_USING_CRT 0x00000002 +#define CORE_MODE_WITHOUT_CRT 0x00000000 + + +// +// test vectors +// +static const uint32_t M_1024[] = M_1024_INIT; +static const uint32_t N_1024[] = N_1024_INIT; +static const uint32_t N_FACTOR_1024[] = N_FACTOR_1024_INIT; +static const uint32_t N_COEFF_1024[] = N_COEFF_1024_INIT; +static uint32_t X_1024[] = X_1024_INIT; +static uint32_t Y_1024[] = Y_1024_INIT; +static const uint32_t P_1024[] = P_1024_INIT; +static const uint32_t Q_1024[] = Q_1024_INIT; +static const uint32_t P_FACTOR_1024[] = P_FACTOR_1024_INIT; +static const uint32_t Q_FACTOR_1024[] = Q_FACTOR_1024_INIT; +static const uint32_t P_COEFF_1024[] = P_COEFF_1024_INIT; +static const uint32_t Q_COEFF_1024[] = Q_COEFF_1024_INIT; +static const uint32_t D_1024[] = D_1024_INIT; +static const uint32_t DP_1024[] = DP_1024_INIT; +static const uint32_t DQ_1024[] = DQ_1024_INIT; +static const uint32_t QINV_1024[] = QINV_1024_INIT; +static const uint32_t XM_1024[] = XM_1024_INIT; +static const uint32_t YM_1024[] = YM_1024_INIT; +static const uint32_t S_1024[] = S_1024_INIT; + +static const uint32_t M_2048[] = M_2048_INIT; +static const uint32_t N_2048[] = N_2048_INIT; +static const uint32_t N_FACTOR_2048[] = N_FACTOR_2048_INIT; +static const uint32_t N_COEFF_2048[] = N_COEFF_2048_INIT; +static uint32_t X_2048[] = X_2048_INIT; +static uint32_t Y_2048[] = Y_2048_INIT; +static const uint32_t P_2048[] = P_2048_INIT; +static const uint32_t Q_2048[] = Q_2048_INIT; +static const uint32_t P_FACTOR_2048[] = P_FACTOR_2048_INIT; +static const uint32_t Q_FACTOR_2048[] = Q_FACTOR_2048_INIT; +static const uint32_t P_COEFF_2048[] = P_COEFF_2048_INIT; +static const uint32_t Q_COEFF_2048[] = Q_COEFF_2048_INIT; +static const uint32_t D_2048[] = D_2048_INIT; +static const uint32_t DP_2048[] = DP_2048_INIT; +static const uint32_t DQ_2048[] = DQ_2048_INIT; +static const uint32_t QINV_2048[] = QINV_2048_INIT; +static const uint32_t XM_2048[] = XM_2048_INIT; +static const uint32_t YM_2048[] = YM_2048_INIT; +static const uint32_t S_2048[] = S_2048_INIT; + +static const uint32_t M_4096[] = M_4096_INIT; +static const uint32_t N_4096[] = N_4096_INIT; +static const uint32_t N_FACTOR_4096[] = N_FACTOR_4096_INIT; +static const uint32_t N_COEFF_4096[] = N_COEFF_4096_INIT; +static uint32_t X_4096[] = X_4096_INIT; +static uint32_t Y_4096[] = Y_4096_INIT; +static const uint32_t P_4096[] = P_4096_INIT; +static const uint32_t Q_4096[] = Q_4096_INIT; +static const uint32_t P_FACTOR_4096[] = P_FACTOR_4096_INIT; +static const uint32_t Q_FACTOR_4096[] = Q_FACTOR_4096_INIT; +static const uint32_t P_COEFF_4096[] = P_COEFF_4096_INIT; +static const uint32_t Q_COEFF_4096[] = Q_COEFF_4096_INIT; +static const uint32_t D_4096[] = D_4096_INIT; +static const uint32_t DP_4096[] = DP_4096_INIT; +static const uint32_t DQ_4096[] = DQ_4096_INIT; +static const uint32_t QINV_4096[] = QINV_4096_INIT; +static const uint32_t XM_4096[] = XM_4096_INIT; +static const uint32_t YM_4096[] = YM_4096_INIT; +static const uint32_t S_4096[] = S_4096_INIT; + + +// +// buffers +// +static uint32_t mod_rev[BUF_NUM_WORDS]; +static uint32_t mod_factor_rev[BUF_NUM_WORDS]; +static uint32_t mod_coeff_rev[BUF_NUM_WORDS+1]; + + +// +// prototypes +// +void toggle_yellow_led(void); + +int check_montgomery_factor(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_factor); +int check_modulus_coeff(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_coeff); + +int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, + const uint32_t *m, const uint32_t *n, + const uint32_t *n_factor, const uint32_t *n_coeff, + uint32_t *x, uint32_t *y, + const uint32_t *p, const uint32_t *q, + const uint32_t *p_factor, const uint32_t *p_coeff, + const uint32_t *q_factor, const uint32_t *q_coeff, + const uint32_t *dp, const uint32_t *dq, + const uint32_t *d, + const uint32_t *qinv, + const uint32_t *s, + const uint32_t *xm, const uint32_t *ym); + +// +// easier calls +// +#define sign_without_crt(k,f,m,n,nf,nc,x,y,d,s,xm,ym) \ + _sign_handler (k,0,f,m,n,nf,nc,x,y,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,d,NULL,s,xm,ym) + +#define sign_using_crt(k,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,qinv,s,xm,ym) \ + _sign_handler (k,1,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,NULL,qinv,s,xm,ym) + + +// +// test routine +// +int main() +{ + int ok; + int first_run; + + // initialize + stm_init(); + fmc_init(); + + // initialize + led_on(LED_GREEN); + led_off(LED_RED); + led_off(LED_YELLOW); + led_off(LED_BLUE); + + // make sure, that ModExpNG is there + uint32_t core_name0; + uint32_t core_name1; + uint32_t core_version; + + fmc_read_32(CORE_ADDR_NAME0, &core_name0); + fmc_read_32(CORE_ADDR_NAME1, &core_name1); + fmc_read_32(CORE_ADDR_VERSION, &core_version); + + // "mode", "xpng" + if ((core_name0 != 0x6D6F6465) || (core_name1 != 0x78706E67)) + { led_off(LED_GREEN); + led_on(LED_RED); + while (1); + } + + // check, that reference code works correctly + ok = 1; + + ok = ok && check_montgomery_factor(1024, N_1024, N_FACTOR_1024); + ok = ok && check_montgomery_factor( 512, P_1024, P_FACTOR_1024); + ok = ok && check_montgomery_factor( 512, Q_1024, Q_FACTOR_1024); + ok = ok && check_montgomery_factor(2048, N_2048, N_FACTOR_2048); + ok = ok && check_montgomery_factor(1024, P_2048, P_FACTOR_2048); + ok = ok && check_montgomery_factor(1024, Q_2048, Q_FACTOR_2048); + ok = ok && check_montgomery_factor(4096, N_4096, N_FACTOR_4096); + ok = ok && check_montgomery_factor(2048, P_4096, P_FACTOR_4096); + ok = ok && check_montgomery_factor(2048, Q_4096, Q_FACTOR_4096); + + ok = ok && check_modulus_coeff(1024, N_1024, N_COEFF_1024); + ok = ok && check_modulus_coeff( 512, P_1024, P_COEFF_1024); + ok = ok && check_modulus_coeff( 512, Q_1024, Q_COEFF_1024); + ok = ok && check_modulus_coeff(2048, N_2048, N_COEFF_2048); + ok = ok && check_modulus_coeff(1024, P_2048, P_COEFF_2048); + ok = ok && check_modulus_coeff(1024, Q_2048, Q_COEFF_2048); +// ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec) + ok = ok && check_modulus_coeff(2048, P_4096, P_COEFF_4096); + ok = ok && check_modulus_coeff(2048, Q_4096, Q_COEFF_4096); + + if (!ok) + { led_off(LED_GREEN); + led_on(LED_RED); + while (1); + } + + // repeat forever + ok = 1, first_run = 1; + while (1) + { + ok = ok && sign_without_crt(1024, first_run, + M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024, + X_1024, Y_1024, D_1024, S_1024, + XM_1024, YM_1024); + + ok = ok && sign_without_crt(2048, first_run, + M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048, + X_2048, Y_2048, D_2048, S_2048, + XM_2048, YM_2048); + + ok = ok && sign_without_crt(4096, first_run, + M_4096, N_4096, N_FACTOR_4096, N_COEFF_4096, + X_4096, Y_4096, D_4096, S_4096, + XM_4096, YM_4096); + + ok = ok && sign_using_crt(1024, first_run, + M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024, + X_1024, Y_1024, P_1024, Q_1024, + P_FACTOR_1024, P_COEFF_1024, Q_FACTOR_1024, Q_COEFF_1024, + DP_1024, DQ_1024, QINV_1024, S_1024, + XM_1024, YM_1024); + + ok = ok && sign_using_crt(2048, first_run, + M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048, + X_2048, Y_2048, P_2048, Q_2048, + P_FACTOR_2048, P_COEFF_2048, Q_FACTOR_2048, Q_COEFF_2048, + DP_2048, DQ_2048, QINV_2048, S_2048, + XM_2048, YM_2048); + + ok = ok && sign_using_crt(4096, first_run, + M_4096, N_4096, N_FACTOR_4096, N_COEFF_4096, + X_4096, Y_4096, P_4096, Q_4096, + P_FACTOR_4096, P_COEFF_4096, Q_FACTOR_4096, Q_COEFF_4096, + DP_4096, DQ_4096, QINV_4096, S_4096, + XM_4096, YM_4096); + + if (!ok) + { led_off(LED_GREEN); + led_on(LED_RED); + } + + first_run = 0; + + toggle_yellow_led(); + } +} + +int check_montgomery_factor(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_factor) +{ + uint32_t i, j; + uint32_t num_words = key_length / UINT32_BITS; + + // _calc_montgomery_factor() expects the least significant byte in [0], + // but C array initialization places it in [N-1], so we need to + // reverse the array before passing it to the function + for (i=0, j=num_words-1; i<num_words; i++, j--) + mod_rev[i] = mod[j]; + + // compute Montgomery factor + _calc_montgomery_factor(num_words, mod_rev, mod_factor_rev); + + // we now need to compare the calculated factor to the reference value, + // _calc_montgomery_factor() places the least significant byte in [0], + // but C array initialization places the least significant byte of the + // reference value in [N-1], so we need to go in opposite directions + // when comparing + for (i=0, j=num_words-1; i<num_words; i++, j--) + if (mod_factor_rev[i] != mod_factor[j]) return 0; + + // everything went just fine + return 1; +} + + +int check_modulus_coeff(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_coeff) +{ + uint32_t i, j; + uint32_t num_words = key_length / UINT32_BITS; + + // _calc_modulus_coeff() expects the least significant byte in [0], + // but C array initialization places it in [N-1], so we need to + // reverse the array before passing it to the function + for (i=0, j=num_words-1; i<num_words; i++, j--) + mod_rev[i] = mod[j]; + + // compute modulus-dependent speed-up coefficient + _calc_modulus_coeff(num_words, mod_rev, mod_coeff_rev); + + // we now need to compare the calculated coefficient to the reference value, + // _calc_modulus_coeff() places the least significant byte in [0], + // but C array initialization places the least significant byte of the + // reference value in [N], so we need to go in opposite directions + // when comparing, also note, that we should process N+1 words, since the + // coefficient is slightly longer, than the modulus + for (i=0, j=num_words; i<=num_words; i++, j--) + if (mod_coeff_rev[i] != mod_coeff[j]) return 0; + + // everything went just fine + return 1; +} + + +int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, + const uint32_t *m, const uint32_t *n, + const uint32_t *n_factor, const uint32_t *n_coeff, + uint32_t *x, uint32_t *y, + const uint32_t *p, const uint32_t *q, + const uint32_t *p_factor, const uint32_t *p_coeff, + const uint32_t *q_factor, const uint32_t *q_coeff, + const uint32_t *dp, const uint32_t *dq, + const uint32_t *d, + const uint32_t *qinv, + const uint32_t *s, + const uint32_t *xm, const uint32_t *ym) +{ + uint32_t i, j, num_cyc; + uint32_t num_words = (key_length / sizeof(uint32_t)) >> 3; + uint32_t num_words_half = num_words >> 1; + uint32_t reg_control, reg_status; + uint32_t reg_mode; + uint32_t reg_modulus_bits, reg_exponent_bits; + + // fill in all the necessary input values + // d is only written when CRT is not enabled (we wipe it otherwise just in case) + // note, that n_coeff is one word larger, than the modulus, so we need a single + // extra write after the word-by-word loop + for (i=0, j=num_words-1; i<num_words; i++, j--) + { fmc_write_32(CORE_ADDR_BANK_M + i * sizeof(uint32_t), m[j]); + fmc_write_32(CORE_ADDR_BANK_N + i * sizeof(uint32_t), n[j]); + fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]); + fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[j+1]); // mind the +1 + fmc_write_32(CORE_ADDR_BANK_X + i * sizeof(uint32_t), x[j]); + fmc_write_32(CORE_ADDR_BANK_Y + i * sizeof(uint32_t), y[j]); + if (!use_crt) fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), d[j]); + else fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), 0); + } + fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[0]); // j+1 is 0 by now, i is num_words + + // also fill in all the input values necessary for CRT mode + // again, we need to write a pair of extra words for p_coeff and q_coeff after the loop + if (use_crt) + { for (i=0, j=num_words_half-1; i<num_words_half; i++, j--) + { fmc_write_32(CORE_ADDR_BANK_P + i * sizeof(uint32_t), p[j]); + fmc_write_32(CORE_ADDR_BANK_Q + i * sizeof(uint32_t), q[j]); + fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]); + fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1! + fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]); + fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1! + fmc_write_32(CORE_ADDR_BANK_DP + i * sizeof(uint32_t), dp[j]); + fmc_write_32(CORE_ADDR_BANK_DQ + i * sizeof(uint32_t), dq[j]); + fmc_write_32(CORE_ADDR_BANK_QINV + i * sizeof(uint32_t), qinv[j]); + } + fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[0]); // j+1 is 0 by now, i is num_words_half + fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[0]); // j+1 is 0 by now, i is num_words_half + } + + // set parameters (there's no need to divide key length by two when CRT is enabled, + // the core takes care of that by itself automatically) + reg_mode = use_crt ? CORE_MODE_USING_CRT : CORE_MODE_WITHOUT_CRT; + reg_modulus_bits = key_length; + reg_exponent_bits = key_length; + + fmc_write_32(CORE_ADDR_MODE, reg_mode); + fmc_write_32(CORE_ADDR_MODULUS_BITS, reg_modulus_bits); + fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits); + + // clear 'next' control bit, then set 'next' control bit again to trigger new operation + reg_control = 0; + fmc_write_32(CORE_ADDR_CONTROL, reg_control); + reg_control = CORE_CONTROL_BIT_NEXT; + fmc_write_32(CORE_ADDR_CONTROL, reg_control); + + // wait for 'ready' status bit to be set, also turn on the blue LED while the + // core is busy to allow precise measurement with a scope + num_cyc = 0; + do + { num_cyc++; + fmc_read_32(CORE_ADDR_STATUS, ®_status); + } + while (!(reg_status & CORE_STATUS_BIT_VALID)); + + // read back s, xm and ym word-by-word + // the first time the function is called, we compare the mutated blinding + // factors to the known correct reference values + // if the very first mutation was ok, we overwrite the currently used + // factors with the mutated ones, so the next time we sign, the new + // mutated factors will be used + // we obviously only know the mutated pair of factors beforehand during the very first call, + // so we don't verify them starting from the second call, but the signature should + // always stay the same, so we always verify it + uint32_t s_word, xm_word, ym_word; + for (i=0, j=num_words-1; i<num_words; i++, j--) + { fmc_read_32(CORE_ADDR_BANK_S + i * sizeof(uint32_t), &s_word); + fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word); + fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word); + + if (s_word != s[j]) return 0; + + if (first_run) + { if (xm_word != xm[j]) return 0; + if (ym_word != ym[j]) return 0; + } + else + { x[j] = xm_word; + y[j] = ym_word; + } + } + + // everything went just fine + return 1; +} + + +// +// toggle the yellow led to indicate that we're not stuck somewhere +// +void toggle_yellow_led(void) +{ + static int led_state = 0; + + led_state = !led_state; + + if (led_state) led_on(LED_YELLOW); + else led_off(LED_YELLOW); +} + + +// +// SysTick +// +void SysTick_Handler(void) +{ + HAL_IncTick(); + HAL_SYSTICK_IRQHandler(); +} + + +// +// End-of-File +// |