diff options
-rw-r--r-- | stm32/modexpng_driver_sample.c | 134 |
1 files changed, 87 insertions, 47 deletions
diff --git a/stm32/modexpng_driver_sample.c b/stm32/modexpng_driver_sample.c index f455b55..d87926a 100644 --- a/stm32/modexpng_driver_sample.c +++ b/stm32/modexpng_driver_sample.c @@ -199,15 +199,55 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, #define sign_using_crt(k,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,qinv,s,xm,ym) \ _sign_handler (k,1,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,NULL,qinv,s,xm,ym) + - +// +// dirty workarounds +// +#define _ntohl(n) (((((unsigned long)(n) & 0xFF)) << 24)| \ + ((((unsigned long)(n) & 0xFF00)) << 8) | \ + ((((unsigned long)(n) & 0xFF0000)) >> 8) | \ + ((((unsigned long)(n) & 0xFF000000)) >> 24)) + +#define _htonl(n) (((((unsigned long)(n) & 0xFF)) << 24)| \ + ((((unsigned long)(n) & 0xFF00)) << 8) | \ + ((((unsigned long)(n) & 0xFF0000)) >> 8) | \ + ((((unsigned long)(n) & 0xFF000000)) >> 24)) + + +// +// Core Offset +// +#define MODEXPNG_CORE_NUM 0x26 + + +// +// more dirty workarounds +// +static void _fmc_read_32(uint32_t from_addr, uint32_t *to_ptr) +{ + uint32_t src_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + from_addr) & FMC_FPGA_ADDR_MASK); + uint32_t t = *((uint32_t *)src_addr); + *to_ptr = _ntohl(t); +} + +static void _fmc_write_32(uint32_t to_addr, uint32_t value) +{ + uint32_t t = _htonl(value); + uint32_t dst_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + to_addr) & FMC_FPGA_ADDR_MASK); + *(uint32_t *)dst_addr = t; +} + + // // test routine // int main() { - int ok; - int first_run; + int ok, first_run; + long long int iters; + + ok = sizeof iters; // initialize stm_init(); @@ -224,9 +264,9 @@ int main() uint32_t core_name1; uint32_t core_version; - fmc_read_32(CORE_ADDR_NAME0, &core_name0); - fmc_read_32(CORE_ADDR_NAME1, &core_name1); - fmc_read_32(CORE_ADDR_VERSION, &core_version); + _fmc_read_32(CORE_ADDR_NAME0, &core_name0); + _fmc_read_32(CORE_ADDR_NAME1, &core_name1); + _fmc_read_32(CORE_ADDR_VERSION, &core_version); // "mode", "xpng" if ((core_name0 != 0x6D6F6465) || (core_name1 != 0x78706E67)) @@ -237,7 +277,7 @@ int main() // check, that reference code works correctly ok = 1; - + /**/ ok = ok && check_montgomery_factor(1024, N_1024, N_FACTOR_1024); ok = ok && check_montgomery_factor( 512, P_1024, P_FACTOR_1024); ok = ok && check_montgomery_factor( 512, Q_1024, Q_FACTOR_1024); @@ -247,17 +287,17 @@ int main() ok = ok && check_montgomery_factor(4096, N_4096, N_FACTOR_4096); ok = ok && check_montgomery_factor(2048, P_4096, P_FACTOR_4096); ok = ok && check_montgomery_factor(2048, Q_4096, Q_FACTOR_4096); - + /**//**/ ok = ok && check_modulus_coeff(1024, N_1024, N_COEFF_1024); ok = ok && check_modulus_coeff( 512, P_1024, P_COEFF_1024); ok = ok && check_modulus_coeff( 512, Q_1024, Q_COEFF_1024); ok = ok && check_modulus_coeff(2048, N_2048, N_COEFF_2048); ok = ok && check_modulus_coeff(1024, P_2048, P_COEFF_2048); ok = ok && check_modulus_coeff(1024, Q_2048, Q_COEFF_2048); -// ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec) +// ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec) ok = ok && check_modulus_coeff(2048, P_4096, P_COEFF_4096); ok = ok && check_modulus_coeff(2048, Q_4096, Q_COEFF_4096); - + /**/ if (!ok) { led_off(LED_GREEN); led_on(LED_RED); @@ -265,14 +305,14 @@ int main() } // repeat forever - ok = 1, first_run = 1; + ok = 1, first_run = 1, iters = 0; while (1) - { + { ok = ok && sign_without_crt(1024, first_run, M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024, X_1024, Y_1024, D_1024, S_1024, XM_1024, YM_1024); - + ok = ok && sign_without_crt(2048, first_run, M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048, X_2048, Y_2048, D_2048, S_2048, @@ -282,14 +322,14 @@ int main() M_4096, N_4096, N_FACTOR_4096, N_COEFF_4096, X_4096, Y_4096, D_4096, S_4096, XM_4096, YM_4096); - + ok = ok && sign_using_crt(1024, first_run, M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024, X_1024, Y_1024, P_1024, Q_1024, P_FACTOR_1024, P_COEFF_1024, Q_FACTOR_1024, Q_COEFF_1024, DP_1024, DQ_1024, QINV_1024, S_1024, XM_1024, YM_1024); - + ok = ok && sign_using_crt(2048, first_run, M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048, X_2048, Y_2048, P_2048, Q_2048, @@ -303,13 +343,13 @@ int main() P_FACTOR_4096, P_COEFF_4096, Q_FACTOR_4096, Q_COEFF_4096, DP_4096, DQ_4096, QINV_4096, S_4096, XM_4096, YM_4096); - + if (!ok) { led_off(LED_GREEN); led_on(LED_RED); } - first_run = 0; + first_run = 0, iters++; toggle_yellow_led(); } @@ -395,33 +435,33 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, // note, that n_coeff is one word larger, than the modulus, so we need a single // extra write after the word-by-word loop for (i=0, j=num_words-1; i<num_words; i++, j--) - { fmc_write_32(CORE_ADDR_BANK_M + i * sizeof(uint32_t), m[j]); - fmc_write_32(CORE_ADDR_BANK_N + i * sizeof(uint32_t), n[j]); - fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]); - fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[j+1]); // mind the +1 - fmc_write_32(CORE_ADDR_BANK_X + i * sizeof(uint32_t), x[j]); - fmc_write_32(CORE_ADDR_BANK_Y + i * sizeof(uint32_t), y[j]); - if (!use_crt) fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), d[j]); - else fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), 0); + { _fmc_write_32(CORE_ADDR_BANK_M + i * sizeof(uint32_t), m[j]); + _fmc_write_32(CORE_ADDR_BANK_N + i * sizeof(uint32_t), n[j]); + _fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]); + _fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[j+1]); // mind the +1 + _fmc_write_32(CORE_ADDR_BANK_X + i * sizeof(uint32_t), x[j]); + _fmc_write_32(CORE_ADDR_BANK_Y + i * sizeof(uint32_t), y[j]); + if (!use_crt) _fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), d[j]); + else _fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), 0); } - fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[0]); // j+1 is 0 by now, i is num_words - + _fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[0]); // j+1 is 0 by now, i is num_words + // also fill in all the input values necessary for CRT mode // again, we need to write a pair of extra words for p_coeff and q_coeff after the loop if (use_crt) { for (i=0, j=num_words_half-1; i<num_words_half; i++, j--) - { fmc_write_32(CORE_ADDR_BANK_P + i * sizeof(uint32_t), p[j]); - fmc_write_32(CORE_ADDR_BANK_Q + i * sizeof(uint32_t), q[j]); - fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]); - fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1! - fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]); - fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1! - fmc_write_32(CORE_ADDR_BANK_DP + i * sizeof(uint32_t), dp[j]); - fmc_write_32(CORE_ADDR_BANK_DQ + i * sizeof(uint32_t), dq[j]); - fmc_write_32(CORE_ADDR_BANK_QINV + i * sizeof(uint32_t), qinv[j]); + { _fmc_write_32(CORE_ADDR_BANK_P + i * sizeof(uint32_t), p[j]); + _fmc_write_32(CORE_ADDR_BANK_Q + i * sizeof(uint32_t), q[j]); + _fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]); + _fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1! + _fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]); + _fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1! + _fmc_write_32(CORE_ADDR_BANK_DP + i * sizeof(uint32_t), dp[j]); + _fmc_write_32(CORE_ADDR_BANK_DQ + i * sizeof(uint32_t), dq[j]); + _fmc_write_32(CORE_ADDR_BANK_QINV + i * sizeof(uint32_t), qinv[j]); } - fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[0]); // j+1 is 0 by now, i is num_words_half - fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[0]); // j+1 is 0 by now, i is num_words_half + _fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[0]); // j+1 is 0 by now, i is num_words_half + _fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[0]); // j+1 is 0 by now, i is num_words_half } // set parameters (there's no need to divide key length by two when CRT is enabled, @@ -430,22 +470,22 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, reg_modulus_bits = key_length; reg_exponent_bits = key_length; - fmc_write_32(CORE_ADDR_MODE, reg_mode); - fmc_write_32(CORE_ADDR_MODULUS_BITS, reg_modulus_bits); - fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits); + _fmc_write_32(CORE_ADDR_MODE, reg_mode); + _fmc_write_32(CORE_ADDR_MODULUS_BITS, reg_modulus_bits); + _fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits); // clear 'next' control bit, then set 'next' control bit again to trigger new operation reg_control = 0; - fmc_write_32(CORE_ADDR_CONTROL, reg_control); + _fmc_write_32(CORE_ADDR_CONTROL, reg_control); reg_control = CORE_CONTROL_BIT_NEXT; - fmc_write_32(CORE_ADDR_CONTROL, reg_control); + _fmc_write_32(CORE_ADDR_CONTROL, reg_control); // wait for 'ready' status bit to be set, also turn on the blue LED while the // core is busy to allow precise measurement with a scope num_cyc = 0; do { num_cyc++; - fmc_read_32(CORE_ADDR_STATUS, ®_status); + _fmc_read_32(CORE_ADDR_STATUS, ®_status); } while (!(reg_status & CORE_STATUS_BIT_VALID)); @@ -460,9 +500,9 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run, // always stay the same, so we always verify it uint32_t s_word, xm_word, ym_word; for (i=0, j=num_words-1; i<num_words; i++, j--) - { fmc_read_32(CORE_ADDR_BANK_S + i * sizeof(uint32_t), &s_word); - fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word); - fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word); + { _fmc_read_32(CORE_ADDR_BANK_S + i * sizeof(uint32_t), &s_word); + _fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word); + _fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word); if (s_word != s[j]) return 0; |