aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--stm32/modexpng_driver_sample.c134
1 files changed, 87 insertions, 47 deletions
diff --git a/stm32/modexpng_driver_sample.c b/stm32/modexpng_driver_sample.c
index f455b55..d87926a 100644
--- a/stm32/modexpng_driver_sample.c
+++ b/stm32/modexpng_driver_sample.c
@@ -199,15 +199,55 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
#define sign_using_crt(k,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,qinv,s,xm,ym) \
_sign_handler (k,1,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,NULL,qinv,s,xm,ym)
+
-
+//
+// dirty workarounds
+//
+#define _ntohl(n) (((((unsigned long)(n) & 0xFF)) << 24)| \
+ ((((unsigned long)(n) & 0xFF00)) << 8) | \
+ ((((unsigned long)(n) & 0xFF0000)) >> 8) | \
+ ((((unsigned long)(n) & 0xFF000000)) >> 24))
+
+#define _htonl(n) (((((unsigned long)(n) & 0xFF)) << 24)| \
+ ((((unsigned long)(n) & 0xFF00)) << 8) | \
+ ((((unsigned long)(n) & 0xFF0000)) >> 8) | \
+ ((((unsigned long)(n) & 0xFF000000)) >> 24))
+
+
+//
+// Core Offset
+//
+#define MODEXPNG_CORE_NUM 0x26
+
+
+//
+// more dirty workarounds
+//
+static void _fmc_read_32(uint32_t from_addr, uint32_t *to_ptr)
+{
+ uint32_t src_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + from_addr) & FMC_FPGA_ADDR_MASK);
+ uint32_t t = *((uint32_t *)src_addr);
+ *to_ptr = _ntohl(t);
+}
+
+static void _fmc_write_32(uint32_t to_addr, uint32_t value)
+{
+ uint32_t t = _htonl(value);
+ uint32_t dst_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + to_addr) & FMC_FPGA_ADDR_MASK);
+ *(uint32_t *)dst_addr = t;
+}
+
+
//
// test routine
//
int main()
{
- int ok;
- int first_run;
+ int ok, first_run;
+ long long int iters;
+
+ ok = sizeof iters;
// initialize
stm_init();
@@ -224,9 +264,9 @@ int main()
uint32_t core_name1;
uint32_t core_version;
- fmc_read_32(CORE_ADDR_NAME0, &core_name0);
- fmc_read_32(CORE_ADDR_NAME1, &core_name1);
- fmc_read_32(CORE_ADDR_VERSION, &core_version);
+ _fmc_read_32(CORE_ADDR_NAME0, &core_name0);
+ _fmc_read_32(CORE_ADDR_NAME1, &core_name1);
+ _fmc_read_32(CORE_ADDR_VERSION, &core_version);
// "mode", "xpng"
if ((core_name0 != 0x6D6F6465) || (core_name1 != 0x78706E67))
@@ -237,7 +277,7 @@ int main()
// check, that reference code works correctly
ok = 1;
-
+ /**/
ok = ok && check_montgomery_factor(1024, N_1024, N_FACTOR_1024);
ok = ok && check_montgomery_factor( 512, P_1024, P_FACTOR_1024);
ok = ok && check_montgomery_factor( 512, Q_1024, Q_FACTOR_1024);
@@ -247,17 +287,17 @@ int main()
ok = ok && check_montgomery_factor(4096, N_4096, N_FACTOR_4096);
ok = ok && check_montgomery_factor(2048, P_4096, P_FACTOR_4096);
ok = ok && check_montgomery_factor(2048, Q_4096, Q_FACTOR_4096);
-
+ /**//**/
ok = ok && check_modulus_coeff(1024, N_1024, N_COEFF_1024);
ok = ok && check_modulus_coeff( 512, P_1024, P_COEFF_1024);
ok = ok && check_modulus_coeff( 512, Q_1024, Q_COEFF_1024);
ok = ok && check_modulus_coeff(2048, N_2048, N_COEFF_2048);
ok = ok && check_modulus_coeff(1024, P_2048, P_COEFF_2048);
ok = ok && check_modulus_coeff(1024, Q_2048, Q_COEFF_2048);
-// ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec)
+// ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec)
ok = ok && check_modulus_coeff(2048, P_4096, P_COEFF_4096);
ok = ok && check_modulus_coeff(2048, Q_4096, Q_COEFF_4096);
-
+ /**/
if (!ok)
{ led_off(LED_GREEN);
led_on(LED_RED);
@@ -265,14 +305,14 @@ int main()
}
// repeat forever
- ok = 1, first_run = 1;
+ ok = 1, first_run = 1, iters = 0;
while (1)
- {
+ {
ok = ok && sign_without_crt(1024, first_run,
M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024,
X_1024, Y_1024, D_1024, S_1024,
XM_1024, YM_1024);
-
+
ok = ok && sign_without_crt(2048, first_run,
M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048,
X_2048, Y_2048, D_2048, S_2048,
@@ -282,14 +322,14 @@ int main()
M_4096, N_4096, N_FACTOR_4096, N_COEFF_4096,
X_4096, Y_4096, D_4096, S_4096,
XM_4096, YM_4096);
-
+
ok = ok && sign_using_crt(1024, first_run,
M_1024, N_1024, N_FACTOR_1024, N_COEFF_1024,
X_1024, Y_1024, P_1024, Q_1024,
P_FACTOR_1024, P_COEFF_1024, Q_FACTOR_1024, Q_COEFF_1024,
DP_1024, DQ_1024, QINV_1024, S_1024,
XM_1024, YM_1024);
-
+
ok = ok && sign_using_crt(2048, first_run,
M_2048, N_2048, N_FACTOR_2048, N_COEFF_2048,
X_2048, Y_2048, P_2048, Q_2048,
@@ -303,13 +343,13 @@ int main()
P_FACTOR_4096, P_COEFF_4096, Q_FACTOR_4096, Q_COEFF_4096,
DP_4096, DQ_4096, QINV_4096, S_4096,
XM_4096, YM_4096);
-
+
if (!ok)
{ led_off(LED_GREEN);
led_on(LED_RED);
}
- first_run = 0;
+ first_run = 0, iters++;
toggle_yellow_led();
}
@@ -395,33 +435,33 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
// note, that n_coeff is one word larger, than the modulus, so we need a single
// extra write after the word-by-word loop
for (i=0, j=num_words-1; i<num_words; i++, j--)
- { fmc_write_32(CORE_ADDR_BANK_M + i * sizeof(uint32_t), m[j]);
- fmc_write_32(CORE_ADDR_BANK_N + i * sizeof(uint32_t), n[j]);
- fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]);
- fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[j+1]); // mind the +1
- fmc_write_32(CORE_ADDR_BANK_X + i * sizeof(uint32_t), x[j]);
- fmc_write_32(CORE_ADDR_BANK_Y + i * sizeof(uint32_t), y[j]);
- if (!use_crt) fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), d[j]);
- else fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), 0);
+ { _fmc_write_32(CORE_ADDR_BANK_M + i * sizeof(uint32_t), m[j]);
+ _fmc_write_32(CORE_ADDR_BANK_N + i * sizeof(uint32_t), n[j]);
+ _fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]);
+ _fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[j+1]); // mind the +1
+ _fmc_write_32(CORE_ADDR_BANK_X + i * sizeof(uint32_t), x[j]);
+ _fmc_write_32(CORE_ADDR_BANK_Y + i * sizeof(uint32_t), y[j]);
+ if (!use_crt) _fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), d[j]);
+ else _fmc_write_32(CORE_ADDR_BANK_D + i * sizeof(uint32_t), 0);
}
- fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[0]); // j+1 is 0 by now, i is num_words
-
+ _fmc_write_32(CORE_ADDR_BANK_N_COEFF + i * sizeof(uint32_t), n_coeff[0]); // j+1 is 0 by now, i is num_words
+
// also fill in all the input values necessary for CRT mode
// again, we need to write a pair of extra words for p_coeff and q_coeff after the loop
if (use_crt)
{ for (i=0, j=num_words_half-1; i<num_words_half; i++, j--)
- { fmc_write_32(CORE_ADDR_BANK_P + i * sizeof(uint32_t), p[j]);
- fmc_write_32(CORE_ADDR_BANK_Q + i * sizeof(uint32_t), q[j]);
- fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]);
- fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1!
- fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]);
- fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1!
- fmc_write_32(CORE_ADDR_BANK_DP + i * sizeof(uint32_t), dp[j]);
- fmc_write_32(CORE_ADDR_BANK_DQ + i * sizeof(uint32_t), dq[j]);
- fmc_write_32(CORE_ADDR_BANK_QINV + i * sizeof(uint32_t), qinv[j]);
+ { _fmc_write_32(CORE_ADDR_BANK_P + i * sizeof(uint32_t), p[j]);
+ _fmc_write_32(CORE_ADDR_BANK_Q + i * sizeof(uint32_t), q[j]);
+ _fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]);
+ _fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1!
+ _fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]);
+ _fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1!
+ _fmc_write_32(CORE_ADDR_BANK_DP + i * sizeof(uint32_t), dp[j]);
+ _fmc_write_32(CORE_ADDR_BANK_DQ + i * sizeof(uint32_t), dq[j]);
+ _fmc_write_32(CORE_ADDR_BANK_QINV + i * sizeof(uint32_t), qinv[j]);
}
- fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[0]); // j+1 is 0 by now, i is num_words_half
- fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[0]); // j+1 is 0 by now, i is num_words_half
+ _fmc_write_32(CORE_ADDR_BANK_P_COEFF + i * sizeof(uint32_t), p_coeff[0]); // j+1 is 0 by now, i is num_words_half
+ _fmc_write_32(CORE_ADDR_BANK_Q_COEFF + i * sizeof(uint32_t), q_coeff[0]); // j+1 is 0 by now, i is num_words_half
}
// set parameters (there's no need to divide key length by two when CRT is enabled,
@@ -430,22 +470,22 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
reg_modulus_bits = key_length;
reg_exponent_bits = key_length;
- fmc_write_32(CORE_ADDR_MODE, reg_mode);
- fmc_write_32(CORE_ADDR_MODULUS_BITS, reg_modulus_bits);
- fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits);
+ _fmc_write_32(CORE_ADDR_MODE, reg_mode);
+ _fmc_write_32(CORE_ADDR_MODULUS_BITS, reg_modulus_bits);
+ _fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits);
// clear 'next' control bit, then set 'next' control bit again to trigger new operation
reg_control = 0;
- fmc_write_32(CORE_ADDR_CONTROL, reg_control);
+ _fmc_write_32(CORE_ADDR_CONTROL, reg_control);
reg_control = CORE_CONTROL_BIT_NEXT;
- fmc_write_32(CORE_ADDR_CONTROL, reg_control);
+ _fmc_write_32(CORE_ADDR_CONTROL, reg_control);
// wait for 'ready' status bit to be set, also turn on the blue LED while the
// core is busy to allow precise measurement with a scope
num_cyc = 0;
do
{ num_cyc++;
- fmc_read_32(CORE_ADDR_STATUS, &reg_status);
+ _fmc_read_32(CORE_ADDR_STATUS, &reg_status);
}
while (!(reg_status & CORE_STATUS_BIT_VALID));
@@ -460,9 +500,9 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
// always stay the same, so we always verify it
uint32_t s_word, xm_word, ym_word;
for (i=0, j=num_words-1; i<num_words; i++, j--)
- { fmc_read_32(CORE_ADDR_BANK_S + i * sizeof(uint32_t), &s_word);
- fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word);
- fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word);
+ { _fmc_read_32(CORE_ADDR_BANK_S + i * sizeof(uint32_t), &s_word);
+ _fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word);
+ _fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word);
if (s_word != s[j]) return 0;