From e91ce0765d40b3524f76f96c35aec6c85ded4c5f Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 29 Jun 2017 01:15:40 +0300 Subject: Follow what Verilog does more precisely. --- modexp_fpga_model_montgomery.cpp | 103 ++++++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 39 deletions(-) diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp index 34ef2b6..260f498 100644 --- a/modexp_fpga_model_montgomery.cpp +++ b/modexp_fpga_model_montgomery.cpp @@ -331,41 +331,49 @@ void montgomery_calc_n_coeff(const FPGA_WORD *N, FPGA_WORD *N_COEFF, size_t len) // The high-level algorithm is: // // 1. R = 1 -// 2. NN = ~N + 1 -// 3. for i=1 to len-1 -// 4. T = R * NN mod 2 ** len -// 5. if T[i] then -// 6. R = R + (1 << i) +// 2. B = 1 +// 3. NN = ~N + 1 +// 4. for i=1 to len-1 +// 5. B = B << 1 +// 6. T = R * NN mod 2 ** len +// 7. if T[i] then +// 8. R = R + B // //---------------------------------------------------------------- { size_t i, j, k; // counters - FPGA_WORD NN[MAX_OPERAND_WORDS]; // temporary buffers - FPGA_WORD T [MAX_OPERAND_WORDS]; // - FPGA_WORD R [MAX_OPERAND_WORDS]; // - FPGA_WORD R1[MAX_OPERAND_WORDS]; // + FPGA_WORD NN[MAX_OPERAND_WORDS]; // NN = ~N + 1 + FPGA_WORD T [MAX_OPERAND_WORDS]; // T = R * NN + FPGA_WORD R [MAX_OPERAND_WORDS]; // R + FPGA_WORD B [MAX_OPERAND_WORDS]; // B + FPGA_WORD RR[MAX_OPERAND_WORDS]; // RR = R + FPGA_WORD RB[MAX_OPERAND_WORDS]; // RB = R + B bool flag_update_r; // flag - FPGA_WORD nw, pwr; // + FPGA_WORD nw; // FPGA_WORD sum_c_in, sum_c_out; // - FPGA_WORD carry_in, carry_out; // + FPGA_WORD shift_c_in, shift_c_out; // FPGA_WORD mul_s, mul_c_in, mul_c_out; // // NN = -N mod 2 ** len = ~N + 1 mod 2 ** len - carry_in = 0; + sum_c_in = 0; for (i=0; i 0) ? 0 : 1; // nw = 1 - pe_add(~N[i], nw, carry_in, &NN[i], &carry_out); // NN = ~N + nw - carry_in = carry_out; // propagate carry + { nw = (i > 0) ? 0 : 1; // NW = 1 + pe_add(~N[i], nw, sum_c_in, &NN[i], &sum_c_out); // NN = ~N + nw + sum_c_in = sum_c_out; // propagate carry } // R = 1 + // B = 1 for (i=0; i 0) ? 0 : 1; + R[i] = (i > 0) ? 0 : 1, + B[i] = (i > 0) ? 0 : 1; // calculate T = R * NN + // calculate B = B << 1 + // calculate RB = R + B for (k=1; k<(len * sizeof(FPGA_WORD) * CHAR_BIT); k++) { // T = 0 @@ -374,42 +382,59 @@ void montgomery_calc_n_coeff(const FPGA_WORD *N, FPGA_WORD *N_COEFF, size_t len) // T = NN * R for (i=0; i> (sizeof(FPGA_WORD) * CHAR_BIT - 1); + B[j] <<= 1, B[j] |= shift_c_in; + pe_add(R[j], B[j], sum_c_in, &RB[j], &sum_c_out); + } + + // RR = R + if (i == 0) + RR[j] = R[j]; + // T = R * NN + pe_mul(R[j], NN[i], T[i+j], mul_c_in, &mul_s, &mul_c_out); T[i+j] = mul_s; - mul_c_in = mul_c_out; - } - } + // update flag + if ((i + j) == word_index) + flag_update_r = (T[i+j] & (1 << bit_index)) == (1 << bit_index); - // get word and index indices - size_t word_index = k / (CHAR_BIT * sizeof(FPGA_WORD)); - size_t bit_index = k & ((CHAR_BIT * sizeof(FPGA_WORD)) - 1); + // propagate adder and shifter carries + if (i == 0) + { shift_c_in = shift_c_out; + sum_c_in = sum_c_out; + } - // update bit mask - FPGA_WORD bit_mask = (1 << bit_index); - - sum_c_in = 0; // clear carry - flag_update_r = false; // reset flag - - // calculate R1 = R + 1 << (2 * len) - for (i=0; i