aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-06-29 01:15:40 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-06-29 01:15:40 +0300
commite91ce0765d40b3524f76f96c35aec6c85ded4c5f (patch)
tree32a39699b85ea5ffd3c7ea918396457a78f5e688
parent22f6cc0496f29d909c3f777d7c9b59559ab5723d (diff)
Follow what Verilog does more precisely.
-rw-r--r--modexp_fpga_model_montgomery.cpp103
1 files changed, 64 insertions, 39 deletions
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index 34ef2b6..260f498 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -331,41 +331,49 @@ void montgomery_calc_n_coeff(const FPGA_WORD *N, FPGA_WORD *N_COEFF, size_t len)
// The high-level algorithm is:
//
// 1. R = 1
-// 2. NN = ~N + 1
-// 3. for i=1 to len-1
-// 4. T = R * NN mod 2 ** len
-// 5. if T[i] then
-// 6. R = R + (1 << i)
+// 2. B = 1
+// 3. NN = ~N + 1
+// 4. for i=1 to len-1
+// 5. B = B << 1
+// 6. T = R * NN mod 2 ** len
+// 7. if T[i] then
+// 8. R = R + B
//
//----------------------------------------------------------------
{
size_t i, j, k; // counters
- FPGA_WORD NN[MAX_OPERAND_WORDS]; // temporary buffers
- FPGA_WORD T [MAX_OPERAND_WORDS]; //
- FPGA_WORD R [MAX_OPERAND_WORDS]; //
- FPGA_WORD R1[MAX_OPERAND_WORDS]; //
+ FPGA_WORD NN[MAX_OPERAND_WORDS]; // NN = ~N + 1
+ FPGA_WORD T [MAX_OPERAND_WORDS]; // T = R * NN
+ FPGA_WORD R [MAX_OPERAND_WORDS]; // R
+ FPGA_WORD B [MAX_OPERAND_WORDS]; // B
+ FPGA_WORD RR[MAX_OPERAND_WORDS]; // RR = R
+ FPGA_WORD RB[MAX_OPERAND_WORDS]; // RB = R + B
bool flag_update_r; // flag
- FPGA_WORD nw, pwr; //
+ FPGA_WORD nw; //
FPGA_WORD sum_c_in, sum_c_out; //
- FPGA_WORD carry_in, carry_out; //
+ FPGA_WORD shift_c_in, shift_c_out; //
FPGA_WORD mul_s, mul_c_in, mul_c_out; //
// NN = -N mod 2 ** len = ~N + 1 mod 2 ** len
- carry_in = 0;
+ sum_c_in = 0;
for (i=0; i<len; i++)
- { nw = (i > 0) ? 0 : 1; // nw = 1
- pe_add(~N[i], nw, carry_in, &NN[i], &carry_out); // NN = ~N + nw
- carry_in = carry_out; // propagate carry
+ { nw = (i > 0) ? 0 : 1; // NW = 1
+ pe_add(~N[i], nw, sum_c_in, &NN[i], &sum_c_out); // NN = ~N + nw
+ sum_c_in = sum_c_out; // propagate carry
}
// R = 1
+ // B = 1
for (i=0; i<len; i++)
- R[i] = (i > 0) ? 0 : 1;
+ R[i] = (i > 0) ? 0 : 1,
+ B[i] = (i > 0) ? 0 : 1;
// calculate T = R * NN
+ // calculate B = B << 1
+ // calculate RB = R + B
for (k=1; k<(len * sizeof(FPGA_WORD) * CHAR_BIT); k++)
{
// T = 0
@@ -374,42 +382,59 @@ void montgomery_calc_n_coeff(const FPGA_WORD *N, FPGA_WORD *N_COEFF, size_t len)
// T = NN * R
for (i=0; i<len; i++)
{
+ // reset adder and shifter carries
+ if (i == 0)
+ { shift_c_in = 0;
+ sum_c_in = 0;
+ }
+
+ // reset multiplier carry
mul_c_in = 0;
+ // get word and index indices
+ size_t word_index = k / (CHAR_BIT * sizeof(FPGA_WORD));
+ size_t bit_index = k & ((CHAR_BIT * sizeof(FPGA_WORD)) - 1);
+
+ // update bit mask
+ FPGA_WORD bit_mask = (1 << bit_index);
+
+ // main calculation loop
for (j=0; j<(len-i); j++)
{
-
- pe_mul(R[j], NN[i], T[i+j], mul_c_in, &mul_s, &mul_c_out);
+ // B = B << 1
+ // RB = R + B
+ if (i == 0)
+ { shift_c_out = B[j] >> (sizeof(FPGA_WORD) * CHAR_BIT - 1);
+ B[j] <<= 1, B[j] |= shift_c_in;
+ pe_add(R[j], B[j], sum_c_in, &RB[j], &sum_c_out);
+ }
+
+ // RR = R
+ if (i == 0)
+ RR[j] = R[j];
+ // T = R * NN
+ pe_mul(R[j], NN[i], T[i+j], mul_c_in, &mul_s, &mul_c_out);
T[i+j] = mul_s;
- mul_c_in = mul_c_out;
- }
- }
+ // update flag
+ if ((i + j) == word_index)
+ flag_update_r = (T[i+j] & (1 << bit_index)) == (1 << bit_index);
- // get word and index indices
- size_t word_index = k / (CHAR_BIT * sizeof(FPGA_WORD));
- size_t bit_index = k & ((CHAR_BIT * sizeof(FPGA_WORD)) - 1);
+ // propagate adder and shifter carries
+ if (i == 0)
+ { shift_c_in = shift_c_out;
+ sum_c_in = sum_c_out;
+ }
- // update bit mask
- FPGA_WORD bit_mask = (1 << bit_index);
-
- sum_c_in = 0; // clear carry
- flag_update_r = false; // reset flag
-
- // calculate R1 = R + 1 << (2 * len)
- for (i=0; i<len; i++)
- {
- if (i == word_index) flag_update_r = (T[i] & bit_mask) == bit_mask;
-
- pwr = (i == word_index) ? bit_mask : 0;
- pe_add(R[i], pwr, sum_c_in, &R1[i], &sum_c_out);
- carry_in = carry_out;
+ // propagate multiplier carry
+ mul_c_in = mul_c_out;
+ }
}
// update r
for (i=0; i<len; i++)
- R[i] = flag_update_r ? R1[i] : R[i];
+ R[i] = flag_update_r ? RB[i] : RR[i];
}
// store output