aboutsummaryrefslogtreecommitdiff
path: root/fpga_modular.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fpga_modular.cpp')
-rw-r--r--fpga_modular.cpp33
1 files changed, 20 insertions, 13 deletions
diff --git a/fpga_modular.cpp b/fpga_modular.cpp
index af485a0..9b01df0 100644
--- a/fpga_modular.cpp
+++ b/fpga_modular.cpp
@@ -605,7 +605,8 @@ void fpga_modular_mul_helper_reduce_p256(FPGA_WORD *c, FPGA_BUFFER *p)
// on the other hand is the output of the parallelized Comba multiplier, so it
// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
//
-// ...
+// To save FPGA resources, the calculation is done using only two adders and
+// one subtractor. The algorithm is split into five steps.
//
//------------------------------------------------------------------------------
#if USE_CURVE == 2
@@ -626,27 +627,33 @@ void fpga_modular_mul_helper_reduce_p384(FPGA_WORD *c, FPGA_BUFFER *p)
s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0;
s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0;
-
// intermediate results
- FPGA_BUFFER t1, t2, t3, t4;
+ FPGA_BUFFER sum0, sum1, difference;
/* Step 1. */
- fpga_modular_add(&s1, &s3, &t1); // t1 = s1 + s3
- fpga_modular_add(&s2, &s2, &t2); // t2 = 2*s2
- fpga_modular_add(&s4, &s5, &t3); // t3 = s4 + s5
- fpga_modular_add(&s6, &s7, &t4); // t4 = s6 + s7
+ fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3
+ fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2
+ fpga_modular_sub(&ecdsa_zero, &s8, &difference); // difference = -s8
/* Step 2. */
- fpga_modular_add(&t1, &t2, &t1); // t1 = t1 + t2 = s1 + 2*s2 + 2*s3
- fpga_modular_add(&t3, &t4, &t2); // t2 = t3 + t4 = s4 + s5 + s6 + s7
- fpga_modular_add(&s8, &s9, &t3); // t3 = s8 + s9
+ fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4
+ fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5
+ fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9)
/* Step 3. */
- fpga_modular_add(&t1, &t2, &t1); // t1 = t1 + t2 = s1 + 2*s2 + 2*s3 + s4 + s5 + s6 + s7
- fpga_modular_add(&s10, &t3, &t2); // t2 = s10 + t3 = s8 + s9 + s10
+ fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6
+ fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7
+ fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10)
/* Step 4. */
- fpga_modular_sub(&t1, &t2, p); // p = t1 - t2 = s1 + 2*s2 + 2*s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
+ fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+ fpga_modular_sub(&difference, &ecdsa_zero, &difference); // compulsory cycle to keep difference constant for next stage
+
+ /* Step 5. */
+ fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
}
#endif