diff options
-rw-r--r-- | ecdsa_model.h | 4 | ||||
-rw-r--r-- | fpga_modular.cpp | 33 |
2 files changed, 22 insertions, 15 deletions
diff --git a/ecdsa_model.h b/ecdsa_model.h index 1e6a04c..fc7e571 100644 --- a/ecdsa_model.h +++ b/ecdsa_model.h @@ -45,7 +45,7 @@ // USE_CURVE == 2 -> P-384
//
//------------------------------------------------------------------------------
-#define USE_CURVE 1
+#define USE_CURVE 2
//------------------------------------------------------------------------------
@@ -112,7 +112,7 @@ #define P_384_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
/* Division Factor */
-#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x0000000, 0x080000000}
+#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x00000000, 0x80000000}
/* Base Point */
#define P_384_G_X {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7}
diff --git a/fpga_modular.cpp b/fpga_modular.cpp index af485a0..9b01df0 100644 --- a/fpga_modular.cpp +++ b/fpga_modular.cpp @@ -605,7 +605,8 @@ void fpga_modular_mul_helper_reduce_p256(FPGA_WORD *c, FPGA_BUFFER *p) // on the other hand is the output of the parallelized Comba multiplier, so it
// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
//
-// ...
+// To save FPGA resources, the calculation is done using only two adders and
+// one subtractor. The algorithm is split into five steps.
//
//------------------------------------------------------------------------------
#if USE_CURVE == 2
@@ -626,27 +627,33 @@ void fpga_modular_mul_helper_reduce_p384(FPGA_WORD *c, FPGA_BUFFER *p) s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0;
s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0;
-
// intermediate results
- FPGA_BUFFER t1, t2, t3, t4;
+ FPGA_BUFFER sum0, sum1, difference;
/* Step 1. */
- fpga_modular_add(&s1, &s3, &t1); // t1 = s1 + s3
- fpga_modular_add(&s2, &s2, &t2); // t2 = 2*s2
- fpga_modular_add(&s4, &s5, &t3); // t3 = s4 + s5
- fpga_modular_add(&s6, &s7, &t4); // t4 = s6 + s7
+ fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3
+ fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2
+ fpga_modular_sub(&ecdsa_zero, &s8, &difference); // difference = -s8
/* Step 2. */
- fpga_modular_add(&t1, &t2, &t1); // t1 = t1 + t2 = s1 + 2*s2 + 2*s3
- fpga_modular_add(&t3, &t4, &t2); // t2 = t3 + t4 = s4 + s5 + s6 + s7
- fpga_modular_add(&s8, &s9, &t3); // t3 = s8 + s9
+ fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4
+ fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5
+ fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9)
/* Step 3. */
- fpga_modular_add(&t1, &t2, &t1); // t1 = t1 + t2 = s1 + 2*s2 + 2*s3 + s4 + s5 + s6 + s7
- fpga_modular_add(&s10, &t3, &t2); // t2 = s10 + t3 = s8 + s9 + s10
+ fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6
+ fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7
+ fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10)
/* Step 4. */
- fpga_modular_sub(&t1, &t2, p); // p = t1 - t2 = s1 + 2*s2 + 2*s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
+ fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+ fpga_modular_sub(&difference, &ecdsa_zero, &difference); // compulsory cycle to keep difference constant for next stage
+
+ /* Step 5. */
+ fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
}
#endif
|