From cae8718217846cfaefcbfecd55f9a117731a8d99 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 6 Feb 2017 14:40:16 +0300 Subject: Minor cleanup * Fixed misplaced comma in 'ecdsa_model.h' * Rewrote P-384 reduction routine to match the style used in P-256 reduction --- fpga_modular.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'fpga_modular.cpp') diff --git a/fpga_modular.cpp b/fpga_modular.cpp index af485a0..9b01df0 100644 --- a/fpga_modular.cpp +++ b/fpga_modular.cpp @@ -605,7 +605,8 @@ void fpga_modular_mul_helper_reduce_p256(FPGA_WORD *c, FPGA_BUFFER *p) // on the other hand is the output of the parallelized Comba multiplier, so it // is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). // -// ... +// To save FPGA resources, the calculation is done using only two adders and +// one subtractor. The algorithm is split into five steps. // //------------------------------------------------------------------------------ #if USE_CURVE == 2 @@ -626,27 +627,33 @@ void fpga_modular_mul_helper_reduce_p384(FPGA_WORD *c, FPGA_BUFFER *p) s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0; s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0; - // intermediate results - FPGA_BUFFER t1, t2, t3, t4; + FPGA_BUFFER sum0, sum1, difference; /* Step 1. */ - fpga_modular_add(&s1, &s3, &t1); // t1 = s1 + s3 - fpga_modular_add(&s2, &s2, &t2); // t2 = 2*s2 - fpga_modular_add(&s4, &s5, &t3); // t3 = s4 + s5 - fpga_modular_add(&s6, &s7, &t4); // t4 = s6 + s7 + fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3 + fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2 + fpga_modular_sub(&ecdsa_zero, &s8, &difference); // difference = -s8 /* Step 2. */ - fpga_modular_add(&t1, &t2, &t1); // t1 = t1 + t2 = s1 + 2*s2 + 2*s3 - fpga_modular_add(&t3, &t4, &t2); // t2 = t3 + t4 = s4 + s5 + s6 + s7 - fpga_modular_add(&s8, &s9, &t3); // t3 = s8 + s9 + fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4 + fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5 + fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9) /* Step 3. */ - fpga_modular_add(&t1, &t2, &t1); // t1 = t1 + t2 = s1 + 2*s2 + 2*s3 + s4 + s5 + s6 + s7 - fpga_modular_add(&s10, &t3, &t2); // t2 = s10 + t3 = s8 + s9 + s10 + fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6 + fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7 + fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10) /* Step 4. */ - fpga_modular_sub(&t1, &t2, p); // p = t1 - t2 = s1 + 2*s2 + 2*s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10 + fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 +// fpga_modular_add(, , &sum1); // dummy cycle, result ignored + fpga_modular_sub(&difference, &ecdsa_zero, &difference); // compulsory cycle to keep difference constant for next stage + + /* Step 5. */ + fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10 +// fpga_modular_add(, , &sum1); // dummy cycle, result ignored +// fpga_modular_add(, , &difference); // dummy cycle, result ignored } #endif -- cgit v1.2.3