aboutsummaryrefslogtreecommitdiff
path: root/modexp_fpga_model_montgomery.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'modexp_fpga_model_montgomery.cpp')
-rw-r--r--modexp_fpga_model_montgomery.cpp65
1 files changed, 50 insertions, 15 deletions
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index 260f498..e5237ff 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -40,6 +40,7 @@
//----------------------------------------------------------------
#include "modexp_fpga_model.h"
#include "modexp_fpga_model_pe.h"
+#include "modexp_fpga_systolic.h"
#include "modexp_fpga_model_montgomery.h"
@@ -67,18 +68,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
bool select_s; // flag
- FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // accumulators
+ //FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // accumulators
FPGA_WORD t_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD t_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // intermediate products
+ //FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // intermediate products
FPGA_WORD s_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD s_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // input carries
+ //FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // input carries
FPGA_WORD c_in_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD c_in_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // output carries
+ //FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // output carries
FPGA_WORD c_out_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD c_out_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
@@ -103,18 +104,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// initialize arrays of accumulators and carries to zeroes
for (i=0; i<num_systolic_cycles; i++)
for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- c_in_ab[i][j] = 0, c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
- t_ab[i][j] = 0, t_q [i][j] = 0, t_qn[i][j] = 0;
+ /*c_in_ab[i][j] = 0,*/ c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
+ /*t_ab[i][j] = 0,*/ t_q [i][j] = 0, t_qn[i][j] = 0;
// initialize 1-bit carry and borrow to zeroes too
c_in_s = 0, b_in_sn = 0;
- // simultaneously calculate AB, Q, QN, S, SN
+ multiply_systolic(A, B, AB, len, 2 * len);
+
+ /*
+ ///////////////////////
for (i = 0; i < (2 * len); i++)
{
- // multiply entire B by current word of A to get AB
- // multiply entire N_COEFF by current word of AB to get Q
- // multiply entire N by current word of Q to get QN
for (k = 0; k < num_systolic_cycles; k++)
{
// simulate how a systolic array would work
@@ -124,8 +125,6 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// current words of B, N_COEFF, N
FPGA_WORD Bj = (j_index < len) ? B [k * SYSTOLIC_NUM_WORDS + j] : 0;
- FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
- FPGA_WORD Nj = (j_index < len) ? N [k * SYSTOLIC_NUM_WORDS + j] : 0;
// current word of A
FPGA_WORD Aj_ab = (i < len) ? A[i] : 0;
@@ -135,6 +134,45 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// store current word of AB
if ((k == 0) && (j == 0)) AB[i] = reduce_only ? A[i] : s_ab[0][0];
+ }
+
+ // propagate carries
+ for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+ c_in_ab[k][j] = c_out_ab[k][j];
+
+ // update accumulators
+ for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
+ t_ab[k][j-1] = s_ab[k][j];
+
+ // update accumulators
+ if (k > 0)
+ t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0];
+
+ }
+ }
+ */
+
+ ///////////////////////
+
+
+
+
+ // simultaneously calculate AB, Q, QN, S, SN
+ for (i = 0; i < (2 * len); i++)
+ {
+ // multiply entire B by current word of A to get AB
+ // multiply entire N_COEFF by current word of AB to get Q
+ // multiply entire N by current word of Q to get QN
+ for (k = 0; k < num_systolic_cycles; k++)
+ {
+ // simulate how a systolic array would work
+ for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
+ {
+ size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
+
+ // current words of B, N_COEFF, N
+ FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
+ FPGA_WORD Nj = (j_index < len) ? N [k * SYSTOLIC_NUM_WORDS + j] : 0;
// current word of AB
FPGA_WORD Aj_q = (i < len) ? AB[i] : 0;
@@ -157,21 +195,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// propagate carries
for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- c_in_ab[k][j] = c_out_ab[k][j],
c_in_q [k][j] = c_out_q [k][j],
c_in_qn[k][j] = c_out_qn[k][j];
// update accumulators
for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
{
- t_ab[k][j-1] = s_ab[k][j];
t_q [k][j-1] = s_q [k][j];
t_qn[k][j-1] = s_qn[k][j];
}
// update accumulators
if (k > 0)
- t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0],
t_q [k-1][SYSTOLIC_NUM_WORDS-1] = s_q [k][0],
t_qn[k-1][SYSTOLIC_NUM_WORDS-1] = s_qn[k][0];