aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-05 16:24:44 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-05 16:24:44 +0300
commitee41d5837267e2d6a2e2ae27751d8d03a80e3657 (patch)
tree5be0ceee9f22fe2ff05ff401676b013b732f0d43
parent6e36e87a88b8f96dd012158730a4300ade3608b6 (diff)
Turned systolic multiplication into a separate routine.
-rw-r--r--modexp_fpga_model.cpp8
-rw-r--r--modexp_fpga_model.h2
-rw-r--r--modexp_fpga_model_montgomery.cpp161
3 files changed, 26 insertions, 145 deletions
diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp
index 455980b..e1c7f4e 100644
--- a/modexp_fpga_model.cpp
+++ b/modexp_fpga_model.cpp
@@ -113,7 +113,7 @@ int main()
printf("Trying to sign 384-bit message...\n\n");
ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384);
if (!ok) return EXIT_FAILURE;
- /*
+
printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n");
ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
if (!ok) return EXIT_FAILURE;
@@ -121,11 +121,11 @@ int main()
printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n");
ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
if (!ok) return EXIT_FAILURE;
- */
+
printf("Trying to sign 512-bit message...\n\n");
ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512);
if (!ok) return EXIT_FAILURE;
- /*
+
printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n");
ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
if (!ok) return EXIT_FAILURE;
@@ -133,7 +133,7 @@ int main()
printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n");
ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
if (!ok) return EXIT_FAILURE;
- */
+
return EXIT_SUCCESS;
}
diff --git a/modexp_fpga_model.h b/modexp_fpga_model.h
index f30a41b..2a91d32 100644
--- a/modexp_fpga_model.h
+++ b/modexp_fpga_model.h
@@ -31,7 +31,7 @@
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-//- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index e5237ff..5bc5ba4 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -64,156 +64,37 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
//
//----------------------------------------------------------------
{
- size_t i, j, k; // counters
+ size_t i; // counters
- bool select_s; // flag
+ FPGA_WORD AB[2 * MAX_OPERAND_WORDS]; // products
+ FPGA_WORD Q [ MAX_OPERAND_WORDS]; //
+ FPGA_WORD QN[2 * MAX_OPERAND_WORDS]; //
- //FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // accumulators
- FPGA_WORD t_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD t_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
+ bool select_s; // flag
- //FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // intermediate products
- FPGA_WORD s_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD s_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
+ FPGA_WORD c_in_s; // 1-bit carry and borrow
+ FPGA_WORD b_in_sn; //
+ FPGA_WORD c_out_s; //
+ FPGA_WORD b_out_sn; //
- //FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // input carries
- FPGA_WORD c_in_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD c_in_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- //FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // output carries
- FPGA_WORD c_out_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD c_out_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
+ FPGA_WORD S [2 * MAX_OPERAND_WORDS]; // final sum
+ FPGA_WORD SN[2 * MAX_OPERAND_WORDS]; // final difference
- FPGA_WORD c_in_s; // 1-bit carry and borrow
- FPGA_WORD b_in_sn; //
- FPGA_WORD c_out_s; //
- FPGA_WORD b_out_sn; //
+ // copy twice larger A into AB
+ if (reduce_only)
+ for (i=0; i<(2*len); i++)
+ AB[i] = A[i];
- FPGA_WORD AB[2 * MAX_OPERAND_WORDS]; // final products
- FPGA_WORD Q [2 * MAX_OPERAND_WORDS]; //
- FPGA_WORD QN[2 * MAX_OPERAND_WORDS]; //
+ if (!reduce_only) multiply_systolic(A, B, AB, len, 2 * len); // AB = A * B
+ multiply_systolic(AB, N_COEFF, Q, len, len); // Q = AB * N_COEFF
+ multiply_systolic(Q, N, QN, len, 2 * len); // QN = Q * N
- FPGA_WORD S [2 * MAX_OPERAND_WORDS]; // final sum
- FPGA_WORD SN[2 * MAX_OPERAND_WORDS]; // final difference
-
- // number of full systolic cycles needed to multiply entire B by one word of A
- size_t num_systolic_cycles = len / SYSTOLIC_NUM_WORDS;
-
- // adjust number of cycles
- if ((num_systolic_cycles * SYSTOLIC_NUM_WORDS) < len) num_systolic_cycles++;
-
- // initialize arrays of accumulators and carries to zeroes
- for (i=0; i<num_systolic_cycles; i++)
- for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- /*c_in_ab[i][j] = 0,*/ c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
- /*t_ab[i][j] = 0,*/ t_q [i][j] = 0, t_qn[i][j] = 0;
-
- // initialize 1-bit carry and borrow to zeroes too
+ // initialize 1-bit carry and borrow
c_in_s = 0, b_in_sn = 0;
- multiply_systolic(A, B, AB, len, 2 * len);
-
- /*
- ///////////////////////
+ // now it's time to simultaneously add and subtract
for (i = 0; i < (2 * len); i++)
- {
- for (k = 0; k < num_systolic_cycles; k++)
- {
- // simulate how a systolic array would work
- for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
- {
- size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
-
- // current words of B, N_COEFF, N
- FPGA_WORD Bj = (j_index < len) ? B [k * SYSTOLIC_NUM_WORDS + j] : 0;
-
- // current word of A
- FPGA_WORD Aj_ab = (i < len) ? A[i] : 0;
-
- // AB = A * B
- pe_mul(Aj_ab, Bj, t_ab[k][j], c_in_ab[k][j], &s_ab[k][j], &c_out_ab[k][j]);
-
- // store current word of AB
- if ((k == 0) && (j == 0)) AB[i] = reduce_only ? A[i] : s_ab[0][0];
- }
-
- // propagate carries
- for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- c_in_ab[k][j] = c_out_ab[k][j];
-
- // update accumulators
- for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
- t_ab[k][j-1] = s_ab[k][j];
-
- // update accumulators
- if (k > 0)
- t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0];
-
- }
- }
- */
-
- ///////////////////////
-
-
-
-
- // simultaneously calculate AB, Q, QN, S, SN
- for (i = 0; i < (2 * len); i++)
- {
- // multiply entire B by current word of A to get AB
- // multiply entire N_COEFF by current word of AB to get Q
- // multiply entire N by current word of Q to get QN
- for (k = 0; k < num_systolic_cycles; k++)
- {
- // simulate how a systolic array would work
- for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
- {
- size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
-
- // current words of B, N_COEFF, N
- FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
- FPGA_WORD Nj = (j_index < len) ? N [k * SYSTOLIC_NUM_WORDS + j] : 0;
-
- // current word of AB
- FPGA_WORD Aj_q = (i < len) ? AB[i] : 0;
-
- // Q = AB * N
- pe_mul(Aj_q, N_COEFFj, t_q[k][j], c_in_q[k][j], &s_q[k][j], &c_out_q[k][j]);
-
- // store current word of Q
- if ((k == 0) && (j == 0)) Q[i] = s_q[0][0];
-
- // current word of Q
- FPGA_WORD Aj_qn = (i < len) ? Q[i] : 0;
-
- // QN = Q * N
- pe_mul(Aj_qn, Nj, t_qn[k][j], c_in_qn[k][j], &s_qn[k][j], &c_out_qn[k][j]);
-
- // store next word of QN
- if ((k == 0) && (j == 0)) QN[i] = s_qn[0][0];
- }
-
- // propagate carries
- for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- c_in_q [k][j] = c_out_q [k][j],
- c_in_qn[k][j] = c_out_qn[k][j];
-
- // update accumulators
- for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
- {
- t_q [k][j-1] = s_q [k][j];
- t_qn[k][j-1] = s_qn[k][j];
- }
-
- // update accumulators
- if (k > 0)
- t_q [k-1][SYSTOLIC_NUM_WORDS-1] = s_q [k][0],
- t_qn[k-1][SYSTOLIC_NUM_WORDS-1] = s_qn[k][0];
-
- }
-
- // now it's time to simultaneously add and subtract
-
+ {
// current operand words
FPGA_WORD QNi = QN[i];
FPGA_WORD Ni = (i < len) ? 0 : N[i-len];