summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-10-15 15:47:47 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-10-15 15:47:47 +0300
commit469110c8636aff88bde5a94f7a1596b88126547b (patch)
tree0cc3e2a78a3799ee5883ec61dc9ca8e6664bd509
parent8d002c47537fa5d1002df8353c134a9f5303cd7f (diff)
Finished porting modular multiplier.
-rw-r--r--bench/tb_modular_multiplier.v55
-rw-r--r--rtl/modular_multiplier/ed25519_modular_multiplier.v387
2 files changed, 331 insertions, 111 deletions
diff --git a/bench/tb_modular_multiplier.v b/bench/tb_modular_multiplier.v
index c3de537..46e8790 100644
--- a/bench/tb_modular_multiplier.v
+++ b/bench/tb_modular_multiplier.v
@@ -48,6 +48,9 @@ module tb_modular_multiplier;
//
localparam A1 = 256'h216936d3_cd6e53fe_c0a4e231_fdd6dc5c_692cc760_9525a7b2_c9562d60_8f25d51a; // GX
localparam B1 = 256'h66666666_66666666_66666666_66666666_66666666_66666666_66666666_66666658; // GY
+ localparam C1 = 256'h67875f0f_d78b7665_66ea4e8e_64abe37d_20f09f80_775152f5_6dde8ab3_a5b7dda3; // GT
+
+ localparam F = 256'hFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF; // FFF...F
//
@@ -163,7 +166,9 @@ module tb_modular_multiplier;
/* run tests */
$display("1. A1 * A1 = ...");
- test_modular_multiplier(A1, A1);
+ test_modular_multiplier(A1 , B1);
+ test_modular_multiplier(A1+B1, C1+C1);
+ test_modular_multiplier(F, F);
/* print result */
if (ok) $display("tb_modular_multiplier: SUCCESS");
@@ -188,11 +193,16 @@ module tb_modular_multiplier;
reg [255:0] b_shreg;
reg [255:0] p_shreg;
reg p_ok;
-
+ reg [511:0] ab;
+ reg [255:0] p_ref;
integer w;
begin
+ /* calculate reference value */
+ ab = {{256{1'b0}}, a} * {{256{1'b0}}, b};
+ p_ref = ab % {{31{8'hFF}}, 8'hDA};
+
/* initialize result */
p_ok = 0;
@@ -241,28 +251,29 @@ module tb_modular_multiplier;
/* wait for operation to complete */
while (!rdy) #`CLOCK_PERIOD;
-// /* read result */
-// for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-//
-// /* set address */
-// tb_qxy_addr = w[WORD_COUNTER_WIDTH-1:0];
-//
-// /* wait for 1 clock tick */
-// #10;
-//
-// /* store data word */
-// qx_shreg = {tb_qx_data, qx_shreg[255:32]};
-// qy_shreg = {tb_qy_data, qy_shreg[255:32]};
-//
-// end
-//
-// /* compare */
-// q_ok = (qx_shreg == qx) &&
-// (qy_shreg == qy);
-//
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_p_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #`CLOCK_PERIOD;
+
+ /* store data word */
+ p_shreg = {tb_p_data, p_shreg[255:32]};
+
+ end
+
+ /* compare */
+ p_ok = (p_shreg === p_ref);
+
/* display results */
if (p_ok) $display("test_modular_multiplier(): CORRECT RESULT");
- else $display("test_modular_multiplier(): WRONG RESULT");
+ else begin
+ $display("test_modular_multiplier(): WRONG RESULT");
+ $display("XOR: %x", p_shreg ^ p_ref);
+ end
/* update global flag */
ok = ok & p_ok;
diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v
index 56c8537..012091a 100644
--- a/rtl/modular_multiplier/ed25519_modular_multiplier.v
+++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v
@@ -120,11 +120,74 @@ module ed25519_modular_multiplier
//
// FSM
//
- localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 1 +
- 2 * OPERAND_NUM_WORDS + 1 +
- 2 * OPERAND_NUM_WORDS + 2 +
- 0 * OPERAND_NUM_WORDS + 2 +
- 0 * OPERAND_NUM_WORDS + 1;
+
+
+ localparam integer PHASE_INCREMENT_INDEX_A_OFFSET = 0;
+ localparam integer PHASE_INCREMENT_INDEX_A_DURATION = OPERAND_NUM_WORDS;
+
+ localparam integer PHASE_DECREMENT_INDEX_B_OFFSET = PHASE_INCREMENT_INDEX_A_DURATION;
+ localparam integer PHASE_DECREMENT_INDEX_B_DURATION = OPERAND_NUM_WORDS * 2;
+
+ localparam integer PHASE_STORE_MSB_SI_OFFSET = PHASE_DECREMENT_INDEX_B_OFFSET + 2;
+ localparam integer PHASE_STORE_MSB_SI_DURATION = OPERAND_NUM_WORDS * 2 - 1;
+
+ localparam integer PHASE_STORE_LSB_SI_OFFSET = PHASE_STORE_MSB_SI_OFFSET +
+ PHASE_STORE_MSB_SI_DURATION;
+ localparam integer PHASE_STORE_LSB_SI_DURATION = 1;
+
+ localparam integer PHASE_SHIFT_SI_OFFSET = PHASE_STORE_LSB_SI_OFFSET + 1;
+ localparam integer PHASE_SHIFT_SI_DURATION = OPERAND_NUM_WORDS * 2 - 1;
+
+ localparam integer PHASE_MASK_SUM_CW1_OFFSET = PHASE_SHIFT_SI_OFFSET + 1;
+ localparam integer PHASE_MASK_SUM_CW1_DURATION = 1;
+
+ localparam integer PHASE_STORE_LSB_C_OFFSET = PHASE_MASK_SUM_CW1_OFFSET + 1;
+ localparam integer PHASE_STORE_LSB_C_DURATION = OPERAND_NUM_WORDS;
+
+ localparam integer PHASE_STORE_MSB_C_OFFSET = PHASE_STORE_LSB_C_OFFSET +
+ PHASE_STORE_LSB_C_DURATION;
+ localparam integer PHASE_STORE_MSB_C_DURATION = OPERAND_NUM_WORDS;
+
+ localparam integer PHASE_MASK_B_R3_OFFSET = PHASE_STORE_MSB_C_OFFSET + 3;
+ localparam integer PHASE_MASK_B_R3_DURATION = 1;
+
+ localparam integer PHASE_CALCULATE_CARRY_MSB_S1_OFFSET = PHASE_STORE_MSB_C_OFFSET +
+ PHASE_STORE_MSB_C_DURATION + 4;
+ localparam integer PHASE_CALCULATE_CARRY_MSB_S1_DURATION = 1;
+
+ localparam integer PHASE_STORE_LSB_S1_OFFSET = PHASE_STORE_MSB_C_OFFSET + 4;
+ localparam integer PHASE_STORE_LSB_S1_DURATION = OPERAND_NUM_WORDS;
+
+ localparam integer PHASE_SHIFT_S1_OFFSET = PHASE_STORE_LSB_S1_OFFSET +
+ PHASE_STORE_LSB_S1_DURATION + 1;
+ localparam integer PHASE_SHIFT_S1_DURATION = OPERAND_NUM_WORDS;
+
+ localparam integer PHASE_CHANGE_LSB_B_P_OFFSET = PHASE_SHIFT_S1_OFFSET;
+ localparam integer PHASE_CHANGE_LSB_B_P_DURATION = 1;
+
+ localparam integer PHASE_SELECT_S2_OR_PN_OFFSET = PHASE_SHIFT_S1_OFFSET +
+ PHASE_SHIFT_S1_DURATION + 1;
+ localparam integer PHASE_SELECT_S2_OR_PN_DURATION = 1;
+
+ localparam integer PHASE_UPDATE_P_DOUT_OFFSET = PHASE_SHIFT_S1_OFFSET +
+ PHASE_SHIFT_S1_DURATION + 2;
+ localparam integer PHASE_UPDATE_P_DOUT_DURATION = OPERAND_NUM_WORDS;
+
+
+
+ localparam integer FSM_SHREG_WIDTH = PHASE_INCREMENT_INDEX_A_DURATION +
+ PHASE_DECREMENT_INDEX_B_DURATION +
+ 1 +
+ PHASE_STORE_LSB_SI_DURATION +
+ PHASE_SHIFT_SI_DURATION +
+ -1 +
+ PHASE_STORE_LSB_S1_DURATION +
+ PHASE_CALCULATE_CARRY_MSB_S1_DURATION +
+ PHASE_SHIFT_S1_DURATION +
+ 1 +
+ PHASE_SELECT_S2_OR_PN_DURATION +
+ PHASE_UPDATE_P_DOUT_DURATION +
+ 2;
localparam [FSM_SHREG_WIDTH-1:0] FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1};
@@ -132,39 +195,66 @@ module ed25519_modular_multiplier
assign rdy = fsm_shreg[0];
- wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0)];
- wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1)];
- wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_part_b = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)];
- wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_b = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)];
- wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_enable_mac_ab = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)];
- wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)];
- wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2)];
- wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+3):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+1)];
- wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4)];
- wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+4)];
- wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5)];
- wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_wait = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6)];
-
- wire inc_index_a = |fsm_shreg_inc_index_a;
- wire store_word_a = |fsm_shreg_store_word_a;
- wire store_part_b = |fsm_shreg_store_part_b;
- wire dec_index_b = |fsm_shreg_dec_index_b;
- wire enable_mac_ab = |fsm_shreg_enable_mac_ab;
- wire store_si_msb = |fsm_shreg_store_si_msb;
- wire store_si_lsb = |fsm_shreg_store_si_lsb;
- wire shift_si = |fsm_shreg_shift_si;
- wire mask_cw1_sum = |fsm_shreg_mask_cw1_sum;
- wire store_c_word = |fsm_shreg_store_c_word;
- wire reduce_start = |fsm_shreg_reduce_start;
- wire reduce_wait = |fsm_shreg_reduce_wait;
+
+
+
+
+
+
+ wire [PHASE_INCREMENT_INDEX_A_DURATION -1:0] fsm_shreg_increment_index_a = fsm_shreg[FSM_SHREG_WIDTH - PHASE_INCREMENT_INDEX_A_OFFSET - 1 -: PHASE_INCREMENT_INDEX_A_DURATION];
+ wire [PHASE_DECREMENT_INDEX_B_DURATION -1:0] fsm_shreg_decrement_index_b = fsm_shreg[FSM_SHREG_WIDTH - PHASE_DECREMENT_INDEX_B_OFFSET - 1 -: PHASE_DECREMENT_INDEX_B_DURATION];
+ wire [PHASE_STORE_MSB_SI_DURATION -1:0] fsm_shreg_store_msb_si = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_MSB_SI_OFFSET - 1 -: PHASE_STORE_MSB_SI_DURATION];
+ wire [PHASE_STORE_LSB_SI_DURATION -1:0] fsm_shreg_store_lsb_si = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_SI_OFFSET - 1 -: PHASE_STORE_LSB_SI_DURATION];
+ wire [PHASE_SHIFT_SI_DURATION -1:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SHIFT_SI_OFFSET - 1 -: PHASE_SHIFT_SI_DURATION];
+ wire [PHASE_MASK_SUM_CW1_DURATION -1:0] fsm_shreg_mask_sum_cw1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_MASK_SUM_CW1_OFFSET - 1 -: PHASE_MASK_SUM_CW1_DURATION];
+ wire [PHASE_STORE_LSB_C_DURATION -1:0] fsm_shreg_store_lsb_c = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_C_OFFSET - 1 -: PHASE_STORE_LSB_C_DURATION];
+ wire [PHASE_STORE_MSB_C_DURATION -1:0] fsm_shreg_store_msb_c = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_MSB_C_OFFSET - 1 -: PHASE_STORE_MSB_C_DURATION];
+ wire [PHASE_MASK_B_R3_DURATION -1:0] fsm_shreg_mask_b_r3 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_MASK_B_R3_OFFSET - 1 -: PHASE_MASK_B_R3_DURATION];
+ wire [PHASE_CALCULATE_CARRY_MSB_S1_DURATION-1:0] fsm_shreg_calculate_carry_msb_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_CALCULATE_CARRY_MSB_S1_OFFSET - 1 -: PHASE_CALCULATE_CARRY_MSB_S1_DURATION];
+ wire [PHASE_STORE_LSB_S1_DURATION -1:0] fsm_shreg_store_lsb_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_S1_OFFSET - 1 -: PHASE_STORE_LSB_S1_DURATION];
+ wire [PHASE_SHIFT_S1_DURATION -1:0] fsm_shreg_shift_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SHIFT_S1_OFFSET - 1 -: PHASE_SHIFT_S1_DURATION];
+ wire [PHASE_CHANGE_LSB_B_P_DURATION -1:0] fsm_shreg_change_lsb_b_p = fsm_shreg[FSM_SHREG_WIDTH - PHASE_CHANGE_LSB_B_P_OFFSET - 1 -: PHASE_CHANGE_LSB_B_P_DURATION];
+ wire [PHASE_SELECT_S2_OR_PN_DURATION -1:0] fsm_shreg_select_s2_or_pn = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SELECT_S2_OR_PN_OFFSET - 1 -: PHASE_SELECT_S2_OR_PN_DURATION];
+ wire [PHASE_UPDATE_P_DOUT_DURATION -1:0] fsm_shreg_update_p_dout = fsm_shreg[FSM_SHREG_WIDTH - PHASE_UPDATE_P_DOUT_OFFSET - 1 -: PHASE_UPDATE_P_DOUT_DURATION];
+
+ wire flag_increment_index_a = |fsm_shreg_increment_index_a;
+ wire flag_decrement_index_b = |fsm_shreg_decrement_index_b;
+ wire flag_store_msb_si = |fsm_shreg_store_msb_si;
+ wire flag_store_lsb_si = |fsm_shreg_store_lsb_si;
+ wire flag_shift_si = |fsm_shreg_shift_si;
+ wire flag_mask_sum_cw1 = |fsm_shreg_mask_sum_cw1;
+ wire flag_store_lsb_c = |fsm_shreg_store_lsb_c;
+ wire flag_store_msb_c = |fsm_shreg_store_msb_c;
+ wire flag_mask_b_r3 = |fsm_shreg_mask_b_r3;
+ wire flag_calculate_carry_msb_s1 = |fsm_shreg_calculate_carry_msb_s1;
+ wire flag_store_lsb_s1 = |fsm_shreg_store_lsb_s1;
+ wire flag_shift_s1 = |fsm_shreg_shift_s1;
+ wire flag_change_lsb_b_p = |fsm_shreg_change_lsb_b_p;
+ wire flag_select_s2_or_pn = |fsm_shreg_select_s2_or_pn;
+ wire flag_update_p_dout = |fsm_shreg_update_p_dout;
+
+ reg flag_store_word_a = 0;
+ reg flag_enable_mac_ab = 0;
+ reg flag_delay_msb_c = 0;
+ reg flag_mask_a_s2 = 0;
+ reg flag_mask_b_out_p = 0;
+ reg flag_store_s2 = 0;
+ reg flag_store_pn = 0;
+
+ always @(posedge clk) begin
+ flag_store_word_a <= flag_increment_index_a;
+ flag_enable_mac_ab <= flag_decrement_index_b;
+ flag_delay_msb_c <= flag_store_msb_c;
+ flag_mask_a_s2 <= flag_calculate_carry_msb_s1;
+ flag_mask_b_out_p <= flag_change_lsb_b_p;
+ flag_store_s2 <= flag_shift_s1;
+ flag_store_pn <= flag_store_s2;
+ end
//
// FSM Logic
- //
- wire reduce_done;
- wire fsm_freeze = reduce_wait && !reduce_done;
-
+ //
always @(posedge clk or negedge rst_n)
//
if (rst_n == 1'b0)
@@ -173,8 +263,8 @@ module ed25519_modular_multiplier
//
else begin
//
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- else if (!fsm_freeze) fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
end
@@ -184,7 +274,7 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
if (rdy) index_a <= WORD_INDEX_ZERO;
- else if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+ else if (flag_increment_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
//
@@ -193,7 +283,7 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
if (rdy) index_b <= WORD_INDEX_LAST;
- else if (dec_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+ else if (flag_decrement_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
//
@@ -201,10 +291,6 @@ module ed25519_modular_multiplier
//
reg [255:0] buf_a_wide;
- always @(posedge clk)
- //
- if (store_word_a) buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]};
- else if (enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
//
@@ -220,7 +306,7 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
- if (dec_index_b) index_b_ff <= ~index_b_ff;
+ if (flag_decrement_index_b) index_b_ff <= ~index_b_ff;
else index_b_ff <= 1'b0;
@@ -231,7 +317,7 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
- if (store_part_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0];
+ if (flag_decrement_index_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0];
//
@@ -241,7 +327,7 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
- if (!enable_mac_ab) mac_clear <= {16{1'b1}};
+ if (!flag_enable_mac_ab) mac_clear <= {16{1'b1}};
else begin
if (mac_clear[0]) mac_clear <= 16'b0000000000000010;
else if (mac_clear[15]) mac_clear <= 16'b1111111111111111;
@@ -263,7 +349,7 @@ module ed25519_modular_multiplier
`ED25519_MAC16_PRIMITIVE mac16_inst
(
.clk (clk),
- .ce (enable_mac_ab),
+ .ce (flag_enable_mac_ab),
.clr (mac_clear[i]),
@@ -283,7 +369,6 @@ module ed25519_modular_multiplier
reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
-
wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
@@ -301,22 +386,25 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
- if (shift_si) begin
+ if (flag_shift_si) begin
si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
end else begin
- if (store_si_msb) si_msb <= si_msb_new;
- if (store_si_lsb) si_lsb <= si_lsb_new;
+ if (flag_store_msb_si) si_msb <= si_msb_new;
+ if (flag_store_lsb_si) si_lsb <= si_lsb_new;
end
//
// Accumulators
//
- wire [46: 0] add47_cw0_s;
- wire [46: 0] add47_cw1_s;
-
+ wire [46:0] add47_cw0_s;
+ wire [46:0] add47_cw1_s;
+ wire [14:0] add47_cw1_s_masked = flag_mask_sum_cw1 ? {15{1'b0}} : add47_cw1_s[32+:15];
+
+ wire [46:0] add47_r3_b_masked = {{32{1'b0}}, flag_mask_b_r3 ? {15{1'b0}} : add47_r3_s[46:32]};
+
//
// cw0, cw1
//
@@ -325,7 +413,7 @@ module ed25519_modular_multiplier
always @(posedge clk)
//
- if (shift_si) si_prev_dly <= si_lsb[93:63];
+ if (flag_shift_si) si_prev_dly <= si_lsb[93:63];
else si_prev_dly <= {31{1'b0}};
always @(posedge clk)
@@ -336,7 +424,7 @@ module ed25519_modular_multiplier
wire [46:0] add47_cw0_b = {{16{1'b0}}, si_prev_dly};
wire [46:0] add47_cw1_a = add47_cw0_s;
- wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, mask_cw1_sum ? {15{1'b0}} : add47_cw1_s[32+:15]};
+ wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, add47_cw1_s_masked};
`ED25519_ADD47_PRIMITIVE add47_cw0_inst
(
@@ -353,60 +441,181 @@ module ed25519_modular_multiplier
.b (add47_cw1_b),
.s (add47_cw1_s)
);
-
+
//
// Full-Size Product
//
- reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
+ wire [31:0] c_word_lower = add47_cw1_s[31:0];
+
+
+ wire [46:0] add47_r0_s;
+ wire [46:0] add47_r1_s;
+ wire [46:0] add47_r2_s;
+ wire [46:0] add47_r3_s;
+
+ reg [255:0] c_lsb_s1_shreg;
+ reg [ 31:0] c_msb_latch;
+
- wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
- wire [ 31:0] reduce_c_word;
+
+
always @(posedge clk)
//
- if (store_c_word) bram_c_addr <= bram_c_addr + 1'b1;
- else bram_c_addr <= {(2*WORD_COUNTER_WIDTH){1'b0}};
-
- bram_1rw_1ro_readfirst #
+ if (flag_store_msb_c) c_msb_latch <= c_word_lower;
+ else c_msb_latch <= {32{1'b0}};
+
+
+ reg [4:0] c_msb_latch_upper_dly;
+ reg [31:0] c_lsb_shreg_lower_dly;
+
+ always @(posedge clk)
+ //
+ if (flag_delay_msb_c) c_msb_latch_upper_dly <= c_msb_latch[31:27];
+ else c_msb_latch_upper_dly <= {5{1'b0}};
+
+
+ always @(posedge clk)
+ //
+ if (flag_store_msb_c) c_lsb_shreg_lower_dly <= c_lsb_s1_shreg[31:0];
+ else c_lsb_shreg_lower_dly <= {32{1'b0}};
+
+
+
+ reg [11:0] carry_msb_s1;
+
+ always @(posedge clk)
+ //
+ if (flag_calculate_carry_msb_s1) carry_msb_s1 <= {{6{1'b0}}, 6'd38} * {{6{1'b0}}, add47_r3_s[5:0]};
+
+
+ wire [46:0] add47_s2_a_masked = {{32{1'b0}}, flag_mask_a_s2 ? {3'b000, carry_msb_s1} : add47_s2_s[46:32]};
+
+ `ED25519_ADD47_PRIMITIVE add47_r0
(
- .MEM_WIDTH(32),
- .MEM_ADDR_BITS(WORD_COUNTER_WIDTH + 1)
- )
- bram_c_inst
+ .clk (clk),
+ .a ({{15{1'b0}}, c_msb_latch[30:0], c_msb_latch_upper_dly[4]}),
+ .b ({{15{1'b0}}, c_msb_latch[29:0], c_msb_latch_upper_dly[4:3]}),
+ .s (add47_r0_s)
+ );
+ `ED25519_ADD47_PRIMITIVE add47_r1
(
.clk (clk),
+ .a ({{15{1'b0}}, c_msb_latch[26:0], c_msb_latch_upper_dly[4:0]}),
+ .b ({{15{1'b0}}, c_lsb_shreg_lower_dly}),
+ .s (add47_r1_s)
+ );
+ `ED25519_ADD47_PRIMITIVE add47_r2
+ (
+ .clk (clk),
+ .a (add47_r0_s),
+ .b (add47_r1_s),
+ .s (add47_r2_s)
+ );
+ `ED25519_ADD47_PRIMITIVE add47_r3
+ (
+ .clk (clk),
+ .a (add47_r2_s),
+ .b (add47_r3_b_masked),
+ .s (add47_r3_s)
+ );
+
+
+
+ wire [46:0] add47_s2_s;
+ `ED25519_ADD47_PRIMITIVE add47_s2
+ (
+ .clk (clk),
+ .a (add47_s2_a_masked),
+ .b ({{15{1'b0}}, c_lsb_s1_shreg[31:0]}),
+ .s (add47_s2_s)
+ );
+
+
+ reg sub32_b_bit;
+
+ wire [31:0] sub32_b = {{26{1'b1}}, // ...*11*1*
+ sub32_b_bit, {2{1'b1}}, sub32_b_bit, 1'b1, sub32_b_bit};
+
+ always @(posedge clk)
+ //
+ if (!fsm_shreg_change_lsb_b_p) sub32_b_bit <= 1'b1;
+ else sub32_b_bit <= 1'b0;
+
+ wire [31:0] sub32_pn_d;
+ wire sub32_b_in;
+ wire sub32_b_out;
- .a_addr (bram_c_addr),
- .a_wr (store_c_word),
- .a_in (add47_cw1_s[31:0]),
- .a_out (),
-
- .b_addr (reduce_c_addr),
- .b_out (reduce_c_word)
+ assign sub32_b_in = sub32_b_out & !flag_mask_b_out_p;
+
+ `ED25519_SUB32_PRIMITIVE sub32_pn
+ (
+ .clk (clk),
+ .a (add47_s2_s[31:0]),
+ .b (sub32_b),
+ .d (sub32_pn_d),
+ .b_in (sub32_b_in),
+ .b_out (sub32_b_out)
);
+
+ wire [31:0] add47_r3_s_lower = add47_r3_s[31:0];
+
- //
- // Reduction Stage
- //
- ed25519_modular_reductor reductor_inst
- (
- .clk (clk),
- .rst_n (rst_n),
+ always @(posedge clk)
+ //
+ if (flag_store_word_a) buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]};
+ else if (flag_enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+ else if (flag_store_s2) buf_a_wide <= {add47_s2_s[31:0], buf_a_wide[255:32]};
+ else if (flag_update_p_dout) buf_a_wide <= {{32{1'bX}}, buf_a_wide[255:32]};
- .ena (reduce_start),
- .rdy (reduce_done),
+
+ always @(posedge clk)
+ //
+ if (flag_store_lsb_c) c_lsb_s1_shreg <= {c_word_lower, c_lsb_s1_shreg[255:32]};
+ else if (flag_store_lsb_s1) c_lsb_s1_shreg <= {add47_r3_s_lower, c_lsb_s1_shreg[255:32]};
+ else if (flag_store_pn) c_lsb_s1_shreg <= {sub32_pn_d, c_lsb_s1_shreg[255:32]};
+ else if (flag_store_msb_c || flag_shift_s1) c_lsb_s1_shreg <= {{32{1'b0}}, c_lsb_s1_shreg[255:32]};
+ else if (flag_update_p_dout) c_lsb_s1_shreg <= {{32{1'b0}}, c_lsb_s1_shreg[255:32]};
- .x_addr (reduce_c_addr),
- .y_addr (p_addr),
- .y_wren (p_wren),
- .x_din (reduce_c_word),
- .y_dout (p_dout)
- );
+ reg sel_pn; // 0: output in S2, 1: output in PN
+
+ always @(posedge clk)
+ //
+ if (flag_select_s2_or_pn) sel_pn <= sub32_b_out & add47_s2_s[0];
+
+
+ reg [31:0] p_dout_reg;
+
+ assign p_dout = p_dout_reg;
+
+ always @(posedge clk)
+ //
+ if (flag_update_p_dout) p_dout_reg <= sel_pn ? c_lsb_s1_shreg[31:0] : buf_a_wide[31:0];
+ else p_dout_reg <= {32{1'bX}};
+
+ reg p_wren_reg = 0;
+
+ assign p_wren = p_wren_reg;
+
+ always @(posedge clk)
+ //
+ p_wren_reg <= flag_update_p_dout;
+
+ reg [WORD_COUNTER_WIDTH-1:0] p_addr_reg;
+
+ assign p_addr = p_addr_reg;
+
+ always @(posedge clk)
+ //
+ if (p_wren_reg) p_addr_reg <= WORD_INDEX_NEXT_OR_ZERO(p_addr_reg);
+ else p_addr_reg <= WORD_INDEX_ZERO;
+
+
endmodule