diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2018-10-15 15:47:47 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2018-10-15 15:47:47 +0300 |
commit | 469110c8636aff88bde5a94f7a1596b88126547b (patch) | |
tree | 0cc3e2a78a3799ee5883ec61dc9ca8e6664bd509 /rtl | |
parent | 8d002c47537fa5d1002df8353c134a9f5303cd7f (diff) |
Finished porting modular multiplier.
Diffstat (limited to 'rtl')
-rw-r--r-- | rtl/modular_multiplier/ed25519_modular_multiplier.v | 387 |
1 files changed, 298 insertions, 89 deletions
diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v index 56c8537..012091a 100644 --- a/rtl/modular_multiplier/ed25519_modular_multiplier.v +++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v @@ -120,11 +120,74 @@ module ed25519_modular_multiplier // // FSM // - localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 1 + - 2 * OPERAND_NUM_WORDS + 1 + - 2 * OPERAND_NUM_WORDS + 2 + - 0 * OPERAND_NUM_WORDS + 2 + - 0 * OPERAND_NUM_WORDS + 1; + + + localparam integer PHASE_INCREMENT_INDEX_A_OFFSET = 0; + localparam integer PHASE_INCREMENT_INDEX_A_DURATION = OPERAND_NUM_WORDS; + + localparam integer PHASE_DECREMENT_INDEX_B_OFFSET = PHASE_INCREMENT_INDEX_A_DURATION; + localparam integer PHASE_DECREMENT_INDEX_B_DURATION = OPERAND_NUM_WORDS * 2; + + localparam integer PHASE_STORE_MSB_SI_OFFSET = PHASE_DECREMENT_INDEX_B_OFFSET + 2; + localparam integer PHASE_STORE_MSB_SI_DURATION = OPERAND_NUM_WORDS * 2 - 1; + + localparam integer PHASE_STORE_LSB_SI_OFFSET = PHASE_STORE_MSB_SI_OFFSET + + PHASE_STORE_MSB_SI_DURATION; + localparam integer PHASE_STORE_LSB_SI_DURATION = 1; + + localparam integer PHASE_SHIFT_SI_OFFSET = PHASE_STORE_LSB_SI_OFFSET + 1; + localparam integer PHASE_SHIFT_SI_DURATION = OPERAND_NUM_WORDS * 2 - 1; + + localparam integer PHASE_MASK_SUM_CW1_OFFSET = PHASE_SHIFT_SI_OFFSET + 1; + localparam integer PHASE_MASK_SUM_CW1_DURATION = 1; + + localparam integer PHASE_STORE_LSB_C_OFFSET = PHASE_MASK_SUM_CW1_OFFSET + 1; + localparam integer PHASE_STORE_LSB_C_DURATION = OPERAND_NUM_WORDS; + + localparam integer PHASE_STORE_MSB_C_OFFSET = PHASE_STORE_LSB_C_OFFSET + + PHASE_STORE_LSB_C_DURATION; + localparam integer PHASE_STORE_MSB_C_DURATION = OPERAND_NUM_WORDS; + + localparam integer PHASE_MASK_B_R3_OFFSET = PHASE_STORE_MSB_C_OFFSET + 3; + localparam integer PHASE_MASK_B_R3_DURATION = 1; + + localparam integer PHASE_CALCULATE_CARRY_MSB_S1_OFFSET = PHASE_STORE_MSB_C_OFFSET + + PHASE_STORE_MSB_C_DURATION + 4; + localparam integer PHASE_CALCULATE_CARRY_MSB_S1_DURATION = 1; + + localparam integer PHASE_STORE_LSB_S1_OFFSET = PHASE_STORE_MSB_C_OFFSET + 4; + localparam integer PHASE_STORE_LSB_S1_DURATION = OPERAND_NUM_WORDS; + + localparam integer PHASE_SHIFT_S1_OFFSET = PHASE_STORE_LSB_S1_OFFSET + + PHASE_STORE_LSB_S1_DURATION + 1; + localparam integer PHASE_SHIFT_S1_DURATION = OPERAND_NUM_WORDS; + + localparam integer PHASE_CHANGE_LSB_B_P_OFFSET = PHASE_SHIFT_S1_OFFSET; + localparam integer PHASE_CHANGE_LSB_B_P_DURATION = 1; + + localparam integer PHASE_SELECT_S2_OR_PN_OFFSET = PHASE_SHIFT_S1_OFFSET + + PHASE_SHIFT_S1_DURATION + 1; + localparam integer PHASE_SELECT_S2_OR_PN_DURATION = 1; + + localparam integer PHASE_UPDATE_P_DOUT_OFFSET = PHASE_SHIFT_S1_OFFSET + + PHASE_SHIFT_S1_DURATION + 2; + localparam integer PHASE_UPDATE_P_DOUT_DURATION = OPERAND_NUM_WORDS; + + + + localparam integer FSM_SHREG_WIDTH = PHASE_INCREMENT_INDEX_A_DURATION + + PHASE_DECREMENT_INDEX_B_DURATION + + 1 + + PHASE_STORE_LSB_SI_DURATION + + PHASE_SHIFT_SI_DURATION + + -1 + + PHASE_STORE_LSB_S1_DURATION + + PHASE_CALCULATE_CARRY_MSB_S1_DURATION + + PHASE_SHIFT_S1_DURATION + + 1 + + PHASE_SELECT_S2_OR_PN_DURATION + + PHASE_UPDATE_P_DOUT_DURATION + + 2; localparam [FSM_SHREG_WIDTH-1:0] FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1}; @@ -132,39 +195,66 @@ module ed25519_modular_multiplier assign rdy = fsm_shreg[0]; - wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0)]; - wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1)]; - wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_part_b = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)]; - wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_b = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)]; - wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_enable_mac_ab = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)]; - wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)]; - wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2)]; - wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+3):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+1)]; - wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4)]; - wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+4)]; - wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5)]; - wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_wait = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6)]; - - wire inc_index_a = |fsm_shreg_inc_index_a; - wire store_word_a = |fsm_shreg_store_word_a; - wire store_part_b = |fsm_shreg_store_part_b; - wire dec_index_b = |fsm_shreg_dec_index_b; - wire enable_mac_ab = |fsm_shreg_enable_mac_ab; - wire store_si_msb = |fsm_shreg_store_si_msb; - wire store_si_lsb = |fsm_shreg_store_si_lsb; - wire shift_si = |fsm_shreg_shift_si; - wire mask_cw1_sum = |fsm_shreg_mask_cw1_sum; - wire store_c_word = |fsm_shreg_store_c_word; - wire reduce_start = |fsm_shreg_reduce_start; - wire reduce_wait = |fsm_shreg_reduce_wait; + + + + + + + wire [PHASE_INCREMENT_INDEX_A_DURATION -1:0] fsm_shreg_increment_index_a = fsm_shreg[FSM_SHREG_WIDTH - PHASE_INCREMENT_INDEX_A_OFFSET - 1 -: PHASE_INCREMENT_INDEX_A_DURATION]; + wire [PHASE_DECREMENT_INDEX_B_DURATION -1:0] fsm_shreg_decrement_index_b = fsm_shreg[FSM_SHREG_WIDTH - PHASE_DECREMENT_INDEX_B_OFFSET - 1 -: PHASE_DECREMENT_INDEX_B_DURATION]; + wire [PHASE_STORE_MSB_SI_DURATION -1:0] fsm_shreg_store_msb_si = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_MSB_SI_OFFSET - 1 -: PHASE_STORE_MSB_SI_DURATION]; + wire [PHASE_STORE_LSB_SI_DURATION -1:0] fsm_shreg_store_lsb_si = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_SI_OFFSET - 1 -: PHASE_STORE_LSB_SI_DURATION]; + wire [PHASE_SHIFT_SI_DURATION -1:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SHIFT_SI_OFFSET - 1 -: PHASE_SHIFT_SI_DURATION]; + wire [PHASE_MASK_SUM_CW1_DURATION -1:0] fsm_shreg_mask_sum_cw1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_MASK_SUM_CW1_OFFSET - 1 -: PHASE_MASK_SUM_CW1_DURATION]; + wire [PHASE_STORE_LSB_C_DURATION -1:0] fsm_shreg_store_lsb_c = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_C_OFFSET - 1 -: PHASE_STORE_LSB_C_DURATION]; + wire [PHASE_STORE_MSB_C_DURATION -1:0] fsm_shreg_store_msb_c = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_MSB_C_OFFSET - 1 -: PHASE_STORE_MSB_C_DURATION]; + wire [PHASE_MASK_B_R3_DURATION -1:0] fsm_shreg_mask_b_r3 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_MASK_B_R3_OFFSET - 1 -: PHASE_MASK_B_R3_DURATION]; + wire [PHASE_CALCULATE_CARRY_MSB_S1_DURATION-1:0] fsm_shreg_calculate_carry_msb_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_CALCULATE_CARRY_MSB_S1_OFFSET - 1 -: PHASE_CALCULATE_CARRY_MSB_S1_DURATION]; + wire [PHASE_STORE_LSB_S1_DURATION -1:0] fsm_shreg_store_lsb_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_S1_OFFSET - 1 -: PHASE_STORE_LSB_S1_DURATION]; + wire [PHASE_SHIFT_S1_DURATION -1:0] fsm_shreg_shift_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SHIFT_S1_OFFSET - 1 -: PHASE_SHIFT_S1_DURATION]; + wire [PHASE_CHANGE_LSB_B_P_DURATION -1:0] fsm_shreg_change_lsb_b_p = fsm_shreg[FSM_SHREG_WIDTH - PHASE_CHANGE_LSB_B_P_OFFSET - 1 -: PHASE_CHANGE_LSB_B_P_DURATION]; + wire [PHASE_SELECT_S2_OR_PN_DURATION -1:0] fsm_shreg_select_s2_or_pn = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SELECT_S2_OR_PN_OFFSET - 1 -: PHASE_SELECT_S2_OR_PN_DURATION]; + wire [PHASE_UPDATE_P_DOUT_DURATION -1:0] fsm_shreg_update_p_dout = fsm_shreg[FSM_SHREG_WIDTH - PHASE_UPDATE_P_DOUT_OFFSET - 1 -: PHASE_UPDATE_P_DOUT_DURATION]; + + wire flag_increment_index_a = |fsm_shreg_increment_index_a; + wire flag_decrement_index_b = |fsm_shreg_decrement_index_b; + wire flag_store_msb_si = |fsm_shreg_store_msb_si; + wire flag_store_lsb_si = |fsm_shreg_store_lsb_si; + wire flag_shift_si = |fsm_shreg_shift_si; + wire flag_mask_sum_cw1 = |fsm_shreg_mask_sum_cw1; + wire flag_store_lsb_c = |fsm_shreg_store_lsb_c; + wire flag_store_msb_c = |fsm_shreg_store_msb_c; + wire flag_mask_b_r3 = |fsm_shreg_mask_b_r3; + wire flag_calculate_carry_msb_s1 = |fsm_shreg_calculate_carry_msb_s1; + wire flag_store_lsb_s1 = |fsm_shreg_store_lsb_s1; + wire flag_shift_s1 = |fsm_shreg_shift_s1; + wire flag_change_lsb_b_p = |fsm_shreg_change_lsb_b_p; + wire flag_select_s2_or_pn = |fsm_shreg_select_s2_or_pn; + wire flag_update_p_dout = |fsm_shreg_update_p_dout; + + reg flag_store_word_a = 0; + reg flag_enable_mac_ab = 0; + reg flag_delay_msb_c = 0; + reg flag_mask_a_s2 = 0; + reg flag_mask_b_out_p = 0; + reg flag_store_s2 = 0; + reg flag_store_pn = 0; + + always @(posedge clk) begin + flag_store_word_a <= flag_increment_index_a; + flag_enable_mac_ab <= flag_decrement_index_b; + flag_delay_msb_c <= flag_store_msb_c; + flag_mask_a_s2 <= flag_calculate_carry_msb_s1; + flag_mask_b_out_p <= flag_change_lsb_b_p; + flag_store_s2 <= flag_shift_s1; + flag_store_pn <= flag_store_s2; + end // // FSM Logic - // - wire reduce_done; - wire fsm_freeze = reduce_wait && !reduce_done; - + // always @(posedge clk or negedge rst_n) // if (rst_n == 1'b0) @@ -173,8 +263,8 @@ module ed25519_modular_multiplier // else begin // - if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - else if (!fsm_freeze) fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; end @@ -184,7 +274,7 @@ module ed25519_modular_multiplier always @(posedge clk) // if (rdy) index_a <= WORD_INDEX_ZERO; - else if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); + else if (flag_increment_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); // @@ -193,7 +283,7 @@ module ed25519_modular_multiplier always @(posedge clk) // if (rdy) index_b <= WORD_INDEX_LAST; - else if (dec_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); + else if (flag_decrement_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); // @@ -201,10 +291,6 @@ module ed25519_modular_multiplier // reg [255:0] buf_a_wide; - always @(posedge clk) - // - if (store_word_a) buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]}; - else if (enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; // @@ -220,7 +306,7 @@ module ed25519_modular_multiplier always @(posedge clk) // - if (dec_index_b) index_b_ff <= ~index_b_ff; + if (flag_decrement_index_b) index_b_ff <= ~index_b_ff; else index_b_ff <= 1'b0; @@ -231,7 +317,7 @@ module ed25519_modular_multiplier always @(posedge clk) // - if (store_part_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0]; + if (flag_decrement_index_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0]; // @@ -241,7 +327,7 @@ module ed25519_modular_multiplier always @(posedge clk) // - if (!enable_mac_ab) mac_clear <= {16{1'b1}}; + if (!flag_enable_mac_ab) mac_clear <= {16{1'b1}}; else begin if (mac_clear[0]) mac_clear <= 16'b0000000000000010; else if (mac_clear[15]) mac_clear <= 16'b1111111111111111; @@ -263,7 +349,7 @@ module ed25519_modular_multiplier `ED25519_MAC16_PRIMITIVE mac16_inst ( .clk (clk), - .ce (enable_mac_ab), + .ce (flag_enable_mac_ab), .clr (mac_clear[i]), @@ -283,7 +369,6 @@ module ed25519_modular_multiplier reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; - wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; @@ -301,22 +386,25 @@ module ed25519_modular_multiplier always @(posedge clk) // - if (shift_si) begin + if (flag_shift_si) begin si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; end else begin - if (store_si_msb) si_msb <= si_msb_new; - if (store_si_lsb) si_lsb <= si_lsb_new; + if (flag_store_msb_si) si_msb <= si_msb_new; + if (flag_store_lsb_si) si_lsb <= si_lsb_new; end // // Accumulators // - wire [46: 0] add47_cw0_s; - wire [46: 0] add47_cw1_s; - + wire [46:0] add47_cw0_s; + wire [46:0] add47_cw1_s; + wire [14:0] add47_cw1_s_masked = flag_mask_sum_cw1 ? {15{1'b0}} : add47_cw1_s[32+:15]; + + wire [46:0] add47_r3_b_masked = {{32{1'b0}}, flag_mask_b_r3 ? {15{1'b0}} : add47_r3_s[46:32]}; + // // cw0, cw1 // @@ -325,7 +413,7 @@ module ed25519_modular_multiplier always @(posedge clk) // - if (shift_si) si_prev_dly <= si_lsb[93:63]; + if (flag_shift_si) si_prev_dly <= si_lsb[93:63]; else si_prev_dly <= {31{1'b0}}; always @(posedge clk) @@ -336,7 +424,7 @@ module ed25519_modular_multiplier wire [46:0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; wire [46:0] add47_cw1_a = add47_cw0_s; - wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, mask_cw1_sum ? {15{1'b0}} : add47_cw1_s[32+:15]}; + wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, add47_cw1_s_masked}; `ED25519_ADD47_PRIMITIVE add47_cw0_inst ( @@ -353,60 +441,181 @@ module ed25519_modular_multiplier .b (add47_cw1_b), .s (add47_cw1_s) ); - + // // Full-Size Product // - reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + wire [31:0] c_word_lower = add47_cw1_s[31:0]; + + + wire [46:0] add47_r0_s; + wire [46:0] add47_r1_s; + wire [46:0] add47_r2_s; + wire [46:0] add47_r3_s; + + reg [255:0] c_lsb_s1_shreg; + reg [ 31:0] c_msb_latch; + - wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; - wire [ 31:0] reduce_c_word; + + always @(posedge clk) // - if (store_c_word) bram_c_addr <= bram_c_addr + 1'b1; - else bram_c_addr <= {(2*WORD_COUNTER_WIDTH){1'b0}}; - - bram_1rw_1ro_readfirst # + if (flag_store_msb_c) c_msb_latch <= c_word_lower; + else c_msb_latch <= {32{1'b0}}; + + + reg [4:0] c_msb_latch_upper_dly; + reg [31:0] c_lsb_shreg_lower_dly; + + always @(posedge clk) + // + if (flag_delay_msb_c) c_msb_latch_upper_dly <= c_msb_latch[31:27]; + else c_msb_latch_upper_dly <= {5{1'b0}}; + + + always @(posedge clk) + // + if (flag_store_msb_c) c_lsb_shreg_lower_dly <= c_lsb_s1_shreg[31:0]; + else c_lsb_shreg_lower_dly <= {32{1'b0}}; + + + + reg [11:0] carry_msb_s1; + + always @(posedge clk) + // + if (flag_calculate_carry_msb_s1) carry_msb_s1 <= {{6{1'b0}}, 6'd38} * {{6{1'b0}}, add47_r3_s[5:0]}; + + + wire [46:0] add47_s2_a_masked = {{32{1'b0}}, flag_mask_a_s2 ? {3'b000, carry_msb_s1} : add47_s2_s[46:32]}; + + `ED25519_ADD47_PRIMITIVE add47_r0 ( - .MEM_WIDTH(32), - .MEM_ADDR_BITS(WORD_COUNTER_WIDTH + 1) - ) - bram_c_inst + .clk (clk), + .a ({{15{1'b0}}, c_msb_latch[30:0], c_msb_latch_upper_dly[4]}), + .b ({{15{1'b0}}, c_msb_latch[29:0], c_msb_latch_upper_dly[4:3]}), + .s (add47_r0_s) + ); + `ED25519_ADD47_PRIMITIVE add47_r1 ( .clk (clk), + .a ({{15{1'b0}}, c_msb_latch[26:0], c_msb_latch_upper_dly[4:0]}), + .b ({{15{1'b0}}, c_lsb_shreg_lower_dly}), + .s (add47_r1_s) + ); + `ED25519_ADD47_PRIMITIVE add47_r2 + ( + .clk (clk), + .a (add47_r0_s), + .b (add47_r1_s), + .s (add47_r2_s) + ); + `ED25519_ADD47_PRIMITIVE add47_r3 + ( + .clk (clk), + .a (add47_r2_s), + .b (add47_r3_b_masked), + .s (add47_r3_s) + ); + + + + wire [46:0] add47_s2_s; + `ED25519_ADD47_PRIMITIVE add47_s2 + ( + .clk (clk), + .a (add47_s2_a_masked), + .b ({{15{1'b0}}, c_lsb_s1_shreg[31:0]}), + .s (add47_s2_s) + ); + + + reg sub32_b_bit; + + wire [31:0] sub32_b = {{26{1'b1}}, // ...*11*1* + sub32_b_bit, {2{1'b1}}, sub32_b_bit, 1'b1, sub32_b_bit}; + + always @(posedge clk) + // + if (!fsm_shreg_change_lsb_b_p) sub32_b_bit <= 1'b1; + else sub32_b_bit <= 1'b0; + + wire [31:0] sub32_pn_d; + wire sub32_b_in; + wire sub32_b_out; - .a_addr (bram_c_addr), - .a_wr (store_c_word), - .a_in (add47_cw1_s[31:0]), - .a_out (), - - .b_addr (reduce_c_addr), - .b_out (reduce_c_word) + assign sub32_b_in = sub32_b_out & !flag_mask_b_out_p; + + `ED25519_SUB32_PRIMITIVE sub32_pn + ( + .clk (clk), + .a (add47_s2_s[31:0]), + .b (sub32_b), + .d (sub32_pn_d), + .b_in (sub32_b_in), + .b_out (sub32_b_out) ); + + wire [31:0] add47_r3_s_lower = add47_r3_s[31:0]; + - // - // Reduction Stage - // - ed25519_modular_reductor reductor_inst - ( - .clk (clk), - .rst_n (rst_n), + always @(posedge clk) + // + if (flag_store_word_a) buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]}; + else if (flag_enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; + else if (flag_store_s2) buf_a_wide <= {add47_s2_s[31:0], buf_a_wide[255:32]}; + else if (flag_update_p_dout) buf_a_wide <= {{32{1'bX}}, buf_a_wide[255:32]}; - .ena (reduce_start), - .rdy (reduce_done), + + always @(posedge clk) + // + if (flag_store_lsb_c) c_lsb_s1_shreg <= {c_word_lower, c_lsb_s1_shreg[255:32]}; + else if (flag_store_lsb_s1) c_lsb_s1_shreg <= {add47_r3_s_lower, c_lsb_s1_shreg[255:32]}; + else if (flag_store_pn) c_lsb_s1_shreg <= {sub32_pn_d, c_lsb_s1_shreg[255:32]}; + else if (flag_store_msb_c || flag_shift_s1) c_lsb_s1_shreg <= {{32{1'b0}}, c_lsb_s1_shreg[255:32]}; + else if (flag_update_p_dout) c_lsb_s1_shreg <= {{32{1'b0}}, c_lsb_s1_shreg[255:32]}; - .x_addr (reduce_c_addr), - .y_addr (p_addr), - .y_wren (p_wren), - .x_din (reduce_c_word), - .y_dout (p_dout) - ); + reg sel_pn; // 0: output in S2, 1: output in PN + + always @(posedge clk) + // + if (flag_select_s2_or_pn) sel_pn <= sub32_b_out & add47_s2_s[0]; + + + reg [31:0] p_dout_reg; + + assign p_dout = p_dout_reg; + + always @(posedge clk) + // + if (flag_update_p_dout) p_dout_reg <= sel_pn ? c_lsb_s1_shreg[31:0] : buf_a_wide[31:0]; + else p_dout_reg <= {32{1'bX}}; + + reg p_wren_reg = 0; + + assign p_wren = p_wren_reg; + + always @(posedge clk) + // + p_wren_reg <= flag_update_p_dout; + + reg [WORD_COUNTER_WIDTH-1:0] p_addr_reg; + + assign p_addr = p_addr_reg; + + always @(posedge clk) + // + if (p_wren_reg) p_addr_reg <= WORD_INDEX_NEXT_OR_ZERO(p_addr_reg); + else p_addr_reg <= WORD_INDEX_ZERO; + + endmodule |