From 46e2c534e7197b57f40ebf3e60bf4da5a0da7d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Stro=CC=88mbergson?= Date: Wed, 17 Jun 2015 21:02:24 +0200 Subject: (1) Collapsed the sm and sa adder states. Thisimoproves performance for modexp with 3%. (2) Updated montprod testbench to not use the now removed states. (3) Minor code cleanup to make it easier to work with for further improvements. --- src/rtl/montprod.v | 165 +++++++++++++++++++++++---------------------------- src/tb/tb_montprod.v | 4 +- 2 files changed, 75 insertions(+), 94 deletions(-) diff --git a/src/rtl/montprod.v b/src/rtl/montprod.v index 7c7bf33..9748b09 100644 --- a/src/rtl/montprod.v +++ b/src/rtl/montprod.v @@ -70,14 +70,12 @@ module montprod( localparam CTRL_WAIT = 4'h2; localparam CTRL_LOOP_ITER = 4'h3; localparam CTRL_LOOP_BQ = 4'h4; - localparam CTRL_L_CALC_SM = 4'h5; - localparam CTRL_L_STALLPIPE_SM = 4'h6; - localparam CTRL_L_CALC_SA = 4'h7; - localparam CTRL_L_STALLPIPE_SA = 4'h8; - localparam CTRL_L_CALC_SDIV2 = 4'h9; - localparam CTRL_L_STALLPIPE_D2 = 4'hA; - localparam CTRL_L_STALLPIPE_ES = 4'hB; - localparam CTRL_EMIT_S = 4'hC; + localparam CTRL_L_CALC_SA = 4'h5; + localparam CTRL_L_STALLPIPE_SA = 4'h6; + localparam CTRL_L_CALC_SDIV2 = 4'h7; + localparam CTRL_L_STALLPIPE_D2 = 4'h8; + localparam CTRL_L_STALLPIPE_ES = 4'h9; + localparam CTRL_EMIT_S = 4'ha; localparam SMUX_0 = 2'h0; localparam SMUX_ADD_SM = 2'h1; @@ -117,7 +115,7 @@ module montprod( reg q_reg; reg b_new; reg b_reg; - reg update_bq; + reg bq_we; reg [12 : 0] loop_ctr_reg; reg [12 : 0] loop_ctr_new; @@ -125,9 +123,11 @@ module montprod( reg loop_ctr_set; reg loop_ctr_dec; - reg [07 : 0] B_word_index; //loop counter as a word index - reg [04 : 0] B_bit_index; //loop counter as a bit index - reg [04 : 0] B_bit_index_reg; //loop counter as a bit index + reg [07 : 0] b_word_index; //loop counter as a word index + + reg [04 : 0] b_bit_index_reg; + reg [04 : 0] b_bit_index_new; + reg [04 : 0] b_bit_index_we; reg [07 : 0] word_index_reg; //register of what word is being read reg [07 : 0] word_index_new; //calculation of what word to be read @@ -142,8 +142,10 @@ module montprod( reg shr_carry_in_reg; reg shr_carry_in_new; - reg reset_word_index_LSW; - reg reset_word_index_MSW; + reg reset_word_index_lsw; + reg reset_word_index_msw; + + reg [31 : 0] sa_adder_data_in; //---------------------------------------------------------------- @@ -155,10 +157,10 @@ module montprod( wire [31 : 0] add_result_sm; wire add_carry_out_sm; + reg shr_data_in; wire shr_carry_out; - wire [31 : 0] shr_adiv2; + wire [31 : 0] shr_data_out; - reg set_B_bit_index; //---------------------------------------------------------------- @@ -198,7 +200,7 @@ module montprod( adder32 s_adder_sa( - .a(s_mem_read_data), + .a(sa_adder_data_in), .b(opa_data), .carry_in(add_carry_in_sa_reg), .sum(add_result_sa), @@ -208,7 +210,7 @@ module montprod( shr32 shifter( .a(s_mem_read_data), .carry_in(shr_carry_in_reg), - .adiv2(shr_adiv2), + .adiv2(shr_data_out), .carry_out(shr_carry_out) ); @@ -225,7 +227,7 @@ module montprod( if (!reset_n) begin ready_reg <= 1'b0; - loop_ctr_reg <= 13'h0; + loop_ctr_reg <= 13'h0; word_index_reg <= 8'h0; word_index_prev_reg <= 8'h0; add_carry_in_sa_reg <= 1'b0; @@ -236,13 +238,14 @@ module montprod( s_mux_reg <= SMUX_0; s_mem_we_reg <= 1'b0; s_mem_wr_addr_reg <= 8'h0; - B_bit_index_reg <= 5'h0; + b_bit_index_reg <= 5'h0; montprod_ctrl_reg <= CTRL_IDLE; end else begin - s_mem_wr_addr_reg <= s_mem_addr; - s_mem_we_reg <= s_mem_we_new; + s_mem_wr_addr_reg <= s_mem_addr; + s_mem_we_reg <= s_mem_we_new; + s_mux_reg <= s_mux_new; word_index_reg <= word_index_new; word_index_prev_reg <= word_index_reg; @@ -251,14 +254,14 @@ module montprod( add_carry_in_sa_reg <= add_carry_in_sa_new; add_carry_in_sm_reg <= add_carry_in_sm_new; - B_bit_index_reg <= B_bit_index; + if (b_bit_index_we) + b_bit_index_reg <= b_bit_index_new; - if (update_bq) + if (bq_we) begin b_reg <= b_new; q_reg <= q_new; end - s_mux_reg <= s_mux_new; if (ready_we) ready_reg <= ready_new; @@ -279,7 +282,7 @@ module montprod( //---------------------------------------------------------------- always @* begin : prodcalc - opb_addr_reg = B_word_index; + opb_addr_reg = b_word_index; opm_addr_reg = word_index_reg; result_addr_reg = word_index_prev_reg; @@ -309,10 +312,10 @@ module montprod( endcase - if (reset_word_index_LSW == 1'b1) + if (reset_word_index_lsw == 1'b1) word_index_new = length_m1; - else if (reset_word_index_MSW == 1'b1) + else if (reset_word_index_msw == 1'b1) word_index_new = 8'h0; else if (montprod_ctrl_reg == CTRL_L_CALC_SDIV2) @@ -331,6 +334,7 @@ module montprod( shr_carry_in_new = 1'b0; s_mux_new = SMUX_0; s_mem_we_new = 1'b0; + sa_adder_data_in = 32'h0; case (montprod_ctrl_reg) CTRL_INIT_S: @@ -339,18 +343,11 @@ module montprod( s_mem_we_new = 1'b1; end - CTRL_L_CALC_SM: - begin - //s = (s + q*M + b*A) >>> 1;, if(q==1) S+= M. Takes (1..length) cycles. - s_mux_new = SMUX_ADD_SM; - s_mem_we_new = q_reg; - end - CTRL_L_CALC_SA: begin //s = (s + q*M + b*A) >>> 1;, if(b==1) S+= A. Takes (1..length) cycles. s_mux_new = SMUX_ADD_SA; - s_mem_we_new = b_reg; + s_mem_we_new = b_reg | q_reg; end CTRL_L_CALC_SDIV2: @@ -373,21 +370,26 @@ module montprod( SMUX_0: s_mem_new = 32'h0; - SMUX_ADD_SM: - begin - s_mem_new = add_result_sm; - add_carry_in_sm_new = add_carry_out_sm; - end - SMUX_ADD_SA: begin - s_mem_new = add_result_sa; + if (q_reg) + sa_adder_data_in = add_result_sm; + else + sa_adder_data_in = s_mem_read_data; + + if (b_reg) + s_mem_new = add_result_sa; + else + s_mem_new = add_result_sm; + add_carry_in_sa_new = add_carry_out_sa; + add_carry_in_sm_new = add_carry_out_sm; end SMUX_SHR: begin - s_mem_new = shr_adiv2; + shr_data_in = s_mem_read_data; + s_mem_new = shr_data_out; shr_carry_in_new = shr_carry_out; end @@ -400,12 +402,14 @@ module montprod( //---------------------------------------------------------------- // bq - // b: bit of B + // + // Extract b and q bits. + // b: current bit of B used. // q = (s - b * A) & 1 //---------------------------------------------------------------- always @* begin : bq - b_new = opb_data[B_bit_index_reg]; + b_new = opb_data[b_bit_index_reg]; q_new = s_mem_read_data[0] ^ (opa_data[0] & b_new); end // bq @@ -417,11 +421,12 @@ module montprod( //---------------------------------------------------------------- always @* begin : loop_ctr - loop_ctr_new = loop_ctr_reg; - loop_ctr_we = 1'b0; + loop_ctr_new = 13'h0; + loop_ctr_we = 1'b0; length_m1 = length - 1'b1; - B_bit_index = B_bit_index_reg; - B_word_index = loop_ctr_reg[12:5]; + + b_bit_index_new = 5'h1f - loop_ctr_reg[4:0]; + b_word_index = loop_ctr_reg[12:5]; if (loop_ctr_set) begin @@ -434,11 +439,6 @@ module montprod( loop_ctr_new = loop_ctr_reg - 1'b1; loop_ctr_we = 1'b1; end - - if (set_B_bit_index) - begin - B_bit_index = 5'h1f - loop_ctr_reg[4:0]; - end end @@ -449,18 +449,18 @@ module montprod( //---------------------------------------------------------------- always @* begin : montprod_ctrl - ready_new = 1'b0; - ready_we = 1'b0; - montprod_ctrl_new = CTRL_IDLE; - montprod_ctrl_we = 1'b0; + ready_new = 1'b0; + ready_we = 1'b0; + montprod_ctrl_new = CTRL_IDLE; + montprod_ctrl_we = 1'b0; - loop_ctr_set = 1'b0; - loop_ctr_dec = 1'b0; - set_B_bit_index = 1'b0; - update_bq = 1'b0; + loop_ctr_set = 1'b0; + loop_ctr_dec = 1'b0; + b_bit_index_we = 1'b0; + bq_we = 1'b0; - reset_word_index_LSW = 1'b0; - reset_word_index_MSW = 1'b0; + reset_word_index_lsw = 1'b0; + reset_word_index_msw = 1'b0; case (montprod_ctrl_reg) @@ -472,7 +472,7 @@ module montprod( ready_we = 1'b1; montprod_ctrl_new = CTRL_INIT_S; montprod_ctrl_we = 1'b1; - reset_word_index_LSW = 1'b1; + reset_word_index_lsw = 1'b1; end else begin @@ -501,42 +501,25 @@ module montprod( // Also abort loop if done. CTRL_LOOP_ITER: begin - set_B_bit_index = 1'b1; - reset_word_index_LSW = 1'b1; + b_bit_index_we = 1'b1; + reset_word_index_lsw = 1'b1; montprod_ctrl_new = CTRL_LOOP_BQ; montprod_ctrl_we = 1'b1; end CTRL_LOOP_BQ: begin - reset_word_index_LSW = 1'b1; - update_bq = 1'b1; - montprod_ctrl_new = CTRL_L_CALC_SM; - montprod_ctrl_we = 1'b1; - end - - CTRL_L_CALC_SM: - begin - if (word_index_reg == 8'h0) - begin - reset_word_index_LSW = 1'b1; - montprod_ctrl_we = 1'b1; - montprod_ctrl_new = CTRL_L_STALLPIPE_SM; - end - end - - CTRL_L_STALLPIPE_SM: - begin + reset_word_index_lsw = 1'b1; + bq_we = 1'b1; montprod_ctrl_new = CTRL_L_CALC_SA; montprod_ctrl_we = 1'b1; - reset_word_index_LSW = 1'b1; end CTRL_L_CALC_SA: begin if (word_index_reg == 8'h0) begin - reset_word_index_LSW = 1'b1; + reset_word_index_lsw = 1'b1; montprod_ctrl_new = CTRL_L_STALLPIPE_SA; montprod_ctrl_we = 1'b1; end @@ -546,7 +529,7 @@ module montprod( begin montprod_ctrl_new = CTRL_L_CALC_SDIV2; montprod_ctrl_we = 1'b1; - reset_word_index_MSW = 1'b1; + reset_word_index_msw = 1'b1; end CTRL_L_CALC_SDIV2: @@ -560,10 +543,10 @@ module montprod( CTRL_L_STALLPIPE_D2: begin - loop_ctr_dec = 1'b1; + loop_ctr_dec = 1'b1; montprod_ctrl_new = CTRL_LOOP_ITER; montprod_ctrl_we = 1'b1; - reset_word_index_LSW = 1'b1; + reset_word_index_lsw = 1'b1; if (loop_ctr_reg == 0) begin diff --git a/src/tb/tb_montprod.v b/src/tb/tb_montprod.v index bf9333e..850f8d9 100644 --- a/src/tb/tb_montprod.v +++ b/src/tb/tb_montprod.v @@ -226,7 +226,7 @@ module tb_montprod(); if (SHOW_BQ_DEBUG) begin if (dut.montprod_ctrl_reg == dut.CTRL_L_CALC_SM) - $display("====================> B: %x Q: %x B_bit_index_reg: %x <=====================", dut.b_reg, dut.q_reg, dut.B_bit_index_reg); + $display("====================> B: %x Q: %x b_bit_index_reg: %x <=====================", dut.b_reg, dut.q_reg, dut.b_bit_index_reg); end end @@ -247,8 +247,6 @@ module tb_montprod(); $display("FSM: LOOP_ITER"); dut.CTRL_LOOP_BQ: $display("FSM: LOOP_BQ"); - dut.CTRL_L_CALC_SM: - $display("FSM: LOOP_CALC_SM"); dut.CTRL_L_CALC_SA: $display("FSM: LOOP_CALC_SA"); dut.CTRL_L_STALLPIPE_SA: -- cgit v1.2.3