From 157d5dedd90fede9ea392e2aeda6562d839a30e1 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Wed, 20 Nov 2019 14:34:36 +0300 Subject: Small change to the reductor module to try to get past 180 MHz. Previously BRAM outputs were going directry into a LUT-based ternary adder which was causing timing problems. Added a layer of flip-flops, so instead of BRAM -> LUT -> FF we have BRAM -> FF -> LUT -> FF. This increases core latency by (number_of_supporting_modular_multiplications + number_of_exponent_bits) ticks. --- rtl/modexpng_reductor.v | 168 ++++++++++++++++++++++++------------------------ 1 file changed, 83 insertions(+), 85 deletions(-) diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index dd9cfd9..7404eba 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -174,116 +174,114 @@ module modexpng_reductor // - // Pipeline (Delay Match) + // Pipeline rd_wide_* // - reg rcmb_xy_valid_dly1 = 1'b0; - reg rcmb_xy_valid_dly2 = 1'b0; - reg rcmb_xy_valid_dly3 = 1'b0; + reg [WORD_EXT_W -1:0] rd_wide_x_din_aux_pipe; + reg [WORD_EXT_W -1:0] rd_wide_y_din_aux_pipe; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; + always @(posedge clk) + // + {rd_wide_x_din_aux_pipe, rd_wide_y_din_aux_pipe} <= + {rd_wide_x_din_aux, rd_wide_y_din_aux } ; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3; + // + // Delay rcmb_final_* to match rd_wide_* + // + reg rcmb_xy_valid_dly1_x = 1'b0; + reg rcmb_xy_valid_dly2_x = 1'b0; + reg rcmb_xy_valid_dly3_x = 1'b0; + reg rcmb_xy_valid_dly4_x = 1'b0; + + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1_x; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2_x; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3_x; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly4_x; + + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1_x; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2_x; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3_x; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly4_x; + + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1_x; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2_x; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3_x; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly4_x; + + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1_x; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2_x; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3_x; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly4_x; always @(posedge clk or negedge rst_n) // - if (!rst_n) begin - rcmb_xy_valid_dly1 <= 1'b0; - rcmb_xy_valid_dly2 <= 1'b0; - rcmb_xy_valid_dly3 <= 1'b0; - end else begin - rcmb_xy_valid_dly1 <= rcmb_final_xy_valid; - rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1; - rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2; - end + if (!rst_n) {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= 4'b0000; + else {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= + {rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x, rcmb_final_xy_valid } ; always @(posedge clk) begin // - if (rcmb_final_xy_valid) begin - rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; - rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; - rcmb_x_dout_dly1 <= rcmb_final_x_din; - rcmb_y_dout_dly1 <= rcmb_final_y_din; - end - // - if (rcmb_xy_valid_dly1) begin - rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1; - rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1; - rcmb_x_dout_dly2 <= rcmb_x_dout_dly1; - rcmb_y_dout_dly2 <= rcmb_y_dout_dly1; - end - // - if (rcmb_xy_valid_dly2) begin - rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2; - rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2; - rcmb_x_dout_dly3 <= rcmb_x_dout_dly2; - rcmb_y_dout_dly3 <= rcmb_y_dout_dly2; - end + if (rcmb_final_xy_valid) {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} <= + {rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din } ; + if (rcmb_xy_valid_dly1_x) {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} <= + {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} ; + if (rcmb_xy_valid_dly2_x) {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} <= + {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} ; + if (rcmb_xy_valid_dly3_x) {rcmb_xy_bank_dly4_x, rcmb_xy_addr_dly4_x, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x} <= + {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} ; // end // + // LSB Carry Logic // - // - reg [ CARRY_W -1:0] rcmb_x_lsb_carry; - reg [ WORD_W -1:0] rcmb_x_lsb_dummy; - reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout; - - reg [ CARRY_W -1:0] rcmb_y_lsb_carry; - reg [ WORD_W -1:0] rcmb_y_lsb_dummy; - reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout; + reg [ CARRY_W -1:0] rcmb_x_lsb_carry; + reg [ CARRY_W -1:0] rcmb_y_lsb_carry; + reg [ WORD_W -1:0] rcmb_x_lsb_dummy; + reg [ WORD_W -1:0] rcmb_y_lsb_dummy; + wire [WORD_EXT_W -1:0] rcmb_x_lsb_carry_ext = {WORD_ZERO, rcmb_x_lsb_carry}; + wire [WORD_EXT_W -1:0] rcmb_y_lsb_carry_ext = {WORD_ZERO, rcmb_y_lsb_carry}; + + task calc_rcmb_xy_lsb_carry; + begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe + rcmb_x_lsb_carry_ext; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe + rcmb_y_lsb_carry_ext; + end + endtask // - // Carry Computation + // LSB Carry Computation // always @(posedge clk) // if (ena) begin + // rcmb_x_lsb_carry <= CARRY_ZERO; rcmb_y_lsb_carry <= CARRY_ZERO; - end else if (rcmb_xy_valid_dly3) // - case (rcmb_xy_bank_dly3) - - BANK_RCMB_ML: begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry; - end - - BANK_RCMB_MH: - if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry; - end - + end else if (rcmb_xy_valid_dly4_x) + // + case (rcmb_xy_bank_dly4_x) + BANK_RCMB_ML: calc_rcmb_xy_lsb_carry; + BANK_RCMB_MH: if (rcmb_xy_addr_dly4_x == OP_ADDR_ZERO) calc_rcmb_xy_lsb_carry; endcase // + // MSB Sum Logic // - // - wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux; - wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux; + wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe; + wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe; - wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_ZERO, rcmb_x_lsb_carry}; - wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_ZERO, rcmb_y_lsb_carry}; + wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + rcmb_x_lsb_carry_ext; + wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + rcmb_y_lsb_carry_ext; // - // + // MSB Sum Computation // always @(posedge clk or negedge rst_n) // @@ -295,22 +293,22 @@ module modexpng_reductor clear_rdct_wide; clear_rdct_narrow; // - if (rcmb_xy_valid_dly3) + if (rcmb_xy_valid_dly4_x) // - case (rcmb_xy_bank_dly3) + case (rcmb_xy_bank_dly4_x) BANK_RCMB_MH: - if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) begin + if (rcmb_xy_addr_dly4_x == OP_ADDR_ONE) begin set_rdct_wide (sel_wide_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); set_rdct_narrow(sel_narrow_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); - end else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) begin - set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); - set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); + end else if (rcmb_xy_addr_dly4_x > OP_ADDR_ONE) begin + set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y); + set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y); end BANK_RCMB_EXT: begin - set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); - set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); + set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x); + set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x); end endcase @@ -335,10 +333,10 @@ module modexpng_reductor always @(posedge clk or negedge rst_n) // - if (!rst_n) busy_next <= 1'b0; + if (!rst_n) busy_next <= 1'b0; else begin - if (rdy && ena) busy_next <= 1'b1; - if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0; + if (rdy && ena) busy_next <= 1'b1; + if (!rdy && rcmb_xy_valid_dly4_x && (rcmb_xy_bank_dly4_x == BANK_RCMB_EXT)) busy_next <= 1'b0; end -- cgit v1.2.3