From 2791a17430c5b0c3291be3824aa8cdf07f305e92 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 11 Feb 2020 15:54:22 +0300 Subject: More elegant way to do partial product recombination: * take advantage of the cascade paths between DSP slices * decrease latency of operation --- rtl/modexpng_recombinator_block.v | 83 ++++++++++----------------------------- 1 file changed, 21 insertions(+), 62 deletions(-) (limited to 'rtl/modexpng_recombinator_block.v') diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index e3cb50f..62d84e1 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -496,10 +496,8 @@ module modexpng_recombinator_block reg rcmb_xy_lsb_ce = 1'b0; reg rcmb_xy_lsb_ce_aux = 1'b0; - reg rcmb_xy_lsb_ce_aux_dly = 1'b0; reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; - wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; - wire rcmb_xy_lsb_ce_combined_ext = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0] | rcmb_xy_lsb_ce_aux_dly; + wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; reg rcmb_xy_lsb_clr; wire rcmb_xy_lsb_cry = !xy_valid_latch_lsb && rcmb_xy_lsb_ce_purge[1]; @@ -512,9 +510,7 @@ module modexpng_recombinator_block reg rcmb_xy_msb_ce = 1'b0; reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; - reg rcmb_xy_msb_ce_purge0_rectangle_dly = 1'b0; - wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; - wire rcmb_xy_msb_ce_combined_ext = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0] | rcmb_xy_msb_ce_purge0_rectangle_dly; + wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; reg rcmb_xy_msb_clr; reg [ MAC_W -1:0] rcmb_x_msb_din; @@ -522,46 +518,44 @@ module modexpng_recombinator_block wire [WORD_W -1:0] rcmb_x_msb_dout; wire [WORD_W -1:0] rcmb_y_msb_dout; - always @(posedge clk) rcmb_xy_lsb_ce_aux_dly <= rcmb_xy_lsb_ce_aux; - always @(posedge clk) rcmb_xy_msb_ce_purge0_rectangle_dly <= rcmb_mode == RCMB_MODE_RECTANGLE ? rcmb_xy_msb_ce_purge[0] : 1'b0; - - modexpng_recombinator_cell recomb_x_lsb_new + modexpng_recombinator_cell recomb_x_lsb ( .clk (clk), - .ce (rcmb_xy_lsb_ce_combined_ext), + .ce (rcmb_xy_lsb_ce_combined), .clr (rcmb_xy_lsb_clr), .din (rcmb_x_lsb_din), .dout (rcmb_x_lsb_dout), .doutw (rcmb_x_lsb_doutw) ); - modexpng_recombinator_cell recomb_y_lsb_new + modexpng_recombinator_cell recomb_y_lsb ( .clk (clk), - .ce (rcmb_xy_lsb_ce_combined_ext), + .ce (rcmb_xy_lsb_ce_combined), .clr (rcmb_xy_lsb_clr), .din (rcmb_y_lsb_din), .dout (rcmb_y_lsb_dout), .doutw (rcmb_y_lsb_doutw) ); - modexpng_recombinator_cell recomb_x_msb_new + modexpng_recombinator_cell recomb_x_msb ( .clk (clk), - .ce (rcmb_xy_msb_ce_combined_ext), + .ce (rcmb_xy_msb_ce_combined), .clr (rcmb_xy_msb_clr), .din (rcmb_x_msb_din), .dout (rcmb_x_msb_dout), .doutw () ); - modexpng_recombinator_cell recomb_y_msb_new + modexpng_recombinator_cell recomb_y_msb ( .clk (clk), - .ce (rcmb_xy_msb_ce_combined_ext), + .ce (rcmb_xy_msb_ce_combined), .clr (rcmb_xy_msb_clr), .din (rcmb_y_msb_din), .dout (rcmb_y_msb_dout), .doutw () ); + always @(posedge clk) begin // @@ -596,8 +590,8 @@ module modexpng_recombinator_block rcmb_x_lsb_din <= dsp_x_p_latch[NUM_MULTS_AUX-1]; rcmb_y_lsb_din <= dsp_y_p_latch[NUM_MULTS_AUX-1]; end else if (rcmb_xy_lsb_cry) begin - rcmb_x_lsb_din <= rcmb_x_msb_carry_1; - rcmb_y_lsb_din <= rcmb_y_msb_carry_1; + rcmb_x_lsb_din <= {{(MAC_W-WORD_W){1'b0}}, rcmb_x_msb_carry_1}; + rcmb_y_lsb_din <= {{(MAC_W-WORD_W){1'b0}}, rcmb_y_msb_carry_1}; end else begin rcmb_x_lsb_din <= {MAC_W{1'b0}}; rcmb_y_lsb_din <= {MAC_W{1'b0}}; @@ -759,52 +753,17 @@ module modexpng_recombinator_block end - reg rcmb_xy_lsb_ce_combined_dly1 = 1'b0; - reg rcmb_xy_msb_ce_combined_dly1 = 1'b0; - - reg rcmb_xy_lsb_ce_combined_dly2 = 1'b0; - reg rcmb_xy_msb_ce_combined_dly2 = 1'b0; - - reg rcmb_xy_lsb_ce_combined_dly3 = 1'b0; - reg rcmb_xy_msb_ce_combined_dly3 = 1'b0; - - reg rcmb_xy_lsb_ce_combined_dly4 = 1'b0; - reg rcmb_xy_msb_ce_combined_dly4 = 1'b0; - - reg rcmb_xy_lsb_ce_combined_dly5 = 1'b0; - reg rcmb_xy_msb_ce_combined_dly5 = 1'b0; - - reg rcmb_xy_lsb_ce_combined_dly6 = 1'b0; - reg rcmb_xy_msb_ce_combined_dly6 = 1'b0; + reg [4:1] rcmb_xy_lsb_ce_combined_dly; + reg [4:1] rcmb_xy_msb_ce_combined_dly; always @(posedge clk or negedge rst_n) // if (!rst_n) begin - rcmb_xy_lsb_ce_combined_dly1 <= 1'b0; - rcmb_xy_msb_ce_combined_dly1 <= 1'b0; - rcmb_xy_lsb_ce_combined_dly2 <= 1'b0; - rcmb_xy_msb_ce_combined_dly2 <= 1'b0; - rcmb_xy_lsb_ce_combined_dly3 <= 1'b0; - rcmb_xy_msb_ce_combined_dly3 <= 1'b0; - rcmb_xy_lsb_ce_combined_dly4 <= 1'b0; - rcmb_xy_msb_ce_combined_dly4 <= 1'b0; - rcmb_xy_lsb_ce_combined_dly5 <= 1'b0; - rcmb_xy_msb_ce_combined_dly5 <= 1'b0; - rcmb_xy_lsb_ce_combined_dly6 <= 1'b0; - rcmb_xy_msb_ce_combined_dly6 <= 1'b0; + rcmb_xy_lsb_ce_combined_dly <= 4'b0000; + rcmb_xy_msb_ce_combined_dly <= 4'b0000; end else begin - rcmb_xy_lsb_ce_combined_dly1 <= rcmb_xy_lsb_ce_combined; - rcmb_xy_msb_ce_combined_dly1 <= rcmb_xy_msb_ce_combined; - rcmb_xy_lsb_ce_combined_dly2 <= rcmb_xy_lsb_ce_combined_dly1; - rcmb_xy_msb_ce_combined_dly2 <= rcmb_xy_msb_ce_combined_dly1; - rcmb_xy_lsb_ce_combined_dly3 <= rcmb_xy_lsb_ce_combined_dly2; - rcmb_xy_msb_ce_combined_dly3 <= rcmb_xy_msb_ce_combined_dly2; - rcmb_xy_lsb_ce_combined_dly4 <= rcmb_xy_lsb_ce_combined_dly3; - rcmb_xy_msb_ce_combined_dly4 <= rcmb_xy_msb_ce_combined_dly3; - rcmb_xy_lsb_ce_combined_dly5 <= rcmb_xy_lsb_ce_combined_dly4; - rcmb_xy_msb_ce_combined_dly5 <= rcmb_xy_msb_ce_combined_dly4; - rcmb_xy_lsb_ce_combined_dly6 <= rcmb_xy_lsb_ce_combined_dly5; - rcmb_xy_msb_ce_combined_dly6 <= rcmb_xy_msb_ce_combined_dly5; + rcmb_xy_lsb_ce_combined_dly <= {rcmb_xy_lsb_ce_combined_dly[3:1], rcmb_xy_lsb_ce_combined}; + rcmb_xy_msb_ce_combined_dly <= {rcmb_xy_msb_ce_combined_dly[3:1], rcmb_xy_msb_ce_combined}; end reg rcmb_xy_lsb_valid = 1'b0; @@ -816,8 +775,8 @@ module modexpng_recombinator_block rcmb_xy_lsb_valid <= 1'b0; rcmb_xy_msb_valid <= 1'b0; end else begin - rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly6; - rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly6; + rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly[4]; + rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly[4]; end -- cgit v1.2.3