From 77d11487d8987e13403f426537dc9bc59141f3f3 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 3 Feb 2020 22:49:25 +0300 Subject: New partial product recombination algorithm. --- rtl/modexpng_recombinator_block.v | 219 ++++++++++++++++++++++---------------- rtl/modexpng_recombinator_cell.v | 188 +++++++++++++++++++++++++------- 2 files changed, 278 insertions(+), 129 deletions(-) diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index cc89db0..e3cb50f 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -153,7 +153,7 @@ module modexpng_recombinator_block // index - latch reg [MAC_INDEX_W-1:0] xy_index_latch_lsb; - // purge - index + // purge - latch reg xy_purge_latch_lsb = 1'b0; reg xy_purge_latch_msb = 1'b0; @@ -496,21 +496,25 @@ module modexpng_recombinator_block reg rcmb_xy_lsb_ce = 1'b0; reg rcmb_xy_lsb_ce_aux = 1'b0; + reg rcmb_xy_lsb_ce_aux_dly = 1'b0; reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; - wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; + wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; + wire rcmb_xy_lsb_ce_combined_ext = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0] | rcmb_xy_lsb_ce_aux_dly; reg rcmb_xy_lsb_clr; wire rcmb_xy_lsb_cry = !xy_valid_latch_lsb && rcmb_xy_lsb_ce_purge[1]; - reg [ MAC_W -1:0] rcmb_x_lsb_din; - reg [ MAC_W -1:0] rcmb_y_lsb_din; - wire [WORD_W -1:0] rcmb_x_lsb_dout; - wire [WORD_W -1:0] rcmb_y_lsb_dout; - wire [WORD_EXT_W -2:0] rcmb_x_lsb_dout_ext; - wire [WORD_EXT_W -2:0] rcmb_y_lsb_dout_ext; + reg [ MAC_W -1:0] rcmb_x_lsb_din; + reg [ MAC_W -1:0] rcmb_y_lsb_din; + wire [WORD_W -1:0] rcmb_x_lsb_dout; + wire [WORD_W -1:0] rcmb_y_lsb_dout; + wire [WORD_W :0] rcmb_x_lsb_doutw; + wire [WORD_W :0] rcmb_y_lsb_doutw; reg rcmb_xy_msb_ce = 1'b0; reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; - wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; + reg rcmb_xy_msb_ce_purge0_rectangle_dly = 1'b0; + wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; + wire rcmb_xy_msb_ce_combined_ext = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0] | rcmb_xy_msb_ce_purge0_rectangle_dly; reg rcmb_xy_msb_clr; reg [ MAC_W -1:0] rcmb_x_msb_din; @@ -518,42 +522,45 @@ module modexpng_recombinator_block wire [WORD_W -1:0] rcmb_x_msb_dout; wire [WORD_W -1:0] rcmb_y_msb_dout; - modexpng_recombinator_cell recomb_x_lsb + always @(posedge clk) rcmb_xy_lsb_ce_aux_dly <= rcmb_xy_lsb_ce_aux; + always @(posedge clk) rcmb_xy_msb_ce_purge0_rectangle_dly <= rcmb_mode == RCMB_MODE_RECTANGLE ? rcmb_xy_msb_ce_purge[0] : 1'b0; + + modexpng_recombinator_cell recomb_x_lsb_new ( - .clk (clk), - .ce (rcmb_xy_lsb_ce_combined), - .clr (rcmb_xy_lsb_clr), - .din (rcmb_x_lsb_din), - .dout (rcmb_x_lsb_dout), - .dout_ext (rcmb_x_lsb_dout_ext) + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined_ext), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_x_lsb_din), + .dout (rcmb_x_lsb_dout), + .doutw (rcmb_x_lsb_doutw) ); - modexpng_recombinator_cell recomb_y_lsb + modexpng_recombinator_cell recomb_y_lsb_new ( - .clk (clk), - .ce (rcmb_xy_lsb_ce_combined), - .clr (rcmb_xy_lsb_clr), - .din (rcmb_y_lsb_din), - .dout (rcmb_y_lsb_dout), - .dout_ext (rcmb_y_lsb_dout_ext) + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined_ext), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_y_lsb_din), + .dout (rcmb_y_lsb_dout), + .doutw (rcmb_y_lsb_doutw) ); - modexpng_recombinator_cell recomb_x_msb + modexpng_recombinator_cell recomb_x_msb_new ( - .clk (clk), - .ce (rcmb_xy_msb_ce_combined), - .clr (rcmb_xy_msb_clr), - .din (rcmb_x_msb_din), - .dout (rcmb_x_msb_dout), - .dout_ext () + .clk (clk), + .ce (rcmb_xy_msb_ce_combined_ext), + .clr (rcmb_xy_msb_clr), + .din (rcmb_x_msb_din), + .dout (rcmb_x_msb_dout), + .doutw () ); - modexpng_recombinator_cell recomb_y_msb + modexpng_recombinator_cell recomb_y_msb_new ( - .clk (clk), - .ce (rcmb_xy_msb_ce_combined), - .clr (rcmb_xy_msb_clr), - .din (rcmb_y_msb_din), - .dout (rcmb_y_msb_dout), - .dout_ext () + .clk (clk), + .ce (rcmb_xy_msb_ce_combined_ext), + .clr (rcmb_xy_msb_clr), + .din (rcmb_y_msb_din), + .dout (rcmb_y_msb_dout), + .doutw () ); always @(posedge clk) begin @@ -704,7 +711,6 @@ module modexpng_recombinator_block xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]}; end // - // for (i=1; i<6; i=i+1) begin xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; @@ -753,17 +759,52 @@ module modexpng_recombinator_block end - reg rcmb_xy_lsb_ce_combined_dly = 1'b0; - reg rcmb_xy_msb_ce_combined_dly = 1'b0; + reg rcmb_xy_lsb_ce_combined_dly1 = 1'b0; + reg rcmb_xy_msb_ce_combined_dly1 = 1'b0; + + reg rcmb_xy_lsb_ce_combined_dly2 = 1'b0; + reg rcmb_xy_msb_ce_combined_dly2 = 1'b0; + + reg rcmb_xy_lsb_ce_combined_dly3 = 1'b0; + reg rcmb_xy_msb_ce_combined_dly3 = 1'b0; + + reg rcmb_xy_lsb_ce_combined_dly4 = 1'b0; + reg rcmb_xy_msb_ce_combined_dly4 = 1'b0; + + reg rcmb_xy_lsb_ce_combined_dly5 = 1'b0; + reg rcmb_xy_msb_ce_combined_dly5 = 1'b0; + + reg rcmb_xy_lsb_ce_combined_dly6 = 1'b0; + reg rcmb_xy_msb_ce_combined_dly6 = 1'b0; always @(posedge clk or negedge rst_n) // if (!rst_n) begin - rcmb_xy_lsb_ce_combined_dly <= 1'b0; - rcmb_xy_msb_ce_combined_dly <= 1'b0; + rcmb_xy_lsb_ce_combined_dly1 <= 1'b0; + rcmb_xy_msb_ce_combined_dly1 <= 1'b0; + rcmb_xy_lsb_ce_combined_dly2 <= 1'b0; + rcmb_xy_msb_ce_combined_dly2 <= 1'b0; + rcmb_xy_lsb_ce_combined_dly3 <= 1'b0; + rcmb_xy_msb_ce_combined_dly3 <= 1'b0; + rcmb_xy_lsb_ce_combined_dly4 <= 1'b0; + rcmb_xy_msb_ce_combined_dly4 <= 1'b0; + rcmb_xy_lsb_ce_combined_dly5 <= 1'b0; + rcmb_xy_msb_ce_combined_dly5 <= 1'b0; + rcmb_xy_lsb_ce_combined_dly6 <= 1'b0; + rcmb_xy_msb_ce_combined_dly6 <= 1'b0; end else begin - rcmb_xy_lsb_ce_combined_dly <= rcmb_xy_lsb_ce_combined; - rcmb_xy_msb_ce_combined_dly <= rcmb_xy_msb_ce_combined; + rcmb_xy_lsb_ce_combined_dly1 <= rcmb_xy_lsb_ce_combined; + rcmb_xy_msb_ce_combined_dly1 <= rcmb_xy_msb_ce_combined; + rcmb_xy_lsb_ce_combined_dly2 <= rcmb_xy_lsb_ce_combined_dly1; + rcmb_xy_msb_ce_combined_dly2 <= rcmb_xy_msb_ce_combined_dly1; + rcmb_xy_lsb_ce_combined_dly3 <= rcmb_xy_lsb_ce_combined_dly2; + rcmb_xy_msb_ce_combined_dly3 <= rcmb_xy_msb_ce_combined_dly2; + rcmb_xy_lsb_ce_combined_dly4 <= rcmb_xy_lsb_ce_combined_dly3; + rcmb_xy_msb_ce_combined_dly4 <= rcmb_xy_msb_ce_combined_dly3; + rcmb_xy_lsb_ce_combined_dly5 <= rcmb_xy_lsb_ce_combined_dly4; + rcmb_xy_msb_ce_combined_dly5 <= rcmb_xy_msb_ce_combined_dly4; + rcmb_xy_lsb_ce_combined_dly6 <= rcmb_xy_lsb_ce_combined_dly5; + rcmb_xy_msb_ce_combined_dly6 <= rcmb_xy_msb_ce_combined_dly5; end reg rcmb_xy_lsb_valid = 1'b0; @@ -775,8 +816,8 @@ module modexpng_recombinator_block rcmb_xy_lsb_valid <= 1'b0; rcmb_xy_msb_valid <= 1'b0; end else begin - rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly; - rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly; + rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly6; + rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly6; end @@ -811,17 +852,17 @@ module modexpng_recombinator_block reg cnt_lsb_wrapped; reg cnt_msb_wrapped; - reg [31:0] rcmb_xy_msb_delay_0; - reg [31:0] rcmb_xy_msb_delay_1; - reg [31:0] rcmb_xy_msb_delay_2; + reg [WORD_W-1:0] rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y; + reg [WORD_W-1:0] rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y; + reg [WORD_W-1:0] rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y; - reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_0 = OP_ADDR_ZERO; - reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_1 = OP_ADDR_ZERO; - reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_2 = OP_ADDR_ZERO; + reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_0 = OP_ADDR_ZERO; + reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_1 = OP_ADDR_ZERO; + reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_2 = OP_ADDR_ZERO; - reg rcmb_msb_flag_delay_0 = 1'b0; - reg rcmb_msb_flag_delay_1 = 1'b0; - reg rcmb_msb_flag_delay_2 = 1'b0; + reg rcmb_msb_flag_dly_0 = 1'b0; + reg rcmb_msb_flag_dly_1 = 1'b0; + reg rcmb_msb_flag_dly_2 = 1'b0; // @@ -870,24 +911,24 @@ module modexpng_recombinator_block input flag; begin // - rcmb_xy_msb_delay_0 <= {dout_y, dout_x}; - rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0; - rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1; + {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y} <= {dout_x, dout_y}; + {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y} <= {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y}; + {rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y} <= {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y}; // - rcmb_msb_cnt_delay_0 <= cnt; - rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0; - rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1; + rcmb_msb_cnt_dly_0 <= cnt; + rcmb_msb_cnt_dly_1 <= rcmb_msb_cnt_dly_0; + rcmb_msb_cnt_dly_2 <= rcmb_msb_cnt_dly_1; // - rcmb_msb_flag_delay_0 <= flag; - rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0; - rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1; + rcmb_msb_flag_dly_0 <= flag; + rcmb_msb_flag_dly_1 <= rcmb_msb_flag_dly_0; + rcmb_msb_flag_dly_2 <= rcmb_msb_flag_dly_1; // end endtask task _update_rcmb_msb_carry; - input [WORD_W -1:0] dout_x; - input [WORD_W -1:0] dout_y; + input [WORD_W-1:0] dout_x; + input [WORD_W-1:0] dout_y; begin rcmb_x_msb_carry_0 <= dout_x; rcmb_y_msb_carry_0 <= dout_y; @@ -1031,8 +1072,8 @@ module modexpng_recombinator_block always @(posedge clk) // if (ena) begin - clr_cnt_lsb(); - clr_cnt_msb(); + clr_cnt_lsb; + clr_cnt_msb; end else if (!rdy) // case (rcmb_mode) @@ -1040,22 +1081,22 @@ module modexpng_recombinator_block RCMB_MODE_TRIANGLE: recombine_triangle(); RCMB_MODE_RECTANGLE: recombine_rectangle(); endcase - - + + // // Padding // wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout}; wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout}; - wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_ext_pad = {1'b0, rcmb_x_lsb_dout_ext}; - wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_ext_pad = {1'b0, rcmb_y_lsb_dout_ext}; + wire [WORD_EXT_W-1:0] rcmb_x_lsb_doutw_pad = {1'b0, rcmb_x_lsb_doutw}; + wire [WORD_EXT_W-1:0] rcmb_y_lsb_doutw_pad = {1'b0, rcmb_y_lsb_doutw}; wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout}; wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout}; - wire [WORD_EXT_W-1:0] rcmb_x_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[15: 0]}; - wire [WORD_EXT_W-1:0] rcmb_y_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[31:16]}; + wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_x_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_x}; + wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_y_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_y}; // @@ -1079,25 +1120,25 @@ module modexpng_recombinator_block // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + 2'b00: if (rcmb_msb_flag_dly_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad); else clear_wide; // - 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // 2'b10: if (cnt_msb < OP_ADDR_TWO) clear_wide; - else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // - 2'b11: if (!cnt_lsb_wrapped) set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b11: if (!cnt_lsb_wrapped) set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); else begin - if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); - else set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad); + if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad); end // endcase // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); + 2'b00: if (rcmb_msb_flag_dly_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); // 2'b01: if (rcmb_xy_lsb_cry) pop_rcmb_msb_carry; // @@ -1120,7 +1161,7 @@ module modexpng_recombinator_block begin // case (rcmb_xy_valid) - 2'b01: inc_cnt_lsb(); + 2'b01: inc_cnt_lsb; endcase // case (rcmb_xy_valid) @@ -1156,7 +1197,7 @@ module modexpng_recombinator_block // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + 2'b00: if (rcmb_msb_flag_dly_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad); else clear_rdct; // 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); @@ -1166,14 +1207,14 @@ module modexpng_recombinator_block else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); end else set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // - 2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); - else set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad); + 2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad); // endcase // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); + 2'b00: if (rcmb_msb_flag_dly_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); // 2'b01: if (rcmb_xy_lsb_cry) pop_rcmb_msb_carry; // @@ -1200,9 +1241,9 @@ module modexpng_recombinator_block else if (!rdy_reg) // case (rcmb_mode) - RCMB_MODE_SQUARE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase - RCMB_MODE_TRIANGLE: case (rcmb_xy_valid) 2'b01: rdy_adv <= cnt_lsb_wrapped; endcase - RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase + RCMB_MODE_SQUARE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase + RCMB_MODE_TRIANGLE: case (rcmb_xy_valid) 2'b01: rdy_adv <= cnt_lsb_wrapped; endcase + RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase endcase diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v index 45fc68c..0c9ab00 100644 --- a/rtl/modexpng_recombinator_cell.v +++ b/rtl/modexpng_recombinator_cell.v @@ -34,7 +34,7 @@ module modexpng_recombinator_cell ( clk, ce, clr, - din, dout, dout_ext + din, dout, doutw ); @@ -54,63 +54,172 @@ module modexpng_recombinator_cell input clr; input [ MAC_W -1:0] din; output [WORD_W -1:0] dout; - output [WORD_W :0] dout_ext; + output [WORD_W :0] doutw; // // din <=> {z[13:0], y[15:0], x[15:0]} // - wire [WORD_W -3:0] din_z = din[3 * WORD_W -3 : 2 * WORD_W]; // [45:32] - wire [WORD_W -1:0] din_y = din[2 * WORD_W -1 : WORD_W]; // [31:16] - wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0] - + wire [WORD_W -1:0] din_z = {2'b00, din[3 * WORD_W -3 : 2 * WORD_W]}; // [47:46][45:32] + wire [WORD_W -1:0] din_y = { din[2 * WORD_W -1 : WORD_W]}; // [31:16] + wire [WORD_W -1:0] din_x = { din[ WORD_W -1 : 0]}; // [15: 0] + // - // Delayed Clock Enable + // Delayed Clock Enables + // + reg ce_dly1 = 1'b0, ce_dly2 = 1'b0, ce_dly3 = 1'b0, ce_dly4 = 1'b0, ce_dly5 = 1'b0, ce_dly6 = 1'b0; + always @(posedge clk) {ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5, ce_dly6} <= {ce, ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5}; + + + // + // Delayed Clear // - reg ce_dly = 1'b0; - always @(posedge clk) ce_dly <= ce; + reg clr_dly1, clr_dly2, clr_dly3, clr_dly4; + always @(posedge clk) {clr_dly1, clr_dly2, clr_dly3, clr_dly4} <= {clr, clr_dly1, clr_dly2, clr_dly3}; + + // + // Phase Flip-Flop + // + reg phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5; + always @(posedge clk) + if (ce) phase_ff <= ~phase_ff; + else if (clr) phase_ff <= 1'b0; + + always @(posedge clk) + {phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5} <= {phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4}; + // - // DSP Slice Buses + // Shift Registers // - wire [DSP48E1_A_W-1:0] a_int; - wire [DSP48E1_B_W-1:0] b_int; - wire [DSP48E1_C_W-1:0] c_int; - wire [DSP48E1_P_W-1:0] p_int; + reg [WORD_W-1:0] din_x_dly1; + reg [WORD_W-1:0] din_y_dly1; + reg [WORD_W-1:0] din_z_dly1; + reg [WORD_W-1:0] din_z_dly2; - assign {a_int, b_int} = {{(DSP48E1_C_W-(2*WORD_W+1)){1'b0}}, din_z, din_y[WORD_W-1], din_y}; - assign {c_int} = {{(DSP48E1_C_W-(2*WORD_W+1)){1'b0}}, WORD_ZERO, din_x[WORD_W-1], din_x}; + always @(posedge clk) begin + // + if (ce) {din_x_dly1, din_y_dly1, din_z_dly1} <= {din_x, din_y, din_z}; + else if (clr) {din_x_dly1, din_y_dly1, din_z_dly1} <= {WORD_ZERO, WORD_ZERO, WORD_ZERO}; + // + if (ce) {din_z_dly2} <= {din_z_dly1}; + else if (clr) {din_z_dly2} <= {WORD_ZERO}; + // + end + + // + // DSP Input Registers // - // Combinational OPMODE Switch + reg [2 * WORD_W-1:0] master_ab_reg; + reg [2 * WORD_W-1:0] master_c_reg; + + reg [ WORD_W+1:0] slave_ab_reg; + reg [ WORD_W+1:0] slave_ab_next_reg; + + + // + // DSP Cascade Bus + // + wire [DSP48E1_P_W-1:0] master_slave_p_int; + + + // + // DSP Output Buses // - reg [DSP48E1_OPMODE_W-1:0] opmode; + wire [DSP48E1_P_W-1:0] master_p_int; + wire [DSP48E1_P_W-1:0] slave_p_int; + - always @(clr) - // - case (clr) - 1'b1: opmode = DSP48E1_OPMODE_Z0_YC_X0; - 1'b0: opmode = DSP48E1_OPMODE_ZP17_YC_XAB; - endcase + // + // DSP Input Mapping + // + wire [DSP48E1_C_W-1:0] master_ab_int = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_ab_reg}; + wire [DSP48E1_C_W-1:0] master_c_int = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_c_reg}; + + wire [DSP48E1_C_W-1:0] slave_ab_int = {{(DSP48E1_C_W - (WORD_W+3)){1'b0}}, slave_ab_reg[WORD_W+1:WORD_W], 1'b1, slave_ab_reg[WORD_W-1:0]}; + wire [DSP48E1_C_W-1:0] slave_c_int = {DSP48E1_C_W{1'b0}}; // - // DSP Slice Instance + // Master DSP Input Logic // - `MODEXPNG_DSP_SLICE_ADDSUB #(.AB_REG(2)) dsp_inst + always @(posedge clk) + // + if (ce) begin + master_ab_reg <= !phase_ff ? {din_y, din_y_dly1} : {din_x, din_x_dly1}; + master_c_reg <= !phase_ff ? {din_z_dly1, din_z_dly2} : {WORD_DNC, WORD_DNC}; + end else begin + master_ab_reg <= {WORD_DNC, WORD_DNC}; + master_c_reg <= {WORD_DNC, WORD_DNC}; + end + + + // + // Slave DSP Input Logic + // + always @(posedge clk) begin + // + slave_ab_reg <= {(WORD_W+2){1'bX}}; + slave_ab_next_reg <= {(WORD_W+2){1'bX}}; + // + if (ce_dly3 && phase_ff_dly3) slave_ab_next_reg <= {master_p_int[2*WORD_W+1:WORD_W]}; + // + if (ce_dly3 && phase_ff_dly3) slave_ab_reg <= {2'b00, master_p_int[WORD_W-1:0]}; + if (ce_dly4 && phase_ff_dly4) slave_ab_reg <= slave_ab_next_reg; + // + end + + + // + // OPMODE Logic + // + reg [DSP48E1_OPMODE_W-1:0] master_opmode; + reg [DSP48E1_OPMODE_W-1:0] slave_opmode; + + always @(posedge clk) begin + // + if (ce) master_opmode <= !phase_ff ? DSP48E1_OPMODE_Z0_YC_XAB : DSP48E1_OPMODE_ZP_Y0_XAB; + else master_opmode <= DSP48E1_OPMODE_DNC; + // + if (ce_dly4) slave_opmode <= clr_dly4 ? DSP48E1_OPMODE_Z0_Y0_XAB : DSP48E1_OPMODE_ZP17_Y0_XAB; + else slave_opmode <= DSP48E1_OPMODE_DNC; + // + end + + + // + // DSP Slice Instances + // + `MODEXPNG_DSP_SLICE_ADDSUB dsp_master_inst + ( + .clk (clk), + .ce_abc (ce_dly1), + .ce_p (ce_dly2), + .ce_ctrl (ce_dly1), + .ab (master_ab_int), + .c (master_c_int), + .p (master_p_int), + .op_mode (master_opmode), + .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), + .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), + .casc_p_in (), + .casc_p_out (), + .carry_out () + ); + + `MODEXPNG_DSP_SLICE_ADDSUB dsp_slave_inst ( .clk (clk), - .ce_ab1 (ce), - .ce_ab2 (ce_dly), - .ce_c (ce), - .ce_p (ce_dly), - .ce_ctrl (ce), - .ab ({a_int, b_int}), - .c (c_int), - .p (p_int), - .op_mode (opmode), + .ce_abc (ce_dly5), + .ce_p (ce_dly6), + .ce_ctrl (ce_dly5), + .ab (slave_ab_int), + .c (slave_c_int), + .p (slave_p_int), + .op_mode (slave_opmode), .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), .casc_p_in (), @@ -120,10 +229,9 @@ module modexpng_recombinator_cell // - // Output Mapping - // - assign dout = {p_int[WORD_W-1:0]}; - assign dout_ext = {p_int[WORD_W+1], dout}; - + // Output Register + // + assign dout = {slave_p_int[WORD_W-1:0]}; + assign doutw = {slave_p_int[WORD_W+1], dout}; endmodule -- cgit v1.2.3