From c5516257cb32d5608e028a36d48f8d24a6446e48 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 21 Jan 2020 00:05:18 +0300 Subject: Refactored modular reductor module. --- rtl/modexpng_reductor.v | 388 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 266 insertions(+), 122 deletions(-) diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index 7404eba..1e6dcc1 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -46,6 +46,8 @@ module modexpng_reductor // Headers // `include "modexpng_parameters.vh" + `include "modexpng_dsp48e1.vh" + `include "modexpng_dsp_slice_primitives.vh" // @@ -97,7 +99,7 @@ module modexpng_reductor reg [ WORD_EXT_W -1:0] narrow_x_dout; reg [ WORD_EXT_W -1:0] narrow_y_dout; reg narrow_xy_valid = 1'b0; - + // // Mapping @@ -162,7 +164,7 @@ module modexpng_reductor input [ WORD_EXT_W -1:0] dout_x; input [ WORD_EXT_W -1:0] dout_y; _update_rdct_narrow(bank, addr, dout_x, dout_y, 1'b1); - endtask + endtask task clear_rdct_wide; _update_rdct_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); @@ -181,179 +183,321 @@ module modexpng_reductor always @(posedge clk) // - {rd_wide_x_din_aux_pipe, rd_wide_y_din_aux_pipe} <= - {rd_wide_x_din_aux, rd_wide_y_din_aux } ; + {rd_wide_y_din_aux_pipe, rd_wide_x_din_aux_pipe} <= {rd_wide_y_din_aux, rd_wide_x_din_aux}; + + // + // Counter + // + integer i; // // Delay rcmb_final_* to match rd_wide_* // - reg rcmb_xy_valid_dly1_x = 1'b0; - reg rcmb_xy_valid_dly2_x = 1'b0; - reg rcmb_xy_valid_dly3_x = 1'b0; - reg rcmb_xy_valid_dly4_x = 1'b0; - - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1_x; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2_x; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3_x; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly4_x; - - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1_x; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2_x; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3_x; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly4_x; - - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1_x; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2_x; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3_x; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly4_x; - - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1_x; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2_x; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3_x; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly4_x; + reg rcmb_xy_valid_dly[1:6]; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly [1:6]; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly [1:6]; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly [1:4]; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly [1:4]; + + initial for (i=1; i<=6; i=i+1) rcmb_xy_valid_dly[i] = 1'b0; always @(posedge clk or negedge rst_n) // - if (!rst_n) {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= 4'b0000; - else {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= - {rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x, rcmb_final_xy_valid } ; + if (!rst_n) for (i=1; i<=6; i=i+1) rcmb_xy_valid_dly[i] <= 1'b0; + else begin + rcmb_xy_valid_dly[1] <= rcmb_final_xy_valid; + for (i=2; i<=6; i=i+1) rcmb_xy_valid_dly[i] <= rcmb_xy_valid_dly[i-1]; + end always @(posedge clk) begin // - if (rcmb_final_xy_valid) {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} <= - {rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din } ; - if (rcmb_xy_valid_dly1_x) {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} <= - {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} ; - if (rcmb_xy_valid_dly2_x) {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} <= - {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} ; - if (rcmb_xy_valid_dly3_x) {rcmb_xy_bank_dly4_x, rcmb_xy_addr_dly4_x, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x} <= - {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} ; + {rcmb_xy_bank_dly[1], rcmb_xy_addr_dly[1], rcmb_x_dout_dly[1], rcmb_y_dout_dly[1]} <= {rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din }; + for (i=2; i<=6; i=i+1) {rcmb_xy_bank_dly[i], rcmb_xy_addr_dly[i] } <= {rcmb_xy_bank_dly[i-1], rcmb_xy_addr_dly[i-1] }; + for (i=2; i<=4; i=i+1) { rcmb_x_dout_dly[i], rcmb_y_dout_dly[i]} <= { rcmb_x_dout_dly[i-1], rcmb_y_dout_dly[i-1]}; // end - + // - // LSB Carry Logic + // Internal Busy Flag Logic // - reg [ CARRY_W -1:0] rcmb_x_lsb_carry; - reg [ CARRY_W -1:0] rcmb_y_lsb_carry; - reg [ WORD_W -1:0] rcmb_x_lsb_dummy; - reg [ WORD_W -1:0] rcmb_y_lsb_dummy; - wire [WORD_EXT_W -1:0] rcmb_x_lsb_carry_ext = {WORD_ZERO, rcmb_x_lsb_carry}; - wire [WORD_EXT_W -1:0] rcmb_y_lsb_carry_ext = {WORD_ZERO, rcmb_y_lsb_carry}; + reg busy_next = 1'b0; + reg [4:0] busy_now_shreg = 5'b00000; + wire busy_now = busy_now_shreg[4]; - task calc_rcmb_xy_lsb_carry; - begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe + rcmb_x_lsb_carry_ext; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe + rcmb_y_lsb_carry_ext; + always @(posedge clk or negedge rst_n) + // + if (!rst_n) busy_now_shreg <= 5'b00000; + else begin + if (rdy && ena) busy_now_shreg <= 5'b11111; + else busy_now_shreg <= {busy_now_shreg[3:0], busy_next}; end - endtask + always @(posedge clk or negedge rst_n) + // + if (!rst_n) busy_next <= 1'b0; + else begin + if (rdy && ena) busy_next <= 1'b1; + if (!rdy && rcmb_xy_valid_dly[4] && (rcmb_xy_bank_dly[4] == BANK_RCMB_EXT)) busy_next <= 1'b0; + end + // - // LSB Carry Computation + // Ready Flag Logic // - always @(posedge clk) + reg rdy_reg = 1'b1; + + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) // - if (ena) begin - // - rcmb_x_lsb_carry <= CARRY_ZERO; - rcmb_y_lsb_carry <= CARRY_ZERO; - // - end else if (rcmb_xy_valid_dly4_x) - // - case (rcmb_xy_bank_dly4_x) - BANK_RCMB_ML: calc_rcmb_xy_lsb_carry; - BANK_RCMB_MH: if (rcmb_xy_addr_dly4_x == OP_ADDR_ZERO) calc_rcmb_xy_lsb_carry; - endcase + if (!rst_n) rdy_reg <= 1'b1; + else begin + if (rdy && ena) rdy_reg <= 1'b0; + if (!rdy && !busy_now) rdy_reg <= 1'b1; + end + + + // + // Pipelined Flags + // + reg rcmb_xy_addr_dly3_is_zero; + reg rcmb_xy_addr_dly3_is_one; + reg rcmb_xy_addr_dly3_gt_one; + reg rcmb_xy_addr_dly5_is_one; + reg rcmb_xy_addr_dly5_gt_one; + reg rcmb_xy_addr_dly6_is_zero; + + always @(posedge clk) begin + rcmb_xy_addr_dly3_is_zero <= rcmb_xy_addr_dly[2] == OP_ADDR_ZERO; + rcmb_xy_addr_dly3_is_one <= rcmb_xy_addr_dly[2] == OP_ADDR_ONE; + rcmb_xy_addr_dly3_gt_one <= rcmb_xy_addr_dly[2] > OP_ADDR_ONE; + rcmb_xy_addr_dly5_is_one <= rcmb_xy_addr_dly[4] == OP_ADDR_ONE; + rcmb_xy_addr_dly5_gt_one <= rcmb_xy_addr_dly[4] > OP_ADDR_ONE; + rcmb_xy_addr_dly6_is_zero <= rcmb_xy_addr_dly[5] == OP_ADDR_ZERO; + end + - // - // MSB Sum Logic + // LSB Math // - wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe; - wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe; + reg lsb_ce = 1'b0; + reg lsb_ce_dly = 1'b0; + reg [DSP48E1_OPMODE_W -1:0] lsb_opmode; - wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + rcmb_x_lsb_carry_ext; - wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + rcmb_y_lsb_carry_ext; + wire [DSP48E1_P_W -1:0] lsb_px; + wire [DSP48E1_P_W -1:0] lsb_py; + wire [DSP48E1_C_W -1:0] lsb_ax = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rcmb_x_dout_dly[4][WORD_EXT_W-1:WORD_W], 1'b1, rcmb_x_dout_dly[4][WORD_W-1:0]}; + wire [DSP48E1_C_W -1:0] lsb_ay = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rcmb_y_dout_dly[4][WORD_EXT_W-1:WORD_W], 1'b1, rcmb_y_dout_dly[4][WORD_W-1:0]}; + wire [DSP48E1_C_W -1:0] lsb_bx = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_wide_x_din_aux_pipe[WORD_EXT_W-1:WORD_W], 1'b0, rd_wide_x_din_aux_pipe[WORD_W-1:0]}; + wire [DSP48E1_C_W -1:0] lsb_by = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_wide_y_din_aux_pipe[WORD_EXT_W-1:WORD_W], 1'b0, rd_wide_y_din_aux_pipe[WORD_W-1:0]}; + + wire [DSP48E1_P_W -1:0] lsb2msb_px_casc; + wire [DSP48E1_P_W -1:0] lsb2msb_py_casc; + + `MODEXPNG_DSP_SLICE_ADDSUB dsp_lsb_x + ( + .clk (clk), + .ce_abc (lsb_ce), + .ce_p (lsb_ce_dly), + .ce_ctrl (lsb_ce), + .x (lsb_ax), + .y (lsb_bx), + .p (lsb_px), + .op_mode (lsb_opmode), + .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), + .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), + .casc_p_in (), + .casc_p_out (lsb2msb_px_casc), + .carry_out () + ); + `MODEXPNG_DSP_SLICE_ADDSUB dsp_lsb_y + ( + .clk (clk), + .ce_abc (lsb_ce), + .ce_p (lsb_ce_dly), + .ce_ctrl (lsb_ce), + .x (lsb_ay), + .y (lsb_by), + .p (lsb_py), + .op_mode (lsb_opmode), + .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), + .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), + .casc_p_in (), + .casc_p_out (lsb2msb_py_casc), + .carry_out () + ); - // - // MSB Sum Computation - // always @(posedge clk or negedge rst_n) // - if (!rst_n) begin - clear_rdct_wide; - clear_rdct_narrow; - end else begin - // - clear_rdct_wide; - clear_rdct_narrow; - // - if (rcmb_xy_valid_dly4_x) + if (!rst_n) lsb_ce <= 1'b0; + else begin + lsb_ce <= 1'b0; + if (rcmb_xy_valid_dly[3]) // - case (rcmb_xy_bank_dly4_x) - - BANK_RCMB_MH: - if (rcmb_xy_addr_dly4_x == OP_ADDR_ONE) begin - set_rdct_wide (sel_wide_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); - set_rdct_narrow(sel_narrow_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); - end else if (rcmb_xy_addr_dly4_x > OP_ADDR_ONE) begin - set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y); - set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y); - end - - BANK_RCMB_EXT: begin - set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x); - set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x); - end - + case (rcmb_xy_bank_dly[3]) + BANK_RCMB_ML: lsb_ce <= 1'b1; + BANK_RCMB_MH: if (rcmb_xy_addr_dly3_is_zero) lsb_ce <= 1'b1; endcase // end + always @(posedge clk) begin + // + //lsb_opmode <= DSP48E1_OPMODE_DNC; + // + if (rcmb_xy_valid_dly[3]) + // + case (rcmb_xy_bank_dly[3]) + BANK_RCMB_ML: if (rcmb_xy_addr_dly3_is_zero) lsb_opmode <= DSP48E1_OPMODE_Z0_YC_XAB; + else lsb_opmode <= DSP48E1_OPMODE_ZP17_YC_XAB; + BANK_RCMB_MH: if (rcmb_xy_addr_dly3_is_zero) lsb_opmode <= DSP48E1_OPMODE_ZP17_YC_XAB; + endcase + // + end + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) lsb_ce_dly <= 1'b0; + else lsb_ce_dly <= lsb_ce; + // - // Internal Busy Flag Logic + // MSB Math // - reg busy_next = 1'b0; - reg [2:0] busy_now_shreg = 3'b000; - wire busy_now = busy_now_shreg[2]; + reg msb_ce = 1'b0; + reg msb_ce_dly1 = 1'b0; + reg msb_ce_dly2 = 1'b0; + reg [DSP48E1_OPMODE_W -1:0] msb_opmode; + + wire [DSP48E1_P_W -1:0] msb_px; + wire [DSP48E1_P_W -1:0] msb_py; + wire [DSP48E1_C_W -1:0] msb_ax = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rcmb_x_dout_dly[4][WORD_EXT_W-1:WORD_W], rcmb_x_dout_dly[4][WORD_W-1:0]}; + wire [DSP48E1_C_W -1:0] msb_ay = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rcmb_y_dout_dly[4][WORD_EXT_W-1:WORD_W], rcmb_y_dout_dly[4][WORD_W-1:0]}; + wire [DSP48E1_C_W -1:0] msb_bx = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_wide_x_din_aux_pipe[WORD_EXT_W-1:WORD_W], rd_wide_x_din_aux_pipe[WORD_W-1:0]}; + wire [DSP48E1_C_W -1:0] msb_by = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_wide_y_din_aux_pipe[WORD_EXT_W-1:WORD_W], rd_wide_y_din_aux_pipe[WORD_W-1:0]}; + + `MODEXPNG_DSP_SLICE_ADDSUB dsp_msb_x + ( + .clk (clk), + .ce_abc (msb_ce), + .ce_p (msb_ce_dly1), + .ce_ctrl (msb_ce), + .x (msb_ax), + .y (msb_bx), + .p (msb_px), + .op_mode (msb_opmode), + .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), + .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), + .casc_p_in (lsb2msb_px_casc), + .casc_p_out (), + .carry_out () + ); + + `MODEXPNG_DSP_SLICE_ADDSUB dsp_msb_y + ( + .clk (clk), + .ce_abc (msb_ce), + .ce_p (msb_ce_dly1), + .ce_ctrl (msb_ce), + .x (msb_ay), + .y (msb_by), + .p (msb_py), + .op_mode (msb_opmode), + .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), + .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), + .casc_p_in (lsb2msb_py_casc), + .casc_p_out (), + .carry_out () + ); + always @(posedge clk or negedge rst_n) // - if (!rst_n) busy_now_shreg <= 3'b000; + if (!rst_n) msb_ce <= 1'b0; else begin - if (rdy && ena) busy_now_shreg <= 3'b111; - else busy_now_shreg <= {busy_now_shreg[1:0], busy_next}; - end - + msb_ce <= 1'b0; + if (rcmb_xy_valid_dly[3]) + // + case (rcmb_xy_bank_dly[3]) + BANK_RCMB_MH: if (!rcmb_xy_addr_dly3_is_zero) msb_ce <= 1'b1; + BANK_RCMB_EXT: msb_ce <= 1'b1; + endcase + // + end + + always @(posedge clk) begin + // + msb_opmode <= DSP48E1_OPMODE_DNC; + // + if (rcmb_xy_valid_dly[3]) + // + case (rcmb_xy_bank_dly[3]) + BANK_RCMB_MH: if (rcmb_xy_addr_dly3_is_one) msb_opmode <= DSP48E1_OPMODE_ZPCIN17_YC_XAB; + else if (rcmb_xy_addr_dly3_gt_one) msb_opmode <= DSP48E1_OPMODE_Z0_YC_XAB; + BANK_RCMB_EXT: msb_opmode <= DSP48E1_OPMODE_Z0_Y0_XAB; + endcase + // + end + always @(posedge clk or negedge rst_n) // - if (!rst_n) busy_next <= 1'b0; - else begin - if (rdy && ena) busy_next <= 1'b1; - if (!rdy && rcmb_xy_valid_dly4_x && (rcmb_xy_bank_dly4_x == BANK_RCMB_EXT)) busy_next <= 1'b0; - end + if (!rst_n) {msb_ce_dly2, msb_ce_dly1} <= {2'b00}; + else {msb_ce_dly2, msb_ce_dly1} <= {msb_ce_dly1, msb_ce}; // - // Ready Flag Logic + // Output Logic // - reg rdy_reg = 1'b1; + reg [OP_ADDR_W -1:0] wide_xy_addr_next; + reg [OP_ADDR_W -1:0] narrow_xy_addr_next; - assign rdy = rdy_reg; + always @(posedge clk) + // + if (msb_ce_dly1) + // + case (rcmb_xy_bank_dly[5]) + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly5_is_one) + {wide_xy_addr_next, narrow_xy_addr_next} <= {OP_ADDR_ZERO, OP_ADDR_ZERO}; + else if (rcmb_xy_addr_dly5_gt_one) + {wide_xy_addr_next, narrow_xy_addr_next} <= {rcmb_xy_addr_dly[5] - 1'b1, rcmb_xy_addr_dly[5] - 1'b1}; + + BANK_RCMB_EXT: + {wide_xy_addr_next, narrow_xy_addr_next} <= {word_index_last, word_index_last}; + + endcase always @(posedge clk or negedge rst_n) // - if (!rst_n) rdy_reg <= 1'b1; - else begin - if (rdy && ena) rdy_reg <= 1'b0; - if (!rdy && !busy_now) rdy_reg <= 1'b1; - end - + if (!rst_n) begin + clear_rdct_wide; + clear_rdct_narrow; + end else begin + // + clear_rdct_wide; + clear_rdct_narrow; + // + if (msb_ce_dly2) + // + case (rcmb_xy_bank_dly[6]) + // + BANK_RCMB_MH: if (!rcmb_xy_addr_dly6_is_zero) begin + set_rdct_wide (sel_wide_out, wide_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]); + set_rdct_narrow(sel_narrow_out, narrow_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]); + end + // + BANK_RCMB_EXT: begin + set_rdct_wide (sel_wide_out, wide_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]); + set_rdct_narrow(sel_narrow_out, narrow_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]); + end + // + endcase + // + end endmodule -- cgit v1.2.3