From ab061afd20523bdb0342613f4eb343daee6571c6 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 16 Jan 2020 21:47:01 +0300 Subject: This commit modifies the REGULAR_ADD_UNEVEN micro-operation to use DSP slices for addition instead of fabric logic. This opcode is only necessary when in CRT mode and is executed once per entire exponentiation to recombine the two "easier" exponentiations. This was the final change necessary to get rid of using fabric math in the general worker module. --- rtl/modexpng_general_worker.v | 310 ++++++++++++------------------------------ 1 file changed, 88 insertions(+), 222 deletions(-) diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index 6652f14..6618b5f 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -30,7 +30,7 @@ // //====================================================================== -module modexpng_general_worker +module modexpng_general_worker_new ( clk, rst_n, ena, rdy, @@ -387,7 +387,8 @@ module modexpng_general_worker case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_MODULAR_SUBTRACT_X: + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, @@ -395,8 +396,7 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en; endcase // - UOP_OPCODE_MERGE_LH, - UOP_OPCODE_REGULAR_ADD_UNEVEN: + UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, @@ -754,7 +754,8 @@ module modexpng_general_worker case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_MODULAR_SUBTRACT_X: + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, @@ -762,8 +763,7 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]); endcase // - UOP_OPCODE_MERGE_LH, - UOP_OPCODE_REGULAR_ADD_UNEVEN: + UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, @@ -802,6 +802,22 @@ module modexpng_general_worker endcase // end + + + // + // UOP_OPCODE_REGULAR_ADD_UNEVEN + // + reg regular_add_uneven_flag; + + always @(posedge clk) + // + case (opcode) + UOP_OPCODE_REGULAR_ADD_UNEVEN: + case (wrk_fsm_state) + WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_flag <= 1'b0; + WRK_FSM_STATE_BUSY2: if (rd_wide_addr_is_last_half_dly[2]) regular_add_uneven_flag <= 1'b1; + endcase + endcase // @@ -853,7 +869,8 @@ module modexpng_general_worker // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_MODULAR_SUBTRACT_X, - UOP_OPCODE_MODULAR_SUBTRACT_Y: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; + UOP_OPCODE_MODULAR_SUBTRACT_Y, + UOP_OPCODE_REGULAR_ADD_UNEVEN: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; // endcase @@ -929,6 +946,30 @@ module modexpng_general_worker // end // + UOP_OPCODE_REGULAR_ADD_UNEVEN: begin + // + if (rd_narrow_ena_x_dly2) begin + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_XAB; + else begin + if (!regular_add_uneven_flag) dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_XAB; + else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0; + end + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + end + // + if (rd_narrow_ena_y_dly2) begin + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_XAB; + else begin + if (!regular_add_uneven_flag) dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_XAB; + else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0; + end + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + end + // + end + // endcase // end @@ -988,14 +1029,32 @@ module modexpng_general_worker dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]}; dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; - dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_x[WORD_W-1:0]}; + dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]}; end // if (rd_narrow_ena_y_dly2) begin dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]}; dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; - dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_y[WORD_W-1:0]}; + dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]}; + end + // + end + // + UOP_OPCODE_REGULAR_ADD_UNEVEN: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_x_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_x [WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + dsp_y_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_x [WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_x_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_y [WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + dsp_y_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_y [WORD_W-1:0]}; end // end @@ -1022,7 +1081,7 @@ module modexpng_general_worker .carry_in_sel (dsp_carry_in_sel_x), .casc_p_in (), .casc_p_out (), - .carryout (dsp_carry_out_x) + .carry_out (dsp_carry_out_x) ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x @@ -1039,7 +1098,7 @@ module modexpng_general_worker .carry_in_sel (dsp_carry_in_sel_x), .casc_p_in (), .casc_p_out (), - .carryout () + .carry_out () ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y @@ -1056,7 +1115,7 @@ module modexpng_general_worker .carry_in_sel (dsp_carry_in_sel_y), .casc_p_in (), .casc_p_out (), - .carryout (dsp_carry_out_y) + .carry_out (dsp_carry_out_y) ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y @@ -1073,140 +1132,43 @@ module modexpng_general_worker .carry_in_sel (dsp_carry_in_sel_y), .casc_p_in (), .casc_p_out (), - .carryout () + .carry_out () ); // // UOP_OPCODE_MODULAR_SUBTRACT_X // - reg modular_subtract_x_brw_flag; - reg modular_subtract_y_brw_flag; // // IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_, // the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT" // section of Appendix A on pp. 55-56 of UG479 for more details. // - always @(posedge clk) - // - case (opcode) - UOP_OPCODE_MODULAR_SUBTRACT_X: - case (wrk_fsm_state) - WRK_FSM_STATE_LATENCY_POST4: - //{modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {1'bX, 1'bZ}; - {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y}; - endcase - endcase - - - //reg modular_subtract_x_brw_r; - //reg modular_subtract_y_brw_r; - - //reg modular_subtract_x_cry_r; - //reg modular_subtract_y_cry_r; - - //wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r}; - //wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; - - //wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r}; - //wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_cry_r}; - - //reg [WORD_W:0] modular_subtract_x_w_brw_r; - //reg [WORD_W:0] modular_subtract_y_w_brw_r; - - //reg [WORD_W:0] modular_subtract_x_w_cry_r; - //reg [WORD_W:0] modular_subtract_y_w_cry_r; - - //wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W]; - //wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W]; - - //wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W]; - //wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W]; - - //wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0]; - //wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0]; - - //wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0]; - //wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0]; - //wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb}; - //wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb}; + reg modular_subtract_x_brw_flag; + reg modular_subtract_y_brw_flag; - //wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb}; - //wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb}; - reg [WORD_EXT_W -1:0] modular_subtract_x_mux; reg [WORD_EXT_W -1:0] modular_subtract_y_mux; wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]}; - - //task _modular_subtract_update_brw; - //input x_brw, y_brw; - //{modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw}; - //endtask - - //task _modular_subtract_update_cry; - //input x_cry, y_cry; - //{modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry}; - //endtask - - //task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask - //task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask - - //task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask - //task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask - - //task _modular_subtract_update_diff_w_brw; - //input [WORD_W:0] x_diff_w_brw, y_diff_w_brw; - //{modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw}; - //endtask - - //task _modular_subtract_update_sum_w_cry; - //input [WORD_W:0] x_sum_w_cry, y_sum_w_cry; - //{modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry}; - //endtask - - //task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask - //task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask - always @(posedge clk) // case (opcode) - // - //UOP_OPCODE_MODULAR_SUBTRACT_X: - // - //case (wrk_fsm_state) - // - //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw; - //WRK_FSM_STATE_BUSY1, - //WRK_FSM_STATE_LATENCY_POST1, - //WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too! - // - //WRK_FSM_STATE_LATENCY_PRE4, - //WRK_FSM_STATE_BUSY2, - //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw; - // - //endcase - // - //UOP_OPCODE_MODULAR_SUBTRACT_Y: - // - //case (wrk_fsm_state) - // - //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry; - //WRK_FSM_STATE_BUSY1, - //WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry; - // - //WRK_FSM_STATE_LATENCY_PRE4, - //WRK_FSM_STATE_BUSY2, - //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry; - // - //endcase - // + UOP_OPCODE_MODULAR_SUBTRACT_X: + case (wrk_fsm_state) + WRK_FSM_STATE_LATENCY_POST4: + {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y}; + endcase + endcase + + always @(posedge clk) + // + case (opcode) UOP_OPCODE_MODULAR_SUBTRACT_Z: - // case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE4, @@ -1215,96 +1177,10 @@ module modexpng_general_worker // begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end - // - endcase - // + endcase endcase - // - // UOP_OPCODE_REGULAR_ADD_UNEVEN - // - reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r; - reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r; - reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r; - reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r; - - wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r}; - wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r}; - wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r}; - wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r}; - - wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1; - wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1; - wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1; - wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1; - - reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r; - reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r; - reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r; - reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r; - - wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W]; - - wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0]; - - wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb}; - wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb}; - - reg regular_add_uneven_store_lsb_now; - - task _regular_add_uneven_update_cry; - input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; - { regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <= - { x_x_cry, y_x_cry, x_y_cry, y_y_cry}; - endtask - - task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask - task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask - - task _regular_add_uneven_update_sum_w_cry; - input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry; - { regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <= - { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry}; - endtask - - task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask - - task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask - - always @(posedge clk) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1; - WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0; - // - endcase - - always @(posedge clk) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_clear_cry; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry; - // - WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_store_sum_lsb_w_cry; - WRK_FSM_STATE_BUSY2: if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry; - else regular_add_uneven_store_sum_msb_w_cry; - WRK_FSM_STATE_LATENCY_POST2: regular_add_uneven_store_sum_msb_w_cry; - // - endcase - - // // FSM Process // @@ -1424,7 +1300,8 @@ module modexpng_general_worker case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_MODULAR_SUBTRACT_X: + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) // @@ -1524,17 +1401,6 @@ module modexpng_general_worker // endcase // - UOP_OPCODE_REGULAR_ADD_UNEVEN: - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: - // - update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced); - // - endcase endcase // end -- cgit v1.2.3