aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2020-01-16 21:47:01 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2020-01-16 21:47:01 +0300
commitab061afd20523bdb0342613f4eb343daee6571c6 (patch)
tree3f1287887b4730c2b0a0225692a9992f235921f6
parente5f4454e3ac52fa761f301e7d11ad144cd23d590 (diff)
This commit modifies the REGULAR_ADD_UNEVEN micro-operation to use DSP slices
for addition instead of fabric logic. This opcode is only necessary when in CRT mode and is executed once per entire exponentiation to recombine the two "easier" exponentiations. This was the final change necessary to get rid of using fabric math in the general worker module.
-rw-r--r--rtl/modexpng_general_worker.v310
1 files changed, 88 insertions, 222 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 6652f14..6618b5f 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -30,7 +30,7 @@
//
//======================================================================
-module modexpng_general_worker
+module modexpng_general_worker_new
(
clk, rst_n,
ena, rdy,
@@ -387,7 +387,8 @@ module modexpng_general_worker
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_MODULAR_SUBTRACT_X:
+ UOP_OPCODE_MODULAR_SUBTRACT_X,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY2,
@@ -395,8 +396,7 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en;
endcase
//
- UOP_OPCODE_MERGE_LH,
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
+ UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
@@ -754,7 +754,8 @@ module modexpng_general_worker
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_MODULAR_SUBTRACT_X:
+ UOP_OPCODE_MODULAR_SUBTRACT_X,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY2,
@@ -762,8 +763,7 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]);
endcase
//
- UOP_OPCODE_MERGE_LH,
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
+ UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
@@ -802,6 +802,22 @@ module modexpng_general_worker
endcase
//
end
+
+
+ //
+ // UOP_OPCODE_REGULAR_ADD_UNEVEN
+ //
+ reg regular_add_uneven_flag;
+
+ always @(posedge clk)
+ //
+ case (opcode)
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_flag <= 1'b0;
+ WRK_FSM_STATE_BUSY2: if (rd_wide_addr_is_last_half_dly[2]) regular_add_uneven_flag <= 1'b1;
+ endcase
+ endcase
//
@@ -853,7 +869,8 @@ module modexpng_general_worker
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
- UOP_OPCODE_MODULAR_SUBTRACT_Y: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
+ UOP_OPCODE_MODULAR_SUBTRACT_Y,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
//
endcase
@@ -929,6 +946,30 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
+ //
+ if (rd_narrow_ena_x_dly2) begin
+ if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_XAB;
+ else begin
+ if (!regular_add_uneven_flag) dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_XAB;
+ else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0;
+ end
+ dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
+ dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
+ end
+ //
+ if (rd_narrow_ena_y_dly2) begin
+ if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_XAB;
+ else begin
+ if (!regular_add_uneven_flag) dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_XAB;
+ else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0;
+ end
+ dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
+ dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
+ end
+ //
+ end
+ //
endcase
//
end
@@ -988,14 +1029,32 @@ module modexpng_general_worker
dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
- dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_x[WORD_W-1:0]};
+ dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
- dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_y[WORD_W-1:0]};
+ dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
+ end
+ //
+ end
+ //
+ UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
+ //
+ if (rd_narrow_ena_x_dly2) begin
+ dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
+ dsp_x_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_x [WORD_W-1:0]};
+ dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
+ dsp_y_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_x [WORD_W-1:0]};
+ end
+ //
+ if (rd_narrow_ena_y_dly2) begin
+ dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
+ dsp_x_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_y [WORD_W-1:0]};
+ dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
+ dsp_y_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_y [WORD_W-1:0]};
end
//
end
@@ -1022,7 +1081,7 @@ module modexpng_general_worker
.carry_in_sel (dsp_carry_in_sel_x),
.casc_p_in (),
.casc_p_out (),
- .carryout (dsp_carry_out_x)
+ .carry_out (dsp_carry_out_x)
);
`MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x
@@ -1039,7 +1098,7 @@ module modexpng_general_worker
.carry_in_sel (dsp_carry_in_sel_x),
.casc_p_in (),
.casc_p_out (),
- .carryout ()
+ .carry_out ()
);
`MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y
@@ -1056,7 +1115,7 @@ module modexpng_general_worker
.carry_in_sel (dsp_carry_in_sel_y),
.casc_p_in (),
.casc_p_out (),
- .carryout (dsp_carry_out_y)
+ .carry_out (dsp_carry_out_y)
);
`MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y
@@ -1073,140 +1132,43 @@ module modexpng_general_worker
.carry_in_sel (dsp_carry_in_sel_y),
.casc_p_in (),
.casc_p_out (),
- .carryout ()
+ .carry_out ()
);
//
// UOP_OPCODE_MODULAR_SUBTRACT_X
//
- reg modular_subtract_x_brw_flag;
- reg modular_subtract_y_brw_flag;
//
// IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_,
// the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT"
// section of Appendix A on pp. 55-56 of UG479 for more details.
//
- always @(posedge clk)
- //
- case (opcode)
- UOP_OPCODE_MODULAR_SUBTRACT_X:
- case (wrk_fsm_state)
- WRK_FSM_STATE_LATENCY_POST4:
- //{modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {1'bX, 1'bZ};
- {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y};
- endcase
- endcase
-
-
- //reg modular_subtract_x_brw_r;
- //reg modular_subtract_y_brw_r;
-
- //reg modular_subtract_x_cry_r;
- //reg modular_subtract_y_cry_r;
-
- //wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r};
- //wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r};
-
- //wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r};
- //wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_cry_r};
-
- //reg [WORD_W:0] modular_subtract_x_w_brw_r;
- //reg [WORD_W:0] modular_subtract_y_w_brw_r;
-
- //reg [WORD_W:0] modular_subtract_x_w_cry_r;
- //reg [WORD_W:0] modular_subtract_y_w_cry_r;
-
- //wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W];
- //wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W];
-
- //wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W];
- //wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W];
-
- //wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0];
- //wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0];
-
- //wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0];
- //wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0];
- //wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb};
- //wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb};
+ reg modular_subtract_x_brw_flag;
+ reg modular_subtract_y_brw_flag;
- //wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb};
- //wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb};
-
reg [WORD_EXT_W -1:0] modular_subtract_x_mux;
reg [WORD_EXT_W -1:0] modular_subtract_y_mux;
wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};
-
- //task _modular_subtract_update_brw;
- //input x_brw, y_brw;
- //{modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw};
- //endtask
-
- //task _modular_subtract_update_cry;
- //input x_cry, y_cry;
- //{modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry};
- //endtask
-
- //task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask
- //task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask
-
- //task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask
- //task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask
-
- //task _modular_subtract_update_diff_w_brw;
- //input [WORD_W:0] x_diff_w_brw, y_diff_w_brw;
- //{modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw};
- //endtask
-
- //task _modular_subtract_update_sum_w_cry;
- //input [WORD_W:0] x_sum_w_cry, y_sum_w_cry;
- //{modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry};
- //endtask
-
- //task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask
- //task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask
-
always @(posedge clk)
//
case (opcode)
- //
- //UOP_OPCODE_MODULAR_SUBTRACT_X:
- //
- //case (wrk_fsm_state)
- //
- //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw;
- //WRK_FSM_STATE_BUSY1,
- //WRK_FSM_STATE_LATENCY_POST1,
- //WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too!
- //
- //WRK_FSM_STATE_LATENCY_PRE4,
- //WRK_FSM_STATE_BUSY2,
- //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw;
- //
- //endcase
- //
- //UOP_OPCODE_MODULAR_SUBTRACT_Y:
- //
- //case (wrk_fsm_state)
- //
- //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry;
- //WRK_FSM_STATE_BUSY1,
- //WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry;
- //
- //WRK_FSM_STATE_LATENCY_PRE4,
- //WRK_FSM_STATE_BUSY2,
- //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry;
- //
- //endcase
- //
+ UOP_OPCODE_MODULAR_SUBTRACT_X:
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_LATENCY_POST4:
+ {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y};
+ endcase
+ endcase
+
+ always @(posedge clk)
+ //
+ case (opcode)
UOP_OPCODE_MODULAR_SUBTRACT_Z:
- //
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE4,
@@ -1215,97 +1177,11 @@ module modexpng_general_worker
//
begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
- //
- endcase
- //
+ endcase
endcase
//
- // UOP_OPCODE_REGULAR_ADD_UNEVEN
- //
- reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r;
- reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r;
- reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r;
- reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r;
-
- wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r};
- wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r};
- wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r};
- wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r};
-
- wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1;
- wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1;
- wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1;
- wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1;
-
- reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r;
- reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r;
- reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r;
- reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r;
-
- wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
- wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
- wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
- wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
-
- wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0];
- wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0];
- wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0];
- wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0];
-
- wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb};
- wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb};
- wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb};
- wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb};
-
- reg regular_add_uneven_store_lsb_now;
-
- task _regular_add_uneven_update_cry;
- input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
- { regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <=
- { x_x_cry, y_x_cry, x_y_cry, y_y_cry};
- endtask
-
- task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask
- task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask
-
- task _regular_add_uneven_update_sum_w_cry;
- input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry;
- { regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <=
- { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry};
- endtask
-
- task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask
-
- task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask
-
- always @(posedge clk)
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1;
- WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0;
- //
- endcase
-
- always @(posedge clk)
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_clear_cry;
- WRK_FSM_STATE_BUSY1,
- WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry;
- //
- WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_store_sum_lsb_w_cry;
- WRK_FSM_STATE_BUSY2: if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry;
- else regular_add_uneven_store_sum_msb_w_cry;
- WRK_FSM_STATE_LATENCY_POST2: regular_add_uneven_store_sum_msb_w_cry;
- //
- endcase
-
-
- //
// FSM Process
//
always @(posedge clk or negedge rst_n)
@@ -1424,7 +1300,8 @@ module modexpng_general_worker
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_MODULAR_SUBTRACT_X:
+ UOP_OPCODE_MODULAR_SUBTRACT_X,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
//
@@ -1524,17 +1401,6 @@ module modexpng_general_worker
//
endcase
//
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY1,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST3:
- //
- update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced);
- //
- endcase
endcase
//
end