diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2020-01-16 21:38:04 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2020-01-16 21:38:04 +0300 |
commit | e5f4454e3ac52fa761f301e7d11ad144cd23d590 (patch) | |
tree | accbdc37b3abfdc4b0ac5cdc85fdae8a70289ccb /rtl | |
parent | 6a0438e33fa300822216c259668180f177ac0343 (diff) |
Reworked modular subtraction micro-operation. Previously it used "two-pass"
bank address space sweep, during the first pass (a-b) and (a-b+n) were
computed, during the second pass either the former or the latter quantity was
written to the output bank (depending on the very last borrow flag value).
This is no longer possible, since the FSM now only generates one "interleaved"
address space sweep. The solution is to split one complex modular subtraction
operation into simpler sub-operations. Currently modular subtraction is
achieved by running a sequence of three micro-operations:
* MODULAR_SUBTRACT_X computes (a-b) and latches the final borrow flag
* MODULAR_SUBTRACT_Y computes (a-b+n)
* MODULAR_SUBTRACT_Z writes either (a-b) or (a-b+n) into the output bank
depending on the latched value of the borrow flag
Unfortunately we can't compute both (a-b) and (a-b+n) during one address space
sweep, since fully pipelined adder/subtractor DSP slice has 2-cycle latency.
Diffstat (limited to 'rtl')
-rw-r--r-- | rtl/modexpng_general_worker.v | 569 |
1 files changed, 332 insertions, 237 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index 684af5a..6652f14 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -245,8 +245,8 @@ module modexpng_general_worker reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4]; reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4]; - reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3]; - reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3]; + reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:4]; + reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:4]; reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1; reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1; @@ -277,8 +277,8 @@ module modexpng_general_worker {rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x}; {rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y}; // - {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; - {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; + {rd_wide_addr_x_dly[4], rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; + {rd_wide_addr_y_dly[4], rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; // {rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x}; {rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y}; @@ -386,15 +386,15 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en; endcase - // - UOP_OPCODE_MODULAR_SUBTRACT_X, + // UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: // @@ -415,10 +415,17 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end endcase // - UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: enable_wide_wr_en; + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en; @@ -746,7 +753,8 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, @@ -754,7 +762,6 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]); endcase // - UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: // @@ -787,29 +794,28 @@ module modexpng_general_worker UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]); + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[4], rd_wide_addr_y_dly[4]); endcase // endcase // end - - - + + // // DSP Slice Array // - wire [DSP48E1_C_W-1:0] dsp_x_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_x_dly1}; - wire [DSP48E1_C_W-1:0] dsp_y_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_x_dly1}; - wire [DSP48E1_C_W-1:0] dsp_x_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_y_dly1}; - wire [DSP48E1_C_W-1:0] dsp_y_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_y_dly1}; + reg [DSP48E1_C_W-1:0] dsp_x_x_x; + reg [DSP48E1_C_W-1:0] dsp_y_x_x; + reg [DSP48E1_C_W-1:0] dsp_x_y_x; + reg [DSP48E1_C_W-1:0] dsp_y_y_x; - wire [DSP48E1_C_W-1:0] dsp_x_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_x_dly1[WORD_W-1:0]}; - wire [DSP48E1_C_W-1:0] dsp_y_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_x_dly1[WORD_W-1:0]}; - wire [DSP48E1_C_W-1:0] dsp_x_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_y_dly1[WORD_W-1:0]}; - wire [DSP48E1_C_W-1:0] dsp_y_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_y_dly1[WORD_W-1:0]}; + reg [DSP48E1_C_W-1:0] dsp_x_x_y; + reg [DSP48E1_C_W-1:0] dsp_y_x_y; + reg [DSP48E1_C_W-1:0] dsp_x_y_y; + reg [DSP48E1_C_W-1:0] dsp_y_y_y; wire [DSP48E1_P_W-1:0] dsp_x_x_p; wire [DSP48E1_P_W-1:0] dsp_y_x_p; @@ -821,213 +827,314 @@ module modexpng_general_worker wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]}; - reg dsp_ce_x = 1'b0; - reg dsp_ce_y = 1'b0; - reg dsp_ce_x_dly = 1'b0; - reg dsp_ce_y_dly = 1'b0; - reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_x; - reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_y; + reg dsp_ce_x = 1'b0; + reg dsp_ce_y = 1'b0; + reg dsp_ce_x_dly = 1'b0; + reg dsp_ce_y_dly = 1'b0; + reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_x; + reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_y; + reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_x; + reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_y; + reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_x; + reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_y; + wire dsp_carry_out_x; + wire dsp_carry_out_y; + + + // + // DSP - CE + // + always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y}; always @(posedge clk or negedge rst_n) // if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; else case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; - default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_MODULAR_SUBTRACT_Y: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; + default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; // endcase + + // + // DSP - OPMODE, ALUMODE, CARRYINSEL + // always @(posedge clk) begin // - dsp_opmode_x <= {DSP48E1_OPMODE_W{1'bX}}; - dsp_opmode_y <= {DSP48E1_OPMODE_W{1'bX}}; + dsp_op_mode_x <= DSP48E1_OPMODE_DNC; + dsp_op_mode_y <= DSP48E1_OPMODE_DNC; + // + dsp_alu_mode_x <= DSP48E1_ALUMODE_DNC; + dsp_alu_mode_y <= DSP48E1_ALUMODE_DNC; // - if (rd_narrow_ena_x_dly2) + dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_DNC; + dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_DNC; + // + case (opcode) // - case (opcode) + UOP_OPCODE_PROPAGATE_CARRIES: begin // - UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_opmode_x <= DSP48E1_OPMODE_Z0_YC_X0; - else dsp_opmode_x <= DSP48E1_OPMODE_ZP17_YC_X0; + if (rd_narrow_ena_x_dly2) begin + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_X0; + else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0; + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + end // - endcase - // - if (rd_narrow_ena_y_dly2) + if (rd_narrow_ena_y_dly2) begin + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_X0; + else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0; + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + end + // + end // - case (opcode) + UOP_OPCODE_MODULAR_SUBTRACT_X: begin // - UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_opmode_y <= DSP48E1_OPMODE_Z0_YC_X0; - else dsp_opmode_y <= DSP48E1_OPMODE_ZP17_YC_X0; + if (rd_narrow_ena_x_dly2) begin + dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end // - endcase + if (rd_narrow_ena_y_dly2) begin + dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end + // + end + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end + // + end + // + endcase // end - always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y}; + // + // DSP Feed Logic + // + always @(posedge clk) begin + // + dsp_x_x_x <= {DSP48E1_C_W{1'bX}}; + dsp_x_x_y <= {DSP48E1_C_W{1'bX}}; + dsp_y_x_x <= {DSP48E1_C_W{1'bX}}; + dsp_y_x_y <= {DSP48E1_C_W{1'bX}}; + dsp_x_y_x <= {DSP48E1_C_W{1'bX}}; + dsp_x_y_y <= {DSP48E1_C_W{1'bX}}; + dsp_y_y_x <= {DSP48E1_C_W{1'bX}}; + dsp_y_y_y <= {DSP48E1_C_W{1'bX}}; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + end + // + end + // + UOP_OPCODE_MODULAR_SUBTRACT_X: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + end + // + end + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_x[WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_y[WORD_W-1:0]}; + end + // + end + // + endcase + // + end + + + // + // DSP Slices + // `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_x ( - .clk (clk), - .ce_abc (dsp_ce_x), - .ce_p (dsp_ce_x_dly), - .ce_opmode (dsp_ce_x), - .x (dsp_x_x_x), - .y (dsp_x_x_y), - .p (dsp_x_x_p), - .opmode (dsp_opmode_x), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_x), + .ce_p (dsp_ce_x_dly), + .ce_ctrl (dsp_ce_x), + .x (dsp_x_x_x), + .y (dsp_x_x_y), + .p (dsp_x_x_p), + .op_mode (dsp_op_mode_x), + .alu_mode (dsp_alu_mode_x), + .carry_in_sel (dsp_carry_in_sel_x), + .casc_p_in (), + .casc_p_out (), + .carryout (dsp_carry_out_x) ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x ( - .clk (clk), - .ce_abc (dsp_ce_x), - .ce_p (dsp_ce_x_dly), - .ce_opmode (dsp_ce_x), - .x (dsp_y_x_x), - .y (dsp_y_x_y), - .p (dsp_y_x_p), - .opmode (dsp_opmode_x), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_x), + .ce_p (dsp_ce_x_dly), + .ce_ctrl (dsp_ce_x), + .x (dsp_y_x_x), + .y (dsp_y_x_y), + .p (dsp_y_x_p), + .op_mode (dsp_op_mode_x), + .alu_mode (dsp_alu_mode_x), + .carry_in_sel (dsp_carry_in_sel_x), + .casc_p_in (), + .casc_p_out (), + .carryout () ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y ( - .clk (clk), - .ce_abc (dsp_ce_y), - .ce_p (dsp_ce_y_dly), - .ce_opmode (dsp_ce_y), - .x (dsp_x_y_x), - .y (dsp_x_y_y), - .p (dsp_x_y_p), - .opmode (dsp_opmode_y), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_y), + .ce_p (dsp_ce_y_dly), + .ce_ctrl (dsp_ce_y), + .x (dsp_x_y_x), + .y (dsp_x_y_y), + .p (dsp_x_y_p), + .op_mode (dsp_op_mode_y), + .alu_mode (dsp_alu_mode_y), + .carry_in_sel (dsp_carry_in_sel_y), + .casc_p_in (), + .casc_p_out (), + .carryout (dsp_carry_out_y) ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y ( - .clk (clk), - .ce_abc (dsp_ce_y), - .ce_p (dsp_ce_y_dly), - .ce_opmode (dsp_ce_y), - .x (dsp_y_y_x), - .y (dsp_y_y_y), - .p (dsp_y_y_p), - .opmode (dsp_opmode_y), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_y), + .ce_p (dsp_ce_y_dly), + .ce_ctrl (dsp_ce_y), + .x (dsp_y_y_x), + .y (dsp_y_y_y), + .p (dsp_y_y_p), + .op_mode (dsp_op_mode_y), + .alu_mode (dsp_alu_mode_y), + .carry_in_sel (dsp_carry_in_sel_y), + .casc_p_in (), + .casc_p_out (), + .carryout () ); // - // UOP_OPCODE_PROPAGATE_CARRIES - // - reg [CARRY_W -1:0] propagate_carries_x_x_cry_r; - reg [CARRY_W -1:0] propagate_carries_y_x_cry_r; - reg [CARRY_W -1:0] propagate_carries_x_y_cry_r; - reg [CARRY_W -1:0] propagate_carries_y_y_cry_r; - - wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_x_cry_r}; - wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_x_cry_r}; - wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_y_cry_r}; - wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_y_cry_r}; - - reg [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_r; - reg [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_r; - reg [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_r; - reg [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_r; - - wire [CARRY_W -1:0] propagate_carries_x_x_w_cry_msb = propagate_carries_x_x_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] propagate_carries_y_x_w_cry_msb = propagate_carries_y_x_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] propagate_carries_x_y_w_cry_msb = propagate_carries_x_y_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] propagate_carries_y_y_w_cry_msb = propagate_carries_y_y_w_cry_r[WORD_EXT_W -1:WORD_W]; - - wire [WORD_W -1:0] propagate_carries_x_x_w_cry_lsb = propagate_carries_x_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] propagate_carries_y_x_w_cry_lsb = propagate_carries_y_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] propagate_carries_x_y_w_cry_lsb = propagate_carries_x_y_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] propagate_carries_y_y_w_cry_lsb = propagate_carries_y_y_w_cry_r[WORD_W -1:0]; - - wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_y_w_cry_lsb}; - wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_y_w_cry_lsb}; - - task _propagate_carries_update_cry; - input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; - { propagate_carries_x_x_cry_r, propagate_carries_y_x_cry_r, propagate_carries_x_y_cry_r, propagate_carries_y_y_cry_r} <= - { x_x_cry, y_x_cry, x_y_cry, y_y_cry}; - endtask - - task propagate_carries_clear_cry; _propagate_carries_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask - task propagate_carries_store_cry; _propagate_carries_update_cry(propagate_carries_x_x_w_cry_msb, propagate_carries_y_x_w_cry_msb, propagate_carries_x_y_w_cry_msb, propagate_carries_y_y_w_cry_msb); endtask - - task _propagate_carries_update_sum_w_cry; - input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry; - { propagate_carries_x_x_w_cry_r, propagate_carries_y_x_w_cry_r, propagate_carries_x_y_w_cry_r, propagate_carries_y_y_w_cry_r} <= - { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry}; - endtask - - task propagate_carries_store_sum_w_cry; _propagate_carries_update_sum_w_cry(propagate_carries_x_x_w_cry, propagate_carries_y_x_w_cry, propagate_carries_x_y_w_cry, propagate_carries_y_y_w_cry); endtask + // UOP_OPCODE_MODULAR_SUBTRACT_X + // + reg modular_subtract_x_brw_flag; + reg modular_subtract_y_brw_flag; + // + // IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_, + // the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT" + // section of Appendix A on pp. 55-56 of UG479 for more details. + // always @(posedge clk) // - if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE3: propagate_carries_clear_cry; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1: propagate_carries_store_cry; - // - WRK_FSM_STATE_LATENCY_PRE4, - WRK_FSM_STATE_BUSY2, - WRK_FSM_STATE_LATENCY_POST2: propagate_carries_store_sum_w_cry; - // + case (opcode) + UOP_OPCODE_MODULAR_SUBTRACT_X: + case (wrk_fsm_state) + WRK_FSM_STATE_LATENCY_POST4: + //{modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {1'bX, 1'bZ}; + {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y}; + endcase endcase + + //reg modular_subtract_x_brw_r; + //reg modular_subtract_y_brw_r; - // - // UOP_OPCODE_MODULAR_SUBTRACT_X - // UOP_OPCODE_MODULAR_SUBTRACT_Y - // - reg modular_subtract_x_brw_r; - reg modular_subtract_y_brw_r; - - reg modular_subtract_x_cry_r; - reg modular_subtract_y_cry_r; + //reg modular_subtract_x_cry_r; + //reg modular_subtract_y_cry_r; - wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r}; - wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; + //wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r}; + //wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; - wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r}; - wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; + //wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r}; + //wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_cry_r}; - reg [WORD_W:0] modular_subtract_x_w_brw_r; - reg [WORD_W:0] modular_subtract_y_w_brw_r; + //reg [WORD_W:0] modular_subtract_x_w_brw_r; + //reg [WORD_W:0] modular_subtract_y_w_brw_r; - reg [WORD_W:0] modular_subtract_x_w_cry_r; - reg [WORD_W:0] modular_subtract_y_w_cry_r; + //reg [WORD_W:0] modular_subtract_x_w_cry_r; + //reg [WORD_W:0] modular_subtract_y_w_cry_r; - wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W]; - wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W]; + //wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W]; + //wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W]; - wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W]; - wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W]; + //wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W]; + //wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W]; - wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0]; - wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0]; - wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0]; - wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb}; - wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb}; - wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb}; reg [WORD_EXT_W -1:0] modular_subtract_x_mux; reg [WORD_EXT_W -1:0] modular_subtract_y_mux; @@ -1035,68 +1142,68 @@ module modexpng_general_worker wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]}; - task _modular_subtract_update_brw; - input x_brw, y_brw; - {modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw}; - endtask + //task _modular_subtract_update_brw; + //input x_brw, y_brw; + //{modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw}; + //endtask - task _modular_subtract_update_cry; - input x_cry, y_cry; - {modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry}; - endtask + //task _modular_subtract_update_cry; + //input x_cry, y_cry; + //{modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry}; + //endtask - task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask - task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask + //task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask + //task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask - task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask - task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask + //task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask + //task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask - task _modular_subtract_update_diff_w_brw; - input [WORD_W:0] x_diff_w_brw, y_diff_w_brw; - {modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw}; - endtask + //task _modular_subtract_update_diff_w_brw; + //input [WORD_W:0] x_diff_w_brw, y_diff_w_brw; + //{modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw}; + //endtask - task _modular_subtract_update_sum_w_cry; - input [WORD_W:0] x_sum_w_cry, y_sum_w_cry; - {modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry}; - endtask + //task _modular_subtract_update_sum_w_cry; + //input [WORD_W:0] x_sum_w_cry, y_sum_w_cry; + //{modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry}; + //endtask - task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask + //task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask - task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask + //task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask always @(posedge clk) // case (opcode) // - UOP_OPCODE_MODULAR_SUBTRACT_X: + //UOP_OPCODE_MODULAR_SUBTRACT_X: // - case (wrk_fsm_state) + //case (wrk_fsm_state) // - WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too! + //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw; + //WRK_FSM_STATE_BUSY1, + //WRK_FSM_STATE_LATENCY_POST1, + //WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too! // - WRK_FSM_STATE_LATENCY_PRE4, - WRK_FSM_STATE_BUSY2, - WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw; + //WRK_FSM_STATE_LATENCY_PRE4, + //WRK_FSM_STATE_BUSY2, + //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw; // - endcase + //endcase // - UOP_OPCODE_MODULAR_SUBTRACT_Y: + //UOP_OPCODE_MODULAR_SUBTRACT_Y: // - case (wrk_fsm_state) + //case (wrk_fsm_state) // - WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry; + //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry; + //WRK_FSM_STATE_BUSY1, + //WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry; // - WRK_FSM_STATE_LATENCY_PRE4, - WRK_FSM_STATE_BUSY2, - WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry; + //WRK_FSM_STATE_LATENCY_PRE4, + //WRK_FSM_STATE_BUSY2, + //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry; // - endcase + //endcase // UOP_OPCODE_MODULAR_SUBTRACT_Z: // @@ -1106,8 +1213,8 @@ module modexpng_general_worker WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2: // - begin modular_subtract_x_mux <= !modular_subtract_x_brw_r ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; - modular_subtract_y_mux <= !modular_subtract_y_brw_r ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end + begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; + modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end // endcase // @@ -1316,7 +1423,8 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state) // @@ -1324,7 +1432,6 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: // - //update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced); update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase @@ -1380,27 +1487,15 @@ module modexpng_general_worker // endcase // - UOP_OPCODE_MODULAR_SUBTRACT_X: - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: - // - update_narrow_dout(modular_subtract_x_w_brw_reduced, modular_subtract_x_w_brw_reduced, modular_subtract_y_w_brw_reduced, modular_subtract_y_w_brw_reduced); - // - endcase - // UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) // - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: - // - update_wide_dout(modular_subtract_x_w_cry_reduced, modular_subtract_x_w_cry_reduced, modular_subtract_y_w_cry_reduced, modular_subtract_y_w_cry_reduced); + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: + // + update_wide_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase // |