diff options
Diffstat (limited to 'rtl/modexpng_general_worker.v')
-rw-r--r-- | rtl/modexpng_general_worker.v | 569 |
1 files changed, 332 insertions, 237 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index 684af5a..6652f14 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -245,8 +245,8 @@ module modexpng_general_worker reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4]; reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4]; - reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3]; - reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3]; + reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:4]; + reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:4]; reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1; reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1; @@ -277,8 +277,8 @@ module modexpng_general_worker {rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x}; {rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y}; // - {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; - {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; + {rd_wide_addr_x_dly[4], rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; + {rd_wide_addr_y_dly[4], rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; // {rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x}; {rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y}; @@ -386,15 +386,15 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en; endcase - // - UOP_OPCODE_MODULAR_SUBTRACT_X, + // UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: // @@ -415,10 +415,17 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end endcase // - UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: enable_wide_wr_en; + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en; @@ -746,7 +753,8 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, @@ -754,7 +762,6 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]); endcase // - UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: // @@ -787,29 +794,28 @@ module modexpng_general_worker UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]); + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[4], rd_wide_addr_y_dly[4]); endcase // endcase // end - - - + + // // DSP Slice Array // - wire [DSP48E1_C_W-1:0] dsp_x_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_x_dly1}; - wire [DSP48E1_C_W-1:0] dsp_y_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_x_dly1}; - wire [DSP48E1_C_W-1:0] dsp_x_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_y_dly1}; - wire [DSP48E1_C_W-1:0] dsp_y_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_y_dly1}; + reg [DSP48E1_C_W-1:0] dsp_x_x_x; + reg [DSP48E1_C_W-1:0] dsp_y_x_x; + reg [DSP48E1_C_W-1:0] dsp_x_y_x; + reg [DSP48E1_C_W-1:0] dsp_y_y_x; - wire [DSP48E1_C_W-1:0] dsp_x_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_x_dly1[WORD_W-1:0]}; - wire [DSP48E1_C_W-1:0] dsp_y_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_x_dly1[WORD_W-1:0]}; - wire [DSP48E1_C_W-1:0] dsp_x_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_y_dly1[WORD_W-1:0]}; - wire [DSP48E1_C_W-1:0] dsp_y_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_y_dly1[WORD_W-1:0]}; + reg [DSP48E1_C_W-1:0] dsp_x_x_y; + reg [DSP48E1_C_W-1:0] dsp_y_x_y; + reg [DSP48E1_C_W-1:0] dsp_x_y_y; + reg [DSP48E1_C_W-1:0] dsp_y_y_y; wire [DSP48E1_P_W-1:0] dsp_x_x_p; wire [DSP48E1_P_W-1:0] dsp_y_x_p; @@ -821,213 +827,314 @@ module modexpng_general_worker wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]}; - reg dsp_ce_x = 1'b0; - reg dsp_ce_y = 1'b0; - reg dsp_ce_x_dly = 1'b0; - reg dsp_ce_y_dly = 1'b0; - reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_x; - reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_y; + reg dsp_ce_x = 1'b0; + reg dsp_ce_y = 1'b0; + reg dsp_ce_x_dly = 1'b0; + reg dsp_ce_y_dly = 1'b0; + reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_x; + reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_y; + reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_x; + reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_y; + reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_x; + reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_y; + wire dsp_carry_out_x; + wire dsp_carry_out_y; + + + // + // DSP - CE + // + always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y}; always @(posedge clk or negedge rst_n) // if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; else case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; - default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_MODULAR_SUBTRACT_Y: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; + default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; // endcase + + // + // DSP - OPMODE, ALUMODE, CARRYINSEL + // always @(posedge clk) begin // - dsp_opmode_x <= {DSP48E1_OPMODE_W{1'bX}}; - dsp_opmode_y <= {DSP48E1_OPMODE_W{1'bX}}; + dsp_op_mode_x <= DSP48E1_OPMODE_DNC; + dsp_op_mode_y <= DSP48E1_OPMODE_DNC; + // + dsp_alu_mode_x <= DSP48E1_ALUMODE_DNC; + dsp_alu_mode_y <= DSP48E1_ALUMODE_DNC; // - if (rd_narrow_ena_x_dly2) + dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_DNC; + dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_DNC; + // + case (opcode) // - case (opcode) + UOP_OPCODE_PROPAGATE_CARRIES: begin // - UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_opmode_x <= DSP48E1_OPMODE_Z0_YC_X0; - else dsp_opmode_x <= DSP48E1_OPMODE_ZP17_YC_X0; + if (rd_narrow_ena_x_dly2) begin + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_X0; + else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0; + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + end // - endcase - // - if (rd_narrow_ena_y_dly2) + if (rd_narrow_ena_y_dly2) begin + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_X0; + else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0; + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + end + // + end // - case (opcode) + UOP_OPCODE_MODULAR_SUBTRACT_X: begin // - UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_opmode_y <= DSP48E1_OPMODE_Z0_YC_X0; - else dsp_opmode_y <= DSP48E1_OPMODE_ZP17_YC_X0; + if (rd_narrow_ena_x_dly2) begin + dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end // - endcase + if (rd_narrow_ena_y_dly2) begin + dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end + // + end + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB; + dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; + if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; + else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT; + end + // + end + // + endcase // end - always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y}; + // + // DSP Feed Logic + // + always @(posedge clk) begin + // + dsp_x_x_x <= {DSP48E1_C_W{1'bX}}; + dsp_x_x_y <= {DSP48E1_C_W{1'bX}}; + dsp_y_x_x <= {DSP48E1_C_W{1'bX}}; + dsp_y_x_y <= {DSP48E1_C_W{1'bX}}; + dsp_x_y_x <= {DSP48E1_C_W{1'bX}}; + dsp_x_y_y <= {DSP48E1_C_W{1'bX}}; + dsp_y_y_x <= {DSP48E1_C_W{1'bX}}; + dsp_y_y_y <= {DSP48E1_C_W{1'bX}}; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + end + // + end + // + UOP_OPCODE_MODULAR_SUBTRACT_X: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + end + // + end + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: begin + // + if (rd_narrow_ena_x_dly2) begin + dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]}; + dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_x[WORD_W-1:0]}; + end + // + if (rd_narrow_ena_y_dly2) begin + dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]}; + dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_y[WORD_W-1:0]}; + end + // + end + // + endcase + // + end + + + // + // DSP Slices + // `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_x ( - .clk (clk), - .ce_abc (dsp_ce_x), - .ce_p (dsp_ce_x_dly), - .ce_opmode (dsp_ce_x), - .x (dsp_x_x_x), - .y (dsp_x_x_y), - .p (dsp_x_x_p), - .opmode (dsp_opmode_x), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_x), + .ce_p (dsp_ce_x_dly), + .ce_ctrl (dsp_ce_x), + .x (dsp_x_x_x), + .y (dsp_x_x_y), + .p (dsp_x_x_p), + .op_mode (dsp_op_mode_x), + .alu_mode (dsp_alu_mode_x), + .carry_in_sel (dsp_carry_in_sel_x), + .casc_p_in (), + .casc_p_out (), + .carryout (dsp_carry_out_x) ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x ( - .clk (clk), - .ce_abc (dsp_ce_x), - .ce_p (dsp_ce_x_dly), - .ce_opmode (dsp_ce_x), - .x (dsp_y_x_x), - .y (dsp_y_x_y), - .p (dsp_y_x_p), - .opmode (dsp_opmode_x), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_x), + .ce_p (dsp_ce_x_dly), + .ce_ctrl (dsp_ce_x), + .x (dsp_y_x_x), + .y (dsp_y_x_y), + .p (dsp_y_x_p), + .op_mode (dsp_op_mode_x), + .alu_mode (dsp_alu_mode_x), + .carry_in_sel (dsp_carry_in_sel_x), + .casc_p_in (), + .casc_p_out (), + .carryout () ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y ( - .clk (clk), - .ce_abc (dsp_ce_y), - .ce_p (dsp_ce_y_dly), - .ce_opmode (dsp_ce_y), - .x (dsp_x_y_x), - .y (dsp_x_y_y), - .p (dsp_x_y_p), - .opmode (dsp_opmode_y), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_y), + .ce_p (dsp_ce_y_dly), + .ce_ctrl (dsp_ce_y), + .x (dsp_x_y_x), + .y (dsp_x_y_y), + .p (dsp_x_y_p), + .op_mode (dsp_op_mode_y), + .alu_mode (dsp_alu_mode_y), + .carry_in_sel (dsp_carry_in_sel_y), + .casc_p_in (), + .casc_p_out (), + .carryout (dsp_carry_out_y) ); `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y ( - .clk (clk), - .ce_abc (dsp_ce_y), - .ce_p (dsp_ce_y_dly), - .ce_opmode (dsp_ce_y), - .x (dsp_y_y_x), - .y (dsp_y_y_y), - .p (dsp_y_y_p), - .opmode (dsp_opmode_y), - .casc_p_in (), - .casc_p_out () + .clk (clk), + .ce_abc (dsp_ce_y), + .ce_p (dsp_ce_y_dly), + .ce_ctrl (dsp_ce_y), + .x (dsp_y_y_x), + .y (dsp_y_y_y), + .p (dsp_y_y_p), + .op_mode (dsp_op_mode_y), + .alu_mode (dsp_alu_mode_y), + .carry_in_sel (dsp_carry_in_sel_y), + .casc_p_in (), + .casc_p_out (), + .carryout () ); // - // UOP_OPCODE_PROPAGATE_CARRIES - // - reg [CARRY_W -1:0] propagate_carries_x_x_cry_r; - reg [CARRY_W -1:0] propagate_carries_y_x_cry_r; - reg [CARRY_W -1:0] propagate_carries_x_y_cry_r; - reg [CARRY_W -1:0] propagate_carries_y_y_cry_r; - - wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_x_cry_r}; - wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_x_cry_r}; - wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_y_cry_r}; - wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_y_cry_r}; - - reg [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_r; - reg [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_r; - reg [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_r; - reg [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_r; - - wire [CARRY_W -1:0] propagate_carries_x_x_w_cry_msb = propagate_carries_x_x_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] propagate_carries_y_x_w_cry_msb = propagate_carries_y_x_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] propagate_carries_x_y_w_cry_msb = propagate_carries_x_y_w_cry_r[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] propagate_carries_y_y_w_cry_msb = propagate_carries_y_y_w_cry_r[WORD_EXT_W -1:WORD_W]; - - wire [WORD_W -1:0] propagate_carries_x_x_w_cry_lsb = propagate_carries_x_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] propagate_carries_y_x_w_cry_lsb = propagate_carries_y_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] propagate_carries_x_y_w_cry_lsb = propagate_carries_x_y_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] propagate_carries_y_y_w_cry_lsb = propagate_carries_y_y_w_cry_r[WORD_W -1:0]; - - wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_y_w_cry_lsb}; - wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_y_w_cry_lsb}; - - task _propagate_carries_update_cry; - input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; - { propagate_carries_x_x_cry_r, propagate_carries_y_x_cry_r, propagate_carries_x_y_cry_r, propagate_carries_y_y_cry_r} <= - { x_x_cry, y_x_cry, x_y_cry, y_y_cry}; - endtask - - task propagate_carries_clear_cry; _propagate_carries_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask - task propagate_carries_store_cry; _propagate_carries_update_cry(propagate_carries_x_x_w_cry_msb, propagate_carries_y_x_w_cry_msb, propagate_carries_x_y_w_cry_msb, propagate_carries_y_y_w_cry_msb); endtask - - task _propagate_carries_update_sum_w_cry; - input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry; - { propagate_carries_x_x_w_cry_r, propagate_carries_y_x_w_cry_r, propagate_carries_x_y_w_cry_r, propagate_carries_y_y_w_cry_r} <= - { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry}; - endtask - - task propagate_carries_store_sum_w_cry; _propagate_carries_update_sum_w_cry(propagate_carries_x_x_w_cry, propagate_carries_y_x_w_cry, propagate_carries_x_y_w_cry, propagate_carries_y_y_w_cry); endtask + // UOP_OPCODE_MODULAR_SUBTRACT_X + // + reg modular_subtract_x_brw_flag; + reg modular_subtract_y_brw_flag; + // + // IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_, + // the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT" + // section of Appendix A on pp. 55-56 of UG479 for more details. + // always @(posedge clk) // - if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE3: propagate_carries_clear_cry; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1: propagate_carries_store_cry; - // - WRK_FSM_STATE_LATENCY_PRE4, - WRK_FSM_STATE_BUSY2, - WRK_FSM_STATE_LATENCY_POST2: propagate_carries_store_sum_w_cry; - // + case (opcode) + UOP_OPCODE_MODULAR_SUBTRACT_X: + case (wrk_fsm_state) + WRK_FSM_STATE_LATENCY_POST4: + //{modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {1'bX, 1'bZ}; + {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y}; + endcase endcase + + //reg modular_subtract_x_brw_r; + //reg modular_subtract_y_brw_r; - // - // UOP_OPCODE_MODULAR_SUBTRACT_X - // UOP_OPCODE_MODULAR_SUBTRACT_Y - // - reg modular_subtract_x_brw_r; - reg modular_subtract_y_brw_r; - - reg modular_subtract_x_cry_r; - reg modular_subtract_y_cry_r; + //reg modular_subtract_x_cry_r; + //reg modular_subtract_y_cry_r; - wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r}; - wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; + //wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r}; + //wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; - wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r}; - wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; + //wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r}; + //wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_cry_r}; - reg [WORD_W:0] modular_subtract_x_w_brw_r; - reg [WORD_W:0] modular_subtract_y_w_brw_r; + //reg [WORD_W:0] modular_subtract_x_w_brw_r; + //reg [WORD_W:0] modular_subtract_y_w_brw_r; - reg [WORD_W:0] modular_subtract_x_w_cry_r; - reg [WORD_W:0] modular_subtract_y_w_cry_r; + //reg [WORD_W:0] modular_subtract_x_w_cry_r; + //reg [WORD_W:0] modular_subtract_y_w_cry_r; - wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W]; - wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W]; + //wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W]; + //wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W]; - wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W]; - wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W]; + //wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W]; + //wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W]; - wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0]; - wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0]; - wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0]; - wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0]; + //wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0]; - wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb}; - wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb}; - wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb}; - wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb}; + //wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb}; reg [WORD_EXT_W -1:0] modular_subtract_x_mux; reg [WORD_EXT_W -1:0] modular_subtract_y_mux; @@ -1035,68 +1142,68 @@ module modexpng_general_worker wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]}; - task _modular_subtract_update_brw; - input x_brw, y_brw; - {modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw}; - endtask + //task _modular_subtract_update_brw; + //input x_brw, y_brw; + //{modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw}; + //endtask - task _modular_subtract_update_cry; - input x_cry, y_cry; - {modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry}; - endtask + //task _modular_subtract_update_cry; + //input x_cry, y_cry; + //{modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry}; + //endtask - task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask - task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask + //task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask + //task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask - task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask - task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask + //task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask + //task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask - task _modular_subtract_update_diff_w_brw; - input [WORD_W:0] x_diff_w_brw, y_diff_w_brw; - {modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw}; - endtask + //task _modular_subtract_update_diff_w_brw; + //input [WORD_W:0] x_diff_w_brw, y_diff_w_brw; + //{modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw}; + //endtask - task _modular_subtract_update_sum_w_cry; - input [WORD_W:0] x_sum_w_cry, y_sum_w_cry; - {modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry}; - endtask + //task _modular_subtract_update_sum_w_cry; + //input [WORD_W:0] x_sum_w_cry, y_sum_w_cry; + //{modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry}; + //endtask - task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask + //task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask - task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask + //task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask always @(posedge clk) // case (opcode) // - UOP_OPCODE_MODULAR_SUBTRACT_X: + //UOP_OPCODE_MODULAR_SUBTRACT_X: // - case (wrk_fsm_state) + //case (wrk_fsm_state) // - WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too! + //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw; + //WRK_FSM_STATE_BUSY1, + //WRK_FSM_STATE_LATENCY_POST1, + //WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too! // - WRK_FSM_STATE_LATENCY_PRE4, - WRK_FSM_STATE_BUSY2, - WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw; + //WRK_FSM_STATE_LATENCY_PRE4, + //WRK_FSM_STATE_BUSY2, + //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw; // - endcase + //endcase // - UOP_OPCODE_MODULAR_SUBTRACT_Y: + //UOP_OPCODE_MODULAR_SUBTRACT_Y: // - case (wrk_fsm_state) + //case (wrk_fsm_state) // - WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry; - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry; + //WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry; + //WRK_FSM_STATE_BUSY1, + //WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry; // - WRK_FSM_STATE_LATENCY_PRE4, - WRK_FSM_STATE_BUSY2, - WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry; + //WRK_FSM_STATE_LATENCY_PRE4, + //WRK_FSM_STATE_BUSY2, + //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry; // - endcase + //endcase // UOP_OPCODE_MODULAR_SUBTRACT_Z: // @@ -1106,8 +1213,8 @@ module modexpng_general_worker WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2: // - begin modular_subtract_x_mux <= !modular_subtract_x_brw_r ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; - modular_subtract_y_mux <= !modular_subtract_y_brw_r ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end + begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; + modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end // endcase // @@ -1316,7 +1423,8 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state) // @@ -1324,7 +1432,6 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: // - //update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced); update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase @@ -1380,27 +1487,15 @@ module modexpng_general_worker // endcase // - UOP_OPCODE_MODULAR_SUBTRACT_X: - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: - // - update_narrow_dout(modular_subtract_x_w_brw_reduced, modular_subtract_x_w_brw_reduced, modular_subtract_y_w_brw_reduced, modular_subtract_y_w_brw_reduced); - // - endcase - // UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) // - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: - // - update_wide_dout(modular_subtract_x_w_cry_reduced, modular_subtract_x_w_cry_reduced, modular_subtract_y_w_cry_reduced, modular_subtract_y_w_cry_reduced); + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: + // + update_wide_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase // |