diff options
Diffstat (limited to 'rtl')
-rw-r--r-- | rtl/modexpng_general_worker.v | 172 |
1 files changed, 161 insertions, 11 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index 0620bd6..684af5a 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -54,7 +54,9 @@ module modexpng_general_worker // `include "modexpng_parameters.vh" `include "modexpng_microcode.vh" - + `include "modexpng_dsp48e1.vh" + `include "modexpng_dsp_slice_primitives.vh" + // // Ports @@ -240,8 +242,8 @@ module modexpng_general_worker // // Delays // - reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:3]; - reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:3]; + reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4]; + reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4]; reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3]; reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3]; @@ -255,6 +257,11 @@ module modexpng_general_worker reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1; reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1; + reg rd_narrow_ena_x_dly1 = 1'b0; + reg rd_narrow_ena_y_dly1 = 1'b0; + reg rd_narrow_ena_x_dly2 = 1'b0; + reg rd_narrow_ena_y_dly2 = 1'b0; + always @(posedge clk) begin // {rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x}; @@ -267,12 +274,15 @@ module modexpng_general_worker {rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y}; {rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y}; // - {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x}; - {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y}; + {rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x}; + {rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y}; // {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; // + {rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x}; + {rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y}; + // end @@ -376,7 +386,14 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_PROPAGATE_CARRIES: + // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en; + endcase + // UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: @@ -729,7 +746,14 @@ module modexpng_general_worker // case (opcode) // - UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_PROPAGATE_CARRIES: + // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]); + endcase + // UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: @@ -773,6 +797,131 @@ module modexpng_general_worker end + + // + // DSP Slice Array + // + wire [DSP48E1_C_W-1:0] dsp_x_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_x_dly1}; + wire [DSP48E1_C_W-1:0] dsp_y_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_x_dly1}; + wire [DSP48E1_C_W-1:0] dsp_x_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_y_dly1}; + wire [DSP48E1_C_W-1:0] dsp_y_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_y_dly1}; + + wire [DSP48E1_C_W-1:0] dsp_x_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_x_dly1[WORD_W-1:0]}; + wire [DSP48E1_C_W-1:0] dsp_y_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_x_dly1[WORD_W-1:0]}; + wire [DSP48E1_C_W-1:0] dsp_x_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_y_dly1[WORD_W-1:0]}; + wire [DSP48E1_C_W-1:0] dsp_y_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_y_dly1[WORD_W-1:0]}; + + wire [DSP48E1_P_W-1:0] dsp_x_x_p; + wire [DSP48E1_P_W-1:0] dsp_y_x_p; + wire [DSP48E1_P_W-1:0] dsp_x_y_p; + wire [DSP48E1_P_W-1:0] dsp_y_y_p; + + wire [WORD_EXT_W-1:0] dsp_x_x_p_reduced = {CARRY_ZERO, dsp_x_x_p[WORD_W-1:0]}; + wire [WORD_EXT_W-1:0] dsp_y_x_p_reduced = {CARRY_ZERO, dsp_y_x_p[WORD_W-1:0]}; + wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]}; + wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]}; + + reg dsp_ce_x = 1'b0; + reg dsp_ce_y = 1'b0; + reg dsp_ce_x_dly = 1'b0; + reg dsp_ce_y_dly = 1'b0; + reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_x; + reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_y; + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; + else case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; + default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; + // + endcase + + always @(posedge clk) begin + // + dsp_opmode_x <= {DSP48E1_OPMODE_W{1'bX}}; + dsp_opmode_y <= {DSP48E1_OPMODE_W{1'bX}}; + // + if (rd_narrow_ena_x_dly2) + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_opmode_x <= DSP48E1_OPMODE_Z0_YC_X0; + else dsp_opmode_x <= DSP48E1_OPMODE_ZP17_YC_X0; + // + endcase + // + if (rd_narrow_ena_y_dly2) + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_opmode_y <= DSP48E1_OPMODE_Z0_YC_X0; + else dsp_opmode_y <= DSP48E1_OPMODE_ZP17_YC_X0; + // + endcase + // + end + + always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y}; + + `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_x + ( + .clk (clk), + .ce_abc (dsp_ce_x), + .ce_p (dsp_ce_x_dly), + .ce_opmode (dsp_ce_x), + .x (dsp_x_x_x), + .y (dsp_x_x_y), + .p (dsp_x_x_p), + .opmode (dsp_opmode_x), + .casc_p_in (), + .casc_p_out () + ); + + `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x + ( + .clk (clk), + .ce_abc (dsp_ce_x), + .ce_p (dsp_ce_x_dly), + .ce_opmode (dsp_ce_x), + .x (dsp_y_x_x), + .y (dsp_y_x_y), + .p (dsp_y_x_p), + .opmode (dsp_opmode_x), + .casc_p_in (), + .casc_p_out () + ); + + `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y + ( + .clk (clk), + .ce_abc (dsp_ce_y), + .ce_p (dsp_ce_y_dly), + .ce_opmode (dsp_ce_y), + .x (dsp_x_y_x), + .y (dsp_x_y_y), + .p (dsp_x_y_p), + .opmode (dsp_opmode_y), + .casc_p_in (), + .casc_p_out () + ); + + `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y + ( + .clk (clk), + .ce_abc (dsp_ce_y), + .ce_p (dsp_ce_y_dly), + .ce_opmode (dsp_ce_y), + .x (dsp_y_y_x), + .y (dsp_y_y_y), + .p (dsp_y_y_p), + .opmode (dsp_opmode_y), + .casc_p_in (), + .casc_p_out () + ); + + // // UOP_OPCODE_PROPAGATE_CARRIES // @@ -1171,11 +1320,12 @@ module modexpng_general_worker // case (wrk_fsm_state) // - WRK_FSM_STATE_BUSY1, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST3: + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2, + WRK_FSM_STATE_LATENCY_POST4: // - update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced); + //update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced); + update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase // |