aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rtl/modexpng_general_worker.v172
1 files changed, 161 insertions, 11 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 0620bd6..684af5a 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -54,7 +54,9 @@ module modexpng_general_worker
//
`include "modexpng_parameters.vh"
`include "modexpng_microcode.vh"
-
+ `include "modexpng_dsp48e1.vh"
+ `include "modexpng_dsp_slice_primitives.vh"
+
//
// Ports
@@ -240,8 +242,8 @@ module modexpng_general_worker
//
// Delays
//
- reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:3];
- reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:3];
+ reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4];
+ reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4];
reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3];
reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3];
@@ -255,6 +257,11 @@ module modexpng_general_worker
reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1;
+ reg rd_narrow_ena_x_dly1 = 1'b0;
+ reg rd_narrow_ena_y_dly1 = 1'b0;
+ reg rd_narrow_ena_x_dly2 = 1'b0;
+ reg rd_narrow_ena_y_dly2 = 1'b0;
+
always @(posedge clk) begin
//
{rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x};
@@ -267,12 +274,15 @@ module modexpng_general_worker
{rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y};
{rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y};
//
- {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
- {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
+ {rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
+ {rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
//
{rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x};
{rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y};
//
+ {rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x};
+ {rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y};
+ //
end
@@ -376,7 +386,14 @@ module modexpng_general_worker
//
case (opcode)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2,
+ WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en;
+ endcase
+ //
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
@@ -729,7 +746,14 @@ module modexpng_general_worker
//
case (opcode)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2,
+ WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]);
+ endcase
+ //
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
@@ -773,6 +797,131 @@ module modexpng_general_worker
end
+
+ //
+ // DSP Slice Array
+ //
+ wire [DSP48E1_C_W-1:0] dsp_x_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_x_dly1};
+ wire [DSP48E1_C_W-1:0] dsp_y_x_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_x_dly1};
+ wire [DSP48E1_C_W-1:0] dsp_x_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_x_din_y_dly1};
+ wire [DSP48E1_C_W-1:0] dsp_y_y_x = 'bX;//{{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_narrow_y_din_y_dly1};
+
+ wire [DSP48E1_C_W-1:0] dsp_x_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_x_dly1[WORD_W-1:0]};
+ wire [DSP48E1_C_W-1:0] dsp_y_x_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_x_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_x_dly1[WORD_W-1:0]};
+ wire [DSP48E1_C_W-1:0] dsp_x_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_x_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_x_din_y_dly1[WORD_W-1:0]};
+ wire [DSP48E1_C_W-1:0] dsp_y_y_y = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_narrow_y_din_y_dly1[WORD_EXT_W-1:WORD_W], 1'b1, rd_narrow_y_din_y_dly1[WORD_W-1:0]};
+
+ wire [DSP48E1_P_W-1:0] dsp_x_x_p;
+ wire [DSP48E1_P_W-1:0] dsp_y_x_p;
+ wire [DSP48E1_P_W-1:0] dsp_x_y_p;
+ wire [DSP48E1_P_W-1:0] dsp_y_y_p;
+
+ wire [WORD_EXT_W-1:0] dsp_x_x_p_reduced = {CARRY_ZERO, dsp_x_x_p[WORD_W-1:0]};
+ wire [WORD_EXT_W-1:0] dsp_y_x_p_reduced = {CARRY_ZERO, dsp_y_x_p[WORD_W-1:0]};
+ wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]};
+ wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]};
+
+ reg dsp_ce_x = 1'b0;
+ reg dsp_ce_y = 1'b0;
+ reg dsp_ce_x_dly = 1'b0;
+ reg dsp_ce_y_dly = 1'b0;
+ reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_x;
+ reg [DSP48E1_OPMODE_W-1:0] dsp_opmode_y;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
+ else case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
+ default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
+ //
+ endcase
+
+ always @(posedge clk) begin
+ //
+ dsp_opmode_x <= {DSP48E1_OPMODE_W{1'bX}};
+ dsp_opmode_y <= {DSP48E1_OPMODE_W{1'bX}};
+ //
+ if (rd_narrow_ena_x_dly2)
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_opmode_x <= DSP48E1_OPMODE_Z0_YC_X0;
+ else dsp_opmode_x <= DSP48E1_OPMODE_ZP17_YC_X0;
+ //
+ endcase
+ //
+ if (rd_narrow_ena_y_dly2)
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES: if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_opmode_y <= DSP48E1_OPMODE_Z0_YC_X0;
+ else dsp_opmode_y <= DSP48E1_OPMODE_ZP17_YC_X0;
+ //
+ endcase
+ //
+ end
+
+ always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y};
+
+ `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_x
+ (
+ .clk (clk),
+ .ce_abc (dsp_ce_x),
+ .ce_p (dsp_ce_x_dly),
+ .ce_opmode (dsp_ce_x),
+ .x (dsp_x_x_x),
+ .y (dsp_x_x_y),
+ .p (dsp_x_x_p),
+ .opmode (dsp_opmode_x),
+ .casc_p_in (),
+ .casc_p_out ()
+ );
+
+ `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x
+ (
+ .clk (clk),
+ .ce_abc (dsp_ce_x),
+ .ce_p (dsp_ce_x_dly),
+ .ce_opmode (dsp_ce_x),
+ .x (dsp_y_x_x),
+ .y (dsp_y_x_y),
+ .p (dsp_y_x_p),
+ .opmode (dsp_opmode_x),
+ .casc_p_in (),
+ .casc_p_out ()
+ );
+
+ `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y
+ (
+ .clk (clk),
+ .ce_abc (dsp_ce_y),
+ .ce_p (dsp_ce_y_dly),
+ .ce_opmode (dsp_ce_y),
+ .x (dsp_x_y_x),
+ .y (dsp_x_y_y),
+ .p (dsp_x_y_p),
+ .opmode (dsp_opmode_y),
+ .casc_p_in (),
+ .casc_p_out ()
+ );
+
+ `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y
+ (
+ .clk (clk),
+ .ce_abc (dsp_ce_y),
+ .ce_p (dsp_ce_y_dly),
+ .ce_opmode (dsp_ce_y),
+ .x (dsp_y_y_x),
+ .y (dsp_y_y_y),
+ .p (dsp_y_y_p),
+ .opmode (dsp_opmode_y),
+ .casc_p_in (),
+ .casc_p_out ()
+ );
+
+
//
// UOP_OPCODE_PROPAGATE_CARRIES
//
@@ -1171,11 +1320,12 @@ module modexpng_general_worker
//
case (wrk_fsm_state)
//
- WRK_FSM_STATE_BUSY1,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST3:
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2,
+ WRK_FSM_STATE_LATENCY_POST4:
//
- update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced);
+ //update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced);
+ update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced);
//
endcase
//