aboutsummaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'rtl')
-rw-r--r--rtl/modexpng_general_worker.v1896
1 files changed, 838 insertions, 1058 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index eadd284..0620bd6 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -127,67 +127,46 @@ module modexpng_general_worker
//
// FSM Declaration
//
- localparam [5:0] WRK_FSM_STATE_IDLE = 6'h00;
-
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1 = 6'h01;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2 = 6'h02;
- localparam [5:0] WRK_FSM_STATE_BUSY = 6'h03;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST1 = 6'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST2 = 6'h06;
-
- localparam [5:0] WRK_FSM_STATE_STOP = 6'h07;
-
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13;
- localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14;
- localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 6'h17;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 6'h18;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 6'h19;
-
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_TP = 6'h20;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_TP = 6'h21;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE3_TP = 6'h22;
- localparam [5:0] WRK_FSM_STATE_LATENCY_PRE4_TP = 6'h23;
- localparam [5:0] WRK_FSM_STATE_BUSY_TP = 6'h24;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_TP = 6'h25;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_TP = 6'h26;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST3_TP = 6'h27;
- localparam [5:0] WRK_FSM_STATE_LATENCY_POST4_TP = 6'h28;
- localparam [5:0] WRK_FSM_STATE_HOLDOFF_TP = 6'h29;
-
- reg [5:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
- reg [5:0] wrk_fsm_state_next_one_pass; // single address space sweep
- reg [5:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
- reg [5:0] wrk_fsm_state_next_two_pass; // two address space sweeps
- reg wrk_fsm_two_pass_pass; // 0=first pass, 1=second pass
- reg wrk_fsm_two_pass_pass_dly; // 0=first pass, 1=second pass
-
-
- // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
+
+ localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
+
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3 = 4'h3;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4 = 4'h4;
+
+ localparam [3:0] WRK_FSM_STATE_BUSY1 = 4'hA;
+ localparam [3:0] WRK_FSM_STATE_BUSY2 = 4'hB;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8;
+
+ localparam [3:0] WRK_FSM_STATE_STOP = 4'hF;
+
+ reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [3:0] wrk_fsm_state_next;
+
//
// Control Signals
//
- reg rd_wide_xy_ena_x = 1'b0;
- reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
- reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x;
+ reg rd_wide_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_wide_bank_x;
+ reg [ OP_ADDR_W -1:0] rd_wide_addr_x;
- reg rd_narrow_xy_ena_x = 1'b0;
- reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
- reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x;
+ reg rd_narrow_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_narrow_bank_x;
+ reg [ OP_ADDR_W -1:0] rd_narrow_addr_x;
- reg rd_wide_xy_ena_y = 1'b0;
- reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
- reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y;
+ reg rd_wide_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_wide_bank_y;
+ reg [ OP_ADDR_W -1:0] rd_wide_addr_y;
- reg rd_narrow_xy_ena_y = 1'b0;
- reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
- reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y;
+ reg rd_narrow_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_narrow_bank_y;
+ reg [ OP_ADDR_W -1:0] rd_narrow_addr_y;
reg wr_wide_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
@@ -217,21 +196,21 @@ module modexpng_general_worker
//
// Mapping
//
- assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x;
- assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x;
- assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x;
+ assign wrk_rd_wide_xy_ena_x = rd_wide_ena_x;
+ assign wrk_rd_wide_xy_bank_x = rd_wide_bank_x;
+ assign wrk_rd_wide_xy_addr_x = rd_wide_addr_x;
- assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x;
- assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x;
- assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x;
+ assign wrk_rd_narrow_xy_ena_x = rd_narrow_ena_x;
+ assign wrk_rd_narrow_xy_bank_x = rd_narrow_bank_x;
+ assign wrk_rd_narrow_xy_addr_x = rd_narrow_addr_x;
- assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y;
- assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y;
- assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y;
+ assign wrk_rd_wide_xy_ena_y = rd_wide_ena_y;
+ assign wrk_rd_wide_xy_bank_y = rd_wide_bank_y;
+ assign wrk_rd_wide_xy_addr_y = rd_wide_addr_y;
- assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y;
- assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y;
- assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y;
+ assign wrk_rd_narrow_xy_ena_y = rd_narrow_ena_y;
+ assign wrk_rd_narrow_xy_bank_y = rd_narrow_bank_y;
+ assign wrk_rd_narrow_xy_addr_y = rd_narrow_addr_y;
assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x;
assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x;
@@ -260,172 +239,111 @@ module modexpng_general_worker
//
// Delays
- //
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4;
-
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4;
-
- reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1;
- reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2;
- reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3;
- //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly4;
-
- reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1;
- reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2;
- reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3;
- //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly4;
-
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly1;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly2;
-
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly1;
- reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly2;
+ //
+ reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:3];
+ reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:3];
+
+ reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3];
+ reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3];
+
+ reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1;
+ reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1;
+ reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1;
+ reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1;
always @(posedge clk) begin
//
- {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
- {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ {rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x};
+ {rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x};
+ {rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y};
+ {rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y};
//
- {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
- {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
+ {rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x};
+ {rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x};
+ {rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y};
+ {rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y};
//
- {/*wrk_rd_wide_x_din_x_dly4,*/ wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {/*wrk_rd_wide_x_din_x_dly3,*/ wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
- {/*wrk_rd_wide_x_din_y_dly4,*/ wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {/*wrk_rd_wide_x_din_y_dly3,*/ wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
+ {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
+ {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
//
- {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
- {wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x};
- {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
- {wrk_rd_narrow_y_din_y_dly2, wrk_rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y_dly1, wrk_rd_narrow_y_din_y};
+ {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x};
+ {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y};
//
end
-
-
+
+
//
// Source Read Enable Logic
//
+ task _update_wide_rd_en; input _en; {rd_wide_ena_x, rd_wide_ena_y } <= {2{_en}}; endtask
+ task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask
- task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask
- task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask
+ task enable_wide_rd_en; _update_wide_rd_en(1'b1); endtask
+ task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask
- task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask
- task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask
-
- task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask
- task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask
+ task enable_narrow_rd_en; _update_narrow_rd_en(1'b1); endtask
+ task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
- disable_wide_xy_rd_en;
- disable_narrow_xy_rd_en;
+ disable_wide_rd_en;
+ disable_narrow_rd_en;
//
end else begin
//
- disable_wide_xy_rd_en;
- disable_narrow_xy_rd_en;
- //
- // one_pass
+ disable_wide_rd_en;
+ disable_narrow_rd_en;
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1,
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_REDUCE_INIT,
+ UOP_OPCODE_MODULAR_SUBTRACT_X:
//
- case (opcode)
- //
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- enable_narrow_xy_rd_en;
- //
- UOP_OPCODE_COPY_CRT_Y2X: begin
- //
- enable_wide_xy_rd_en;
- enable_narrow_xy_rd_en;
- //
- end
- //
- UOP_OPCODE_MERGE_LH:
- //
- enable_wide_xy_rd_en;
- //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: enable_narrow_rd_en;
endcase
//
- endcase
- //
- // one_pass_meander
- //
- case (wrk_fsm_state_next_one_pass_meander)
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1,
- WRK_FSM_STATE_LATENCY_PRE1_M2,
- WRK_FSM_STATE_LATENCY_PRE2_M1,
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M1,
- WRK_FSM_STATE_BUSY_M2:
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_SUBTRACT_Y,
+ UOP_OPCODE_MODULAR_SUBTRACT_Z,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
- case (opcode)
- //
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- //
- enable_wide_xy_rd_en;
- enable_narrow_xy_rd_en;
- //
- end
- //
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
- //
- enable_narrow_xy_rd_en;
- //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en; end
endcase
//
- endcase
- //
- // two_pass
- //
- case (wrk_fsm_state_next_two_pass)
- //
- WRK_FSM_STATE_LATENCY_PRE1_TP,
- WRK_FSM_STATE_LATENCY_PRE2_TP,
- WRK_FSM_STATE_LATENCY_PRE3_TP,
- WRK_FSM_STATE_LATENCY_PRE4_TP,
- WRK_FSM_STATE_BUSY_TP:
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
//
- case (opcode)
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- if (!wrk_fsm_two_pass_pass) begin
- enable_wide_xy_rd_en;
- enable_narrow_xy_rd_en;
- end else
- enable_narrow_xy_rd_en;
- //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en; end
endcase
//
+ UOP_OPCODE_MERGE_LH:
+ //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: enable_wide_rd_en;
+ endcase
+ //
endcase
//
end
@@ -435,490 +353,330 @@ module modexpng_general_worker
// Destination Write Enable Logic
//
- task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
- task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
+ task _update_wide_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
+ task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
- task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask
- task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask
+ task enable_wide_wr_en; _update_wide_wr_en(1'b1); endtask
+ task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask
- task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask
- task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask
+ task enable_narrow_wr_en; _update_narrow_wr_en(1'b1); endtask
+ task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
- disable_wide_xy_wr_en;
- disable_narrow_xy_wr_en;
+ disable_wide_wr_en;
+ disable_narrow_wr_en;
//
end else begin
//
- disable_wide_xy_wr_en;
- disable_narrow_xy_wr_en;
+ disable_wide_wr_en;
+ disable_narrow_wr_en;
//
- // one_pass
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_MODULAR_SUBTRACT_X,
+ UOP_OPCODE_MERGE_LH,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
- case (opcode)
- //
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_MERGE_LH:
- //
- enable_narrow_xy_wr_en;
- //
- UOP_OPCODE_COPY_CRT_Y2X: begin
- //
- enable_wide_xy_wr_en;
- enable_narrow_xy_wr_en;
- //
- end
- //
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- enable_wide_xy_wr_en;
- //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en;
endcase
//
- endcase
- //
- // one_pass_meander
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2:
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y,
+ UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
- case (opcode)
- //
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- //
- enable_wide_xy_wr_en;
- enable_narrow_xy_wr_en;
- //
- end
- //
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
- //
- enable_narrow_xy_wr_en;
- //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end
endcase
//
- endcase
- //
- // two_pass
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY_TP,
- WRK_FSM_STATE_LATENCY_POST1_TP,
- WRK_FSM_STATE_LATENCY_POST2_TP,
- WRK_FSM_STATE_LATENCY_POST3_TP,
- WRK_FSM_STATE_LATENCY_POST4_TP:
+ UOP_OPCODE_MODULAR_REDUCE_INIT,
+ UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
- case (opcode)
- //
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- if (!wrk_fsm_two_pass_pass)
- enable_narrow_xy_wr_en;
- else begin
- enable_wide_xy_wr_en;
- enable_narrow_xy_wr_en;
- end
- //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en;
endcase
- //
+ //
endcase
//
end
-
+
//
- // Source to Destination Data Logic
+ // Source Read Address Logic
//
+ reg [OP_ADDR_W -1:0] rd_wide_addr_next;
+ reg [OP_ADDR_W -1:0] rd_narrow_addr_next;
+
+ reg rd_wide_addr_is_last = 1'b0;
+ reg rd_narrow_addr_is_last = 1'b0;
+ reg rd_wide_addr_is_last_half = 1'b0;
+ reg rd_narrow_addr_is_last_half = 1'b0;
+
+ reg rd_wide_addr_next_is_last = 1'b0;
+ reg rd_narrow_addr_next_is_last = 1'b0;
+
+ reg rd_wide_addr_next_is_last_half = 1'b0;
+ reg rd_narrow_addr_next_is_last_half = 1'b0;
+
+ reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0;
+ reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0;
+
always @(posedge clk) begin
//
- update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
- update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
- //
- // one_pass
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
- //
- case (opcode)
- //
- UOP_OPCODE_PROPAGATE_CARRIES:
- //
- update_narrow_dout(rd_narrow_x_din_x_w_cry_reduced,
- rd_narrow_y_din_x_w_cry_reduced,
- rd_narrow_x_din_y_w_cry_reduced,
- rd_narrow_y_din_y_w_cry_reduced);
- //
- UOP_OPCODE_COPY_CRT_Y2X: begin
- //
- update_wide_dout(wrk_rd_wide_x_din_y,
- wrk_rd_wide_y_din_y,
- wrk_rd_wide_x_din_y,
- wrk_rd_wide_y_din_y);
- //
- update_narrow_dout(wrk_rd_narrow_x_din_y,
- wrk_rd_narrow_y_din_y,
- wrk_rd_narrow_x_din_y,
- wrk_rd_narrow_y_din_y);
- //
- end
- //
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- update_wide_dout(wrk_rd_narrow_x_din_x,
- wrk_rd_narrow_y_din_x,
- wrk_rd_narrow_x_din_y,
- wrk_rd_narrow_y_din_y);
- //
- UOP_OPCODE_MERGE_LH:
- //
- update_narrow_dout(wrk_rd_wide_x_din_x,
- wrk_rd_wide_y_din_x,
- wrk_rd_wide_x_din_y,
- wrk_rd_wide_y_din_y);
- //
- endcase
- //
- endcase
- //
- // one_pass_meander
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2:
- //
- case (opcode)
- //
- UOP_OPCODE_COPY_LADDERS_X2Y: begin
- //
- update_wide_dout(wrk_rd_wide_x_din_x_dly3,
- wrk_rd_wide_x_din_x_dly2,
- wrk_rd_wide_x_din_y_dly3,
- wrk_rd_wide_x_din_y_dly2);
- //
- update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
- wrk_rd_narrow_x_din_x_dly2,
- wrk_rd_narrow_x_din_y_dly3,
- wrk_rd_narrow_x_din_y_dly2);
- //
- end
- //
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- //
- update_wide_dout(wrk_rd_wide_x_din_x_dly3,
- wrk_rd_wide_x_din_y_dly2,
- wrk_rd_wide_x_din_y_dly3,
- wrk_rd_wide_x_din_x_dly2);
- //
- update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
- wrk_rd_narrow_x_din_y_dly2,
- wrk_rd_narrow_x_din_y_dly3,
- wrk_rd_narrow_x_din_x_dly2);
- //
- end
- //
- UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
- //
- update_narrow_dout(regadd_x_x_trunc,
- regadd_y_x_trunc,
- regadd_x_y_trunc,
- regadd_y_y_trunc);
- //
- end
- //
- endcase
- //
- endcase
- //
- // two_pass
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY_TP,
- WRK_FSM_STATE_LATENCY_POST1_TP,
- WRK_FSM_STATE_LATENCY_POST2_TP,
- WRK_FSM_STATE_LATENCY_POST3_TP,
- WRK_FSM_STATE_LATENCY_POST4_TP:
- //
- case (opcode)
- //
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- if (!wrk_fsm_two_pass_pass)
- update_narrow_dout(modsub_x_ab_dly_trunc, modsub_x_abn_trunc, modsub_y_ab_dly_trunc, modsub_y_abn_trunc);
- else begin
- update_wide_dout (modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
- update_narrow_dout(modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
- end
- //
- endcase
- //
- endcase
+ rd_wide_addr_is_last_half_dly <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half};
+ rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half};
//
end
-
- //
- // Source Read Address Logic
- //
-
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next;
-
- reg rd_wide_xy_addr_xy_next_last_seen;
- reg rd_wide_xy_addr_xy_next_last_seen_dly1;
- reg rd_wide_xy_addr_xy_next_last_seen_dly2;
-
- wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half;
- wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last;
+ task preset_rd_wide_bank_addr;
+ input [BANK_ADDR_W -1:0] bank;
+ input [ OP_ADDR_W -1:0] addr;
+ begin
+ {rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr};
+ {rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr};
+ rd_wide_addr_is_last <= 1'b0;
+ rd_wide_addr_is_last_half <= 1'b0;
+ end
+ endtask
- task update_rd_wide_bank_addr;
+ task preset_rd_narrow_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, addr};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, addr};
+ {rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr};
+ {rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr};
+ rd_narrow_addr_is_last <= 1'b0;
+ rd_narrow_addr_is_last_half <= 1'b0;
+ end
+ endtask
+
+ task preset_rd_wide_addr_next;
+ input [OP_ADDR_W -1:0] addr;
+ begin
+ rd_wide_addr_next <= addr;
+ rd_wide_addr_next_is_last <= 1'b0;
+ rd_wide_addr_next_is_last_half <= 1'b0;
end
endtask
- task update_rd_wide_bank;
- input [BANK_ADDR_W -1:0] bank;
+ task preset_rd_narrow_addr_next;
+ input [OP_ADDR_W -1:0] addr;
begin
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, rd_wide_xy_addr_x};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, rd_wide_xy_addr_y};
+ rd_narrow_addr_next <= addr;
+ rd_narrow_addr_next_is_last <= 1'b0;
+ rd_narrow_addr_next_is_last_half <= 1'b0;
end
endtask
- task update_rd_narrow_bank_addr;
+ task keep_rd_wide_bank;
+ begin
+ {rd_wide_bank_x} <= {rd_wide_bank_x};
+ {rd_wide_bank_y} <= {rd_wide_bank_y};
+ end
+ endtask
+
+ task switch_rd_wide_bank;
input [BANK_ADDR_W -1:0] bank;
- input [ OP_ADDR_W -1:0] addr;
begin
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, addr};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, addr};
+ {rd_wide_bank_x} <= {bank};
+ {rd_wide_bank_y} <= {bank};
+ end
+ endtask
+
+ task keep_rd_wide_addr;
+ begin
+ {rd_wide_addr_x} <= {rd_wide_addr_x};
+ {rd_wide_addr_y} <= {rd_wide_addr_y};
end
endtask
- task update_rd_narrow_bank;
+ task advance_rd_wide_addr;
+ begin
+ {rd_wide_addr_x} <= {rd_wide_addr_next};
+ {rd_wide_addr_y} <= {rd_wide_addr_next};
+ rd_wide_addr_is_last <= rd_wide_addr_next == word_index_last;
+ rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half;
+ end
+ endtask
+
+ task keep_rd_narrow_bank;
+ begin
+ {rd_narrow_bank_x} <= {rd_narrow_bank_x};
+ {rd_narrow_bank_y} <= {rd_narrow_bank_y};
+ end
+ endtask
+
+ task switch_rd_narrow_bank;
input [BANK_ADDR_W -1:0] bank;
begin
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, rd_narrow_xy_addr_x};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, rd_narrow_xy_addr_y};
+ {rd_narrow_bank_x} <= {bank};
+ {rd_narrow_bank_y} <= {bank};
end
endtask
- task update_rd_wide_addr_next;
- input [OP_ADDR_W -1:0] addr;
- rd_wide_xy_addr_xy_next <= addr;
+ task keep_rd_narrow_addr;
+ begin
+ {rd_narrow_addr_x} <= {rd_narrow_addr_x};
+ {rd_narrow_addr_y} <= {rd_narrow_addr_y};
+ end
+ endtask
+
+ task advance_rd_narrow_addr;
+ begin
+ {rd_narrow_addr_x} <= {rd_narrow_addr_next};
+ {rd_narrow_addr_y} <= {rd_narrow_addr_next};
+ rd_narrow_addr_is_last <= rd_narrow_addr_next == word_index_last;
+ rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half;
+ end
+ endtask
+
+ task update_rd_wide_addr_flags;
+ begin
+ rd_wide_addr_next_is_last <= rd_wide_addr_next == (word_index_last - 1'b1);
+ rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1);
+ end
endtask
- task update_rd_narrow_addr_next;
- input [OP_ADDR_W -1:0] addr;
- rd_narrow_xy_addr_xy_next <= addr;
+ task update_rd_narrow_addr_flags;
+ begin
+ rd_narrow_addr_next_is_last <= rd_narrow_addr_next == (word_index_last - 1'b1);
+ rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1);
+ end
endtask
task advance_rd_wide_addr_next;
- rd_wide_xy_addr_xy_next <= !rd_wide_xy_addr_xy_next_is_last ? rd_wide_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
+ begin
+ rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
+ update_rd_wide_addr_flags;
+ end
endtask
task advance_rd_narrow_addr_next;
- rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
+ begin
+ rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
+ update_rd_narrow_addr_flags;
+ end
+ endtask
+
+ task advance_rd_wide_addr_next_half;
+ begin
+ rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
+ update_rd_wide_addr_flags;
+ end
+ endtask
+
+ task advance_rd_narrow_addr_next_half;
+ begin
+ rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
+ update_rd_narrow_addr_flags;
+ end
endtask
-
- always @(posedge clk)
- //
- case (opcode)
- UOP_OPCODE_MERGE_LH:
- case (wrk_fsm_state_next_one_pass)
- WRK_FSM_STATE_LATENCY_PRE1:
- rd_wide_xy_addr_xy_next_last_seen <= 1'b0;
- WRK_FSM_STATE_BUSY:
- if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last)
- rd_wide_xy_addr_xy_next_last_seen <= 1'b1;
- endcase
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
- case (wrk_fsm_state_next_one_pass_meander)
- WRK_FSM_STATE_LATENCY_PRE1_M1: begin
- rd_wide_xy_addr_xy_next_last_seen <= 1'b0;
- rd_wide_xy_addr_xy_next_last_seen_dly1 <= 1'b0;
- rd_wide_xy_addr_xy_next_last_seen_dly2 <= 1'b0;
- end
- WRK_FSM_STATE_BUSY_M1: begin
- if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last)
- rd_wide_xy_addr_xy_next_last_seen <= 1'b1;
- rd_wide_xy_addr_xy_next_last_seen_dly1 <= rd_wide_xy_addr_xy_next_last_seen;
- rd_wide_xy_addr_xy_next_last_seen_dly2 <= rd_wide_xy_addr_xy_next_last_seen_dly1;
- end
- endcase
- endcase
always @(posedge clk) begin
//
- update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
- update_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
+ preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
+ preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
//
- // one_pass
- //
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1:
- //
- case (opcode)
- //
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_MODULAR_REDUCE_INIT: begin
- //
- update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
- update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
- //
- end
- //
- UOP_OPCODE_MERGE_LH: begin
- update_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
- update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
- end
- //
- endcase
- //
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_SUBTRACT_X:
//
- case (opcode)
- //
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: begin
- //
- update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
- update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- //
- end
- //
- UOP_OPCODE_MODULAR_REDUCE_INIT: begin
- //
- update_rd_wide_bank_addr (sel_wide_in, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ;
- update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- //
- end
- //
- UOP_OPCODE_MERGE_LH: begin
- //
- if (!rd_wide_xy_addr_xy_next_last_seen) update_rd_wide_bank_addr (BANK_WIDE_L, rd_wide_xy_addr_xy_next );
- else update_rd_wide_bank_addr (BANK_WIDE_H, rd_wide_xy_addr_xy_next );
- advance_rd_wide_addr_next ;
- update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- //
- end
- //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank;
endcase
//
- endcase
- //
- // one_pass_meander
- //
- case (wrk_fsm_state_next_one_pass_meander)
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1:
- case (opcode)
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
- update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
- end
- UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
- update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
- update_rd_narrow_bank_addr(sel_wide_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
- end
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_SUBTRACT_Z,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
+ //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
+ preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next;
+ keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end
endcase
//
- WRK_FSM_STATE_LATENCY_PRE2_M1,
- WRK_FSM_STATE_BUSY_M1:
- case (opcode)
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
- update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- end
- UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
- update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
- update_rd_narrow_bank_addr(sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- end
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
+ preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next_half;
+ keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank;
endcase
//
- WRK_FSM_STATE_LATENCY_PRE1_M2,
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M2:
- case (opcode)
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- update_rd_wide_bank (sel_wide_in );
- update_rd_narrow_bank(sel_narrow_in);
- end
- UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
- update_rd_wide_bank (sel_narrow_in);
- update_rd_narrow_bank(sel_narrow_in);
- end
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
+ preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
+ WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank (sel_wide_out); keep_rd_wide_addr;
+ switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next; switch_rd_wide_bank(sel_wide_in);
+ advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2: begin keep_rd_wide_addr; switch_rd_wide_bank (sel_wide_out);
+ keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end
endcase
//
- endcase
- //
- // two_pass
- //
- case (wrk_fsm_state_next_two_pass)
- //
- WRK_FSM_STATE_LATENCY_PRE1_TP:
+ UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
- case (opcode)
- //
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- if (!wrk_fsm_two_pass_pass) begin
- update_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
- update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
- end else begin
- update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
- end
- //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
+ preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
+ WRK_FSM_STATE_LATENCY_PRE3,
+ WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next;
+ keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end
endcase
+ //
+ UOP_OPCODE_MERGE_LH:
//
- WRK_FSM_STATE_LATENCY_PRE2_TP,
- WRK_FSM_STATE_LATENCY_PRE3_TP,
- WRK_FSM_STATE_LATENCY_PRE4_TP,
- WRK_FSM_STATE_BUSY_TP:
- //
- case (opcode)
- //
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- if (!wrk_fsm_two_pass_pass) begin
- update_rd_wide_bank_addr (BANK_WIDE_N, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
- update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- end else begin
- update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
- end
- //
+ case (wrk_fsm_state_next)
+ WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
+ preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
+ WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next_half;
+ advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
+ WRK_FSM_STATE_BUSY1: begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank;
+ else switch_rd_wide_bank(BANK_WIDE_H);
+ advance_rd_wide_addr; advance_rd_wide_addr_next_half;
+ advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2: keep_rd_wide_bank;
endcase
- //
+ //
endcase
//
end
@@ -927,13 +685,21 @@ module modexpng_general_worker
//
// Destination Write Address Logic
//
-
- wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
- wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
-
- wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_x = uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
- wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_y = uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+ reg modular_reduce_init_first_half_x;
+ reg modular_reduce_init_first_half_y;
+ reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x;
+ reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y;
+ always @(posedge clk) begin
+ //
+ modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half;
+ modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half;
+ //
+ modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H;
+ modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H;
+ //
+ end
+
task update_wr_wide_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
@@ -955,120 +721,351 @@ module modexpng_general_worker
{wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
end
endtask
-
+
always @(posedge clk) begin
//
update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
//
- // one_pass
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_MODULAR_SUBTRACT_X,
+ UOP_OPCODE_MERGE_LH,
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
- case (opcode)
- //
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_COPY_CRT_Y2X: begin
- update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
- update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
- end
- //
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2);
- //
- UOP_OPCODE_MERGE_LH:
- update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
- //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
endcase
+ //
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y,
+ UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
- endcase
- //
- // one_pass_meander
- //
- case (wrk_fsm_state)
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3] ); end
+ endcase
//
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2:
- //
- case (opcode)
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y: begin
- update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
- update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
- end
- UOP_OPCODE_REGULAR_ADD_UNEVEN:
- update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
endcase
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
+ endcase
+ //
endcase
//
- // two_pass
+ end
+
+
+ //
+ // UOP_OPCODE_PROPAGATE_CARRIES
+ //
+ reg [CARRY_W -1:0] propagate_carries_x_x_cry_r;
+ reg [CARRY_W -1:0] propagate_carries_y_x_cry_r;
+ reg [CARRY_W -1:0] propagate_carries_x_y_cry_r;
+ reg [CARRY_W -1:0] propagate_carries_y_y_cry_r;
+
+ wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_x_cry_r};
+ wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_x_cry_r};
+ wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_y_cry_r};
+ wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_y_cry_r};
+
+ reg [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_r;
+ reg [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_r;
+ reg [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_r;
+ reg [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_r;
+
+ wire [CARRY_W -1:0] propagate_carries_x_x_w_cry_msb = propagate_carries_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] propagate_carries_y_x_w_cry_msb = propagate_carries_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] propagate_carries_x_y_w_cry_msb = propagate_carries_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] propagate_carries_y_y_w_cry_msb = propagate_carries_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
+
+ wire [WORD_W -1:0] propagate_carries_x_x_w_cry_lsb = propagate_carries_x_x_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] propagate_carries_y_x_w_cry_lsb = propagate_carries_y_x_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] propagate_carries_x_y_w_cry_lsb = propagate_carries_x_y_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] propagate_carries_y_y_w_cry_lsb = propagate_carries_y_y_w_cry_r[WORD_W -1:0];
+
+ wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_x_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_x_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_y_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_y_w_cry_lsb};
+
+ task _propagate_carries_update_cry;
+ input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
+ { propagate_carries_x_x_cry_r, propagate_carries_y_x_cry_r, propagate_carries_x_y_cry_r, propagate_carries_y_y_cry_r} <=
+ { x_x_cry, y_x_cry, x_y_cry, y_y_cry};
+ endtask
+
+ task propagate_carries_clear_cry; _propagate_carries_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask
+ task propagate_carries_store_cry; _propagate_carries_update_cry(propagate_carries_x_x_w_cry_msb, propagate_carries_y_x_w_cry_msb, propagate_carries_x_y_w_cry_msb, propagate_carries_y_y_w_cry_msb); endtask
+
+ task _propagate_carries_update_sum_w_cry;
+ input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry;
+ { propagate_carries_x_x_w_cry_r, propagate_carries_y_x_w_cry_r, propagate_carries_x_y_w_cry_r, propagate_carries_y_y_w_cry_r} <=
+ { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry};
+ endtask
+
+ task propagate_carries_store_sum_w_cry; _propagate_carries_update_sum_w_cry(propagate_carries_x_x_w_cry, propagate_carries_y_x_w_cry, propagate_carries_x_y_w_cry, propagate_carries_y_y_w_cry); endtask
+
+ always @(posedge clk)
//
- case (wrk_fsm_state)
+ if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
//
- WRK_FSM_STATE_BUSY_TP,
- WRK_FSM_STATE_LATENCY_POST1_TP,
- WRK_FSM_STATE_LATENCY_POST2_TP,
- WRK_FSM_STATE_LATENCY_POST3_TP,
- WRK_FSM_STATE_LATENCY_POST4_TP:
+ case (wrk_fsm_state)
//
- case (opcode)
+ WRK_FSM_STATE_LATENCY_PRE3: propagate_carries_clear_cry;
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1: propagate_carries_store_cry;
+ //
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2: propagate_carries_store_sum_w_cry;
+ //
+ endcase
+
+
+ //
+ // UOP_OPCODE_MODULAR_SUBTRACT_X
+ // UOP_OPCODE_MODULAR_SUBTRACT_Y
+ //
+ reg modular_subtract_x_brw_r;
+ reg modular_subtract_y_brw_r;
+
+ reg modular_subtract_x_cry_r;
+ reg modular_subtract_y_cry_r;
+
+ wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r};
+ wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r};
+
+ wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r};
+ wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_brw_r};
+
+ reg [WORD_W:0] modular_subtract_x_w_brw_r;
+ reg [WORD_W:0] modular_subtract_y_w_brw_r;
+
+ reg [WORD_W:0] modular_subtract_x_w_cry_r;
+ reg [WORD_W:0] modular_subtract_y_w_cry_r;
+
+ wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W];
+ wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W];
+
+ wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W];
+ wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W];
+
+ wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0];
+ wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0];
+
+ wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0];
+
+ wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb};
+ wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb};
+
+ wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb};
+
+ reg [WORD_EXT_W -1:0] modular_subtract_x_mux;
+ reg [WORD_EXT_W -1:0] modular_subtract_y_mux;
+
+ wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
+ wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};
+
+ task _modular_subtract_update_brw;
+ input x_brw, y_brw;
+ {modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw};
+ endtask
+
+ task _modular_subtract_update_cry;
+ input x_cry, y_cry;
+ {modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry};
+ endtask
+
+ task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask
+ task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask
+
+ task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask
+ task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask
+
+ task _modular_subtract_update_diff_w_brw;
+ input [WORD_W:0] x_diff_w_brw, y_diff_w_brw;
+ {modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw};
+ endtask
+
+ task _modular_subtract_update_sum_w_cry;
+ input [WORD_W:0] x_sum_w_cry, y_sum_w_cry;
+ {modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry};
+ endtask
+
+ task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask
+
+ task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask
+
+ always @(posedge clk)
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT_X:
+ //
+ case (wrk_fsm_state)
//
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- if (!wrk_fsm_two_pass_pass) begin
- update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
- end else begin
- update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
- update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
- end
+ WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw;
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too!
+ //
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw;
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT_Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry;
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry;
+ //
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry;
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT_Z:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE4,
+ WRK_FSM_STATE_BUSY2,
+ WRK_FSM_STATE_LATENCY_POST2:
//
- endcase
+ begin modular_subtract_x_mux <= !modular_subtract_x_brw_r ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
+ modular_subtract_y_mux <= !modular_subtract_y_brw_r ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
+ //
+ endcase
+ //
+ endcase
+
+
+ //
+ // UOP_OPCODE_REGULAR_ADD_UNEVEN
+ //
+ reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r;
+ reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r;
+ reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r;
+ reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r;
+
+ wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r};
+ wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r};
+ wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r};
+ wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r};
+
+ wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1;
+ wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1;
+ wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1;
+ wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1;
+
+ reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r;
+ reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r;
+ reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r;
+ reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r;
+
+ wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
+
+ wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0];
+ wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0];
+
+ wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb};
+ wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb};
+
+ reg regular_add_uneven_store_lsb_now;
+
+ task _regular_add_uneven_update_cry;
+ input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
+ { regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <=
+ { x_x_cry, y_x_cry, x_y_cry, y_y_cry};
+ endtask
+
+ task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask
+ task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask
+
+ task _regular_add_uneven_update_sum_w_cry;
+ input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry;
+ { regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <=
+ { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry};
+ endtask
+
+ task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask
+
+ task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask
+
+ always @(posedge clk)
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1;
+ WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0;
//
endcase
+
+ always @(posedge clk)
//
- end
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_clear_cry;
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry;
+ //
+ WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_store_sum_lsb_w_cry;
+ WRK_FSM_STATE_BUSY2: if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry;
+ else regular_add_uneven_store_sum_msb_w_cry;
+ WRK_FSM_STATE_LATENCY_POST2: regular_add_uneven_store_sum_msb_w_cry;
+ //
+ endcase
//
// FSM Process
//
-
always @(posedge clk or negedge rst_n)
//
if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
- else case (opcode)
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_MODULAR_REDUCE_INIT,
- UOP_OPCODE_MERGE_LH: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
- UOP_OPCODE_COPY_LADDERS_X2Y,
- UOP_OPCODE_CROSS_LADDERS_X2Y,
- UOP_OPCODE_REGULAR_ADD_UNEVEN: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
- UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass;
- default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
- endcase
-
-
+ else wrk_fsm_state <= wrk_fsm_state_next;
+
+
//
// Busy Exit Logic
- //
-
- reg wrk_fsm_done_one_pass = 1'b0;
- reg wrk_fsm_done_one_pass_meander = 1'b0;
- reg wrk_fsm_done_two_pass = 1'b0;
+ //
+ reg wrk_fsm_done = 1'b0;
always @(posedge clk) begin
//
- wrk_fsm_done_one_pass <= 1'b0;
- wrk_fsm_done_one_pass_meander <= 1'b0;
- wrk_fsm_done_two_pass <= 1'b0;
+ wrk_fsm_done <= 1'b0;
//
case (opcode)
//
@@ -1076,47 +1073,22 @@ module modexpng_general_worker
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT,
- UOP_OPCODE_MERGE_LH:
- //
- case (wrk_fsm_state)
- WRK_FSM_STATE_BUSY:
- if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
- endcase
- //
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
+ UOP_OPCODE_MODULAR_SUBTRACT_X,
+ UOP_OPCODE_MODULAR_SUBTRACT_Y,
+ UOP_OPCODE_MODULAR_SUBTRACT_Z,
+ UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
- WRK_FSM_STATE_BUSY_M2:
- if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
- WRK_FSM_STATE_BUSY_M1:
- wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ WRK_FSM_STATE_BUSY1:
+ if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1;
endcase
- //
- UOP_OPCODE_MODULAR_SUBTRACT:
- //
- case (wrk_fsm_state)
- WRK_FSM_STATE_BUSY_TP:
- if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_two_pass <= 1'b1;
- endcase
- //
//
endcase
//
end
-
-
- //
- // FSM Helper Logic
- //
- always @(posedge clk)
- //
- case (wrk_fsm_state)
- WRK_FSM_STATE_IDLE: if (ena) {wrk_fsm_two_pass_pass, wrk_fsm_two_pass_pass_dly} <= {1'b0, 1'b0};
- WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_two_pass_pass <= 1'b1;
- WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_two_pass_pass_dly <= 1'b1;
- endcase
//
@@ -1125,64 +1097,26 @@ module modexpng_general_worker
always @* begin
//
case (wrk_fsm_state)
- WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
- WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ;
- WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ;
- WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
- WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
- WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
- WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
- default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
- endcase
- //
- end
-
- always @* begin
- //
- case (wrk_fsm_state)
- WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ;
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ;
- WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ;
- WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ;
- WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ;
- WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ;
- WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ;
- WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ;
- WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ;
- WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ;
- WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ;
- //
- WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
- //
- default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE2 ;
+ WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE3 ;
+ WRK_FSM_STATE_LATENCY_PRE3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE4 ;
+ WRK_FSM_STATE_LATENCY_PRE4: wrk_fsm_state_next = WRK_FSM_STATE_BUSY1 ;
+ WRK_FSM_STATE_BUSY1: wrk_fsm_state_next = WRK_FSM_STATE_BUSY2 ;
+ WRK_FSM_STATE_BUSY2: wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ;
+ WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST2 ;
+ WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST3 ;
+ WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST4 ;
+ WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next = WRK_FSM_STATE_STOP ;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ;
+ default: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ;
endcase
//
end
-
- always @* begin
- //
- case (wrk_fsm_state)
- WRK_FSM_STATE_IDLE: wrk_fsm_state_next_two_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1_TP : WRK_FSM_STATE_IDLE;
- WRK_FSM_STATE_LATENCY_PRE1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE2_TP ;
- WRK_FSM_STATE_LATENCY_PRE2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE3_TP ;
- WRK_FSM_STATE_LATENCY_PRE3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE4_TP ;
- WRK_FSM_STATE_LATENCY_PRE4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_BUSY_TP ;
- WRK_FSM_STATE_BUSY_TP: wrk_fsm_state_next_two_pass = wrk_fsm_done_two_pass ? WRK_FSM_STATE_LATENCY_POST1_TP : WRK_FSM_STATE_BUSY_TP;
- WRK_FSM_STATE_LATENCY_POST1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST2_TP ;
- WRK_FSM_STATE_LATENCY_POST2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST3_TP ;
- WRK_FSM_STATE_LATENCY_POST3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST4_TP ;
- WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_HOLDOFF_TP ;
- WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_state_next_two_pass = wrk_fsm_two_pass_pass_dly ? WRK_FSM_STATE_STOP : WRK_FSM_STATE_LATENCY_PRE1_TP;
- WRK_FSM_STATE_STOP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
- default: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
- endcase
- //
- end
-
-
+
+
//
- // Ready Logic
+ // Ready Flag Logic
//
reg rdy_reg = 1'b1;
@@ -1198,321 +1132,167 @@ module modexpng_general_worker
//
- // UOP_OPCODE_PROPAGATE_CARRIES
+ // Source to Destination Data Logic
//
- reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
- reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
- reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
- reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
-
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
- wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
- wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
-
- wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
- wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
- wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
- wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
-
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
- wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
- wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
-
+ reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2;
+ reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2;
+ reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2;
+ reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2;
+
+ always @(posedge clk) begin
+ {rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2 } <= {rd_wide_x_din_x_dly1, rd_wide_y_din_x_dly1, rd_wide_x_din_y_dly1, rd_wide_y_din_y_dly1 };
+ {rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1};
+ end
+
task update_wide_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
- { x_x, y_x, x_y, y_y };
+ { x_x, y_x, x_y, y_y};
endtask
task update_narrow_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
- { x_x, y_x, x_y, y_y };
- endtask
-
- task update_narrow_carries;
- input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
- {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <=
- { x_x_cry, y_x_cry, x_y_cry, y_y_cry };
+ { x_x, y_x, x_y, y_y};
endtask
-
- always @(posedge clk)
- //
- if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_LATENCY_PRE2:
- //
- update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO);
- //
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1:
- //
- update_narrow_carries(rd_narrow_x_din_x_w_cry_msb,
- rd_narrow_y_din_x_w_cry_msb,
- rd_narrow_x_din_y_w_cry_msb,
- rd_narrow_y_din_y_w_cry_msb);
- //
- endcase
-
- //
- // UOP_OPCODE_MODULAR_SUBTRACT
- //
-
- reg [WORD_W:0] modsub_x_ab;
- reg [WORD_W:0] modsub_y_ab;
-
- reg [WORD_W:0] modsub_x_ab_dly;
- reg [WORD_W:0] modsub_y_ab_dly;
-
- reg [WORD_W:0] modsub_x_abn;
- reg [WORD_W:0] modsub_y_abn;
-
- reg modsub_x_ab_mask_now;
- reg modsub_y_ab_mask_now;
-
- reg modsub_x_abn_mask_now;
- reg modsub_y_abn_mask_now;
-
- reg modsub_x_borrow_r;
- reg modsub_y_borrow_r;
-
- wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W];
- wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W];
-
- wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W];
- wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W];
-
- wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
- wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
- wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
- wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
-
- wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]};
- wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]};
-
- wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]};
- wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]};
-
- wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]};
- wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]};
-
- wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2;
- wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2;
-
- wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]};
- wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]};
-
- task update_modsub_ab;
- begin
- modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked;
- modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked;
- end
- endtask
-
- task update_modsub_abn;
- begin
- modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked;
- modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked;
- end
- endtask
-
- always @(posedge clk)
- //
- if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
- //
- case (wrk_fsm_state)
- WRK_FSM_STATE_LATENCY_POST4_TP:
- if (!wrk_fsm_two_pass_pass)
- {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]};
- endcase
-
- always @(posedge clk) begin
- modsub_x_ab_dly <= modsub_x_ab;
- modsub_y_ab_dly <= modsub_y_ab;
- end
-
always @(posedge clk) begin
//
- modsub_x_ab <= {1'bX, WORD_DNC};
- modsub_y_ab <= {1'bX, WORD_DNC};
- //
- modsub_x_abn <= {1'bX, WORD_DNC};
- modsub_y_abn <= {1'bX, WORD_DNC};
+ update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
+ update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
//
- if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
+ case (opcode)
//
- case (wrk_fsm_state)
+ UOP_OPCODE_PROPAGATE_CARRIES:
//
- WRK_FSM_STATE_LATENCY_PRE3_TP:
- update_modsub_ab;
-
- WRK_FSM_STATE_LATENCY_PRE4_TP,
- WRK_FSM_STATE_BUSY_TP,
- WRK_FSM_STATE_LATENCY_POST1_TP,
- WRK_FSM_STATE_LATENCY_POST2_TP: begin
- update_modsub_ab;
- update_modsub_abn;
- end
- //
- WRK_FSM_STATE_LATENCY_POST3_TP:
+ case (wrk_fsm_state)
//
- update_modsub_abn;
- //
- endcase
- //
- end
-
- always @(posedge clk) begin
- //
- modsub_x_ab_mask_now <= 1'b0;
- modsub_y_ab_mask_now <= 1'b0;
- //
- modsub_x_abn_mask_now <= 1'b0;
- modsub_y_abn_mask_now <= 1'b0;
- //
- if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced);
+ //
+ endcase
//
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_LATENCY_PRE2_TP: begin
- modsub_x_ab_mask_now <= 1'b1;
- modsub_y_ab_mask_now <= 1'b1;
- end
+ UOP_OPCODE_COPY_CRT_Y2X:
//
- WRK_FSM_STATE_LATENCY_PRE3_TP: begin
- modsub_x_abn_mask_now <= 1'b1;
- modsub_y_abn_mask_now <= 1'b1;
- end
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
+ update_wide_dout (rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
- endcase
- //
- end
-
-
- //
- // UOP_OPCODE_ADD_UNEVEN
- //
- reg [WORD_W:0] regadd_x_x;
- reg [WORD_W:0] regadd_y_x;
- reg [WORD_W:0] regadd_x_y;
- reg [WORD_W:0] regadd_y_y;
-
- reg regadd_x_x_cry;
- reg regadd_y_x_cry;
- reg regadd_x_y_cry;
- reg regadd_y_y_cry;
-
- wire [WORD_EXT_W-1:0] regadd_x_x_trunc = {{CARRY_W{1'b0}}, regadd_x_x[WORD_W-1:0]};
- wire [WORD_EXT_W-1:0] regadd_y_x_trunc = {{CARRY_W{1'b0}}, regadd_y_x[WORD_W-1:0]};
- wire [WORD_EXT_W-1:0] regadd_x_y_trunc = {{CARRY_W{1'b0}}, regadd_x_y[WORD_W-1:0]};
- wire [WORD_EXT_W-1:0] regadd_y_y_trunc = {{CARRY_W{1'b0}}, regadd_y_y[WORD_W-1:0]};
-
- //wire regadd_x_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_x_x[WORD_W];
- //wire regadd_y_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_y_x[WORD_W];
- //wire regadd_x_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_x_y[WORD_W];
- //wire regadd_y_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_y_y[WORD_W];
- /**/
- reg [WORD_W:0] regadd_x_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly2[WORD_W-1:0]};
- reg [WORD_W:0] regadd_x_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0]};
- reg [WORD_W:0] regadd_y_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly2[WORD_W-1:0]};
- reg [WORD_W:0] regadd_y_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0]};
- reg [WORD_W:0] regadd_x_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly2[WORD_W-1:0]};
- reg [WORD_W:0] regadd_x_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0]};
- reg [WORD_W:0] regadd_y_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly2[WORD_W-1:0]};
- reg [WORD_W:0] regadd_y_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0]};
- /**/
- //WRK_FSM_STATE_BUSY_M1,
- //WRK_FSM_STATE_LATENCY_POST1_M1,
- //WRK_FSM_STATE_LATENCY_POST2_M1:
-
- always @(posedge clk) begin
- //
- regadd_x_x_a_lsb_pad <= {1'bX, WORD_DNC};
- regadd_x_x_b_lsb_pad <= {1'bX, WORD_DNC};
- regadd_y_x_a_lsb_pad <= {1'bX, WORD_DNC};
- regadd_y_x_b_lsb_pad <= {1'bX, WORD_DNC};
- regadd_x_y_a_lsb_pad <= {1'bX, WORD_DNC};
- regadd_x_y_b_lsb_pad <= {1'bX, WORD_DNC};
- regadd_y_y_a_lsb_pad <= {1'bX, WORD_DNC};
- regadd_y_y_b_lsb_pad <= {1'bX, WORD_DNC};
- //
- if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN)
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
+ //
+ endcase
//
- case (wrk_fsm_state)
+ UOP_OPCODE_COPY_LADDERS_X2Y:
//
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2: begin
- regadd_x_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0] : WORD_ZERO};
- regadd_x_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_x [WORD_W-1:0] };
- regadd_y_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0] : WORD_ZERO};
- regadd_y_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_x [WORD_W-1:0] };
- regadd_x_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0] : WORD_ZERO};
- regadd_x_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_y [WORD_W-1:0] };
- regadd_y_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0] : WORD_ZERO};
- regadd_y_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_y [WORD_W-1:0] };
- end
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_x_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_y_dly2);
+ update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end
+ //
+ endcase
+ //
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
//
- endcase
- end
-
- always @(posedge clk) begin
- //
- regadd_x_x <= {1'bX, WORD_DNC};
- regadd_y_x <= {1'bX, WORD_DNC};
- regadd_x_y <= {1'bX, WORD_DNC};
- regadd_y_y <= {1'bX, WORD_DNC};
- //
- if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN)
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_y_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_x_dly2);
+ update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end
+ //
+ endcase
//
- case (wrk_fsm_state)
+ UOP_OPCODE_MODULAR_SUBTRACT_X:
//
- WRK_FSM_STATE_BUSY_M1,
- WRK_FSM_STATE_LATENCY_POST1_M1,
- WRK_FSM_STATE_LATENCY_POST2_M1: begin
- regadd_x_x <= regadd_x_x_a_lsb_pad + regadd_x_x_b_lsb_pad + regadd_x_x_cry;
- regadd_y_x <= regadd_y_x_a_lsb_pad + regadd_y_x_b_lsb_pad + regadd_y_x_cry;
- regadd_x_y <= regadd_x_y_a_lsb_pad + regadd_x_y_b_lsb_pad + regadd_x_y_cry;
- regadd_y_y <= regadd_y_y_a_lsb_pad + regadd_y_y_b_lsb_pad + regadd_y_y_cry;
- end
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ update_narrow_dout(modular_subtract_x_w_brw_reduced, modular_subtract_x_w_brw_reduced, modular_subtract_y_w_brw_reduced, modular_subtract_y_w_brw_reduced);
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
- endcase
- //
- end
-
- always @(posedge clk) begin
- //
- regadd_x_x_cry <= 1'bX;
- regadd_y_x_cry <= 1'bX;
- regadd_x_y_cry <= 1'bX;
- regadd_y_y_cry <= 1'bX;
- //
- if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN)
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ update_wide_dout(modular_subtract_x_w_cry_reduced, modular_subtract_x_w_cry_reduced, modular_subtract_y_w_cry_reduced, modular_subtract_y_w_cry_reduced);
+ //
+ endcase
//
- case (wrk_fsm_state)
+ UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
- WRK_FSM_STATE_LATENCY_PRE2_M2: begin
- regadd_x_x_cry <= 1'b0;
- regadd_y_x_cry <= 1'b0;
- regadd_x_y_cry <= 1'b0;
- regadd_y_y_cry <= 1'b0;
- end
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ begin update_wide_dout (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced);
+ update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end
+ //
+ endcase
+ //
+ UOP_OPCODE_MERGE_LH:
//
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2: begin
- regadd_x_x_cry <= regadd_x_x[WORD_W];
- regadd_y_x_cry <= regadd_y_x[WORD_W];
- regadd_x_y_cry <= regadd_x_y[WORD_W];
- regadd_y_y_cry <= regadd_y_y[WORD_W];
- end
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2);
+ //
+ endcase
+ //
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
- endcase
- //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY1,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST3:
+ //
+ update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced);
+ //
+ endcase
+ endcase
+ //
end
+
endmodule