aboutsummaryrefslogtreecommitdiff
path: root/rtl/modexpng_general_worker.v
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/modexpng_general_worker.v')
-rw-r--r--rtl/modexpng_general_worker.v402
1 files changed, 364 insertions, 38 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index c35f0b3..269ef98 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -14,6 +14,7 @@ module modexpng_general_worker
opcode,
word_index_last,
+ word_index_last_half,
wrk_rd_wide_xy_ena_x,
wrk_rd_wide_xy_bank_x,
@@ -88,6 +89,7 @@ module modexpng_general_worker
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
+ input [ OP_ADDR_W -1:0] word_index_last_half;
output wrk_rd_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
@@ -141,18 +143,35 @@ module modexpng_general_worker
//
// FSM Declaration
//
- localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
- localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
- localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
- localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3;
- localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
- localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
- localparam [3:0] WRK_FSM_STATE_STOP = 4'h7;
+ localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00;
- reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
- reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02;
+ localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06;
+
+ localparam [4:0] WRK_FSM_STATE_STOP = 5'h07;
+
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13;
+ localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14;
+ localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19;
+
+ reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
+
+
//
// Control Signals
//
@@ -244,32 +263,62 @@ module modexpng_general_worker
//
// Delays
//
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
+
always @(posedge clk) begin
//
- //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
- //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ //
+ {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2};
+ {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2};
//
{rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
{rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
//
+ {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2};
+ {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2};
+ //
+ {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
+ {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
+ //
+ {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
+ {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ //
end
-
-
- //
- // Handy Wires
- //
- wire rd_narrow_xy_addr_x_next_is_last;
- wire rd_narrow_xy_addr_y_next_is_last;
//
@@ -310,7 +359,8 @@ module modexpng_general_worker
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW:
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state_next_one_pass)
//
@@ -333,12 +383,30 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_BUSY: begin
//
- enable_narrow_xy_rd_en;
enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
//
end
//
endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1,
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M1,
+ WRK_FSM_STATE_BUSY_M2: begin
+ //
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ //
+ end
+ //
+ endcase
//
endcase
//
@@ -389,8 +457,7 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST2:
//
- enable_narrow_xy_wr_en;
- //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ enable_narrow_xy_wr_en;
//
//
endcase
@@ -405,7 +472,34 @@ module modexpng_general_worker
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
- //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ enable_wide_xy_wr_en;
+ //
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
//
end
//
@@ -424,7 +518,7 @@ module modexpng_general_worker
reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
@@ -497,6 +591,45 @@ module modexpng_general_worker
end
//
endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3;
+ wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2;
+ wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3;
+ wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2;
+ //
+ wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3;
+ wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2;
+ wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3;
+ wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x;
+ wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x;
+ wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y;
+ wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y;
+ //
+ end
+ //
+ endcase
//
endcase
//
@@ -506,6 +639,9 @@ module modexpng_general_worker
//
// Write Address Logic
//
+ wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
+ wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
+
always @(posedge clk) begin
//
{wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
@@ -534,22 +670,64 @@ module modexpng_general_worker
//
endcase
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
+ wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+ //
+ wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2;
+ wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4};
+ //
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4};
+ //
+ end
+ //
+ endcase
+ //
//
endcase
//
end
-
-
+
+
//
// Read Address Logic
//
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next;
+
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
- assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
- assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+ wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half;
+ wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half;
+
+ wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+ wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
- always @(posedge clk) begin
+ always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)??
//
{rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
{rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
@@ -572,6 +750,9 @@ module modexpng_general_worker
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
//
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
//
@@ -586,11 +767,113 @@ module modexpng_general_worker
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
//
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+ //
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+ //
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO};
+ //
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M2: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_BUSY_M1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next};
+ //
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
//
end
//
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M2: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+ //
+ end
+ //
endcase
//
//
@@ -608,7 +891,9 @@ module modexpng_general_worker
else case (opcode)
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
endcase
@@ -616,17 +901,20 @@ module modexpng_general_worker
//
// Busy Exit Logic
//
- reg wrk_fsm_done_one_pass = 1'b0;
+ reg wrk_fsm_done_one_pass = 1'b0;
+ reg wrk_fsm_done_one_pass_meander = 1'b0;
always @(posedge clk) begin
//
- wrk_fsm_done_one_pass <= 1'b0;
+ wrk_fsm_done_one_pass <= 1'b0;
+ wrk_fsm_done_one_pass_meander <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: begin
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
//
@@ -637,6 +925,20 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin
+ //
+ if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary...
+ if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
+ //
+ end
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1)
+ wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ //
+ end
+ //
endcase
//
end
@@ -654,7 +956,31 @@ module modexpng_general_worker
WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
- WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ;
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ;
+ WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ;
+ WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ;
+ WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ;
+ WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ;
+ WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ;
+ WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ;
+ WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ;
+ WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ;
+ WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ;
+ //
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
+ //
+ default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
endcase
//
end