aboutsummaryrefslogtreecommitdiff
path: root/rtl/modexpng_general_worker.v
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/modexpng_general_worker.v')
-rw-r--r--rtl/modexpng_general_worker.v679
1 files changed, 679 insertions, 0 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
new file mode 100644
index 0000000..c35f0b3
--- /dev/null
+++ b/rtl/modexpng_general_worker.v
@@ -0,0 +1,679 @@
+module modexpng_general_worker
+(
+ clk,
+ rst,
+
+ ena,
+ rdy,
+
+ sel_narrow_in,
+ sel_narrow_out,
+ sel_wide_in,
+ sel_wide_out,
+
+ opcode,
+
+ word_index_last,
+
+ wrk_rd_wide_xy_ena_x,
+ wrk_rd_wide_xy_bank_x,
+ wrk_rd_wide_xy_addr_x,
+ wrk_rd_wide_x_din_x,
+ wrk_rd_wide_y_din_x,
+
+ wrk_rd_narrow_xy_ena_x,
+ wrk_rd_narrow_xy_bank_x,
+ wrk_rd_narrow_xy_addr_x,
+ wrk_rd_narrow_x_din_x,
+ wrk_rd_narrow_y_din_x,
+
+ wrk_rd_wide_xy_ena_y,
+ wrk_rd_wide_xy_bank_y,
+ wrk_rd_wide_xy_addr_y,
+ wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y,
+
+ wrk_rd_narrow_xy_ena_y,
+ wrk_rd_narrow_xy_bank_y,
+ wrk_rd_narrow_xy_addr_y,
+ wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y,
+
+ wrk_wr_wide_xy_ena_x,
+ wrk_wr_wide_xy_bank_x,
+ wrk_wr_wide_xy_addr_x,
+ wrk_wr_wide_x_dout_x,
+ wrk_wr_wide_y_dout_x,
+
+ wrk_wr_narrow_xy_ena_x,
+ wrk_wr_narrow_xy_bank_x,
+ wrk_wr_narrow_xy_addr_x,
+ wrk_wr_narrow_x_dout_x,
+ wrk_wr_narrow_y_dout_x,
+
+ wrk_wr_wide_xy_ena_y,
+ wrk_wr_wide_xy_bank_y,
+ wrk_wr_wide_xy_addr_y,
+ wrk_wr_wide_x_dout_y,
+ wrk_wr_wide_y_dout_y,
+
+ wrk_wr_narrow_xy_ena_y,
+ wrk_wr_narrow_xy_bank_y,
+ wrk_wr_narrow_xy_addr_y,
+ wrk_wr_narrow_x_dout_y,
+ wrk_wr_narrow_y_dout_y
+);
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ input [ BANK_ADDR_W -1:0] sel_narrow_in;
+ input [ BANK_ADDR_W -1:0] sel_narrow_out;
+ input [ BANK_ADDR_W -1:0] sel_wide_in;
+ input [ BANK_ADDR_W -1:0] sel_wide_out;
+
+ input [ UOP_OPCODE_W -1:0] opcode;
+
+ input [ OP_ADDR_W -1:0] word_index_last;
+
+ output wrk_rd_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x;
+
+ output wrk_rd_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x;
+
+ output wrk_rd_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y;
+
+ output wrk_rd_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y;
+
+ output wrk_wr_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x;
+
+ output wrk_wr_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x;
+
+ output wrk_wr_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y;
+
+ output wrk_wr_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y;
+
+
+ //
+ // FSM Declaration
+ //
+ localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
+ localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
+ localparam [3:0] WRK_FSM_STATE_STOP = 4'h7;
+
+ reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep
+
+
+ //
+ // Control Signals
+ //
+ reg rd_wide_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x;
+
+ reg rd_narrow_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x;
+
+ reg rd_wide_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y;
+
+ reg rd_narrow_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y;
+
+ reg wr_wide_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
+ reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;
+
+ reg wr_narrow_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
+ reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;
+
+ reg wr_wide_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
+ reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;
+
+ reg wr_narrow_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
+ reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;
+
+
+ //
+ // Mapping
+ //
+ assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x;
+ assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x;
+ assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x;
+
+ assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x;
+ assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x;
+ assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x;
+
+ assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y;
+ assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y;
+ assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y;
+
+ assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y;
+ assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y;
+ assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y;
+
+ assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x;
+ assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x;
+ assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x;
+ assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x;
+ assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x;
+
+ assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x;
+ assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x;
+ assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x;
+ assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x;
+ assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x;
+
+ assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y;
+ assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y;
+ assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y;
+ assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y;
+ assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y;
+
+ assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y;
+ assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y;
+ assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y;
+ assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y;
+ assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y;
+
+
+ //
+ // Delays
+ //
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+
+ always @(posedge clk) begin
+ //
+ //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
+ //
+ end
+
+
+ //
+ // Handy Wires
+ //
+ wire rd_narrow_xy_addr_x_next_is_last;
+ wire rd_narrow_xy_addr_y_next_is_last;
+
+
+ //
+ // Read Enable Logic
+ //
+
+ task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask
+ task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask
+
+ task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask
+ task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask
+
+ task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask
+ task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ disable_wide_xy_rd_en;
+ disable_narrow_xy_rd_en;
+ /*
+ rd_wide_xy_ena_x <= 1'b0;
+ rd_wide_xy_ena_y <= 1'b0;
+ rd_narrow_xy_ena_x <= 1'b0;
+ rd_narrow_xy_ena_y <= 1'b0;
+ */
+ end else begin
+ //
+ disable_wide_xy_rd_en;
+ disable_narrow_xy_rd_en;
+ //
+ //rd_wide_xy_ena_x <= 1'b0;
+ //rd_wide_xy_ena_y <= 1'b0;
+ //rd_narrow_xy_ena_x <= 1'b0;
+ //rd_narrow_xy_ena_y <= 1'b0;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
+ //
+ enable_narrow_xy_rd_en;
+ //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ //
+ endcase
+ //
+ //
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ enable_narrow_xy_rd_en;
+ enable_wide_xy_rd_en;
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+
+ task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
+ task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
+
+ task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask
+ task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask
+
+ task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask
+ task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ disable_wide_xy_wr_en;
+ disable_narrow_xy_wr_en;
+ //wr_wide_xy_ena_x <= 1'b0;
+ //wr_wide_xy_ena_y <= 1'b0;
+ //wr_narrow_xy_ena_x <= 1'b0;
+ //wr_narrow_xy_ena_y <= 1'b0;
+ //
+ end else begin
+ //
+ disable_wide_xy_wr_en;
+ disable_narrow_xy_wr_en;
+ //
+ //wr_wide_xy_ena_x <= 1'b0;
+ //wr_wide_xy_ena_y <= 1'b0;
+ //wr_narrow_xy_ena_x <= 1'b0;
+ //wr_narrow_xy_ena_y <= 1'b0;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ enable_narrow_xy_wr_en;
+ //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
+ //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Data Logic
+ //
+ reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
+ reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
+ reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
+ reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
+
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
+
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
+
+ always @(posedge clk) begin
+ //
+ wr_wide_x_dout_x <= WORD_EXT_DNC;
+ wr_wide_y_dout_x <= WORD_EXT_DNC;
+ wr_wide_x_dout_y <= WORD_EXT_DNC;
+ wr_wide_y_dout_y <= WORD_EXT_DNC;
+ wr_narrow_x_dout_x <= WORD_EXT_DNC;
+ wr_narrow_y_dout_x <= WORD_EXT_DNC;
+ wr_narrow_x_dout_y <= WORD_EXT_DNC;
+ wr_narrow_y_dout_y <= WORD_EXT_DNC;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE2: begin
+ rd_narrow_x_din_x_cry_r <= CARRY_ZERO;
+ rd_narrow_y_din_x_cry_r <= CARRY_ZERO;
+ rd_narrow_x_din_y_cry_r <= CARRY_ZERO;
+ rd_narrow_y_din_y_cry_r <= CARRY_ZERO;
+ end
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word
+ //
+ rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ //
+ wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced;
+ wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced;
+ wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced;
+ wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_wide_x_din_y;
+ wr_wide_y_dout_x <= wrk_rd_wide_y_din_y;
+ wr_wide_x_dout_y <= wrk_rd_wide_x_din_y;
+ wr_wide_y_dout_y <= wrk_rd_wide_y_din_y;
+ //
+ wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y;
+ wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y;
+ wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y;
+ wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y;
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Write Address Logic
+ //
+ always @(posedge clk) begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2};
+ //
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2};
+ //
+ end
+ //
+ endcase
+ //
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Read Address Logic
+ //
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
+
+ assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+ assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+
+ always @(posedge clk) begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+ else case (opcode)
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+ endcase
+
+
+ //
+ // Busy Exit Logic
+ //
+ reg wrk_fsm_done_one_pass = 1'b0;
+
+ always @(posedge clk) begin
+ //
+ wrk_fsm_done_one_pass <= 1'b0;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
+ //
+ if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary...
+ if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
+ //
+ end
+ //
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ;
+ WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ;
+ WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
+ WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
+ WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_reg <= 1'b1;
+ else case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: rdy_reg <= ~ena;
+ WRK_FSM_STATE_STOP: rdy_reg <= 1'b1;
+ endcase
+
+
+endmodule