From e340b1489b08905e3d8acd17686e178028de7922 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 3 Oct 2019 16:47:39 +0300 Subject: Added more micro-operations, also added "general worker" module. The worker is basically a block memory data mover, but it can also do some supporting operations required for the Garner's formula part of the exponentiation. --- rtl/modexpng_general_worker.v | 679 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 679 insertions(+) create mode 100644 rtl/modexpng_general_worker.v (limited to 'rtl/modexpng_general_worker.v') diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v new file mode 100644 index 0000000..c35f0b3 --- /dev/null +++ b/rtl/modexpng_general_worker.v @@ -0,0 +1,679 @@ +module modexpng_general_worker +( + clk, + rst, + + ena, + rdy, + + sel_narrow_in, + sel_narrow_out, + sel_wide_in, + sel_wide_out, + + opcode, + + word_index_last, + + wrk_rd_wide_xy_ena_x, + wrk_rd_wide_xy_bank_x, + wrk_rd_wide_xy_addr_x, + wrk_rd_wide_x_din_x, + wrk_rd_wide_y_din_x, + + wrk_rd_narrow_xy_ena_x, + wrk_rd_narrow_xy_bank_x, + wrk_rd_narrow_xy_addr_x, + wrk_rd_narrow_x_din_x, + wrk_rd_narrow_y_din_x, + + wrk_rd_wide_xy_ena_y, + wrk_rd_wide_xy_bank_y, + wrk_rd_wide_xy_addr_y, + wrk_rd_wide_x_din_y, + wrk_rd_wide_y_din_y, + + wrk_rd_narrow_xy_ena_y, + wrk_rd_narrow_xy_bank_y, + wrk_rd_narrow_xy_addr_y, + wrk_rd_narrow_x_din_y, + wrk_rd_narrow_y_din_y, + + wrk_wr_wide_xy_ena_x, + wrk_wr_wide_xy_bank_x, + wrk_wr_wide_xy_addr_x, + wrk_wr_wide_x_dout_x, + wrk_wr_wide_y_dout_x, + + wrk_wr_narrow_xy_ena_x, + wrk_wr_narrow_xy_bank_x, + wrk_wr_narrow_xy_addr_x, + wrk_wr_narrow_x_dout_x, + wrk_wr_narrow_y_dout_x, + + wrk_wr_wide_xy_ena_y, + wrk_wr_wide_xy_bank_y, + wrk_wr_wide_xy_addr_y, + wrk_wr_wide_x_dout_y, + wrk_wr_wide_y_dout_y, + + wrk_wr_narrow_xy_ena_y, + wrk_wr_narrow_xy_bank_y, + wrk_wr_narrow_xy_addr_y, + wrk_wr_narrow_x_dout_y, + wrk_wr_narrow_y_dout_y +); + + // + // Headers + // + `include "modexpng_parameters.vh" + `include "modexpng_microcode.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input [ BANK_ADDR_W -1:0] sel_narrow_in; + input [ BANK_ADDR_W -1:0] sel_narrow_out; + input [ BANK_ADDR_W -1:0] sel_wide_in; + input [ BANK_ADDR_W -1:0] sel_wide_out; + + input [ UOP_OPCODE_W -1:0] opcode; + + input [ OP_ADDR_W -1:0] word_index_last; + + output wrk_rd_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; + input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x; + input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x; + + output wrk_rd_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x; + + output wrk_rd_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; + input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y; + input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y; + + output wrk_rd_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y; + + output wrk_wr_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; + output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x; + output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x; + + output wrk_wr_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x; + + output wrk_wr_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; + output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y; + output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y; + + output wrk_wr_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y; + + + // + // FSM Declaration + // + localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2; + localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3; + localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! + localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6; + localparam [3:0] WRK_FSM_STATE_STOP = 4'h7; + + reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; + reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep + + + // + // Control Signals + // + reg rd_wide_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x; + reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x; + + reg rd_narrow_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; + reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; + + reg rd_wide_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y; + reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y; + + reg rd_narrow_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; + reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; + + reg wr_wide_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; + reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x; + reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x; + + reg wr_narrow_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; + reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x; + reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x; + + reg wr_wide_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; + reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y; + reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y; + + reg wr_narrow_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; + reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y; + reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y; + + + // + // Mapping + // + assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x; + assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x; + assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x; + + assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x; + assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x; + assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x; + + assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y; + assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y; + assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y; + + assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y; + assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y; + assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y; + + assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x; + assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x; + assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x; + assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x; + assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x; + + assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x; + assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x; + assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x; + assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x; + assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x; + + assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y; + assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y; + assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y; + assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y; + assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y; + + assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y; + assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y; + assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y; + assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y; + assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y; + + + // + // Delays + // + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; + + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2; + + always @(posedge clk) begin + // + //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; + //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + // + {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; + {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; + // + end + + + // + // Handy Wires + // + wire rd_narrow_xy_addr_x_next_is_last; + wire rd_narrow_xy_addr_y_next_is_last; + + + // + // Read Enable Logic + // + + task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask + task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask + + task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask + task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask + + task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask + task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask + + always @(posedge clk) + // + if (rst) begin + // + disable_wide_xy_rd_en; + disable_narrow_xy_rd_en; + /* + rd_wide_xy_ena_x <= 1'b0; + rd_wide_xy_ena_y <= 1'b0; + rd_narrow_xy_ena_x <= 1'b0; + rd_narrow_xy_ena_y <= 1'b0; + */ + end else begin + // + disable_wide_xy_rd_en; + disable_narrow_xy_rd_en; + // + //rd_wide_xy_ena_x <= 1'b0; + //rd_wide_xy_ena_y <= 1'b0; + //rd_narrow_xy_ena_x <= 1'b0; + //rd_narrow_xy_ena_y <= 1'b0; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: + // + enable_narrow_xy_rd_en; + //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}}; + // + // + endcase + // + // + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: begin + // + enable_narrow_xy_rd_en; + enable_wide_xy_rd_en; + // + end + // + endcase + // + endcase + // + end + + + // + // Write Enable Logic + // + + task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask + task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask + + task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask + task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask + + task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask + task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask + + always @(posedge clk) + // + if (rst) begin + // + disable_wide_xy_wr_en; + disable_narrow_xy_wr_en; + //wr_wide_xy_ena_x <= 1'b0; + //wr_wide_xy_ena_y <= 1'b0; + //wr_narrow_xy_ena_x <= 1'b0; + //wr_narrow_xy_ena_y <= 1'b0; + // + end else begin + // + disable_wide_xy_wr_en; + disable_narrow_xy_wr_en; + // + //wr_wide_xy_ena_x <= 1'b0; + //wr_wide_xy_ena_y <= 1'b0; + //wr_narrow_xy_ena_x <= 1'b0; + //wr_narrow_xy_ena_y <= 1'b0; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: + // + enable_narrow_xy_wr_en; + //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + // + // + endcase + // + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + enable_wide_xy_wr_en; + enable_narrow_xy_wr_en; + //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + // + end + // + endcase + // + endcase + // + end + + + // + // Data Logic + // + reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r; + reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r; + reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; + reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; + + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; + + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]}; + + always @(posedge clk) begin + // + wr_wide_x_dout_x <= WORD_EXT_DNC; + wr_wide_y_dout_x <= WORD_EXT_DNC; + wr_wide_x_dout_y <= WORD_EXT_DNC; + wr_wide_y_dout_y <= WORD_EXT_DNC; + wr_narrow_x_dout_x <= WORD_EXT_DNC; + wr_narrow_y_dout_x <= WORD_EXT_DNC; + wr_narrow_x_dout_y <= WORD_EXT_DNC; + wr_narrow_y_dout_y <= WORD_EXT_DNC; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE2: begin + rd_narrow_x_din_x_cry_r <= CARRY_ZERO; + rd_narrow_y_din_x_cry_r <= CARRY_ZERO; + rd_narrow_x_din_y_cry_r <= CARRY_ZERO; + rd_narrow_y_din_y_cry_r <= CARRY_ZERO; + end + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word + // + rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W]; + rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W]; + rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W]; + rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W]; + // + wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced; + wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced; + wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced; + wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced; + // + end + // + endcase + // + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + wr_wide_x_dout_x <= wrk_rd_wide_x_din_y; + wr_wide_y_dout_x <= wrk_rd_wide_y_din_y; + wr_wide_x_dout_y <= wrk_rd_wide_x_din_y; + wr_wide_y_dout_y <= wrk_rd_wide_y_din_y; + // + wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y; + wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y; + wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y; + wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y; + // + end + // + endcase + // + endcase + // + end + + + // + // Write Address Logic + // + always @(posedge clk) begin + // + {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; + {wr_wide_xy_bank_y, wr_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; + {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; + {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2}; + {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2}; + // + {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2}; + {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2}; + // + end + // + endcase + // + // + endcase + // + end + + + // + // Read Address Logic + // + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next; + + assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last; + assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last; + + always @(posedge clk) begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ?? + {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO}; + // + rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; + rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; + // + end + // + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next}; + // + rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; + rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; + // + end + // + endcase + // + // + endcase + // + end + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE; + else case (opcode) + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass; + default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; + endcase + + + // + // Busy Exit Logic + // + reg wrk_fsm_done_one_pass = 1'b0; + + always @(posedge clk) begin + // + wrk_fsm_done_one_pass <= 1'b0; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_COPY_CRT_Y2X: begin + // + if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin + // + if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary... + if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1; + // + end + // + end + // + endcase + // + end + + + // + // FSM Transition Logic + // + always @* begin + // + case (wrk_fsm_state) + WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ; + WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ; + WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ; + WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ; + WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ; + WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ; + WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + endcase + // + end + + + // + // Ready Logic + // + reg rdy_reg = 1'b1; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else case (wrk_fsm_state) + WRK_FSM_STATE_IDLE: rdy_reg <= ~ena; + WRK_FSM_STATE_STOP: rdy_reg <= 1'b1; + endcase + + +endmodule -- cgit v1.2.3