diff options
Diffstat (limited to 'rtl/modexpng_io_manager.v')
-rw-r--r-- | rtl/modexpng_io_manager.v | 527 |
1 files changed, 527 insertions, 0 deletions
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v new file mode 100644 index 0000000..81f582f --- /dev/null +++ b/rtl/modexpng_io_manager.v @@ -0,0 +1,527 @@ +module modexpng_io_manager +( + clk, + rst, + + ena, + rdy, + + sel_crt, + sel_aux, + sel_in, + sel_out, + + opcode, + + word_index_last, + + ext_wide_xy_ena_x, + ext_wide_xy_bank_x, + ext_wide_xy_addr_x, + ext_wide_x_din_x, + ext_wide_y_din_x, + + ext_narrow_xy_ena_x, + ext_narrow_xy_bank_x, + ext_narrow_xy_addr_x, + ext_narrow_x_din_x, + ext_narrow_y_din_x, + + ext_wide_xy_ena_y, + ext_wide_xy_bank_y, + ext_wide_xy_addr_y, + ext_wide_x_din_y, + ext_wide_y_din_y, + + ext_narrow_xy_ena_y, + ext_narrow_xy_bank_y, + ext_narrow_xy_addr_y, + ext_narrow_x_din_y, + ext_narrow_y_din_y, + + io_in_1_en, + io_in_1_addr, + io_in_1_dout, + + io_in_2_en, + io_in_2_addr, + io_in_2_dout, + + io_out_en, + io_out_we, + io_out_addr, + io_out_din +); + + // + // Headers + // + `include "modexpng_parameters.vh" + `include "modexpng_microcode.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input [ UOP_CRT_W -1:0] sel_crt; + input [ UOP_AUX_W -1:0] sel_aux; + input [ BANK_ADDR_W -1:0] sel_in; + input [ BANK_ADDR_W -1:0] sel_out; + + input [ UOP_OPCODE_W -1:0] opcode; + + input [ OP_ADDR_W -1:0] word_index_last; + + output ext_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] ext_wide_xy_addr_x; + output [ WORD_EXT_W -1:0] ext_wide_x_din_x; + output [ WORD_EXT_W -1:0] ext_wide_y_din_x; + + output ext_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x; + output [ WORD_EXT_W -1:0] ext_narrow_x_din_x; + output [ WORD_EXT_W -1:0] ext_narrow_y_din_x; + + output ext_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] ext_wide_xy_addr_y; + output [ WORD_EXT_W -1:0] ext_wide_x_din_y; + output [ WORD_EXT_W -1:0] ext_wide_y_din_y; + + output ext_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y; + output [ WORD_EXT_W -1:0] ext_narrow_x_din_y; + output [ WORD_EXT_W -1:0] ext_narrow_y_din_y; + + output io_in_1_en; + output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; + input [ WORD_W -1:0] io_in_1_dout; + + output io_in_2_en; + output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; + input [ WORD_W -1:0] io_in_2_dout; + + output io_out_en; + output io_out_we; + output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; + output [ WORD_W -1:0] io_out_din; + + + // + // FSM Declaration + // + localparam [2:0] IO_FSM_STATE_IDLE = 3'b000; + localparam [2:0] IO_FSM_STATE_LATENCY_PRE1 = 3'b001; + localparam [2:0] IO_FSM_STATE_LATENCY_PRE2 = 3'b010; + localparam [2:0] IO_FSM_STATE_BUSY = 3'b011; + localparam [2:0] IO_FSM_STATE_EXTRA = 3'b100; + localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101; + localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110; + + reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE; + reg [2:0] io_fsm_state_next; + + + // + // Control Signals + // + reg in_1_en = 1'b0; + reg [BANK_ADDR_W -1:0] in_1_addr_bank; + reg [ OP_ADDR_W -1:0] in_1_addr_op; + + reg in_2_en = 1'b0; + reg [BANK_ADDR_W -1:0] in_2_addr_bank; + reg [ OP_ADDR_W -1:0] in_2_addr_op; + + reg out_en = 1'b0; + reg out_we; + reg [BANK_ADDR_W -1:0] out_addr_bank; + reg [ OP_ADDR_W -1:0] out_addr_op; + reg [ WORD_W -1:0] out_din; + + + // + // Control Signals + // + reg wide_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] wide_xy_bank_x; + reg [ OP_ADDR_W -1:0] wide_xy_addr_x; + reg [ WORD_EXT_W -1:0] wide_x_din_x; + reg [ WORD_EXT_W -1:0] wide_y_din_x; + + reg narrow_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] narrow_xy_bank_x; + reg [ OP_ADDR_W -1:0] narrow_xy_addr_x; + reg [ WORD_EXT_W -1:0] narrow_x_din_x; + reg [ WORD_EXT_W -1:0] narrow_y_din_x; + + reg wide_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] wide_xy_bank_y; + reg [ OP_ADDR_W -1:0] wide_xy_addr_y; + reg [ WORD_EXT_W -1:0] wide_x_din_y; + reg [ WORD_EXT_W -1:0] wide_y_din_y; + + reg narrow_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] narrow_xy_bank_y; + reg [ OP_ADDR_W -1:0] narrow_xy_addr_y; + reg [ WORD_EXT_W -1:0] narrow_x_din_y; + reg [ WORD_EXT_W -1:0] narrow_y_din_y; + + + // + // Mapping + // + assign io_in_1_en = in_1_en; + assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op}; + + assign io_in_2_en = in_2_en; + assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op}; + + assign io_out_en = out_en; + assign io_out_we = out_we; + assign io_out_addr = {out_addr_bank, out_addr_op}; + assign io_out_din = out_din; + + + // + // Mapping + // + assign ext_wide_xy_ena_x = wide_xy_ena_x; + assign ext_wide_xy_bank_x = wide_xy_bank_x; + assign ext_wide_xy_addr_x = wide_xy_addr_x; + assign ext_wide_x_din_x = wide_x_din_x; + assign ext_wide_y_din_x = wide_y_din_x; + + assign ext_narrow_xy_ena_x = narrow_xy_ena_x; + assign ext_narrow_xy_bank_x = narrow_xy_bank_x; + assign ext_narrow_xy_addr_x = narrow_xy_addr_x; + assign ext_narrow_x_din_x = narrow_x_din_x; + assign ext_narrow_y_din_x = narrow_y_din_x; + + assign ext_wide_xy_ena_y = wide_xy_ena_y; + assign ext_wide_xy_bank_y = wide_xy_bank_y; + assign ext_wide_xy_addr_y = wide_xy_addr_y; + assign ext_wide_x_din_y = wide_x_din_y; + assign ext_wide_y_din_y = wide_y_din_y; + + assign ext_narrow_xy_ena_y = narrow_xy_ena_y; + assign ext_narrow_xy_bank_y = narrow_xy_bank_y; + assign ext_narrow_xy_addr_y = narrow_xy_addr_y; + assign ext_narrow_x_din_y = narrow_x_din_y; + assign ext_narrow_y_din_y = narrow_y_din_y; + + + // + // Delays + // + reg [ OP_ADDR_W -1:0] in_1_addr_op_dly1; + reg [ OP_ADDR_W -1:0] in_1_addr_op_dly2; + reg [ OP_ADDR_W -1:0] in_2_addr_op_dly1; + reg [ OP_ADDR_W -1:0] in_2_addr_op_dly2; + + always @(posedge clk) begin + // + {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op}; + {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op}; + // + end + + + // + // Handy Wires + // + wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW); + + wire opcode_is_wide = (opcode == UOP_OPCODE_INPUT_TO_WIDE ); + wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW); + + wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF); + + wire sel_crt_is_x = sel_crt == UOP_CRT_X; + wire sel_crt_is_y = sel_crt == UOP_CRT_Y; + + wire sel_aux_is_1 = sel_aux == UOP_AUX_1; + wire sel_aux_is_2 = sel_aux == UOP_AUX_2; + + wire in_1_addr_op_is_last; + wire in_2_addr_op_is_last; + + wire in_1_addr_next_op_is_last; + wire in_2_addr_next_op_is_last; + + + // + // Enable Logic + // + always @(posedge clk) + // + if (rst) begin + in_1_en <= 1'b0; + in_2_en <= 1'b0; + out_en <= 1'b0; + end else case (io_fsm_state_next) + // + IO_FSM_STATE_LATENCY_PRE1, + IO_FSM_STATE_LATENCY_PRE2, + IO_FSM_STATE_BUSY: begin + in_1_en <= opcode_is_input && sel_aux_is_1; + in_2_en <= opcode_is_input && sel_aux_is_2; + end + // + IO_FSM_STATE_EXTRA: begin + in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra; + in_2_en <= 1'b0; + end + // + default: begin + in_1_en <= 1'b0; + in_2_en <= 1'b0; + end + // + endcase + + // + // Enable Logic + // + always @(posedge clk) + // + if (rst) begin + // + wide_xy_ena_x <= 1'b0; + wide_xy_ena_y <= 1'b0; + narrow_xy_ena_x <= 1'b0; + narrow_xy_ena_y <= 1'b0; + // + end else case (io_fsm_state) + // + IO_FSM_STATE_BUSY, + IO_FSM_STATE_EXTRA, + IO_FSM_STATE_LATENCY_POST1: begin + wide_xy_ena_x <= opcode_is_wide && sel_crt_is_x; + wide_xy_ena_y <= opcode_is_wide && sel_crt_is_y; + narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x; + narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y; + end + // + IO_FSM_STATE_LATENCY_POST2: begin + wide_xy_ena_x <= 1'b0; + wide_xy_ena_y <= 1'b0; + narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra; + narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra; + end + // + default: begin + wide_xy_ena_x <= 1'b0; + wide_xy_ena_y <= 1'b0; + narrow_xy_ena_x <= 1'b0; + narrow_xy_ena_y <= 1'b0; + end + // + endcase + + + // + // Data Logic + // + wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout}; + + always @(posedge clk) begin + // + wide_x_din_x <= WORD_EXT_DNC; + wide_y_din_x <= WORD_EXT_DNC; + wide_x_din_y <= WORD_EXT_DNC; + wide_y_din_y <= WORD_EXT_DNC; + narrow_x_din_x <= WORD_EXT_DNC; + narrow_y_din_x <= WORD_EXT_DNC; + narrow_x_din_y <= WORD_EXT_DNC; + narrow_y_din_y <= WORD_EXT_DNC; + // + case (io_fsm_state) + // + IO_FSM_STATE_BUSY, + IO_FSM_STATE_EXTRA, + IO_FSM_STATE_LATENCY_POST1: begin + if (opcode_is_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}}; + if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + end + // + IO_FSM_STATE_LATENCY_POST2: begin + if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + end + // + endcase + // + end + + + // + // Address Logic + // + wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux = + sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2; + + always @(posedge clk) begin + // + {wide_xy_bank_x, wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; + {wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; + {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; + {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + // + case (io_fsm_state) + // + IO_FSM_STATE_BUSY, + IO_FSM_STATE_EXTRA, + IO_FSM_STATE_LATENCY_POST1: begin + if (opcode_is_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux}; + end + // + IO_FSM_STATE_LATENCY_POST2: begin + if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF}; + if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF}; + end + // + endcase + // + end + + + // + // Address Logic + // + reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; + reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next; + reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next; + + wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] out_addr_next_op = out_addr_next [OP_ADDR_W -1:0]; + + assign in_1_addr_op_is_last = in_1_addr_op == word_index_last; + assign in_2_addr_op_is_last = in_2_addr_op == word_index_last; + + assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last; + assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last; + + always @(posedge clk) + // + case (io_fsm_state_next) + // + IO_FSM_STATE_LATENCY_PRE1: begin + // + {in_1_addr_bank, in_1_addr_op} <= {sel_in, OP_ADDR_ZERO}; + {in_2_addr_bank, in_2_addr_op} <= {sel_in, OP_ADDR_ZERO}; + {out_addr_bank, out_addr_op } <= {sel_out, OP_ADDR_ZERO}; + // + in_1_addr_next <= {sel_in, OP_ADDR_ONE}; + in_2_addr_next <= {sel_in, OP_ADDR_ONE}; + out_addr_next <= {sel_out, OP_ADDR_ONE}; + // + end + // + IO_FSM_STATE_LATENCY_PRE2, + IO_FSM_STATE_BUSY: begin + // + {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; + {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next; + {out_addr_bank, out_addr_op } <= out_addr_next; + // + in_1_addr_next <= in_1_addr_next + 1'b1; + in_2_addr_next <= in_2_addr_next + 1'b1; + out_addr_next <= out_addr_next + 1'b1; + // + end + // + IO_FSM_STATE_EXTRA: + // + if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin + // + {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; + // + in_1_addr_next <= in_1_addr_next + 1'b1; + // + end + // + endcase + + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) io_fsm_state <= IO_FSM_STATE_IDLE; + else io_fsm_state <= io_fsm_state_next; + + + // + // Busy Exit Logic + // + reg io_fsm_done = 1'b0; + + always @(posedge clk) begin + // + io_fsm_done <= 1'b0; + // + if (io_fsm_state == IO_FSM_STATE_BUSY) begin + // + if (opcode_is_input) begin + if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1; + if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1; + end + + end + // + end + + + // + // FSM Transition Logic + // + always @* begin + // + case (io_fsm_state) + IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ; + IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ; + IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_BUSY ; + IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_BUSY ; + IO_FSM_STATE_EXTRA: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ; + IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ; + IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_IDLE ; + endcase + // + end + + + // + // Ready Logic + // + reg rdy_reg = 1'b1; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else case (io_fsm_state) + IO_FSM_STATE_IDLE: rdy_reg <= ~ena; + IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1; + endcase + + +endmodule |