aboutsummaryrefslogtreecommitdiff
path: root/rtl/modexpng_io_manager.v
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:40:25 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:40:25 +0300
commitaffada8d5da7426d22035360c3674ab3b3311ab5 (patch)
treea3db075dc03033db45e3ad5279badf2da48b4566 /rtl/modexpng_io_manager.v
parent0b4b42da734c1164b65a334351274f946b2d4dcb (diff)
Reworked storage architecture (moved I/O memory to a separate module, since there's
only one instance of input/output values, while storage manager has dual storage space for P and Q multipliers). Started working on microcoded layer, added input operation and modular multiplication.
Diffstat (limited to 'rtl/modexpng_io_manager.v')
-rw-r--r--rtl/modexpng_io_manager.v527
1 files changed, 527 insertions, 0 deletions
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
new file mode 100644
index 0000000..81f582f
--- /dev/null
+++ b/rtl/modexpng_io_manager.v
@@ -0,0 +1,527 @@
+module modexpng_io_manager
+(
+ clk,
+ rst,
+
+ ena,
+ rdy,
+
+ sel_crt,
+ sel_aux,
+ sel_in,
+ sel_out,
+
+ opcode,
+
+ word_index_last,
+
+ ext_wide_xy_ena_x,
+ ext_wide_xy_bank_x,
+ ext_wide_xy_addr_x,
+ ext_wide_x_din_x,
+ ext_wide_y_din_x,
+
+ ext_narrow_xy_ena_x,
+ ext_narrow_xy_bank_x,
+ ext_narrow_xy_addr_x,
+ ext_narrow_x_din_x,
+ ext_narrow_y_din_x,
+
+ ext_wide_xy_ena_y,
+ ext_wide_xy_bank_y,
+ ext_wide_xy_addr_y,
+ ext_wide_x_din_y,
+ ext_wide_y_din_y,
+
+ ext_narrow_xy_ena_y,
+ ext_narrow_xy_bank_y,
+ ext_narrow_xy_addr_y,
+ ext_narrow_x_din_y,
+ ext_narrow_y_din_y,
+
+ io_in_1_en,
+ io_in_1_addr,
+ io_in_1_dout,
+
+ io_in_2_en,
+ io_in_2_addr,
+ io_in_2_dout,
+
+ io_out_en,
+ io_out_we,
+ io_out_addr,
+ io_out_din
+);
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ input [ UOP_CRT_W -1:0] sel_crt;
+ input [ UOP_AUX_W -1:0] sel_aux;
+ input [ BANK_ADDR_W -1:0] sel_in;
+ input [ BANK_ADDR_W -1:0] sel_out;
+
+ input [ UOP_OPCODE_W -1:0] opcode;
+
+ input [ OP_ADDR_W -1:0] word_index_last;
+
+ output ext_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] ext_wide_xy_addr_x;
+ output [ WORD_EXT_W -1:0] ext_wide_x_din_x;
+ output [ WORD_EXT_W -1:0] ext_wide_y_din_x;
+
+ output ext_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
+ output [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
+ output [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
+
+ output ext_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] ext_wide_xy_addr_y;
+ output [ WORD_EXT_W -1:0] ext_wide_x_din_y;
+ output [ WORD_EXT_W -1:0] ext_wide_y_din_y;
+
+ output ext_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
+ output [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
+ output [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+
+ output io_in_1_en;
+ output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
+ input [ WORD_W -1:0] io_in_1_dout;
+
+ output io_in_2_en;
+ output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
+ input [ WORD_W -1:0] io_in_2_dout;
+
+ output io_out_en;
+ output io_out_we;
+ output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
+ output [ WORD_W -1:0] io_out_din;
+
+
+ //
+ // FSM Declaration
+ //
+ localparam [2:0] IO_FSM_STATE_IDLE = 3'b000;
+ localparam [2:0] IO_FSM_STATE_LATENCY_PRE1 = 3'b001;
+ localparam [2:0] IO_FSM_STATE_LATENCY_PRE2 = 3'b010;
+ localparam [2:0] IO_FSM_STATE_BUSY = 3'b011;
+ localparam [2:0] IO_FSM_STATE_EXTRA = 3'b100;
+ localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101;
+ localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110;
+
+ reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE;
+ reg [2:0] io_fsm_state_next;
+
+
+ //
+ // Control Signals
+ //
+ reg in_1_en = 1'b0;
+ reg [BANK_ADDR_W -1:0] in_1_addr_bank;
+ reg [ OP_ADDR_W -1:0] in_1_addr_op;
+
+ reg in_2_en = 1'b0;
+ reg [BANK_ADDR_W -1:0] in_2_addr_bank;
+ reg [ OP_ADDR_W -1:0] in_2_addr_op;
+
+ reg out_en = 1'b0;
+ reg out_we;
+ reg [BANK_ADDR_W -1:0] out_addr_bank;
+ reg [ OP_ADDR_W -1:0] out_addr_op;
+ reg [ WORD_W -1:0] out_din;
+
+
+ //
+ // Control Signals
+ //
+ reg wide_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] wide_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] wide_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] wide_x_din_x;
+ reg [ WORD_EXT_W -1:0] wide_y_din_x;
+
+ reg narrow_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] narrow_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] narrow_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] narrow_x_din_x;
+ reg [ WORD_EXT_W -1:0] narrow_y_din_x;
+
+ reg wide_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] wide_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] wide_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] wide_x_din_y;
+ reg [ WORD_EXT_W -1:0] wide_y_din_y;
+
+ reg narrow_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] narrow_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] narrow_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] narrow_x_din_y;
+ reg [ WORD_EXT_W -1:0] narrow_y_din_y;
+
+
+ //
+ // Mapping
+ //
+ assign io_in_1_en = in_1_en;
+ assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op};
+
+ assign io_in_2_en = in_2_en;
+ assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
+
+ assign io_out_en = out_en;
+ assign io_out_we = out_we;
+ assign io_out_addr = {out_addr_bank, out_addr_op};
+ assign io_out_din = out_din;
+
+
+ //
+ // Mapping
+ //
+ assign ext_wide_xy_ena_x = wide_xy_ena_x;
+ assign ext_wide_xy_bank_x = wide_xy_bank_x;
+ assign ext_wide_xy_addr_x = wide_xy_addr_x;
+ assign ext_wide_x_din_x = wide_x_din_x;
+ assign ext_wide_y_din_x = wide_y_din_x;
+
+ assign ext_narrow_xy_ena_x = narrow_xy_ena_x;
+ assign ext_narrow_xy_bank_x = narrow_xy_bank_x;
+ assign ext_narrow_xy_addr_x = narrow_xy_addr_x;
+ assign ext_narrow_x_din_x = narrow_x_din_x;
+ assign ext_narrow_y_din_x = narrow_y_din_x;
+
+ assign ext_wide_xy_ena_y = wide_xy_ena_y;
+ assign ext_wide_xy_bank_y = wide_xy_bank_y;
+ assign ext_wide_xy_addr_y = wide_xy_addr_y;
+ assign ext_wide_x_din_y = wide_x_din_y;
+ assign ext_wide_y_din_y = wide_y_din_y;
+
+ assign ext_narrow_xy_ena_y = narrow_xy_ena_y;
+ assign ext_narrow_xy_bank_y = narrow_xy_bank_y;
+ assign ext_narrow_xy_addr_y = narrow_xy_addr_y;
+ assign ext_narrow_x_din_y = narrow_x_din_y;
+ assign ext_narrow_y_din_y = narrow_y_din_y;
+
+
+ //
+ // Delays
+ //
+ reg [ OP_ADDR_W -1:0] in_1_addr_op_dly1;
+ reg [ OP_ADDR_W -1:0] in_1_addr_op_dly2;
+ reg [ OP_ADDR_W -1:0] in_2_addr_op_dly1;
+ reg [ OP_ADDR_W -1:0] in_2_addr_op_dly2;
+
+ always @(posedge clk) begin
+ //
+ {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op};
+ {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op};
+ //
+ end
+
+
+ //
+ // Handy Wires
+ //
+ wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);
+
+ wire opcode_is_wide = (opcode == UOP_OPCODE_INPUT_TO_WIDE );
+ wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW);
+
+ wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF);
+
+ wire sel_crt_is_x = sel_crt == UOP_CRT_X;
+ wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
+
+ wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
+ wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
+
+ wire in_1_addr_op_is_last;
+ wire in_2_addr_op_is_last;
+
+ wire in_1_addr_next_op_is_last;
+ wire in_2_addr_next_op_is_last;
+
+
+ //
+ // Enable Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) begin
+ in_1_en <= 1'b0;
+ in_2_en <= 1'b0;
+ out_en <= 1'b0;
+ end else case (io_fsm_state_next)
+ //
+ IO_FSM_STATE_LATENCY_PRE1,
+ IO_FSM_STATE_LATENCY_PRE2,
+ IO_FSM_STATE_BUSY: begin
+ in_1_en <= opcode_is_input && sel_aux_is_1;
+ in_2_en <= opcode_is_input && sel_aux_is_2;
+ end
+ //
+ IO_FSM_STATE_EXTRA: begin
+ in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
+ in_2_en <= 1'b0;
+ end
+ //
+ default: begin
+ in_1_en <= 1'b0;
+ in_2_en <= 1'b0;
+ end
+ //
+ endcase
+
+ //
+ // Enable Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ wide_xy_ena_x <= 1'b0;
+ wide_xy_ena_y <= 1'b0;
+ narrow_xy_ena_x <= 1'b0;
+ narrow_xy_ena_y <= 1'b0;
+ //
+ end else case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY,
+ IO_FSM_STATE_EXTRA,
+ IO_FSM_STATE_LATENCY_POST1: begin
+ wide_xy_ena_x <= opcode_is_wide && sel_crt_is_x;
+ wide_xy_ena_y <= opcode_is_wide && sel_crt_is_y;
+ narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x;
+ narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y;
+ end
+ //
+ IO_FSM_STATE_LATENCY_POST2: begin
+ wide_xy_ena_x <= 1'b0;
+ wide_xy_ena_y <= 1'b0;
+ narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra;
+ narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra;
+ end
+ //
+ default: begin
+ wide_xy_ena_x <= 1'b0;
+ wide_xy_ena_y <= 1'b0;
+ narrow_xy_ena_x <= 1'b0;
+ narrow_xy_ena_y <= 1'b0;
+ end
+ //
+ endcase
+
+
+ //
+ // Data Logic
+ //
+ wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout};
+
+ always @(posedge clk) begin
+ //
+ wide_x_din_x <= WORD_EXT_DNC;
+ wide_y_din_x <= WORD_EXT_DNC;
+ wide_x_din_y <= WORD_EXT_DNC;
+ wide_y_din_y <= WORD_EXT_DNC;
+ narrow_x_din_x <= WORD_EXT_DNC;
+ narrow_y_din_x <= WORD_EXT_DNC;
+ narrow_x_din_y <= WORD_EXT_DNC;
+ narrow_y_din_y <= WORD_EXT_DNC;
+ //
+ case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY,
+ IO_FSM_STATE_EXTRA,
+ IO_FSM_STATE_LATENCY_POST1: begin
+ if (opcode_is_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}};
+ if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ end
+ //
+ IO_FSM_STATE_LATENCY_POST2: begin
+ if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Address Logic
+ //
+ wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux =
+ sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2;
+
+ always @(posedge clk) begin
+ //
+ {wide_xy_bank_x, wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
+ {wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
+ {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+ {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ //
+ case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY,
+ IO_FSM_STATE_EXTRA,
+ IO_FSM_STATE_LATENCY_POST1: begin
+ if (opcode_is_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+ end
+ //
+ IO_FSM_STATE_LATENCY_POST2: begin
+ if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+ if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Address Logic
+ //
+ reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next;
+ reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
+ reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next;
+
+ wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] out_addr_next_op = out_addr_next [OP_ADDR_W -1:0];
+
+ assign in_1_addr_op_is_last = in_1_addr_op == word_index_last;
+ assign in_2_addr_op_is_last = in_2_addr_op == word_index_last;
+
+ assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last;
+ assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last;
+
+ always @(posedge clk)
+ //
+ case (io_fsm_state_next)
+ //
+ IO_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {in_1_addr_bank, in_1_addr_op} <= {sel_in, OP_ADDR_ZERO};
+ {in_2_addr_bank, in_2_addr_op} <= {sel_in, OP_ADDR_ZERO};
+ {out_addr_bank, out_addr_op } <= {sel_out, OP_ADDR_ZERO};
+ //
+ in_1_addr_next <= {sel_in, OP_ADDR_ONE};
+ in_2_addr_next <= {sel_in, OP_ADDR_ONE};
+ out_addr_next <= {sel_out, OP_ADDR_ONE};
+ //
+ end
+ //
+ IO_FSM_STATE_LATENCY_PRE2,
+ IO_FSM_STATE_BUSY: begin
+ //
+ {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+ {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
+ {out_addr_bank, out_addr_op } <= out_addr_next;
+ //
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ in_2_addr_next <= in_2_addr_next + 1'b1;
+ out_addr_next <= out_addr_next + 1'b1;
+ //
+ end
+ //
+ IO_FSM_STATE_EXTRA:
+ //
+ if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin
+ //
+ {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+ //
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ //
+ end
+ //
+ endcase
+
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) io_fsm_state <= IO_FSM_STATE_IDLE;
+ else io_fsm_state <= io_fsm_state_next;
+
+
+ //
+ // Busy Exit Logic
+ //
+ reg io_fsm_done = 1'b0;
+
+ always @(posedge clk) begin
+ //
+ io_fsm_done <= 1'b0;
+ //
+ if (io_fsm_state == IO_FSM_STATE_BUSY) begin
+ //
+ if (opcode_is_input) begin
+ if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1;
+ if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1;
+ end
+
+ end
+ //
+ end
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @* begin
+ //
+ case (io_fsm_state)
+ IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ;
+ IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ;
+ IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_BUSY ;
+ IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_BUSY ;
+ IO_FSM_STATE_EXTRA: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ;
+ IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ;
+ IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_reg <= 1'b1;
+ else case (io_fsm_state)
+ IO_FSM_STATE_IDLE: rdy_reg <= ~ena;
+ IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1;
+ endcase
+
+
+endmodule