From e340b1489b08905e3d8acd17686e178028de7922 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 3 Oct 2019 16:47:39 +0300 Subject: Added more micro-operations, also added "general worker" module. The worker is basically a block memory data mover, but it can also do some supporting operations required for the Garner's formula part of the exponentiation. --- rtl/modexpng_io_manager.v | 347 ++++++++++++++++++++++++++-------------------- 1 file changed, 197 insertions(+), 150 deletions(-) (limited to 'rtl/modexpng_io_manager.v') diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v index 81f582f..dfbd676 100644 --- a/rtl/modexpng_io_manager.v +++ b/rtl/modexpng_io_manager.v @@ -15,42 +15,45 @@ module modexpng_io_manager word_index_last, - ext_wide_xy_ena_x, - ext_wide_xy_bank_x, - ext_wide_xy_addr_x, - ext_wide_x_din_x, - ext_wide_y_din_x, - - ext_narrow_xy_ena_x, - ext_narrow_xy_bank_x, - ext_narrow_xy_addr_x, - ext_narrow_x_din_x, - ext_narrow_y_din_x, - - ext_wide_xy_ena_y, - ext_wide_xy_bank_y, - ext_wide_xy_addr_y, - ext_wide_x_din_y, - ext_wide_y_din_y, - - ext_narrow_xy_ena_y, - ext_narrow_xy_bank_y, - ext_narrow_xy_addr_y, - ext_narrow_x_din_y, - ext_narrow_y_din_y, + io_wide_xy_ena_x, + io_wide_xy_bank_x, + io_wide_xy_addr_x, + io_wide_x_din_x, + io_wide_y_din_x, + + io_narrow_xy_ena_x, + io_narrow_xy_bank_x, + io_narrow_xy_addr_x, + io_narrow_x_din_x, + io_narrow_y_din_x, + + io_wide_xy_ena_y, + io_wide_xy_bank_y, + io_wide_xy_addr_y, + io_wide_x_din_y, + io_wide_y_din_y, + + io_narrow_xy_ena_y, + io_narrow_xy_bank_y, + io_narrow_xy_addr_y, + io_narrow_x_din_y, + io_narrow_y_din_y, io_in_1_en, io_in_1_addr, - io_in_1_dout, + io_in_1_din, io_in_2_en, io_in_2_addr, - io_in_2_dout, + io_in_2_din, io_out_en, io_out_we, io_out_addr, - io_out_din + io_out_dout, + + wrk_narrow_x_din_x_trunc, + wrk_narrow_x_din_y_trunc ); // @@ -78,42 +81,45 @@ module modexpng_io_manager input [ OP_ADDR_W -1:0] word_index_last; - output ext_wide_xy_ena_x; - output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_x; - output [ OP_ADDR_W -1:0] ext_wide_xy_addr_x; - output [ WORD_EXT_W -1:0] ext_wide_x_din_x; - output [ WORD_EXT_W -1:0] ext_wide_y_din_x; - - output ext_narrow_xy_ena_x; - output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_x; - output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x; - output [ WORD_EXT_W -1:0] ext_narrow_x_din_x; - output [ WORD_EXT_W -1:0] ext_narrow_y_din_x; - - output ext_wide_xy_ena_y; - output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_y; - output [ OP_ADDR_W -1:0] ext_wide_xy_addr_y; - output [ WORD_EXT_W -1:0] ext_wide_x_din_y; - output [ WORD_EXT_W -1:0] ext_wide_y_din_y; - - output ext_narrow_xy_ena_y; - output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_y; - output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y; - output [ WORD_EXT_W -1:0] ext_narrow_x_din_y; - output [ WORD_EXT_W -1:0] ext_narrow_y_din_y; + output io_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] io_wide_xy_addr_x; + output [ WORD_EXT_W -1:0] io_wide_x_din_x; + output [ WORD_EXT_W -1:0] io_wide_y_din_x; + + output io_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; + output [ WORD_EXT_W -1:0] io_narrow_x_din_x; + output [ WORD_EXT_W -1:0] io_narrow_y_din_x; + + output io_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] io_wide_xy_addr_y; + output [ WORD_EXT_W -1:0] io_wide_x_din_y; + output [ WORD_EXT_W -1:0] io_wide_y_din_y; + + output io_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; + output [ WORD_EXT_W -1:0] io_narrow_x_din_y; + output [ WORD_EXT_W -1:0] io_narrow_y_din_y; output io_in_1_en; output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; - input [ WORD_W -1:0] io_in_1_dout; + input [ WORD_W -1:0] io_in_1_din; output io_in_2_en; output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; - input [ WORD_W -1:0] io_in_2_dout; + input [ WORD_W -1:0] io_in_2_din; output io_out_en; output io_out_we; output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; - output [ WORD_W -1:0] io_out_din; + output [ WORD_W -1:0] io_out_dout; + + output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc; + output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc; // @@ -126,6 +132,7 @@ module modexpng_io_manager localparam [2:0] IO_FSM_STATE_EXTRA = 3'b100; localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101; localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110; + localparam [2:0] IO_FSM_STATE_STOP = 3'b111; reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE; reg [2:0] io_fsm_state_next; @@ -143,10 +150,11 @@ module modexpng_io_manager reg [ OP_ADDR_W -1:0] in_2_addr_op; reg out_en = 1'b0; - reg out_we; reg [BANK_ADDR_W -1:0] out_addr_bank; reg [ OP_ADDR_W -1:0] out_addr_op; - reg [ WORD_W -1:0] out_din; + reg [ WORD_W -1:0] out_dout; + + reg [ OP_ADDR_W -1:0] dummy_addr_op; // @@ -187,51 +195,54 @@ module modexpng_io_manager assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op}; assign io_out_en = out_en; - assign io_out_we = out_we; + assign io_out_we = io_out_en; // we can only write! assign io_out_addr = {out_addr_bank, out_addr_op}; - assign io_out_din = out_din; + assign io_out_dout = out_dout; // // Mapping // - assign ext_wide_xy_ena_x = wide_xy_ena_x; - assign ext_wide_xy_bank_x = wide_xy_bank_x; - assign ext_wide_xy_addr_x = wide_xy_addr_x; - assign ext_wide_x_din_x = wide_x_din_x; - assign ext_wide_y_din_x = wide_y_din_x; + assign io_wide_xy_ena_x = wide_xy_ena_x; + assign io_wide_xy_bank_x = wide_xy_bank_x; + assign io_wide_xy_addr_x = wide_xy_addr_x; + assign io_wide_x_din_x = wide_x_din_x; + assign io_wide_y_din_x = wide_y_din_x; - assign ext_narrow_xy_ena_x = narrow_xy_ena_x; - assign ext_narrow_xy_bank_x = narrow_xy_bank_x; - assign ext_narrow_xy_addr_x = narrow_xy_addr_x; - assign ext_narrow_x_din_x = narrow_x_din_x; - assign ext_narrow_y_din_x = narrow_y_din_x; + assign io_narrow_xy_ena_x = narrow_xy_ena_x; + assign io_narrow_xy_bank_x = narrow_xy_bank_x; + assign io_narrow_xy_addr_x = narrow_xy_addr_x; + assign io_narrow_x_din_x = narrow_x_din_x; + assign io_narrow_y_din_x = narrow_y_din_x; - assign ext_wide_xy_ena_y = wide_xy_ena_y; - assign ext_wide_xy_bank_y = wide_xy_bank_y; - assign ext_wide_xy_addr_y = wide_xy_addr_y; - assign ext_wide_x_din_y = wide_x_din_y; - assign ext_wide_y_din_y = wide_y_din_y; + assign io_wide_xy_ena_y = wide_xy_ena_y; + assign io_wide_xy_bank_y = wide_xy_bank_y; + assign io_wide_xy_addr_y = wide_xy_addr_y; + assign io_wide_x_din_y = wide_x_din_y; + assign io_wide_y_din_y = wide_y_din_y; - assign ext_narrow_xy_ena_y = narrow_xy_ena_y; - assign ext_narrow_xy_bank_y = narrow_xy_bank_y; - assign ext_narrow_xy_addr_y = narrow_xy_addr_y; - assign ext_narrow_x_din_y = narrow_x_din_y; - assign ext_narrow_y_din_y = narrow_y_din_y; + assign io_narrow_xy_ena_y = narrow_xy_ena_y; + assign io_narrow_xy_bank_y = narrow_xy_bank_y; + assign io_narrow_xy_addr_y = narrow_xy_addr_y; + assign io_narrow_x_din_y = narrow_x_din_y; + assign io_narrow_y_din_y = narrow_y_din_y; // // Delays // - reg [ OP_ADDR_W -1:0] in_1_addr_op_dly1; - reg [ OP_ADDR_W -1:0] in_1_addr_op_dly2; - reg [ OP_ADDR_W -1:0] in_2_addr_op_dly1; - reg [ OP_ADDR_W -1:0] in_2_addr_op_dly2; + reg [OP_ADDR_W -1:0] in_1_addr_op_dly1; + reg [OP_ADDR_W -1:0] in_1_addr_op_dly2; + reg [OP_ADDR_W -1:0] in_2_addr_op_dly1; + reg [OP_ADDR_W -1:0] in_2_addr_op_dly2; + reg [OP_ADDR_W -1:0] dummy_addr_op_dly1; + reg [OP_ADDR_W -1:0] dummy_addr_op_dly2; always @(posedge clk) begin // - {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op}; - {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op}; + {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op}; + {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op}; + {dummy_addr_op_dly2, dummy_addr_op_dly1} <= {dummy_addr_op_dly1, dummy_addr_op}; // end @@ -241,10 +252,14 @@ module modexpng_io_manager // wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW); - wire opcode_is_wide = (opcode == UOP_OPCODE_INPUT_TO_WIDE ); - wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW); + wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW; + + wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE; + wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; - wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF); + wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) || + (sel_in == BANK_IN_2_P_COEFF) || + (sel_in == BANK_IN_2_Q_COEFF) ; wire sel_crt_is_x = sel_crt == UOP_CRT_X; wire sel_crt_is_y = sel_crt == UOP_CRT_Y; @@ -252,22 +267,18 @@ module modexpng_io_manager wire sel_aux_is_1 = sel_aux == UOP_AUX_1; wire sel_aux_is_2 = sel_aux == UOP_AUX_2; - wire in_1_addr_op_is_last; - wire in_2_addr_op_is_last; - - wire in_1_addr_next_op_is_last; - wire in_2_addr_next_op_is_last; - + wire in_1_addr_op_next_is_last; + wire in_2_addr_op_next_is_last; + wire dummy_addr_op_next_is_last; // - // Enable Logic + // Source Enable Logic // always @(posedge clk) // if (rst) begin in_1_en <= 1'b0; in_2_en <= 1'b0; - out_en <= 1'b0; end else case (io_fsm_state_next) // IO_FSM_STATE_LATENCY_PRE1, @@ -279,7 +290,7 @@ module modexpng_io_manager // IO_FSM_STATE_EXTRA: begin in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra; - in_2_en <= 1'b0; + in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra; end // default: begin @@ -290,7 +301,7 @@ module modexpng_io_manager endcase // - // Enable Logic + // Destination Enable Logic // always @(posedge clk) // @@ -301,38 +312,52 @@ module modexpng_io_manager narrow_xy_ena_x <= 1'b0; narrow_xy_ena_y <= 1'b0; // + out_en <= 1'b0; + // end else case (io_fsm_state) // IO_FSM_STATE_BUSY, IO_FSM_STATE_EXTRA, IO_FSM_STATE_LATENCY_POST1: begin - wide_xy_ena_x <= opcode_is_wide && sel_crt_is_x; - wide_xy_ena_y <= opcode_is_wide && sel_crt_is_y; - narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x; - narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y; + // + wide_xy_ena_x <= opcode_is_input_wide && sel_crt_is_x; + wide_xy_ena_y <= opcode_is_input_wide && sel_crt_is_y; + narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x; + narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y; + // + out_en <= opcode_is_output; + // end // IO_FSM_STATE_LATENCY_POST2: begin + // wide_xy_ena_x <= 1'b0; wide_xy_ena_y <= 1'b0; - narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra; - narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra; + narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra; + narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra; + // + out_en <= opcode_is_output; + // end // default: begin + // wide_xy_ena_x <= 1'b0; wide_xy_ena_y <= 1'b0; narrow_xy_ena_x <= 1'b0; narrow_xy_ena_y <= 1'b0; + // + out_en <= 1'b0; + // end // endcase // - // Data Logic + // Output Data Logic // - wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout}; + wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_din : io_in_2_din}; always @(posedge clk) begin // @@ -345,20 +370,30 @@ module modexpng_io_manager narrow_x_din_y <= WORD_EXT_DNC; narrow_y_din_y <= WORD_EXT_DNC; // + out_dout <= WORD_DNC; + // case (io_fsm_state) // IO_FSM_STATE_BUSY, IO_FSM_STATE_EXTRA, IO_FSM_STATE_LATENCY_POST1: begin - if (opcode_is_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}}; - if (opcode_is_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}}; - if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; - if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_input_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}}; // TODO: Make external ports smaller (WORD_W, not WORD_EXT_W)?? + if (opcode_is_input_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}}; + if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc; + // end // IO_FSM_STATE_LATENCY_POST2: begin - if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; - if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc; + // end // endcase @@ -367,7 +402,7 @@ module modexpng_io_manager // - // Address Logic + // Destination Address Logic // wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux = sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2; @@ -378,21 +413,24 @@ module modexpng_io_manager {wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + {out_addr_bank, out_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; // case (io_fsm_state) // IO_FSM_STATE_BUSY, IO_FSM_STATE_EXTRA, IO_FSM_STATE_LATENCY_POST1: begin - if (opcode_is_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux}; - if (opcode_is_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux}; - if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux}; - if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_output ) {out_addr_bank, out_addr_op} <= {sel_out, dummy_addr_op_dly2}; end // IO_FSM_STATE_LATENCY_POST2: begin - if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF}; - if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF}; + if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF }; + if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF }; + if (opcode_is_output ) {out_addr_bank, out_addr_op } <= {sel_out, dummy_addr_op_dly2}; end // endcase @@ -401,21 +439,19 @@ module modexpng_io_manager // - // Address Logic + // Source Address Logic // reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next; - reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next; + reg [ OP_ADDR_W -1:0] dummy_addr_next; - wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0]; - wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0]; - wire [OP_ADDR_W -1:0] out_addr_next_op = out_addr_next [OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] in_1_addr_op_next = in_1_addr_next[OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] in_2_addr_op_next = in_2_addr_next[OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next; - assign in_1_addr_op_is_last = in_1_addr_op == word_index_last; - assign in_2_addr_op_is_last = in_2_addr_op == word_index_last; - - assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last; - assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last; + assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last; + assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last; + assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; always @(posedge clk) // @@ -423,36 +459,42 @@ module modexpng_io_manager // IO_FSM_STATE_LATENCY_PRE1: begin // - {in_1_addr_bank, in_1_addr_op} <= {sel_in, OP_ADDR_ZERO}; - {in_2_addr_bank, in_2_addr_op} <= {sel_in, OP_ADDR_ZERO}; - {out_addr_bank, out_addr_op } <= {sel_out, OP_ADDR_ZERO}; + {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO}; + {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO}; + { dummy_addr_op} <= { OP_ADDR_ZERO}; // - in_1_addr_next <= {sel_in, OP_ADDR_ONE}; - in_2_addr_next <= {sel_in, OP_ADDR_ONE}; - out_addr_next <= {sel_out, OP_ADDR_ONE}; + in_1_addr_next <= {sel_in, OP_ADDR_ONE}; + in_2_addr_next <= {sel_in, OP_ADDR_ONE}; + dummy_addr_next <= { OP_ADDR_ONE}; // end // IO_FSM_STATE_LATENCY_PRE2, IO_FSM_STATE_BUSY: begin // - {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; - {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next; - {out_addr_bank, out_addr_op } <= out_addr_next; + {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next; + {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next; + { dummy_addr_op} <= dummy_addr_next; // - in_1_addr_next <= in_1_addr_next + 1'b1; - in_2_addr_next <= in_2_addr_next + 1'b1; - out_addr_next <= out_addr_next + 1'b1; + in_1_addr_next <= in_1_addr_next + 1'b1; + in_2_addr_next <= in_2_addr_next + 1'b1; + dummy_addr_next <= dummy_addr_next + 1'b1; // end // IO_FSM_STATE_EXTRA: // - if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin + if (opcode_is_input && sel_in_needs_extra) begin // - {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; + if (sel_aux_is_1) begin + {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; + in_1_addr_next <= in_1_addr_next + 1'b1; + end // - in_1_addr_next <= in_1_addr_next + 1'b1; + if (sel_aux_is_2) begin + {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next; + in_2_addr_next <= in_2_addr_next + 1'b1; + end // end // @@ -481,28 +523,33 @@ module modexpng_io_manager if (io_fsm_state == IO_FSM_STATE_BUSY) begin // if (opcode_is_input) begin - if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1; - if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1; + if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1; + if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1; + end else if (opcode_is_output) begin + if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1; end - + // end // end - + // // FSM Transition Logic // + wire [2:0] io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_LATENCY_POST1; + always @* begin // case (io_fsm_state) IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ; IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ; IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_BUSY ; - IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_BUSY ; + IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy : IO_FSM_STATE_BUSY ; IO_FSM_STATE_EXTRA: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ; IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ; - IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_IDLE ; + IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_STOP ; + IO_FSM_STATE_STOP: io_fsm_state_next = IO_FSM_STATE_IDLE ; endcase // end @@ -517,10 +564,10 @@ module modexpng_io_manager always @(posedge clk) // - if (rst) rdy_reg <= 1'b1; + if (rst) rdy_reg <= 1'b1; else case (io_fsm_state) - IO_FSM_STATE_IDLE: rdy_reg <= ~ena; - IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1; + IO_FSM_STATE_IDLE: rdy_reg <= ~ena; + IO_FSM_STATE_STOP: rdy_reg <= 1'b1; endcase -- cgit v1.2.3