//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_io_manager
(
clk,
rst_n,
ena,
rdy,
sel_crt,
sel_aux,
sel_in,
sel_out,
opcode,
word_index_last,
io_wide_xy_ena_x,
io_wide_xy_bank_x,
io_wide_xy_addr_x,
io_wide_x_din_x,
io_wide_y_din_x,
io_narrow_xy_ena_x,
io_narrow_xy_bank_x,
io_narrow_xy_addr_x,
io_narrow_x_din_x,
io_narrow_y_din_x,
io_wide_xy_ena_y,
io_wide_xy_bank_y,
io_wide_xy_addr_y,
io_wide_x_din_y,
io_wide_y_din_y,
io_narrow_xy_ena_y,
io_narrow_xy_bank_y,
io_narrow_xy_addr_y,
io_narrow_x_din_y,
io_narrow_y_din_y,
io_in_1_en,
io_in_1_addr,
io_in_1_din,
io_in_2_en,
io_in_2_addr,
io_in_2_din,
io_out_en,
io_out_we,
io_out_addr,
io_out_dout,
wrk_narrow_x_din_x_lsb,
wrk_narrow_y_din_x_lsb,
wrk_narrow_x_din_y_lsb,
wrk_narrow_y_din_y_lsb,
ladder_steps,
ladder_d,
ladder_p,
ladder_q,
ladder_done
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_microcode.vh"
//
// Ports
//
input clk;
input rst_n;
input ena;
output rdy;
input [ UOP_CRT_W -1:0] sel_crt;
input [ UOP_AUX_W -1:0] sel_aux;
input [ BANK_ADDR_W -1:0] sel_in;
input [ BANK_ADDR_W -1:0] sel_out;
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
output io_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] io_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] io_wide_xy_addr_x;
output [ WORD_EXT_W -1:0] io_wide_x_din_x;
output [ WORD_EXT_W -1:0] io_wide_y_din_x;
output io_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] io_narrow_xy_addr_x;
output [ WORD_EXT_W -1:0] io_narrow_x_din_x;
output [ WORD_EXT_W -1:0] io_narrow_y_din_x;
output io_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] io_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] io_wide_xy_addr_y;
output [ WORD_EXT_W -1:0] io_wide_x_din_y;
output [ WORD_EXT_W -1:0] io_wide_y_din_y;
output io_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] io_narrow_xy_addr_y;
output [ WORD_EXT_W -1:0] io_narrow_x_din_y;
output [ WORD_EXT_W -1:0] io_narrow_y_din_y;
output io_in_1_en;
output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
input [ WORD_W -1:0] io_in_1_din;
output io_in_2_en;
output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
input [ WORD_W -1:0] io_in_2_din;
output io_out_en;
output io_out_we;
output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
output [ WORD_W -1:0] io_out_dout;
input [ WORD_W -1:0] wrk_narrow_x_din_x_lsb;
input [ WORD_W -1:0] wrk_narrow_y_din_x_lsb;
input [ WORD_W -1:0] wrk_narrow_x_din_y_lsb;
input [ WORD_W -1:0] wrk_narrow_y_din_y_lsb;
input [ BIT_INDEX_W -1:0] ladder_steps;
output ladder_d;
output ladder_p;
output ladder_q;
output ladder_done;
//
// FSM Declaration
//
localparam [3:0] IO_FSM_STATE_IDLE = 4'h0;
localparam [3:0] IO_FSM_STATE_LATENCY_PRE1 = 4'h1;
localparam [3:0] IO_FSM_STATE_LATENCY_PRE2 = 4'h2;
localparam [3:0] IO_FSM_STATE_LATENCY_PRE3 = 4'h3;
localparam [3:0] IO_FSM_STATE_LATENCY_PRE4 = 4'h4;
localparam [3:0] IO_FSM_STATE_BUSY1 = 4'hA;
localparam [3:0] IO_FSM_STATE_BUSY2 = 4'hB;
localparam [3:0] IO_FSM_STATE_EXTRA1 = 4'hC;
localparam [3:0] IO_FSM_STATE_EXTRA2 = 4'hD;
localparam [3:0] IO_FSM_STATE_LATENCY_POST1 = 4'h5;
localparam [3:0] IO_FSM_STATE_LATENCY_POST2 = 4'h6;
localparam [3:0] IO_FSM_STATE_LATENCY_POST3 = 4'h7;
localparam [3:0] IO_FSM_STATE_LATENCY_POST4 = 4'h8;
localparam [3:0] IO_FSM_STATE_STOP = 4'hF;
reg [3:0] io_fsm_state = IO_FSM_STATE_IDLE;
reg [3:0] io_fsm_state_next;
wire [3:0] io_fsm_state_after_busy;
//
// Control Signals
//
reg in_1_en = 1'b0;
reg [BANK_ADDR_W -1:0] in_1_addr_bank;
reg [ OP_ADDR_W -1:0] in_1_addr_op;
reg in_2_en = 1'b0;
reg [BANK_ADDR_W -1:0] in_2_addr_bank;
reg [ OP_ADDR_W -1:0] in_2_addr_op;
reg out_en = 1'b0;
reg [BANK_ADDR_W -1:0] out_addr_bank;
reg [ OP_ADDR_W -1:0] out_addr_op;
reg [ WORD_W -1:0] out_dout;
reg [ OP_ADDR_W -1:0] dummy_addr_op;
//
// Control Signals
//
reg wide_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wide_xy_bank_x;
reg [ OP_ADDR_W -1:0] wide_xy_addr_x;
reg [ WORD_EXT_W -1:0] wide_x_din_x;
reg [ WORD_EXT_W -1:0] wide_y_din_x;
reg narrow_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] narrow_xy_bank_x;
reg [ OP_ADDR_W -1:0] narrow_xy_addr_x;
reg [ WORD_EXT_W -1:0] narrow_x_din_x;
reg [ WORD_EXT_W -1:0] narrow_y_din_x;
reg wide_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wide_xy_bank_y;
reg [ OP_ADDR_W -1:0] wide_xy_addr_y;
reg [ WORD_EXT_W -1:0] wide_x_din_y;
reg [ WORD_EXT_W -1:0] wide_y_din_y;
reg narrow_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] narrow_xy_bank_y;
reg [ OP_ADDR_W -1:0] narrow_xy_addr_y;
reg [ WORD_EXT_W -1:0] narrow_x_din_y;
reg [ WORD_EXT_W -1:0] narrow_y_din_y;
//
// Mapping
//
assign io_in_1_en = in_1_en;
assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op};
assign io_in_2_en = in_2_en;
assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
assign io_out_en = out_en;
assign io_out_we = io_out_en; // we can only write!
assign io_out_addr = {out_addr_bank, out_addr_op};
assign io_out_dout = out_dout;
//
// Mapping
//
assign io_wide_xy_ena_x = wide_xy_ena_x;
assign io_wide_xy_bank_x = wide_xy_bank_x;
assign io_wide_xy_addr_x = wide_xy_addr_x;
assign io_wide_x_din_x = wide_x_din_x;
assign io_wide_y_din_x = wide_y_din_x;
assign io_narrow_xy_ena_x = narrow_xy_ena_x;
assign io_narrow_xy_bank_x = narrow_xy_bank_x;
assign io_narrow_xy_addr_x = narrow_xy_addr_x;
assign io_narrow_x_din_x = narrow_x_din_x;
assign io_narrow_y_din_x = narrow_y_din_x;
assign io_wide_xy_ena_y = wide_xy_ena_y;
assign io_wide_xy_bank_y = wide_xy_bank_y;
assign io_wide_xy_addr_y = wide_xy_addr_y;
assign io_wide_x_din_y = wide_x_din_y;
assign io_wide_y_din_y = wide_y_din_y;
assign io_narrow_xy_ena_y = narrow_xy_ena_y;
assign io_narrow_xy_bank_y = narrow_xy_bank_y;
assign io_narrow_xy_addr_y = narrow_xy_addr_y;
assign io_narrow_x_din_y = narrow_x_din_y;
assign io_narrow_y_din_y = narrow_y_din_y;
//
// Delays
//
reg [OP_ADDR_W -1:0] in_1_addr_op_dly1;
reg [OP_ADDR_W -1:0] in_2_addr_op_dly1;
reg [OP_ADDR_W -1:0] dummy_addr_op_dly1;
reg [WORD_W -1:0] io_in_1_din_dly1;
reg [WORD_W -1:0] io_in_2_din_dly1;
reg [WORD_W -1:0] wrk_narrow_x_din_x_lsb_dly1;
reg [WORD_W -1:0] wrk_narrow_y_din_x_lsb_dly1;
reg [WORD_W -1:0] wrk_narrow_x_din_y_lsb_dly1;
reg [WORD_W -1:0] wrk_narrow_y_din_y_lsb_dly1;
always @(posedge clk) begin
//
{in_1_addr_op_dly1} <= {in_1_addr_op};
{in_2_addr_op_dly1} <= {in_2_addr_op};
//
{io_in_1_din_dly1} <= {io_in_1_din};
{io_in_2_din_dly1} <= {io_in_2_din};
//
{dummy_addr_op_dly1} <= {dummy_addr_op};
//
{wrk_narrow_x_din_x_lsb_dly1} <= {wrk_narrow_x_din_x_lsb};
{wrk_narrow_y_din_x_lsb_dly1} <= {wrk_narrow_y_din_x_lsb};
{wrk_narrow_x_din_y_lsb_dly1} <= {wrk_narrow_x_din_y_lsb};
{wrk_narrow_y_din_y_lsb_dly1} <= {wrk_narrow_y_din_y_lsb};
//
end
//
// Handy Wires
//
wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);
wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;
wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT;
wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP;
wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step;
wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE;
wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW;
wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) ||
(sel_in == BANK_IN_2_P_COEFF) ||
(sel_in == BANK_IN_2_Q_COEFF) ;
wire sel_crt_is_x = sel_crt == UOP_CRT_X;
wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
//
// Ladder Init/Step Logic
//
`ifdef MODEXPNG_ENABLE_DEBUG
//`define DEFINE_MODEXPNG_TRUNCATE_LADDER XXX /*specify step index to abort ladder*/
`ifdef DEFINE_MODEXPNG_TRUNCATE_LADDER
localparam [BIT_INDEX_W-1:0] MODEXPNG_TRUNCATE_LADDER = `DEFINE_MODEXPNG_TRUNCATE_LADDER;
`endif
`endif
reg ladder_d_r;
reg ladder_p_r;
reg ladder_q_r;
reg ladder_done_r = 1'b0;
assign ladder_d = ladder_d_r;
assign ladder_p = ladder_p_r;
assign ladder_q = ladder_q_r;
assign ladder_done = ladder_done_r;
reg [BIT_INDEX_W -1:0] ladder_index;
reg [BIT_INDEX_W -1:0] ladder_index_next;
wire [ OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W];
wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W];
wire ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO;
always @(posedge clk) begin
//
if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin
//
if (opcode_is_ladder_init) begin
ladder_index <= ladder_steps;
ladder_index_next <= ladder_steps - 1'b1;
ladder_done_r <= 1'b0;
end
//
if (opcode_is_ladder_step) begin
ladder_index <= ladder_index_next;
ladder_index_next <= ladder_index_next - 1'b1;
if (ladder_index_is_zero) ladder_done_r <= 1'b1;
end
//
end
//
`ifdef MODEXPNG_ENABLE_DEBUG
`ifdef DEFINE_MODEXPNG_TRUNCATE_LADDER
if ((io_fsm_state_next == IO_FSM_STATE_STOP) && opcode_is_ladder_step && (ladder_index == MODEXPNG_TRUNCATE_LADDER))
ladder_done_r <= 1'b1;
`endif
`endif
//
end
//
// Ladder Mux
//
reg ladder_dpq_mux_dly1;
reg ladder_dpq_mux_dly2;
wire ladder_dpq_mux = ladder_dpq_mux_dly2;
always @(io_in_2_din_dly1, ladder_index_lsb)
//
case(ladder_index_lsb)
4'b0000: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 0];
4'b0001: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 1];
4'b0010: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 2];
4'b0011: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 3];
4'b0100: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 4];
4'b0101: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 5];
4'b0110: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 6];
4'b0111: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 7];
4'b1000: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 8];
4'b1001: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 9];
4'b1010: ladder_dpq_mux_dly1 = io_in_2_din_dly1[10];
4'b1011: ladder_dpq_mux_dly1 = io_in_2_din_dly1[11];
4'b1100: ladder_dpq_mux_dly1 = io_in_2_din_dly1[12];
4'b1101: ladder_dpq_mux_dly1 = io_in_2_din_dly1[13];
4'b1110: ladder_dpq_mux_dly1 = io_in_2_din_dly1[14];
4'b1111: ladder_dpq_mux_dly1 = io_in_2_din_dly1[15];
endcase
always @(posedge clk)
//
ladder_dpq_mux_dly2 <= ladder_dpq_mux_dly1;
always @(posedge clk)
//
case (io_fsm_state)
//
IO_FSM_STATE_BUSY1:
if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux;
//
IO_FSM_STATE_LATENCY_POST1:
if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux;
//
IO_FSM_STATE_LATENCY_POST3:
if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux;
//
endcase
//
// Source Enable Logic
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
in_1_en <= 1'b0;
in_2_en <= 1'b0;
end else case (io_fsm_state_next)
//
IO_FSM_STATE_LATENCY_PRE1,
IO_FSM_STATE_LATENCY_PRE3,
IO_FSM_STATE_BUSY1: begin
in_1_en <= opcode_is_input && sel_aux_is_1;
in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder;
end
//
IO_FSM_STATE_EXTRA1: begin
in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra;
end
//
default: begin
in_1_en <= 1'b0;
in_2_en <= 1'b0;
end
//
endcase
//
// Destination Enable Logic
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
wide_xy_ena_x <= 1'b0;
wide_xy_ena_y <= 1'b0;
narrow_xy_ena_x <= 1'b0;
narrow_xy_ena_y <= 1'b0;
//
out_en <= 1'b0;
//
end else case (io_fsm_state)
//
IO_FSM_STATE_BUSY1,
IO_FSM_STATE_EXTRA1,
IO_FSM_STATE_LATENCY_POST1: begin
//
wide_xy_ena_x <= opcode_is_input_wide && sel_crt_is_x;
wide_xy_ena_y <= opcode_is_input_wide && sel_crt_is_y;
narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x;
narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y;
//
out_en <= opcode_is_output;
//
end
//
IO_FSM_STATE_LATENCY_POST3: begin
//
wide_xy_ena_x <= 1'b0;
wide_xy_ena_y <= 1'b0;
narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra;
narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra;
//
out_en <= opcode_is_output;
//
end
//
default: begin
//
wide_xy_ena_x <= 1'b0;
wide_xy_ena_y <= 1'b0;
narrow_xy_ena_x <= 1'b0;
narrow_xy_ena_y <= 1'b0;
//
out_en <= 1'b0;
//
end
//
endcase
//
// Output Data Logic
//
reg [ WORD_W -1:0] io_in_dout_mux_dly2;
wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, io_in_dout_mux_dly2};
reg [WORD_W -1:0] wrk_narrow_din_x_lsb_mux_dly2;
reg [WORD_W -1:0] wrk_narrow_din_y_lsb_mux_dly2;
wire [WORD_W -1:0] wrk_narrow_din_x_lsb_mux = wrk_narrow_din_x_lsb_mux_dly2;
wire [WORD_W -1:0] wrk_narrow_din_y_lsb_mux = wrk_narrow_din_y_lsb_mux_dly2;
always @(posedge clk) begin
//
io_in_dout_mux_dly2 <= sel_aux_is_1 ? io_in_1_din_dly1 : io_in_2_din_dly1;
//
wrk_narrow_din_x_lsb_mux_dly2 = sel_aux == UOP_AUX_1 ? wrk_narrow_x_din_x_lsb_dly1 : wrk_narrow_y_din_x_lsb_dly1;
wrk_narrow_din_y_lsb_mux_dly2 = sel_aux == UOP_AUX_1 ? wrk_narrow_x_din_y_lsb_dly1 : wrk_narrow_y_din_y_lsb_dly1;
//
end
always @(posedge clk) begin
//
wide_x_din_x <= WORD_EXT_DNC;
wide_y_din_x <= WORD_EXT_DNC;
wide_x_din_y <= WORD_EXT_DNC;
wide_y_din_y <= WORD_EXT_DNC;
narrow_x_din_x <= WORD_EXT_DNC;
narrow_y_din_x <= WORD_EXT_DNC;
narrow_x_din_y <= WORD_EXT_DNC;
narrow_y_din_y <= WORD_EXT_DNC;
//
out_dout <= WORD_DNC;
//
case (io_fsm_state)
//
IO_FSM_STATE_BUSY1,
IO_FSM_STATE_EXTRA1,
IO_FSM_STATE_LATENCY_POST1: begin
//
if (opcode_is_input_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}};
if (opcode_is_input_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}};
if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
//
if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_din_x_lsb_mux : wrk_narrow_din_y_lsb_mux;
//
end
//
IO_FSM_STATE_LATENCY_POST3: begin
//
if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
//
if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_din_x_lsb_mux : wrk_narrow_din_y_lsb_mux;
//
end
//
endcase
//
end
//
// Destination Address Logic
//
reg [OP_ADDR_W -1:0] in_addr_op_dly2_mux;
reg [OP_ADDR_W -1:0] in_addr_op_dly3_mux;
reg [OP_ADDR_W -1:0] in_addr_op_dly4_mux;
wire [OP_ADDR_W -1:0] in_addr_op_mux = in_addr_op_dly4_mux;
reg [OP_ADDR_W -1:0] dummy_addr_op_dly2;
reg [OP_ADDR_W -1:0] dummy_addr_op_dly3;
reg [OP_ADDR_W -1:0] dummy_addr_op_dly4;
always @(posedge clk) begin
//
in_addr_op_dly2_mux <= sel_aux_is_1 ? in_1_addr_op_dly1 : in_2_addr_op_dly1;
in_addr_op_dly3_mux <= in_addr_op_dly2_mux;
in_addr_op_dly4_mux <= in_addr_op_dly3_mux;
//
dummy_addr_op_dly2 <= dummy_addr_op_dly1;
dummy_addr_op_dly3 <= dummy_addr_op_dly2;
dummy_addr_op_dly4 <= dummy_addr_op_dly3;
//
end
always @(posedge clk) begin
//
{wide_xy_bank_x, wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
{wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
{narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
{narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
{out_addr_bank, out_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
//
case (io_fsm_state)
//
IO_FSM_STATE_BUSY1,
IO_FSM_STATE_EXTRA1,
IO_FSM_STATE_LATENCY_POST1: begin
if (opcode_is_input_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_mux };
if (opcode_is_input_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_mux };
if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_mux };
if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_mux };
if (opcode_is_output ) {out_addr_bank, out_addr_op} <= {sel_out, dummy_addr_op_dly4};
end
//
IO_FSM_STATE_LATENCY_POST3: begin
if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
if (opcode_is_output ) {out_addr_bank, out_addr_op } <= {sel_out, dummy_addr_op_dly4};
end
//
endcase
//
end
//
// Source Address Logic
//
reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next;
reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
reg [ OP_ADDR_W -1:0] dummy_addr_next;
wire [OP_ADDR_W -1:0] in_1_addr_op_next = in_1_addr_next[OP_ADDR_W -1:0];
wire [OP_ADDR_W -1:0] in_2_addr_op_next = in_2_addr_next[OP_ADDR_W -1:0];
wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;
reg in_1_addr_op_is_last = 1'b0;
reg in_2_addr_op_is_last = 1'b0;
reg dummy_addr_op_is_last = 1'b0;
always @(posedge clk) begin
//
{in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
{in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
{ dummy_addr_op} <= { OP_ADDR_DNC};
//
case (io_fsm_state_next)
//
IO_FSM_STATE_LATENCY_PRE1: begin
//
{in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
else {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC };
{ dummy_addr_op} <= { OP_ADDR_ZERO};
//
end
//
IO_FSM_STATE_LATENCY_PRE3: begin
//
{in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
else {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb};
{ dummy_addr_op} <= dummy_addr_next;
//
end
//
IO_FSM_STATE_BUSY1: begin
//
{in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
{in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
{ dummy_addr_op} <= dummy_addr_next;
//
end
//
IO_FSM_STATE_EXTRA1:
//
if (opcode_is_input && sel_in_needs_extra) begin
//
if (sel_aux_is_1) {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
if (sel_aux_is_2) {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
//
end
//
endcase
//
end
always @(posedge clk)
//
case (io_fsm_state_next)
//
IO_FSM_STATE_LATENCY_PRE1: begin
//
in_1_addr_next <= {sel_in, OP_ADDR_ONE};
in_2_addr_next <= {sel_in, OP_ADDR_ONE};
dummy_addr_next <= { OP_ADDR_ONE};
//
end
//
IO_FSM_STATE_LATENCY_PRE3: begin
//
in_1_addr_next <= in_1_addr_next + 1'b1;
if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1;
else in_2_addr_next <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
IO_FSM_STATE_BUSY1: begin
//
in_1_addr_next <= in_1_addr_next + 1'b1;
if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1;
else in_2_addr_next <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
IO_FSM_STATE_EXTRA1:
//
if (opcode_is_input && sel_in_needs_extra) begin
//
if (sel_aux_is_1) in_1_addr_next <= in_1_addr_next + 1'b1;
if (sel_aux_is_2) in_2_addr_next <= in_2_addr_next + 1'b1;
//
end
//
endcase
always @(posedge clk) begin
//
in_1_addr_op_is_last <= 1'b0;
in_2_addr_op_is_last <= 1'b0;
dummy_addr_op_is_last <= 1'b0;
//
case (io_fsm_state_next)
//
IO_FSM_STATE_BUSY1: begin
in_1_addr_op_is_last <= in_1_addr_op_next == word_index_last;
in_2_addr_op_is_last <= in_2_addr_op_next == word_index_last;
dummy_addr_op_is_last <= dummy_addr_op_next == word_index_last;
end
//
endcase
//
end
//
// FSM Process
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) io_fsm_state <= IO_FSM_STATE_IDLE;
else io_fsm_state <= io_fsm_state_next;
//
// Busy Exit Logic
//
reg io_fsm_done = 1'b0;
always @(posedge clk) begin
//
io_fsm_done <= 1'b0;
//
if (io_fsm_state == IO_FSM_STATE_BUSY1) begin
//
if (opcode_is_input) begin
if (sel_aux_is_1 && in_1_addr_op_is_last) io_fsm_done <= 1'b1;
if (sel_aux_is_2 && in_2_addr_op_is_last) io_fsm_done <= 1'b1;
end else if (opcode_is_output || opcode_is_ladder) begin
if (dummy_addr_op_is_last) io_fsm_done <= 1'b1;
end
//
end
//
end
//
// FSM Transition Logic
//
assign io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA1 : IO_FSM_STATE_LATENCY_POST1;
always @* begin
//
case (io_fsm_state)
IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ;
IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ;
IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE3 ;
IO_FSM_STATE_LATENCY_PRE3: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE4 ;
IO_FSM_STATE_LATENCY_PRE4: io_fsm_state_next = IO_FSM_STATE_BUSY1 ;
IO_FSM_STATE_BUSY1: io_fsm_state_next = IO_FSM_STATE_BUSY2 ;
IO_FSM_STATE_BUSY2: io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy : IO_FSM_STATE_BUSY1;
IO_FSM_STATE_EXTRA1: io_fsm_state_next = IO_FSM_STATE_EXTRA2 ;
IO_FSM_STATE_EXTRA2: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ;
IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ;
IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST3 ;
IO_FSM_STATE_LATENCY_POST3: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST4 ;
IO_FSM_STATE_LATENCY_POST4: io_fsm_state_next = IO_FSM_STATE_STOP ;
IO_FSM_STATE_STOP: io_fsm_state_next = IO_FSM_STATE_IDLE ;
default: io_fsm_state_next = IO_FSM_STATE_IDLE ;
endcase
//
end
//
// Ready Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdy_reg <= 1'b1;
else case (io_fsm_state)
IO_FSM_STATE_IDLE: rdy_reg <= ~ena;
IO_FSM_STATE_STOP: rdy_reg <= 1'b1;
endcase
//
// Optional Debug Facility
//
`ifdef MODEXPNG_ENABLE_DEBUG
`include "modexpng_io_manager_debug.vh"
`endif
endmodule