aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_io_manager.v
blob: 466f1ea7917b05b72ab1adac66f331d84b3156c1 (plain) (tree)































                                                                           


                          
          












                    






















                        


                 
                


                 
                



                

                
                           
                           
                           
                           





                 












                                                 
                                                   












                                                             






















                                                                 


                                                          
                                                         


                                                          
                                                         



                                                         

                                                         
                                                                    
                                                                    
                                                                    
                                                                    





                                                          




                      

















                                                         
 
    











                                          

                                         


                                         







































                                                         
                                                          
                                                       
                                   




              




                                                
 




                                                  
 




                                                
 




                                                  




             
                                           
                                           
    









                                                  

                               











                                                                  








                                                                                                          

                                                                    



                                                                             

                                                                        
 


                                                              





                                             
    





















                                                                                       
                                                                   


















                                                                


                                              
    
                                                

                              















                                                                
               




                                                   



                           
                                 

                                                                    
                                         

                                                                   
                                         




                                                                   
      
                          
      
                                          
          
                         

                            

                                         


                                        

                                                                                 

               
                                        
                                                                                 
                                                                                 








                                
 
      
                               
          
                                          
          
                         





                                    

                           

                                    


                                               







                                                                          

               
                                               
                  

                                        




                                                                                                


                          
                  



                                        


                               





               
                        
      

                                                                                                
 













                                                                                                                                 
 










                                       

                                   

                           


                                               
                  
                                                                                                                    



                                                                                                                    
                                                                                                                     
                  

               
                                               



                                                                                                                                          
                                                                                                                     
                  







                           
                                
      



















                                                                                    






                                                                        
                                                                        


                           







                                                                                                                                  

               
                                               

                                                                                                                                                                
                                                                                                                                                                





                           
 
      
                           


                                                       
                                                       
 


                                                                              
    



                                     





                                                                   

                                
                                              
                  

                                                                                                   
                                                                                                   
                                                                                                   
                 

               
                                              




                                                                                                          
                  




































                                                                                       



                                                                                                               


                  
                                       
                  



                                                                                                               


                  
                                  
                  
                                                                
                      

                                                                              
                       
















                                                                               

               

          
    



                  
                                          
          
                                                        
                                                      










                               
                                                       

                                      

                                                                              
                                                                    
                                                                              
               
              



           
    


                           
                                                                                                            
    


                           














                                                                                                                                











                         
                                          
          
                                                 
                                

                                                   


               
      
                              
      
                                
                                           
          

 
         
//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpng_io_manager
(
    clk,
    rst_n,
    
    ena,
    rdy,
    
    sel_crt,
    sel_aux,
    sel_in,
    sel_out,
    
    opcode,
    
    word_index_last,
    
    io_wide_xy_ena_x,
    io_wide_xy_bank_x,
    io_wide_xy_addr_x,
    io_wide_x_din_x,
    io_wide_y_din_x,

    io_narrow_xy_ena_x,
    io_narrow_xy_bank_x,
    io_narrow_xy_addr_x,
    io_narrow_x_din_x,
    io_narrow_y_din_x,

    io_wide_xy_ena_y,
    io_wide_xy_bank_y,
    io_wide_xy_addr_y,
    io_wide_x_din_y,
    io_wide_y_din_y,

    io_narrow_xy_ena_y,
    io_narrow_xy_bank_y,
    io_narrow_xy_addr_y,
    io_narrow_x_din_y,
    io_narrow_y_din_y,
    
    io_in_1_en,
    io_in_1_addr,
    io_in_1_din,
    
    io_in_2_en,
    io_in_2_addr,
    io_in_2_din,
    
    io_out_en,
    io_out_we,
    io_out_addr,
    io_out_dout,
    
    wrk_narrow_x_din_x_lsb,
    wrk_narrow_y_din_x_lsb,
    wrk_narrow_x_din_y_lsb,
    wrk_narrow_y_din_y_lsb,
    
    ladder_steps,
    ladder_d,
    ladder_p,
    ladder_q,
    ladder_done
);

    //
    // Headers
    //
    `include "modexpng_parameters.vh"
    `include "modexpng_microcode.vh"

    
    //
    // Ports
    //
    input                                    clk;
    input                                    rst_n;

    input                                    ena;
    output                                   rdy;
    
    input  [              UOP_CRT_W    -1:0] sel_crt;
    input  [              UOP_AUX_W    -1:0] sel_aux;
    input  [              BANK_ADDR_W  -1:0] sel_in; 
    input  [              BANK_ADDR_W  -1:0] sel_out;
    
    input  [              UOP_OPCODE_W -1:0] opcode;
    
    input  [              OP_ADDR_W    -1:0] word_index_last;
    
    output                                   io_wide_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] io_wide_xy_bank_x;
    output [              OP_ADDR_W    -1:0] io_wide_xy_addr_x;
    output [              WORD_EXT_W   -1:0] io_wide_x_din_x;
    output [              WORD_EXT_W   -1:0] io_wide_y_din_x;

    output                                   io_narrow_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] io_narrow_xy_bank_x;
    output [              OP_ADDR_W    -1:0] io_narrow_xy_addr_x;
    output [              WORD_EXT_W   -1:0] io_narrow_x_din_x;
    output [              WORD_EXT_W   -1:0] io_narrow_y_din_x;
    
    output                                   io_wide_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] io_wide_xy_bank_y;
    output [              OP_ADDR_W    -1:0] io_wide_xy_addr_y;
    output [              WORD_EXT_W   -1:0] io_wide_x_din_y;
    output [              WORD_EXT_W   -1:0] io_wide_y_din_y;

    output                                   io_narrow_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] io_narrow_xy_bank_y;
    output [              OP_ADDR_W    -1:0] io_narrow_xy_addr_y;
    output [              WORD_EXT_W   -1:0] io_narrow_x_din_y;
    output [              WORD_EXT_W   -1:0] io_narrow_y_din_y;

    output                                   io_in_1_en;
    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_1_addr;
    input  [              WORD_W       -1:0] io_in_1_din;
    
    output                                   io_in_2_en;
    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_2_addr;
    input  [              WORD_W       -1:0] io_in_2_din;
    
    output                                   io_out_en;
    output                                   io_out_we;
    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_out_addr;
    output [              WORD_W       -1:0] io_out_dout;
    
    input  [              WORD_W       -1:0] wrk_narrow_x_din_x_lsb;
    input  [              WORD_W       -1:0] wrk_narrow_y_din_x_lsb;
    input  [              WORD_W       -1:0] wrk_narrow_x_din_y_lsb;
    input  [              WORD_W       -1:0] wrk_narrow_y_din_y_lsb;
    
    input  [              BIT_INDEX_W  -1:0] ladder_steps;
    output                                   ladder_d;
    output                                   ladder_p;
    output                                   ladder_q;
    output                                   ladder_done;


    //
    // FSM Declaration
    //
    localparam [3:0] IO_FSM_STATE_IDLE_X          = 4'h0;
    localparam [3:0] IO_FSM_STATE_LATENCY_PRE1_X  = 4'h1;
    localparam [3:0] IO_FSM_STATE_LATENCY_PRE2_X  = 4'h2;
    localparam [3:0] IO_FSM_STATE_LATENCY_PRE3_X  = 4'h3;
    localparam [3:0] IO_FSM_STATE_LATENCY_PRE4_X  = 4'h4;
    localparam [3:0] IO_FSM_STATE_BUSY1_X         = 4'hA;
    localparam [3:0] IO_FSM_STATE_BUSY2_X         = 4'hB;
    localparam [3:0] IO_FSM_STATE_EXTRA1_X        = 4'hC;
    localparam [3:0] IO_FSM_STATE_EXTRA2_X        = 4'hD;
    localparam [3:0] IO_FSM_STATE_LATENCY_POST1_X = 4'h5;
    localparam [3:0] IO_FSM_STATE_LATENCY_POST2_X = 4'h6;
    localparam [3:0] IO_FSM_STATE_LATENCY_POST3_X = 4'h7;
    localparam [3:0] IO_FSM_STATE_LATENCY_POST4_X = 4'h8;
    localparam [3:0] IO_FSM_STATE_STOP_X          = 4'hF;
    
    reg  [3:0] io_fsm_state = IO_FSM_STATE_IDLE_X;
    reg  [3:0] io_fsm_state_next;
    wire [3:0] io_fsm_state_after_busy;

    
    //
    // Control Signals
    //
    reg                    in_1_en = 1'b0;
    reg [BANK_ADDR_W -1:0] in_1_addr_bank;
    reg [  OP_ADDR_W -1:0] in_1_addr_op;
    
    reg                    in_2_en = 1'b0;
    reg [BANK_ADDR_W -1:0] in_2_addr_bank;
    reg [  OP_ADDR_W -1:0] in_2_addr_op;
    
    reg                    out_en = 1'b0;
    reg [BANK_ADDR_W -1:0] out_addr_bank;
    reg [  OP_ADDR_W -1:0] out_addr_op;
    reg [     WORD_W -1:0] out_dout;
    
    reg [  OP_ADDR_W -1:0] dummy_addr_op;
    
    
    //
    // Control Signals
    //
    reg                    wide_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] wide_xy_bank_x;
    reg [  OP_ADDR_W -1:0] wide_xy_addr_x;
    reg [ WORD_EXT_W -1:0] wide_x_din_x;
    reg [ WORD_EXT_W -1:0] wide_y_din_x;

    reg                    narrow_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] narrow_xy_bank_x;
    reg [  OP_ADDR_W -1:0] narrow_xy_addr_x;
    reg [ WORD_EXT_W -1:0] narrow_x_din_x;
    reg [ WORD_EXT_W -1:0] narrow_y_din_x;
    
    reg                    wide_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] wide_xy_bank_y;
    reg [  OP_ADDR_W -1:0] wide_xy_addr_y;
    reg [ WORD_EXT_W -1:0] wide_x_din_y;
    reg [ WORD_EXT_W -1:0] wide_y_din_y;

    reg                    narrow_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] narrow_xy_bank_y;
    reg [  OP_ADDR_W -1:0] narrow_xy_addr_y;
    reg [ WORD_EXT_W -1:0] narrow_x_din_y;
    reg [ WORD_EXT_W -1:0] narrow_y_din_y;


    //
    // Mapping
    //    
    assign io_in_1_en   = in_1_en;
    assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op};
    
    assign io_in_2_en   = in_2_en;
    assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
    
    assign io_out_en    = out_en;
    assign io_out_we    = io_out_en; // we can only write!
    assign io_out_addr  = {out_addr_bank, out_addr_op};
    assign io_out_dout  = out_dout;


    //
    // Mapping
    //
    assign io_wide_xy_ena_x    = wide_xy_ena_x;
    assign io_wide_xy_bank_x   = wide_xy_bank_x;
    assign io_wide_xy_addr_x   = wide_xy_addr_x;
    assign io_wide_x_din_x     = wide_x_din_x;
    assign io_wide_y_din_x     = wide_y_din_x;

    assign io_narrow_xy_ena_x  = narrow_xy_ena_x; 
    assign io_narrow_xy_bank_x = narrow_xy_bank_x;
    assign io_narrow_xy_addr_x = narrow_xy_addr_x;
    assign io_narrow_x_din_x   = narrow_x_din_x;
    assign io_narrow_y_din_x   = narrow_y_din_x;

    assign io_wide_xy_ena_y    = wide_xy_ena_y;
    assign io_wide_xy_bank_y   = wide_xy_bank_y;
    assign io_wide_xy_addr_y   = wide_xy_addr_y;
    assign io_wide_x_din_y     = wide_x_din_y;
    assign io_wide_y_din_y     = wide_y_din_y;

    assign io_narrow_xy_ena_y  = narrow_xy_ena_y;
    assign io_narrow_xy_bank_y = narrow_xy_bank_y;
    assign io_narrow_xy_addr_y = narrow_xy_addr_y;
    assign io_narrow_x_din_y   = narrow_x_din_y;
    assign io_narrow_y_din_y   = narrow_y_din_y;


    //
    // Delays
    //    
    reg [OP_ADDR_W -1:0] in_1_addr_op_dly1;
    reg [OP_ADDR_W -1:0] in_2_addr_op_dly1;
    
    reg [OP_ADDR_W -1:0] dummy_addr_op_dly1;

    reg [WORD_W -1:0] io_in_1_din_dly1;
    reg [WORD_W -1:0] io_in_2_din_dly1;

    reg [WORD_W -1:0] wrk_narrow_x_din_x_lsb_dly1;
    reg [WORD_W -1:0] wrk_narrow_y_din_x_lsb_dly1;
    reg [WORD_W -1:0] wrk_narrow_x_din_y_lsb_dly1;
    reg [WORD_W -1:0] wrk_narrow_y_din_y_lsb_dly1;

    always @(posedge clk) begin
        //
        {in_1_addr_op_dly1} <= {in_1_addr_op};
        {in_2_addr_op_dly1} <= {in_2_addr_op};
        //
        {io_in_1_din_dly1} <= {io_in_1_din};
        {io_in_2_din_dly1} <= {io_in_2_din};
        //
        {dummy_addr_op_dly1} <= {dummy_addr_op};
        //
        {wrk_narrow_x_din_x_lsb_dly1} <= {wrk_narrow_x_din_x_lsb};
        {wrk_narrow_y_din_x_lsb_dly1} <= {wrk_narrow_y_din_x_lsb};
        {wrk_narrow_x_din_y_lsb_dly1} <= {wrk_narrow_x_din_y_lsb};
        {wrk_narrow_y_din_y_lsb_dly1} <= {wrk_narrow_y_din_y_lsb};
        //
    end


    //
    // Handy Wires
    //
    wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);

    wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;

    wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT;
    wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP;
    wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step;  

    wire opcode_is_input_wide   = opcode == UOP_OPCODE_INPUT_TO_WIDE;
    wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; 

    wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) ||
                              (sel_in == BANK_IN_2_P_COEFF) ||
                              (sel_in == BANK_IN_2_Q_COEFF) ;

    wire sel_crt_is_x = sel_crt == UOP_CRT_X;
    wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
    
    wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
    wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
    

    //
    // Ladder Init/Step Logic
    //
    reg ladder_d_r;
    reg ladder_p_r;
    reg ladder_q_r;
    reg ladder_done_r = 1'b0;
    
    assign ladder_d    = ladder_d_r;
    assign ladder_p    = ladder_p_r;
    assign ladder_q    = ladder_q_r;
    assign ladder_done = ladder_done_r;
    
    reg  [BIT_INDEX_W -1:0] ladder_index;
    reg  [BIT_INDEX_W -1:0] ladder_index_next;
    wire [  OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W];
    wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W];
    wire                    ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO; 
    
    always @(posedge clk)
        //
        if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1_X) begin
            //
            if (opcode_is_ladder_init) begin
                ladder_index      <= ladder_steps;
                ladder_index_next <= ladder_steps - 1'b1;
                ladder_done_r     <= 1'b0;
            end
            //
            if (opcode_is_ladder_step) begin
                ladder_index      <= ladder_index_next;
                ladder_index_next <= ladder_index_next - 1'b1;
                if (ladder_index_is_zero) ladder_done_r <= 1'b1;
            end
            //
        end


    //
    // Ladder Mux
    //
    reg  ladder_dpq_mux_dly1;
    reg  ladder_dpq_mux_dly2;
    wire ladder_dpq_mux = ladder_dpq_mux_dly2;
    
    always @(io_in_2_din_dly1, ladder_index_lsb)
        //
        case(ladder_index_lsb)
            4'b0000: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 0];
            4'b0001: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 1];
            4'b0010: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 2];
            4'b0011: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 3];
            4'b0100: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 4];
            4'b0101: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 5];
            4'b0110: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 6];
            4'b0111: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 7];
            4'b1000: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 8];
            4'b1001: ladder_dpq_mux_dly1 = io_in_2_din_dly1[ 9];
            4'b1010: ladder_dpq_mux_dly1 = io_in_2_din_dly1[10];
            4'b1011: ladder_dpq_mux_dly1 = io_in_2_din_dly1[11];
            4'b1100: ladder_dpq_mux_dly1 = io_in_2_din_dly1[12];
            4'b1101: ladder_dpq_mux_dly1 = io_in_2_din_dly1[13];
            4'b1110: ladder_dpq_mux_dly1 = io_in_2_din_dly1[14];
            4'b1111: ladder_dpq_mux_dly1 = io_in_2_din_dly1[15];
        endcase
    
    always @(posedge clk)
        //
        ladder_dpq_mux_dly2 <= ladder_dpq_mux_dly1;
    
    always @(posedge clk)
        //
        case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY1_X:
                if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux; 
            //
            IO_FSM_STATE_LATENCY_POST1_X:
                if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux;
            //
            IO_FSM_STATE_LATENCY_POST3_X:
                if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux;
            //
        endcase
    
    
    //
    // Source Enable Logic
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            in_1_en <= 1'b0;
            in_2_en <= 1'b0;
        end else case (io_fsm_state_next)
            //
            IO_FSM_STATE_LATENCY_PRE1_X,
            IO_FSM_STATE_LATENCY_PRE3_X,
            IO_FSM_STATE_BUSY1_X: begin
                in_1_en <=  opcode_is_input && sel_aux_is_1;
                in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder;
            end
            //
            IO_FSM_STATE_EXTRA1_X: begin
                in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
                in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra;
            end
            //
            default: begin
                in_1_en <= 1'b0;
                in_2_en <= 1'b0;
            end
            //
        endcase


    //
    // Destination Enable Logic
    //    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            wide_xy_ena_x   <= 1'b0;
            wide_xy_ena_y   <= 1'b0;
            narrow_xy_ena_x <= 1'b0;
            narrow_xy_ena_y <= 1'b0;
            //
            out_en <= 1'b0;
            //
        end else case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY1_X,
            IO_FSM_STATE_EXTRA1_X,
            IO_FSM_STATE_LATENCY_POST1_X: begin
                //
                wide_xy_ena_x   <= opcode_is_input_wide   && sel_crt_is_x;
                wide_xy_ena_y   <= opcode_is_input_wide   && sel_crt_is_y;
                narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x;
                narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y;
                //
                out_en <= opcode_is_output;
                //
            end
            //
            IO_FSM_STATE_LATENCY_POST3_X: begin
                //
                wide_xy_ena_x   <= 1'b0;
                wide_xy_ena_y   <= 1'b0;
                narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra;
                narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra;
                //
                out_en <= opcode_is_output;
                //
            end
            //
            default: begin
                //
                wide_xy_ena_x   <= 1'b0;
                wide_xy_ena_y   <= 1'b0;
                narrow_xy_ena_x <= 1'b0;
                narrow_xy_ena_y <= 1'b0;
                //
                out_en <= 1'b0;
                //
            end
            //
        endcase


    //
    // Output Data Logic
    //
    reg  [    WORD_W -1:0] io_in_dout_mux_dly2;
    wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, io_in_dout_mux_dly2}; 

    reg [WORD_W -1:0] wrk_narrow_din_x_lsb_mux_dly2; 
    reg [WORD_W -1:0] wrk_narrow_din_y_lsb_mux_dly2;
    
    wire [WORD_W -1:0] wrk_narrow_din_x_lsb_mux = wrk_narrow_din_x_lsb_mux_dly2; 
    wire [WORD_W -1:0] wrk_narrow_din_y_lsb_mux = wrk_narrow_din_y_lsb_mux_dly2;

    always @(posedge clk) begin
        //
        io_in_dout_mux_dly2 <= sel_aux_is_1 ? io_in_1_din_dly1 : io_in_2_din_dly1;
        //
        wrk_narrow_din_x_lsb_mux_dly2 = sel_aux == UOP_AUX_1 ? wrk_narrow_x_din_x_lsb_dly1 : wrk_narrow_y_din_x_lsb_dly1; 
        wrk_narrow_din_y_lsb_mux_dly2 = sel_aux == UOP_AUX_1 ? wrk_narrow_x_din_y_lsb_dly1 : wrk_narrow_y_din_y_lsb_dly1;        
        //
    end

    always @(posedge clk) begin
        //
        wide_x_din_x   <= WORD_EXT_DNC;
        wide_y_din_x   <= WORD_EXT_DNC;
        wide_x_din_y   <= WORD_EXT_DNC;
        wide_y_din_y   <= WORD_EXT_DNC;
        narrow_x_din_x <= WORD_EXT_DNC;
        narrow_y_din_x <= WORD_EXT_DNC;
        narrow_x_din_y <= WORD_EXT_DNC;
        narrow_y_din_y <= WORD_EXT_DNC;
        //
        out_dout       <= WORD_DNC;
        //
        case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY1_X,
            IO_FSM_STATE_EXTRA1_X,
            IO_FSM_STATE_LATENCY_POST1_X: begin
                //
                if (opcode_is_input_wide   && sel_crt_is_x) {wide_x_din_x,   wide_y_din_x}   <= {2{io_in_dout_mux}};
                if (opcode_is_input_wide   && sel_crt_is_y) {wide_x_din_y,   wide_y_din_y}   <= {2{io_in_dout_mux}};
                if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
                if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
                //
                if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_din_x_lsb_mux : wrk_narrow_din_y_lsb_mux;
                //
            end
            //
            IO_FSM_STATE_LATENCY_POST3_X: begin
            //
                if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
                if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
                //
                if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_din_x_lsb_mux : wrk_narrow_din_y_lsb_mux;
                //
            end            
            //
        endcase
        //
    end


    //
    // Destination Address Logic
    //
    reg  [OP_ADDR_W -1:0] in_addr_op_dly2_mux;
    reg  [OP_ADDR_W -1:0] in_addr_op_dly3_mux;
    reg  [OP_ADDR_W -1:0] in_addr_op_dly4_mux;
    wire [OP_ADDR_W -1:0] in_addr_op_mux = in_addr_op_dly4_mux;
    
    reg  [OP_ADDR_W -1:0] dummy_addr_op_dly2;
    reg  [OP_ADDR_W -1:0] dummy_addr_op_dly3;
    reg  [OP_ADDR_W -1:0] dummy_addr_op_dly4;

    always @(posedge clk) begin
        //
        in_addr_op_dly2_mux <= sel_aux_is_1 ? in_1_addr_op_dly1 : in_2_addr_op_dly1;
        in_addr_op_dly3_mux <= in_addr_op_dly2_mux;
        in_addr_op_dly4_mux <= in_addr_op_dly3_mux;
        //
        dummy_addr_op_dly2 <= dummy_addr_op_dly1;
        dummy_addr_op_dly3 <= dummy_addr_op_dly2;
        dummy_addr_op_dly4 <= dummy_addr_op_dly3;
        //
    end

    always @(posedge clk) begin
        //
        {wide_xy_bank_x,   wide_xy_addr_x  } <= {BANK_DNC, OP_ADDR_DNC};
        {wide_xy_bank_y,   wide_xy_addr_y  } <= {BANK_DNC, OP_ADDR_DNC};
        {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
        {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
        {out_addr_bank,    out_addr_op     } <= {BANK_DNC, OP_ADDR_DNC};
        //
        case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY1_X,
            IO_FSM_STATE_EXTRA1_X,
            IO_FSM_STATE_LATENCY_POST1_X: begin
                if (opcode_is_input_wide   && sel_crt_is_x) {wide_xy_bank_x,   wide_xy_addr_x  } <= {sel_out, in_addr_op_mux    };
                if (opcode_is_input_wide   && sel_crt_is_y) {wide_xy_bank_y,   wide_xy_addr_y  } <= {sel_out, in_addr_op_mux    };
                if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_mux    };
                if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_mux    };
                if (opcode_is_output                      ) {out_addr_bank,    out_addr_op}      <= {sel_out, dummy_addr_op_dly4};
            end
            //
            IO_FSM_STATE_LATENCY_POST3_X: begin
                if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
                if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
                if (opcode_is_output                                            ) {out_addr_bank,    out_addr_op     } <= {sel_out,         dummy_addr_op_dly4};
            end            
            //
        endcase
        //
    end
        

    //
    // Source Address Logic
    //
    reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; 
    reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
    reg [              OP_ADDR_W -1:0] dummy_addr_next;

    wire [OP_ADDR_W -1:0] in_1_addr_op_next  = in_1_addr_next[OP_ADDR_W -1:0];
    wire [OP_ADDR_W -1:0] in_2_addr_op_next  = in_2_addr_next[OP_ADDR_W -1:0];
    wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;  
    
    reg in_1_addr_op_is_last  = 1'b0;
    reg in_2_addr_op_is_last  = 1'b0;
    reg dummy_addr_op_is_last = 1'b0;
        
    always @(posedge clk) begin
        //
        {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
        {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
        {                dummy_addr_op} <= {          OP_ADDR_DNC};
        //
        case (io_fsm_state_next)
            //
            IO_FSM_STATE_LATENCY_PRE1_X: begin
                //
                                       {in_1_addr_bank, in_1_addr_op } <= {sel_in,   OP_ADDR_ZERO};
                if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in,   OP_ADDR_ZERO};
                else                   {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC };
                                       {                dummy_addr_op} <= {          OP_ADDR_ZERO};
               //
            end
            //
            IO_FSM_STATE_LATENCY_PRE3_X: begin
                //
                                       {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
                if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
                else                   {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb};
                                       {                dummy_addr_op} <= dummy_addr_next;
                //
            end
            //
            IO_FSM_STATE_BUSY1_X: begin
                //
                {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
                {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
                {                dummy_addr_op} <= dummy_addr_next;
                //
            end
            //
            IO_FSM_STATE_EXTRA1_X:
                //
                if (opcode_is_input && sel_in_needs_extra) begin
                    //
                    if (sel_aux_is_1) {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
                    if (sel_aux_is_2) {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
                    // 
                end
            //
        endcase
        //
    end
    
    always @(posedge clk)
        //
        case (io_fsm_state_next)
            //
            IO_FSM_STATE_LATENCY_PRE1_X: begin
                //
                in_1_addr_next  <= {sel_in, OP_ADDR_ONE};
                in_2_addr_next  <= {sel_in, OP_ADDR_ONE};
                dummy_addr_next <= {        OP_ADDR_ONE};
                //
            end
            //
            IO_FSM_STATE_LATENCY_PRE3_X: begin
                //
                                       in_1_addr_next  <= in_1_addr_next  + 1'b1;
                if (!opcode_is_ladder) in_2_addr_next  <= in_2_addr_next  + 1'b1;
                else                   in_2_addr_next  <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
                                       dummy_addr_next <= dummy_addr_next + 1'b1;
                //
            end
            //
            IO_FSM_STATE_BUSY1_X: begin
                //
                                       in_1_addr_next  <= in_1_addr_next  + 1'b1;
                if (!opcode_is_ladder) in_2_addr_next  <= in_2_addr_next  + 1'b1;
                else                   in_2_addr_next  <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
                                       dummy_addr_next <= dummy_addr_next + 1'b1;
                //
            end
            //
            IO_FSM_STATE_EXTRA1_X:
                //
                if (opcode_is_input && sel_in_needs_extra) begin
                    //
                    if (sel_aux_is_1) in_1_addr_next <= in_1_addr_next + 1'b1;
                    if (sel_aux_is_2) in_2_addr_next <= in_2_addr_next + 1'b1;
                    // 
                end            
            //
        endcase
    
    always @(posedge clk) begin
        //
        in_1_addr_op_is_last  <= 1'b0;
        in_2_addr_op_is_last  <= 1'b0;
        dummy_addr_op_is_last <= 1'b0;
        //
        case (io_fsm_state_next)
            //
            IO_FSM_STATE_BUSY1_X: begin
                in_1_addr_op_is_last  <= in_1_addr_op_next  == word_index_last;
                in_2_addr_op_is_last  <= in_2_addr_op_next  == word_index_last;
                dummy_addr_op_is_last <= dummy_addr_op_next == word_index_last;
            end
            //
        endcase
        //
    end
    

    //
    // FSM Process
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) io_fsm_state <= IO_FSM_STATE_IDLE_X;
        else        io_fsm_state <= io_fsm_state_next;
    
    
    //
    // Busy Exit Logic
    //
    reg io_fsm_done = 1'b0;
    
    always @(posedge clk) begin
        //
        io_fsm_done <= 1'b0;
        //
        if (io_fsm_state == IO_FSM_STATE_BUSY1_X) begin
            //
            if (opcode_is_input) begin
                if (sel_aux_is_1 && in_1_addr_op_is_last) io_fsm_done <= 1'b1;
                if (sel_aux_is_2 && in_2_addr_op_is_last) io_fsm_done <= 1'b1;
            end else if (opcode_is_output || opcode_is_ladder) begin
                if (dummy_addr_op_is_last)                io_fsm_done <= 1'b1;
            end
            //
        end
        //
    end
    
    
    //
    // FSM Transition Logic
    //
    assign io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA1_X : IO_FSM_STATE_LATENCY_POST1_X;
    
    always @* begin
        //
        case (io_fsm_state)
            IO_FSM_STATE_IDLE_X:          io_fsm_state_next = ena         ? IO_FSM_STATE_LATENCY_PRE1_X  : IO_FSM_STATE_IDLE_X ;
            IO_FSM_STATE_LATENCY_PRE1_X:  io_fsm_state_next =               IO_FSM_STATE_LATENCY_PRE2_X  ;
            IO_FSM_STATE_LATENCY_PRE2_X:  io_fsm_state_next =               IO_FSM_STATE_LATENCY_PRE3_X  ;
            IO_FSM_STATE_LATENCY_PRE3_X:  io_fsm_state_next =               IO_FSM_STATE_LATENCY_PRE4_X  ;
            IO_FSM_STATE_LATENCY_PRE4_X:  io_fsm_state_next =               IO_FSM_STATE_BUSY1_X         ;
            IO_FSM_STATE_BUSY1_X:         io_fsm_state_next =               IO_FSM_STATE_BUSY2_X         ;
            IO_FSM_STATE_BUSY2_X:         io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy      : IO_FSM_STATE_BUSY1_X;
            IO_FSM_STATE_EXTRA1_X:        io_fsm_state_next =               IO_FSM_STATE_EXTRA2_X        ;
            IO_FSM_STATE_EXTRA2_X:        io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST1_X ;
            IO_FSM_STATE_LATENCY_POST1_X: io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST2_X ;
            IO_FSM_STATE_LATENCY_POST2_X: io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST3_X ;
            IO_FSM_STATE_LATENCY_POST3_X: io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST4_X ;
            IO_FSM_STATE_LATENCY_POST4_X: io_fsm_state_next =               IO_FSM_STATE_STOP_X          ;
            IO_FSM_STATE_STOP_X:          io_fsm_state_next =               IO_FSM_STATE_IDLE_X          ;
            default:                      io_fsm_state_next =               IO_FSM_STATE_IDLE_X          ;
        endcase
        //
    end


    //
    // Ready Logic
    //
    reg rdy_reg = 1'b1;
    
    assign rdy = rdy_reg;
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n)              rdy_reg <= 1'b1;
        else case (io_fsm_state)
            IO_FSM_STATE_IDLE_X:   rdy_reg <= ~ena;
            IO_FSM_STATE_STOP_X:   rdy_reg <= 1'b1;
        endcase


    //
    // Optional Debug Facility
    //
    `ifdef MODEXPNG_ENABLE_DEBUG
    `include "modexpng_io_manager_debug.vh"
    `endif


endmodule