aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_io_manager.v
blob: a5cd1db688d371f5e1612781cb227c190fb8ec1f (plain) (tree)































                                                                           


                          
          












                    






















                        


                 
                


                 
                



                

                
                           
                           
                           
                           





                 












                                                 
                                                   












                                                             






















                                                                 


                                                          
                                                         


                                                          
                                                         



                                                         

                                                         
                                                                    
                                                                    
                                                                    
                                                                    





                                                          











                                                         
                                                         
















                                               

                                         


                                         







































                                                         
                                                          
                                                       
                                   




              




                                                
 




                                                  
 




                                                
 




                                                  




             





                                            


                               


                                                                                        








                                                                                                          

                                                                    



                                                                             

                                                                        
 


                                                              






                                             

                                   
                                  
                                    
 
















































































                                                                                       
      
                          
      
                                          
          
                         

                            




                                         

                                                                                 



                                                                                 
                                                                                 









                                
                               
          
                                          
          
                         





                                    

                           




                                             







                                                                          


                                             
                  

                                        




                                                                                                


                          
                  



                                        


                               





               
                        
      
                                                                                                                    
 


                                                                                                                             










                                       

                                   




                                             
                  
                                                                                                                    



                                                                                                                    
                                                                                                                           
                  


                                             



                                                                                                                                          
                                                                                                                           
                  







                           
                                









                                                                        
                                                                        





                                             




                                                                                                                                   


                                             


                                                                                                                                                                







                           
                           


                                                       
                                                       
 


                                                                              
    
                                                                              
                                                                              
                                                                          
                                                                               
    








                                                                   




                                            
















                                                                                                          
                  



                                                                                                               


                  

                                    


                                                                   
                  



                                                                                                               




                               
                                                                
                      



                                                                         
                      



                                                                         



                       

          




                  
                                          
          

                                                      













                                                    

                                                                                   
                                                                    
                                                                                   
               
              



           
    


                           

                                                                                                           





                                                                                                                          
                                                                                                                          

                                                                                                      

                                                                                                      











                         
                                          
          
                                                 
                                

                                                 


               


                  
                                

                         








                                                                                                   

 
         
//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpng_io_manager
(
    clk,
    rst_n,
    
    ena,
    rdy,
    
    sel_crt,
    sel_aux,
    sel_in,
    sel_out,
    
    opcode,
    
    word_index_last,
    
    io_wide_xy_ena_x,
    io_wide_xy_bank_x,
    io_wide_xy_addr_x,
    io_wide_x_din_x,
    io_wide_y_din_x,

    io_narrow_xy_ena_x,
    io_narrow_xy_bank_x,
    io_narrow_xy_addr_x,
    io_narrow_x_din_x,
    io_narrow_y_din_x,

    io_wide_xy_ena_y,
    io_wide_xy_bank_y,
    io_wide_xy_addr_y,
    io_wide_x_din_y,
    io_wide_y_din_y,

    io_narrow_xy_ena_y,
    io_narrow_xy_bank_y,
    io_narrow_xy_addr_y,
    io_narrow_x_din_y,
    io_narrow_y_din_y,
    
    io_in_1_en,
    io_in_1_addr,
    io_in_1_din,
    
    io_in_2_en,
    io_in_2_addr,
    io_in_2_din,
    
    io_out_en,
    io_out_we,
    io_out_addr,
    io_out_dout,
    
    wrk_narrow_x_din_x_lsb,
    wrk_narrow_y_din_x_lsb,
    wrk_narrow_x_din_y_lsb,
    wrk_narrow_y_din_y_lsb,
    
    ladder_steps,
    ladder_d,
    ladder_p,
    ladder_q,
    ladder_done
);

    //
    // Headers
    //
    `include "modexpng_parameters.vh"
    `include "modexpng_microcode.vh"

    
    //
    // Ports
    //
    input                                    clk;
    input                                    rst_n;

    input                                    ena;
    output                                   rdy;
    
    input  [              UOP_CRT_W    -1:0] sel_crt;
    input  [              UOP_AUX_W    -1:0] sel_aux;
    input  [              BANK_ADDR_W  -1:0] sel_in; 
    input  [              BANK_ADDR_W  -1:0] sel_out;
    
    input  [              UOP_OPCODE_W -1:0] opcode;
    
    input  [              OP_ADDR_W    -1:0] word_index_last;
    
    output                                   io_wide_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] io_wide_xy_bank_x;
    output [              OP_ADDR_W    -1:0] io_wide_xy_addr_x;
    output [              WORD_EXT_W   -1:0] io_wide_x_din_x;
    output [              WORD_EXT_W   -1:0] io_wide_y_din_x;

    output                                   io_narrow_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] io_narrow_xy_bank_x;
    output [              OP_ADDR_W    -1:0] io_narrow_xy_addr_x;
    output [              WORD_EXT_W   -1:0] io_narrow_x_din_x;
    output [              WORD_EXT_W   -1:0] io_narrow_y_din_x;
    
    output                                   io_wide_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] io_wide_xy_bank_y;
    output [              OP_ADDR_W    -1:0] io_wide_xy_addr_y;
    output [              WORD_EXT_W   -1:0] io_wide_x_din_y;
    output [              WORD_EXT_W   -1:0] io_wide_y_din_y;

    output                                   io_narrow_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] io_narrow_xy_bank_y;
    output [              OP_ADDR_W    -1:0] io_narrow_xy_addr_y;
    output [              WORD_EXT_W   -1:0] io_narrow_x_din_y;
    output [              WORD_EXT_W   -1:0] io_narrow_y_din_y;

    output                                   io_in_1_en;
    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_1_addr;
    input  [              WORD_W       -1:0] io_in_1_din;
    
    output                                   io_in_2_en;
    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_2_addr;
    input  [              WORD_W       -1:0] io_in_2_din;
    
    output                                   io_out_en;
    output                                   io_out_we;
    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_out_addr;
    output [              WORD_W       -1:0] io_out_dout;
    
    input  [              WORD_W       -1:0] wrk_narrow_x_din_x_lsb;
    input  [              WORD_W       -1:0] wrk_narrow_y_din_x_lsb;
    input  [              WORD_W       -1:0] wrk_narrow_x_din_y_lsb;
    input  [              WORD_W       -1:0] wrk_narrow_y_din_y_lsb;
    
    input  [              BIT_INDEX_W  -1:0] ladder_steps;
    output                                   ladder_d;
    output                                   ladder_p;
    output                                   ladder_q;
    output                                   ladder_done;


    //
    // FSM Declaration
    //
    localparam [2:0] IO_FSM_STATE_IDLE          = 3'b000;
    localparam [2:0] IO_FSM_STATE_LATENCY_PRE1  = 3'b001;
    localparam [2:0] IO_FSM_STATE_LATENCY_PRE2  = 3'b010;
    localparam [2:0] IO_FSM_STATE_BUSY          = 3'b011;
    localparam [2:0] IO_FSM_STATE_EXTRA         = 3'b100;
    localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101;
    localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110;
    localparam [2:0] IO_FSM_STATE_STOP          = 3'b111;
    
    reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE;
    reg [2:0] io_fsm_state_next;


    //
    // Control Signals
    //
    reg                    in_1_en = 1'b0;
    reg [BANK_ADDR_W -1:0] in_1_addr_bank;
    reg [  OP_ADDR_W -1:0] in_1_addr_op;
    
    reg                    in_2_en = 1'b0;
    reg [BANK_ADDR_W -1:0] in_2_addr_bank;
    reg [  OP_ADDR_W -1:0] in_2_addr_op;
    
    reg                    out_en = 1'b0;
    reg [BANK_ADDR_W -1:0] out_addr_bank;
    reg [  OP_ADDR_W -1:0] out_addr_op;
    reg [     WORD_W -1:0] out_dout;
    
    reg [  OP_ADDR_W -1:0] dummy_addr_op;
    
    
    //
    // Control Signals
    //
    reg                    wide_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] wide_xy_bank_x;
    reg [  OP_ADDR_W -1:0] wide_xy_addr_x;
    reg [ WORD_EXT_W -1:0] wide_x_din_x;
    reg [ WORD_EXT_W -1:0] wide_y_din_x;

    reg                    narrow_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] narrow_xy_bank_x;
    reg [  OP_ADDR_W -1:0] narrow_xy_addr_x;
    reg [ WORD_EXT_W -1:0] narrow_x_din_x;
    reg [ WORD_EXT_W -1:0] narrow_y_din_x;
    
    reg                    wide_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] wide_xy_bank_y;
    reg [  OP_ADDR_W -1:0] wide_xy_addr_y;
    reg [ WORD_EXT_W -1:0] wide_x_din_y;
    reg [ WORD_EXT_W -1:0] wide_y_din_y;

    reg                    narrow_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] narrow_xy_bank_y;
    reg [  OP_ADDR_W -1:0] narrow_xy_addr_y;
    reg [ WORD_EXT_W -1:0] narrow_x_din_y;
    reg [ WORD_EXT_W -1:0] narrow_y_din_y;


    //
    // Mapping
    //    
    assign io_in_1_en   = in_1_en;
    assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op};
    
    assign io_in_2_en   = in_2_en;
    assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
    
    assign io_out_en    = out_en;
    assign io_out_we    = io_out_en; // we can only write!
    assign io_out_addr  = {out_addr_bank, out_addr_op};
    assign io_out_dout  = out_dout;


    //
    // Mapping
    //
    assign io_wide_xy_ena_x    = wide_xy_ena_x;
    assign io_wide_xy_bank_x   = wide_xy_bank_x;
    assign io_wide_xy_addr_x   = wide_xy_addr_x;
    assign io_wide_x_din_x     = wide_x_din_x;
    assign io_wide_y_din_x     = wide_y_din_x;

    assign io_narrow_xy_ena_x  = narrow_xy_ena_x; 
    assign io_narrow_xy_bank_x = narrow_xy_bank_x;
    assign io_narrow_xy_addr_x = narrow_xy_addr_x;
    assign io_narrow_x_din_x   = narrow_x_din_x;
    assign io_narrow_y_din_x   = narrow_y_din_x;

    assign io_wide_xy_ena_y    = wide_xy_ena_y;
    assign io_wide_xy_bank_y   = wide_xy_bank_y;
    assign io_wide_xy_addr_y   = wide_xy_addr_y;
    assign io_wide_x_din_y     = wide_x_din_y;
    assign io_wide_y_din_y     = wide_y_din_y;

    assign io_narrow_xy_ena_y  = narrow_xy_ena_y;
    assign io_narrow_xy_bank_y = narrow_xy_bank_y;
    assign io_narrow_xy_addr_y = narrow_xy_addr_y;
    assign io_narrow_x_din_y   = narrow_x_din_y;
    assign io_narrow_y_din_y   = narrow_y_din_y;


    //
    // Delays
    //    
    reg [OP_ADDR_W -1:0] in_1_addr_op_dly1;
    reg [OP_ADDR_W -1:0] in_1_addr_op_dly2;
    reg [OP_ADDR_W -1:0] in_2_addr_op_dly1;
    reg [OP_ADDR_W -1:0] in_2_addr_op_dly2;
    reg [OP_ADDR_W -1:0] dummy_addr_op_dly1;
    reg [OP_ADDR_W -1:0] dummy_addr_op_dly2;
    
    always @(posedge clk) begin
        //
        {in_1_addr_op_dly2,  in_1_addr_op_dly1}  <= {in_1_addr_op_dly1,  in_1_addr_op};
        {in_2_addr_op_dly2,  in_2_addr_op_dly1}  <= {in_2_addr_op_dly1,  in_2_addr_op};
        {dummy_addr_op_dly2, dummy_addr_op_dly1} <= {dummy_addr_op_dly1, dummy_addr_op};
        //
    end


    //
    // Handy Wires
    //
    wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);

    wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;

    wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT;
    wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP;
    wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step;  

    wire opcode_is_input_wide   = opcode == UOP_OPCODE_INPUT_TO_WIDE;
    wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; 

    wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) ||
                              (sel_in == BANK_IN_2_P_COEFF) ||
                              (sel_in == BANK_IN_2_Q_COEFF) ;

    wire sel_crt_is_x = sel_crt == UOP_CRT_X;
    wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
    
    wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
    wire sel_aux_is_2 = sel_aux == UOP_AUX_2;

    wire in_1_addr_op_next_is_last;
    wire in_2_addr_op_next_is_last;
    wire in_2_addr_op_next_is_one;
    wire dummy_addr_op_next_is_last;


    //
    // Ladder Init/Step Logic
    //
    reg ladder_d_r;
    reg ladder_p_r;
    reg ladder_q_r;
    reg ladder_done_r = 1'b0;
    
    assign ladder_d    = ladder_d_r;
    assign ladder_p    = ladder_p_r;
    assign ladder_q    = ladder_q_r;
    assign ladder_done = ladder_done_r;
    
    reg  [BIT_INDEX_W -1:0] ladder_index;
    reg  [BIT_INDEX_W -1:0] ladder_index_next;
    wire [  OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W];
    wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W];
    wire                    ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO; 
    
    always @(posedge clk)
        //
        if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin
            //
            if (opcode_is_ladder_init) begin
                ladder_index      <= ladder_steps;
                ladder_index_next <= ladder_steps - 1'b1;
                ladder_done_r     <= 1'b0;
            end
            //
            if (opcode_is_ladder_step) begin
                ladder_index      <= ladder_index_next;
                ladder_index_next <= ladder_index_next - 1'b1;
                if (ladder_index_is_zero) ladder_done_r <= 1'b1;
            end
            //
        end


    //
    // Ladder Mux
    //
    reg ladder_dpq_mux;
    
    always @(io_in_2_din, ladder_index_lsb)
        //
        case(ladder_index_lsb)
            4'b0000: ladder_dpq_mux = io_in_2_din[ 0];
            4'b0001: ladder_dpq_mux = io_in_2_din[ 1];
            4'b0010: ladder_dpq_mux = io_in_2_din[ 2];
            4'b0011: ladder_dpq_mux = io_in_2_din[ 3];
            4'b0100: ladder_dpq_mux = io_in_2_din[ 4];
            4'b0101: ladder_dpq_mux = io_in_2_din[ 5];
            4'b0110: ladder_dpq_mux = io_in_2_din[ 6];
            4'b0111: ladder_dpq_mux = io_in_2_din[ 7];
            4'b1000: ladder_dpq_mux = io_in_2_din[ 8];
            4'b1001: ladder_dpq_mux = io_in_2_din[ 9];
            4'b1010: ladder_dpq_mux = io_in_2_din[10];
            4'b1011: ladder_dpq_mux = io_in_2_din[11];
            4'b1100: ladder_dpq_mux = io_in_2_din[12];
            4'b1101: ladder_dpq_mux = io_in_2_din[13];
            4'b1110: ladder_dpq_mux = io_in_2_din[14];
            4'b1111: ladder_dpq_mux = io_in_2_din[15];
        endcase

    always @(posedge clk)
        //
        case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY:
                if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux; 
            //
            IO_FSM_STATE_LATENCY_POST1:
                if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux;
            //
            IO_FSM_STATE_LATENCY_POST2:
                if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux;
            //
        endcase
    
    
    //
    // Source Enable Logic
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            in_1_en <= 1'b0;
            in_2_en <= 1'b0;
        end else case (io_fsm_state_next)
            //
            IO_FSM_STATE_LATENCY_PRE1,
            IO_FSM_STATE_LATENCY_PRE2,
            IO_FSM_STATE_BUSY: begin
                in_1_en <=  opcode_is_input && sel_aux_is_1;
                in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder;
            end
            //
            IO_FSM_STATE_EXTRA: begin
                in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
                in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra;
            end
            //
            default: begin
                in_1_en <= 1'b0;
                in_2_en <= 1'b0;
            end
            //
        endcase

    //
    // Destination Enable Logic
    //    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            wide_xy_ena_x   <= 1'b0;
            wide_xy_ena_y   <= 1'b0;
            narrow_xy_ena_x <= 1'b0;
            narrow_xy_ena_y <= 1'b0;
            //
            out_en <= 1'b0;
            //
        end else case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY,
            IO_FSM_STATE_EXTRA,
            IO_FSM_STATE_LATENCY_POST1: begin
                //
                wide_xy_ena_x   <= opcode_is_input_wide   && sel_crt_is_x;
                wide_xy_ena_y   <= opcode_is_input_wide   && sel_crt_is_y;
                narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x;
                narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y;
                //
                out_en <= opcode_is_output;
                //
            end
            //
            IO_FSM_STATE_LATENCY_POST2: begin
                //
                wide_xy_ena_x   <= 1'b0;
                wide_xy_ena_y   <= 1'b0;
                narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra;
                narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra;
                //
                out_en <= opcode_is_output;
                //
            end
            //
            default: begin
                //
                wide_xy_ena_x   <= 1'b0;
                wide_xy_ena_y   <= 1'b0;
                narrow_xy_ena_x <= 1'b0;
                narrow_xy_ena_y <= 1'b0;
                //
                out_en <= 1'b0;
                //
            end
            //
        endcase


    //
    // Output Data Logic
    //
    wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_din : io_in_2_din};

    wire [WORD_W -1:0] wrk_narrow_xy_din_x_mux_lsb = sel_aux == UOP_AUX_1 ? wrk_narrow_x_din_x_lsb : wrk_narrow_y_din_x_lsb; 
    wire [WORD_W -1:0] wrk_narrow_xy_din_y_mux_lsb = sel_aux == UOP_AUX_1 ? wrk_narrow_x_din_y_lsb : wrk_narrow_y_din_y_lsb;

    always @(posedge clk) begin
        //
        wide_x_din_x   <= WORD_EXT_DNC;
        wide_y_din_x   <= WORD_EXT_DNC;
        wide_x_din_y   <= WORD_EXT_DNC;
        wide_y_din_y   <= WORD_EXT_DNC;
        narrow_x_din_x <= WORD_EXT_DNC;
        narrow_y_din_x <= WORD_EXT_DNC;
        narrow_x_din_y <= WORD_EXT_DNC;
        narrow_y_din_y <= WORD_EXT_DNC;
        //
        out_dout       <= WORD_DNC;
        //
        case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY,
            IO_FSM_STATE_EXTRA,
            IO_FSM_STATE_LATENCY_POST1: begin
                //
                if (opcode_is_input_wide   && sel_crt_is_x) {wide_x_din_x,   wide_y_din_x}   <= {2{io_in_dout_mux}};
                if (opcode_is_input_wide   && sel_crt_is_y) {wide_x_din_y,   wide_y_din_y}   <= {2{io_in_dout_mux}};
                if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
                if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
                //
                if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_xy_din_x_mux_lsb : wrk_narrow_xy_din_y_mux_lsb;
                //
            end
            //
            IO_FSM_STATE_LATENCY_POST2: begin
            //
                if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
                if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
                //
                if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_xy_din_x_mux_lsb : wrk_narrow_xy_din_y_mux_lsb;
                //
            end            
            //
        endcase
        //
    end


    //
    // Destination Address Logic
    //
    wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux =
        sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2;

    always @(posedge clk) begin
        //
        {wide_xy_bank_x,   wide_xy_addr_x  } <= {BANK_DNC, OP_ADDR_DNC};
        {wide_xy_bank_y,   wide_xy_addr_y  } <= {BANK_DNC, OP_ADDR_DNC};
        {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
        {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
        {out_addr_bank,    out_addr_op     } <= {BANK_DNC, OP_ADDR_DNC};
        //
        case (io_fsm_state)
            //
            IO_FSM_STATE_BUSY,
            IO_FSM_STATE_EXTRA,
            IO_FSM_STATE_LATENCY_POST1: begin
                if (opcode_is_input_wide   && sel_crt_is_x) {wide_xy_bank_x,   wide_xy_addr_x  } <= {sel_out, in_addr_op_dly2_mux};
                if (opcode_is_input_wide   && sel_crt_is_y) {wide_xy_bank_y,   wide_xy_addr_y  } <= {sel_out, in_addr_op_dly2_mux};
                if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
                if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
                if (opcode_is_output                      ) {out_addr_bank,    out_addr_op}      <= {sel_out, dummy_addr_op_dly2};
            end
            //
            IO_FSM_STATE_LATENCY_POST2: begin
                if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
                if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
                if (opcode_is_output                                            ) {out_addr_bank,    out_addr_op     } <= {sel_out,         dummy_addr_op_dly2};
            end            
            //
        endcase
        //
    end
        
    
    //
    // Source Address Logic
    //
    reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; 
    reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
    reg [              OP_ADDR_W -1:0] dummy_addr_next;

    wire [OP_ADDR_W -1:0] in_1_addr_op_next  = in_1_addr_next[OP_ADDR_W -1:0];
    wire [OP_ADDR_W -1:0] in_2_addr_op_next  = in_2_addr_next[OP_ADDR_W -1:0];
    wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;  
    
    assign in_1_addr_op_next_is_last  = in_1_addr_op_next  == word_index_last;
    assign in_2_addr_op_next_is_last  = in_2_addr_op_next  == word_index_last;
//  assign in_2_addr_op_next_is_one   = in_2_addr_op_next  == OP_ADDR_ONE;
    assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; 
    
    always @(posedge clk) begin
        //
        {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
        {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
        {                dummy_addr_op} <= {          OP_ADDR_DNC};
        //
        in_1_addr_next  <= {BANK_DNC, OP_ADDR_DNC};
        in_2_addr_next  <= {BANK_DNC, OP_ADDR_DNC};
        dummy_addr_next <= {          OP_ADDR_DNC};
        //
        case (io_fsm_state_next)
            //
            IO_FSM_STATE_LATENCY_PRE1: begin
                //
                                       {in_1_addr_bank, in_1_addr_op } <= {sel_in,   OP_ADDR_ZERO};
                if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in,   OP_ADDR_ZERO};
                else                   {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
                                       {                dummy_addr_op} <= {          OP_ADDR_ZERO};
                //
                in_1_addr_next  <= {sel_in, OP_ADDR_ONE};
                in_2_addr_next  <= {sel_in, OP_ADDR_ONE};
                dummy_addr_next <= {        OP_ADDR_ONE};
                //
            end
            //
            IO_FSM_STATE_LATENCY_PRE2: begin
                //
                                       {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
                if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
                else                   {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb};
                                       {                dummy_addr_op} <= dummy_addr_next;
                //
                                       in_1_addr_next  <= in_1_addr_next  + 1'b1;
                if (!opcode_is_ladder) in_2_addr_next  <= in_2_addr_next  + 1'b1;
                else                   in_2_addr_next  <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
                                       dummy_addr_next <= dummy_addr_next + 1'b1;
                //
            end
            //
            IO_FSM_STATE_BUSY: begin
                //
                {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
                {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
                {                dummy_addr_op} <= dummy_addr_next;
                //
                                       in_1_addr_next  <= in_1_addr_next  + 1'b1;
                if (!opcode_is_ladder) in_2_addr_next  <= in_2_addr_next  + 1'b1;
                else                   in_2_addr_next  <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
                                       dummy_addr_next <= dummy_addr_next + 1'b1;
                //
            end
            //
            IO_FSM_STATE_EXTRA:
                //
                if (opcode_is_input && sel_in_needs_extra) begin
                    //
                    if (sel_aux_is_1) begin
                        {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
                        in_1_addr_next <= in_1_addr_next + 1'b1;
                    end
                    //
                    if (sel_aux_is_2) begin
                        {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
                        in_2_addr_next <= in_2_addr_next + 1'b1;
                    end                    
                    // 
                end
            //
        endcase
        //
    end


    //
    // FSM Process
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) io_fsm_state <= IO_FSM_STATE_IDLE;
        else        io_fsm_state <= io_fsm_state_next;
    
    
    //
    // Busy Exit Logic
    //
    reg io_fsm_done = 1'b0;
    
    always @(posedge clk) begin
        //
        io_fsm_done <= 1'b0;
        //
        if (io_fsm_state == IO_FSM_STATE_BUSY) begin
            //
            if (opcode_is_input) begin
                if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1;
                if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1;
            end else if (opcode_is_output || opcode_is_ladder) begin
                if (dummy_addr_op_next_is_last)                io_fsm_done <= 1'b1;
            end
            //
        end
        //
    end
    
    
    //
    // FSM Transition Logic
    //
    wire [2:0] io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_LATENCY_POST1;
    
    always @* begin
        //
        case (io_fsm_state)
            IO_FSM_STATE_IDLE:          io_fsm_state_next = ena         ? IO_FSM_STATE_LATENCY_PRE1  : IO_FSM_STATE_IDLE ;
            IO_FSM_STATE_LATENCY_PRE1:  io_fsm_state_next =               IO_FSM_STATE_LATENCY_PRE2  ;
            IO_FSM_STATE_LATENCY_PRE2:  io_fsm_state_next =               IO_FSM_STATE_BUSY          ;
            IO_FSM_STATE_BUSY:          io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy    : IO_FSM_STATE_BUSY ;
            IO_FSM_STATE_EXTRA:         io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST1 ;
            IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST2 ;
            IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next =               IO_FSM_STATE_STOP          ;
            IO_FSM_STATE_STOP:          io_fsm_state_next =               IO_FSM_STATE_IDLE          ;
        endcase
        //
    end


    //
    // Ready Logic
    //
    reg rdy_reg = 1'b1;
    
    assign rdy = rdy_reg;
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n)              rdy_reg <= 1'b1;
        else case (io_fsm_state)
            IO_FSM_STATE_IDLE:   rdy_reg <= ~ena;
            IO_FSM_STATE_STOP:   rdy_reg <= 1'b1;
        endcase


    //
    // BEGIN DEBUG
    //
    `ifdef MODEXPNG_ENABLE_DEBUG
    always @(posedge clk)
        //
        if (io_fsm_state == IO_FSM_STATE_STOP) begin
            if (opcode_is_ladder_init) begin
                $display("[step] | D | P | Q");
                $display("-------+---+---+---");
            end else if (opcode_is_ladder_step)
                $display("[%4d] | %d | %d | %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r);
        end
        //
    `endif


endmodule