aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_general_worker.v
blob: 13b7aacff104153d0f1715ea13aa494f9731010d (plain) (tree)































                                                                           
                              
 
               


                                  
           








                                                                                                                             

  
 




                                     


                                               




                                                 
                                                   







                                                             
        


                                                             
                                                                  




















































                                                                     














                                                        
    




                                                        
 


                      


                                                
 


                                                  
 


                                                
 


                                                  




























                                                     


                                                     
 


                                                       
    


                                                     
 


                                                       



























                                                          
      

                                                   
 

                                                 








                                                   
    




                                                      

                               



                                                        
          



                                                            
          

                                                                                                                                                                                                                                                                
          

                                                                                                                                                                                                                                            
          


                                                                                                
       

    
      
                               
      

                                                                                                 
    

                                                              
    

                                                                  
    
                                          
          
                         
              

                                 
              

                      

                                 
              
                         
                  



                                               
                      



                                                                 
                           
                  



                                              
                      



                                                                                               
                           
                  

                                             
                      






                                                                                               
                           
                  







                                                               





                   
                                     

      

                                                                                                       
    

                                                              
    

                                                                  
    
                                          
          
                         
              

                                 


                      

                                 
              
                         
                  
                                             

                                              





                                                                         
                                  
                                    
                      



                                                                         
                           
                  



                                              
                      



                                                                                                      

                           
                                              
                      
                                        







                                                                       


                                                                       
                           
                    

                   
           
   

      
                                
      










                                             
    





                                                     

                               

                                                                                                               


          









                                                             
    
                                    


                                      












                                                                 


           









                                                     
             

                                                 


           
                             
                                      
             
























                                                                                   


           
                               

                                      

                                         


           




















                                                                                                 

           




                                                                                                     


                                   



                                                                                                      


                                     

















                                                                                                                 
            
 

                               

                                                          
          
                     
              


                                          
                  






                                                                                                                                                           
                       
              












                                                                                                                                                           

                       










                                                                                                                                                           

                       













                                                                                                                                                                           

                       
                                          
                  








                                                                                                                                                           
                       

                                
                  











                                                                                                                                                         
                       
              


               

 
      
                                      
      



                                                              
 









                                                                                                           









                                                                       
    









                                                                           
 
                               
          

                                                                                 
          
                     
              
                                         

                                          






                                                                                                                                                              
                                
                  



                                                                                                                                                              
                       
              









                                                                                                                                                                        
                       









                                                                                                                                                                                                
                  
                                    


                                                                                                                                                    

                       

               
       















                                                                                                               

 


                      



                                     
    



                                     










                                                                                   

















                                                                               





                                                         

                                          

                                                                                                                
                                                                                


               



                                        

                               




                                              
          



                                                     
              
                                               
                  





                                                                                                                              
                  







                                                                                                                              
              
                                                
                  





                                                                                                                               
                  


























                                                                                                                               























                                                                                                                              
               


          
    





















































                                                                                                                                                             
                                                                                                 





                                                                                                   


















                                                                                                                                                             











                   
                                           
     
                              
                                   

                                       

                                    





                                             
                                         

      
                                           
     
                              
                                   

                                       

                                    





                                             
                          

      
                                           
     
                              
                                   

                                       

                                    





                                                 
                                         

      
                                           
     
                              
                                   

                                       

                                    





                                             
                          


      
      

                                    
 




                                                                                                     
 

                                    
 




                                                                                                                  
 


                         









                                                                                                                           
                                          




                                                
                          

                                                                                                                                         
                       



               

                  
                                          
          
                                                        


                                                        

                      

                            


                               
                             




                                          
                                    
                                           
                                        
                                         



                                          
                                          
                  
                                    

                                                                         
                       
              


               

 





                            












                                                                                                                                


               

 
      
                       




                         
                                          
          
                                                  





                                                  
      
                                       
      













                                                                                                                                                                                                              


                                                                                   
                                                                                 




                                                                                           
                                                                                         
           
 

                               

                                                                                   
          
                     
              
                                         

                                          
                  
                                    
                      


                                                
                          
                                                                                                                       

                       
              
                                    
                  











                                                                                                                                                         
                  








                                                                                                                                         
              
                                        
                  











                                                                                                                                                     
                  









                                                                                                                                                     
              
                                          
                  

                                    




                                                                                                                     

                                       
              
                                          
                  











                                                                                                                                                                                     
                  









                                                                                                                                   

               

       
 
         
//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpng_general_worker
(
    clk, rst_n,
    ena, rdy,
    sel_narrow_in, sel_narrow_out,
    sel_wide_in,   sel_wide_out,
    opcode,
    word_index_last, word_index_last_half,
    wrk_rd_wide_xy_ena_x,   wrk_rd_wide_xy_bank_x,   wrk_rd_wide_xy_addr_x,   wrk_rd_wide_x_din_x,    wrk_rd_wide_y_din_x,
    wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x,  wrk_rd_narrow_y_din_x,
    wrk_rd_wide_xy_ena_y,   wrk_rd_wide_xy_bank_y,   wrk_rd_wide_xy_addr_y,   wrk_rd_wide_x_din_y,    wrk_rd_wide_y_din_y,
    wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y,  wrk_rd_narrow_y_din_y,
    wrk_wr_wide_xy_ena_x,   wrk_wr_wide_xy_bank_x,   wrk_wr_wide_xy_addr_x,   wrk_wr_wide_x_dout_x,   wrk_wr_wide_y_dout_x,
    wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
    wrk_wr_wide_xy_ena_y,   wrk_wr_wide_xy_bank_y,   wrk_wr_wide_xy_addr_y,   wrk_wr_wide_x_dout_y,   wrk_wr_wide_y_dout_y,
    wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);


    //
    // Headers
    //
    `include "modexpng_parameters.vh"
    `include "modexpng_microcode.vh"
    `include "modexpng_dsp48e1.vh"
    `include "modexpng_dsp_slice_primitives.vh"
    
    
    //
    // Ports
    //
    input                                    clk;
    input                                    rst_n;

    input                                    ena;
    output                                   rdy;
    
    input  [              BANK_ADDR_W  -1:0] sel_narrow_in; 
    input  [              BANK_ADDR_W  -1:0] sel_narrow_out; 
    input  [              BANK_ADDR_W  -1:0] sel_wide_in; 
    input  [              BANK_ADDR_W  -1:0] sel_wide_out; 
        
    input  [              UOP_OPCODE_W -1:0] opcode;
    
    input  [              OP_ADDR_W    -1:0] word_index_last;
    input  [              OP_ADDR_W    -1:0] word_index_last_half;
    
    output                                   wrk_rd_wide_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_rd_wide_xy_addr_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_x_din_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_y_din_x;

    output                                   wrk_rd_narrow_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_rd_narrow_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_rd_narrow_xy_addr_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_x_din_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_y_din_x;
    
    output                                   wrk_rd_wide_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_rd_wide_xy_addr_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_x_din_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_y_din_y;

    output                                   wrk_rd_narrow_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_rd_narrow_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_rd_narrow_xy_addr_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_x_din_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_y_din_y;

    output                                   wrk_wr_wide_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_wr_wide_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_wr_wide_xy_addr_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_x_dout_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_y_dout_x;

    output                                   wrk_wr_narrow_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_wr_narrow_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_wr_narrow_xy_addr_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_x_dout_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_y_dout_x;
    
    output                                   wrk_wr_wide_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_wr_wide_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_wr_wide_xy_addr_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_x_dout_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_y_dout_y;

    output                                   wrk_wr_narrow_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_wr_narrow_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_wr_narrow_xy_addr_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_x_dout_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_y_dout_y;


    //
    // FSM Declaration
    //

    localparam [3:0] WRK_FSM_STATE_IDLE          = 4'h0;
    
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1  = 4'h1;
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2  = 4'h2;
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3  = 4'h3;
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4  = 4'h4;
    
    localparam [3:0] WRK_FSM_STATE_BUSY1         = 4'hA;
    localparam [3:0] WRK_FSM_STATE_BUSY2         = 4'hB;
    
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7;
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8;
    
    localparam [3:0] WRK_FSM_STATE_STOP          = 4'hF;

    reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
    reg [3:0] wrk_fsm_state_next;


    //
    // Control Signals
    //
    reg                    rd_wide_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_wide_bank_x;
    reg [  OP_ADDR_W -1:0] rd_wide_addr_x; 

    reg                    rd_narrow_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_narrow_bank_x;
    reg [  OP_ADDR_W -1:0] rd_narrow_addr_x; 

    reg                    rd_wide_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_wide_bank_y;
    reg [  OP_ADDR_W -1:0] rd_wide_addr_y; 

    reg                    rd_narrow_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_narrow_bank_y;
    reg [  OP_ADDR_W -1:0] rd_narrow_addr_y; 
    
    reg                    wr_wide_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_x;
    reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
    reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;

    reg                    wr_narrow_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
    reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
    reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;

    reg                    wr_wide_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_y;
    reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
    reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;

    reg                    wr_narrow_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
    reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
    reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;


    //
    // Mapping
    //
    assign wrk_rd_wide_xy_ena_x     = rd_wide_ena_x;
    assign wrk_rd_wide_xy_bank_x    = rd_wide_bank_x;
    assign wrk_rd_wide_xy_addr_x    = rd_wide_addr_x;

    assign wrk_rd_narrow_xy_ena_x   = rd_narrow_ena_x;
    assign wrk_rd_narrow_xy_bank_x  = rd_narrow_bank_x;
    assign wrk_rd_narrow_xy_addr_x  = rd_narrow_addr_x;
    
    assign wrk_rd_wide_xy_ena_y     = rd_wide_ena_y;
    assign wrk_rd_wide_xy_bank_y    = rd_wide_bank_y;
    assign wrk_rd_wide_xy_addr_y    = rd_wide_addr_y;

    assign wrk_rd_narrow_xy_ena_y   = rd_narrow_ena_y;
    assign wrk_rd_narrow_xy_bank_y  = rd_narrow_bank_y;
    assign wrk_rd_narrow_xy_addr_y  = rd_narrow_addr_y;

    assign wrk_wr_wide_xy_ena_x     = wr_wide_xy_ena_x;
    assign wrk_wr_wide_xy_bank_x    = wr_wide_xy_bank_x;
    assign wrk_wr_wide_xy_addr_x    = wr_wide_xy_addr_x;
    assign wrk_wr_wide_x_dout_x     = wr_wide_x_dout_x;
    assign wrk_wr_wide_y_dout_x     = wr_wide_y_dout_x;

    assign wrk_wr_narrow_xy_ena_x   = wr_narrow_xy_ena_x;
    assign wrk_wr_narrow_xy_bank_x  = wr_narrow_xy_bank_x;
    assign wrk_wr_narrow_xy_addr_x  = wr_narrow_xy_addr_x;
    assign wrk_wr_narrow_x_dout_x   = wr_narrow_x_dout_x;
    assign wrk_wr_narrow_y_dout_x   = wr_narrow_y_dout_x;
    
    assign wrk_wr_wide_xy_ena_y     = wr_wide_xy_ena_y;
    assign wrk_wr_wide_xy_bank_y    = wr_wide_xy_bank_y;
    assign wrk_wr_wide_xy_addr_y    = wr_wide_xy_addr_y;
    assign wrk_wr_wide_x_dout_y     = wr_wide_x_dout_y;
    assign wrk_wr_wide_y_dout_y     = wr_wide_y_dout_y;

    assign wrk_wr_narrow_xy_ena_y   = wr_narrow_xy_ena_y;
    assign wrk_wr_narrow_xy_bank_y  = wr_narrow_xy_bank_y;
    assign wrk_wr_narrow_xy_addr_y  = wr_narrow_xy_addr_y;
    assign wrk_wr_narrow_x_dout_y   = wr_narrow_x_dout_y;
    assign wrk_wr_narrow_y_dout_y   = wr_narrow_y_dout_y;
   
   
    //
    // Delays
    //
    reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4];
    reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4];

    reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:4];
    reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:4];
    
    reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1;    
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1;
    
    reg                   rd_narrow_ena_x_dly1 = 1'b0;
    reg                   rd_narrow_ena_y_dly1 = 1'b0;
    reg                   rd_narrow_ena_x_dly2 = 1'b0;
    reg                   rd_narrow_ena_y_dly2 = 1'b0;
    
    always @(posedge clk) begin
        //
        {rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x};
        {rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x};
        {rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y};
        {rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y};
        //
        {rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x};
        {rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x};
        {rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y};
        {rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y};
        //
        {rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
        {rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
        //
        {rd_wide_addr_x_dly[4], rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x};
        {rd_wide_addr_y_dly[4], rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y};
        //
        {rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x};
        {rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y};
        //
    end
    
  
    //
    // Source Read Enable Logic
    //
    task _update_wide_rd_en;   input _en; {rd_wide_ena_x,   rd_wide_ena_y  } <= {2{_en}}; endtask
    task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask
    
    task enable_wide_rd_en;  _update_wide_rd_en(1'b1); endtask
    task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask
    
    task enable_narrow_rd_en;  _update_narrow_rd_en(1'b1); endtask
    task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            disable_wide_rd_en;
            disable_narrow_rd_en;
            //
        end else begin
            //
            disable_wide_rd_en;
            disable_narrow_rd_en;
            //
            case (opcode)
                //
                UOP_OPCODE_PROPAGATE_CARRIES,
                UOP_OPCODE_OUTPUT_FROM_NARROW,
                UOP_OPCODE_MODULAR_REDUCE_INIT,
                UOP_OPCODE_MODULAR_SUBTRACT_X:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_BUSY1: enable_narrow_rd_en;
                    endcase
                //
                UOP_OPCODE_COPY_CRT_Y2X,
                UOP_OPCODE_MODULAR_SUBTRACT_Y,
                UOP_OPCODE_MODULAR_SUBTRACT_Z,
                UOP_OPCODE_REGULAR_ADD_UNEVEN:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en;  end
                    endcase
                //
                UOP_OPCODE_COPY_LADDERS_X2Y,
                UOP_OPCODE_CROSS_LADDERS_X2Y:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE2,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_LATENCY_PRE4,
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en;  end
                    endcase
                //
                UOP_OPCODE_MERGE_LH:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_BUSY1: enable_wide_rd_en;
                    endcase                
                //
            endcase
            //
        end


    //
    // Destination Write Enable Logic
    //
    
    task _update_wide_wr_en;   input _en; {wr_wide_xy_ena_x,   wr_wide_xy_ena_y  } <= {2{_en}}; endtask
    task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
    
    task enable_wide_wr_en;  _update_wide_wr_en(1'b1); endtask
    task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask
    
    task enable_narrow_wr_en;  _update_narrow_wr_en(1'b1); endtask
    task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            disable_wide_wr_en;
            disable_narrow_wr_en;
            //
        end else begin
            //
            disable_wide_wr_en;
            disable_narrow_wr_en;
            //
            case (opcode)
                //
                UOP_OPCODE_PROPAGATE_CARRIES,
                UOP_OPCODE_MODULAR_SUBTRACT_X,
                UOP_OPCODE_REGULAR_ADD_UNEVEN:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY2,
                        WRK_FSM_STATE_LATENCY_POST2,
                        WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en;
                    endcase
                //                
                UOP_OPCODE_MERGE_LH:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_LATENCY_POST1,
                        WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en;
                    endcase
                //
                UOP_OPCODE_COPY_CRT_Y2X,
                UOP_OPCODE_COPY_LADDERS_X2Y,
                UOP_OPCODE_CROSS_LADDERS_X2Y,
                UOP_OPCODE_MODULAR_SUBTRACT_Z:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_LATENCY_POST1,
                        WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end
                    endcase
                //
                UOP_OPCODE_MODULAR_SUBTRACT_Y:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY2,
                        WRK_FSM_STATE_LATENCY_POST2,
                        WRK_FSM_STATE_LATENCY_POST4: enable_wide_wr_en;
                    endcase
                //
                UOP_OPCODE_MODULAR_REDUCE_INIT:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_LATENCY_POST1,
                        WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en;
                    endcase
                //  
            endcase
            //
        end
   

    //
    // Source Read Address Logic
    //
    reg [OP_ADDR_W -1:0] rd_wide_addr_next;
    reg [OP_ADDR_W -1:0] rd_narrow_addr_next;

    reg rd_wide_addr_is_last = 1'b0;
    reg rd_narrow_addr_is_last = 1'b0;
    
    reg rd_wide_addr_is_last_half = 1'b0;
    reg rd_narrow_addr_is_last_half = 1'b0;

    reg rd_wide_addr_next_is_last = 1'b0;
    reg rd_narrow_addr_next_is_last = 1'b0;
    
    reg rd_wide_addr_next_is_last_half = 1'b0;
    reg rd_narrow_addr_next_is_last_half = 1'b0;
    
    reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0;
    reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0;

    always @(posedge clk) begin
        //
        rd_wide_addr_is_last_half_dly   <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half};
        rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half};
        //
    end

    task preset_rd_wide_bank_addr;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        begin
            {rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr};
            {rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr};
            rd_wide_addr_is_last      <= 1'b0;
            rd_wide_addr_is_last_half <= 1'b0;
        end
    endtask
    
    task preset_rd_narrow_bank_addr;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        begin
            {rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr};
            {rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr};
            rd_narrow_addr_is_last      <= 1'b0;
            rd_narrow_addr_is_last_half <= 1'b0;
        end
    endtask
      
    task preset_rd_wide_addr_next;
        input [OP_ADDR_W -1:0] addr;
        begin
            rd_wide_addr_next              <= addr;
            rd_wide_addr_next_is_last      <= 1'b0;
            rd_wide_addr_next_is_last_half <= 1'b0;
        end
    endtask

    task preset_rd_narrow_addr_next;
        input [OP_ADDR_W -1:0] addr;
        begin
            rd_narrow_addr_next              <= addr;
            rd_narrow_addr_next_is_last      <= 1'b0;
            rd_narrow_addr_next_is_last_half <= 1'b0;
        end
    endtask
    
    task keep_rd_wide_bank;
        begin
            {rd_wide_bank_x} <= {rd_wide_bank_x};
            {rd_wide_bank_y} <= {rd_wide_bank_y};
        end
    endtask
    
    task switch_rd_wide_bank;
        input [BANK_ADDR_W -1:0] bank;
        begin
            {rd_wide_bank_x} <= {bank};
            {rd_wide_bank_y} <= {bank};
        end
    endtask
    
    task keep_rd_wide_addr;
        begin
            {rd_wide_addr_x} <= {rd_wide_addr_x};
            {rd_wide_addr_y} <= {rd_wide_addr_y};
        end
    endtask
    
    task advance_rd_wide_addr;
        begin
            {rd_wide_addr_x} <= {rd_wide_addr_next};
            {rd_wide_addr_y} <= {rd_wide_addr_next};
            rd_wide_addr_is_last      <= rd_wide_addr_next == word_index_last;
            rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half;
        end
    endtask
    
    task keep_rd_narrow_bank;
        begin
            {rd_narrow_bank_x} <= {rd_narrow_bank_x};
            {rd_narrow_bank_y} <= {rd_narrow_bank_y};
        end
    endtask
    
    task switch_rd_narrow_bank;
        input [BANK_ADDR_W -1:0] bank;
        begin
            {rd_narrow_bank_x} <= {bank};
            {rd_narrow_bank_y} <= {bank};
        end
    endtask
    
    task keep_rd_narrow_addr;
        begin
            {rd_narrow_addr_x} <= {rd_narrow_addr_x};
            {rd_narrow_addr_y} <= {rd_narrow_addr_y};
        end
    endtask
    
    task advance_rd_narrow_addr;
        begin
            {rd_narrow_addr_x} <= {rd_narrow_addr_next};
            {rd_narrow_addr_y} <= {rd_narrow_addr_next};
            rd_narrow_addr_is_last      <= rd_narrow_addr_next == word_index_last;
            rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half;
        end
    endtask
    
    task update_rd_wide_addr_flags;
        begin
            rd_wide_addr_next_is_last      <= rd_wide_addr_next == (word_index_last      - 1'b1);
            rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1);
        end
    endtask

    task update_rd_narrow_addr_flags;
        begin
            rd_narrow_addr_next_is_last      <= rd_narrow_addr_next == (word_index_last      - 1'b1);
            rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1);
        end
    endtask
    
    task advance_rd_wide_addr_next;
        begin
            rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_wide_addr_flags;
        end
    endtask

    task advance_rd_narrow_addr_next;
        begin
            rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_narrow_addr_flags;
        end
    endtask 

    task advance_rd_wide_addr_next_half;
        begin
            rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_wide_addr_flags;
        end
    endtask

    task advance_rd_narrow_addr_next_half;
        begin
            rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_narrow_addr_flags;
        end
    endtask 

    always @(posedge clk) begin
        //
        preset_rd_wide_bank_addr  (BANK_DNC, OP_ADDR_DNC);
        preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_OUTPUT_FROM_NARROW,
            UOP_OPCODE_MODULAR_SUBTRACT_X:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:              keep_rd_narrow_bank;
                endcase
            //
            UOP_OPCODE_COPY_CRT_Y2X,
            UOP_OPCODE_MODULAR_SUBTRACT_Z,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (sel_wide_in,   OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin keep_rd_wide_bank;   advance_rd_wide_addr;   advance_rd_wide_addr_next;
                                                      keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:        begin keep_rd_wide_bank; keep_rd_narrow_bank; end
                endcase
            //
            UOP_OPCODE_MODULAR_REDUCE_INIT:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (BANK_DNC,      OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin                      advance_rd_wide_addr;   advance_rd_wide_addr_next_half;
                                                      keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:              keep_rd_narrow_bank;
                endcase
            //
            UOP_OPCODE_COPY_LADDERS_X2Y,
            UOP_OPCODE_CROSS_LADDERS_X2Y:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (sel_wide_in,   OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank  (sel_wide_out);   keep_rd_wide_addr; 
                                                      switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end                                                      
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin advance_rd_wide_addr;   advance_rd_wide_addr_next;   switch_rd_wide_bank(sel_wide_in);
                                                      advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:        begin keep_rd_wide_addr;   switch_rd_wide_bank  (sel_wide_out);
                                                      keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end                                                      
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (BANK_WIDE_N,   OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin keep_rd_wide_bank;   advance_rd_wide_addr;   advance_rd_wide_addr_next;
                                                      keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:        begin keep_rd_wide_bank; keep_rd_narrow_bank; end
                endcase
            //
            UOP_OPCODE_MERGE_LH:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(BANK_DNC,    OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr;   advance_rd_wide_addr_next_half;
                                                                         advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_BUSY1:        begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank;
                                                      else                                   switch_rd_wide_bank(BANK_WIDE_H);
                                                      advance_rd_wide_addr;   advance_rd_wide_addr_next_half;
                                                      advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2: keep_rd_wide_bank;
                endcase
            //
        endcase
        //
    end


    //
    // Destination Write Address Logic
    //
    reg                    modular_reduce_init_first_half_x;
    reg                    modular_reduce_init_first_half_y;
    reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x;
    reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y;

    always @(posedge clk) begin
        //
        modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half;
        modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half;
        //
        modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H;
        modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H;
        //
    end
    
    task update_wr_wide_bank_addr;
        input [BANK_ADDR_W -1:0] x_bank;
        input [BANK_ADDR_W -1:0] y_bank;
        input [  OP_ADDR_W -1:0] x_addr;
        input [  OP_ADDR_W -1:0] y_addr;
        begin
            {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
            {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
        end
    endtask
    
    task update_wr_narrow_bank_addr;
        input [BANK_ADDR_W -1:0] x_bank;
        input [BANK_ADDR_W -1:0] y_bank;
        input [  OP_ADDR_W -1:0] x_addr;
        input [  OP_ADDR_W -1:0] y_addr;
        begin
            {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
            {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
        end
    endtask

    always @(posedge clk) begin
        //
        update_wr_wide_bank_addr  (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
        update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_MODULAR_SUBTRACT_X,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2,
                    WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]);
                endcase
            //
            UOP_OPCODE_MERGE_LH:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
                endcase
            //
            UOP_OPCODE_COPY_CRT_Y2X,
            UOP_OPCODE_COPY_LADDERS_X2Y,
            UOP_OPCODE_CROSS_LADDERS_X2Y,
            UOP_OPCODE_MODULAR_SUBTRACT_Z:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
                                                       update_wr_wide_bank_addr  (sel_wide_out,   sel_wide_out,   rd_wide_addr_x_dly[3],   rd_wide_addr_y_dly[3]  ); end
                endcase
            //
            UOP_OPCODE_MODULAR_REDUCE_INIT:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2,
                    WRK_FSM_STATE_LATENCY_POST4: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[4], rd_wide_addr_y_dly[4]);
                endcase
            //
        endcase
        //
    end
    
    
    //
    // UOP_OPCODE_REGULAR_ADD_UNEVEN
    //
    reg regular_add_uneven_flag;
    
    always @(posedge clk)
        //
        case (opcode)
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                case (wrk_fsm_state)
                    WRK_FSM_STATE_LATENCY_PRE4:                                regular_add_uneven_flag <= 1'b0;
                    WRK_FSM_STATE_BUSY2: if (rd_wide_addr_is_last_half_dly[2]) regular_add_uneven_flag <= 1'b1;
                endcase
        endcase


    //
    // DSP Slice Array
    //
    reg [DSP48E1_C_W-1:0] dsp_x_x_x; 
    reg [DSP48E1_C_W-1:0] dsp_y_x_x;
    reg [DSP48E1_C_W-1:0] dsp_x_y_x;
    reg [DSP48E1_C_W-1:0] dsp_y_y_x;
    
    reg [DSP48E1_C_W-1:0] dsp_x_x_y; 
    reg [DSP48E1_C_W-1:0] dsp_y_x_y;
    reg [DSP48E1_C_W-1:0] dsp_x_y_y;
    reg [DSP48E1_C_W-1:0] dsp_y_y_y;

    wire [DSP48E1_P_W-1:0] dsp_x_x_p;
    wire [DSP48E1_P_W-1:0] dsp_y_x_p;
    wire [DSP48E1_P_W-1:0] dsp_x_y_p;
    wire [DSP48E1_P_W-1:0] dsp_y_y_p;
    
    wire [WORD_EXT_W-1:0] dsp_x_x_p_reduced = {CARRY_ZERO, dsp_x_x_p[WORD_W-1:0]}; 
    wire [WORD_EXT_W-1:0] dsp_y_x_p_reduced = {CARRY_ZERO, dsp_y_x_p[WORD_W-1:0]};
    wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]};
    wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]};
    
    reg                             dsp_ce_x = 1'b0;
    reg                             dsp_ce_y = 1'b0;
    reg                             dsp_ce_x_dly = 1'b0;
    reg                             dsp_ce_y_dly = 1'b0;
    reg [    DSP48E1_OPMODE_W -1:0] dsp_op_mode_x;
    reg [    DSP48E1_OPMODE_W -1:0] dsp_op_mode_y;
    reg [   DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_x;
    reg [   DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_y;
    reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_x;
    reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_y;
    wire                            dsp_carry_out_x;
    wire                            dsp_carry_out_y;
    

    //
    // DSP - CE
    //
    always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y};
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
        else case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_MODULAR_SUBTRACT_X,
            UOP_OPCODE_MODULAR_SUBTRACT_Y,
            UOP_OPCODE_REGULAR_ADD_UNEVEN: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
            default:                       {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
            //
        endcase
    
    
    //
    // DSP - OPMODE, ALUMODE, CARRYINSEL
    //
    always @(posedge clk) begin
        //
        dsp_op_mode_x <= DSP48E1_OPMODE_DNC;
        dsp_op_mode_y <= DSP48E1_OPMODE_DNC;
        //
        dsp_alu_mode_x <= DSP48E1_ALUMODE_DNC;
        dsp_alu_mode_y <= DSP48E1_ALUMODE_DNC;
        //
        dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_DNC;
        dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_DNC;
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                    if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x      <= DSP48E1_OPMODE_Z0_YC_X0;
                    else                                         dsp_op_mode_x      <= DSP48E1_OPMODE_ZP17_YC_X0;
                                                                 dsp_alu_mode_x     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
                                                                 dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                    if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y      <= DSP48E1_OPMODE_Z0_YC_X0;
                    else                                         dsp_op_mode_y      <= DSP48E1_OPMODE_ZP17_YC_X0;
                                                                 dsp_alu_mode_y     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
                                                                 dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
                end
                //
            end                
            //
            UOP_OPCODE_MODULAR_SUBTRACT_X: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                                                                 dsp_op_mode_x      <= DSP48E1_OPMODE_ZC_Y0_XAB;
                                                                 dsp_alu_mode_x     <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN;
                    if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
                    else                                         dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                                                                 dsp_op_mode_y      <= DSP48E1_OPMODE_ZC_Y0_XAB;
                                                                 dsp_alu_mode_y     <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN;
                    if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
                    else                                         dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
                end
                //
            end
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                                                                 dsp_op_mode_x      <= DSP48E1_OPMODE_ZC_Y0_XAB;
                                                                 dsp_alu_mode_x     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
                    if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
                    else                                         dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                                                                 dsp_op_mode_y      <= DSP48E1_OPMODE_ZC_Y0_XAB;
                                                                 dsp_alu_mode_y     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
                    if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
                    else                                         dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
                end
                //
            end
            //
            UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                    if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x      <= DSP48E1_OPMODE_Z0_YC_XAB;
                    else begin
                        if (!regular_add_uneven_flag)            dsp_op_mode_x      <= DSP48E1_OPMODE_ZP17_YC_XAB;
                        else                                     dsp_op_mode_x      <= DSP48E1_OPMODE_ZP17_YC_X0;
                    end                 
                                                                 dsp_alu_mode_x     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
                                                                 dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                    if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y      <= DSP48E1_OPMODE_Z0_YC_XAB;
                    else begin
                        if (!regular_add_uneven_flag)            dsp_op_mode_y      <= DSP48E1_OPMODE_ZP17_YC_XAB;
                        else                                     dsp_op_mode_y      <= DSP48E1_OPMODE_ZP17_YC_X0;
                    end                 
                                                                 dsp_alu_mode_y     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
                                                                 dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
                end
                //
            end
            //            
        endcase
        //
    end
    
    
    //
    // DSP Feed Logic
    //
    always @(posedge clk) begin
        //
        dsp_x_x_x <= {DSP48E1_C_W{1'bX}};
        dsp_x_x_y <= {DSP48E1_C_W{1'bX}};
        dsp_y_x_x <= {DSP48E1_C_W{1'bX}};
        dsp_y_x_y <= {DSP48E1_C_W{1'bX}};
        dsp_x_y_x <= {DSP48E1_C_W{1'bX}};
        dsp_x_y_y <= {DSP48E1_C_W{1'bX}};
        dsp_y_y_x <= {DSP48E1_C_W{1'bX}};
        dsp_y_y_y <= {DSP48E1_C_W{1'bX}};
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                    dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                    dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                    dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                    dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
                end
                //
            end
            //
            UOP_OPCODE_MODULAR_SUBTRACT_X: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                    dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                    dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
                    dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                    dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                    dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                    dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
                    dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                    dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
                end
                //
            end
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                    dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                    dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
                    dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                    dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                    dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                    dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
                    dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                    dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
                end
                //
            end
            //
            UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
                //
                if (rd_narrow_ena_x_dly2) begin
                    dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                    dsp_x_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_x  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_x  [WORD_W-1:0]};
                    dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
                    dsp_y_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_x  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_x  [WORD_W-1:0]};
                end
                //
                if (rd_narrow_ena_y_dly2) begin
                    dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                    dsp_x_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_y  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_y  [WORD_W-1:0]};
                    dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
                    dsp_y_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_y  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_y  [WORD_W-1:0]};
                end
                //
            end
            //
        endcase
        //
    end
    
     
    //
    // DSP Slices
    //   
    `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_x_x
    (
        .clk            (clk),
        .ce_abc         (dsp_ce_x),
        .ce_p           (dsp_ce_x_dly),
        .ce_ctrl        (dsp_ce_x),
        .ab             (dsp_x_x_x),
        .c              (dsp_x_x_y),
        .p              (dsp_x_x_p),
        .op_mode        (dsp_op_mode_x),
        .alu_mode       (dsp_alu_mode_x),
        .carry_in_sel   (dsp_carry_in_sel_x),
        .casc_p_in      (),
        .casc_p_out     (),
        .carry_out      (dsp_carry_out_x)
    );
    
    `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_y_x
    (
        .clk            (clk),
        .ce_abc         (dsp_ce_x),
        .ce_p           (dsp_ce_x_dly),
        .ce_ctrl        (dsp_ce_x),
        .ab             (dsp_y_x_x),
        .c              (dsp_y_x_y),
        .p              (dsp_y_x_p),
        .op_mode        (dsp_op_mode_x),
        .alu_mode       (dsp_alu_mode_x),
        .carry_in_sel   (dsp_carry_in_sel_x),
        .casc_p_in      (),
        .casc_p_out     (),
        .carry_out      ()
    );
    
    `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_x_y
    (
        .clk            (clk),
        .ce_abc         (dsp_ce_y),
        .ce_p           (dsp_ce_y_dly),
        .ce_ctrl        (dsp_ce_y),
        .ab             (dsp_x_y_x),
        .c              (dsp_x_y_y),
        .p              (dsp_x_y_p),
        .op_mode        (dsp_op_mode_y),
        .alu_mode       (dsp_alu_mode_y),        
        .carry_in_sel   (dsp_carry_in_sel_y),
        .casc_p_in      (),
        .casc_p_out     (),
        .carry_out      (dsp_carry_out_y)
    );
    
    `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_y_y
    (
        .clk            (clk),
        .ce_abc         (dsp_ce_y),
        .ce_p           (dsp_ce_y_dly),
        .ce_ctrl        (dsp_ce_y),
        .ab             (dsp_y_y_x),
        .c              (dsp_y_y_y),
        .p              (dsp_y_y_p),
        .op_mode        (dsp_op_mode_y),
        .alu_mode       (dsp_alu_mode_y),
        .carry_in_sel   (dsp_carry_in_sel_y),
        .casc_p_in      (),
        .casc_p_out     (),
        .carry_out      ()
    );
    
    
    //
    // UOP_OPCODE_MODULAR_SUBTRACT_X
    //    

    //
    // IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_,
    //            the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT"
    //            section of Appendix A on pp. 55-56 of UG479 for more details.
    //

    reg modular_subtract_x_brw_flag;
    reg modular_subtract_y_brw_flag;

    reg  [WORD_EXT_W -1:0] modular_subtract_x_mux;
    reg  [WORD_EXT_W -1:0] modular_subtract_y_mux;
    
    wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
    wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};

    always @(posedge clk)
        //
        case (opcode)
            UOP_OPCODE_MODULAR_SUBTRACT_X:
                case (wrk_fsm_state)
                    WRK_FSM_STATE_LATENCY_POST4:
                        {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y};
                endcase
            endcase    
           
    always @(posedge clk)
        //
        case (opcode)
            UOP_OPCODE_MODULAR_SUBTRACT_Z:
                case (wrk_fsm_state)
                    // 
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2:
                        //
                        begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
                              modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
                endcase
        endcase


    //
    // FSM Process
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
        else        wrk_fsm_state <= wrk_fsm_state_next;


    //
    // Busy Exit Logic
    //    
    reg wrk_fsm_done = 1'b0;
    
    always @(posedge clk) begin
        //
        wrk_fsm_done <= 1'b0;
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_OUTPUT_FROM_NARROW,
            UOP_OPCODE_COPY_CRT_Y2X,
            UOP_OPCODE_MODULAR_REDUCE_INIT,
            UOP_OPCODE_COPY_LADDERS_X2Y,
            UOP_OPCODE_CROSS_LADDERS_X2Y,
            UOP_OPCODE_MODULAR_SUBTRACT_X,
            UOP_OPCODE_MODULAR_SUBTRACT_Y,
            UOP_OPCODE_MODULAR_SUBTRACT_Z,
            UOP_OPCODE_MERGE_LH,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1:
                        if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1;
                endcase
            //
        endcase
        //
    end


    //
    // FSM Transition Logic
    //
    always @* begin
        //
        case (wrk_fsm_state)
            WRK_FSM_STATE_IDLE:          wrk_fsm_state_next = ena          ? WRK_FSM_STATE_LATENCY_PRE1  : WRK_FSM_STATE_IDLE  ;
            WRK_FSM_STATE_LATENCY_PRE1:  wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_PRE2  ;
            WRK_FSM_STATE_LATENCY_PRE2:  wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_PRE3  ;
            WRK_FSM_STATE_LATENCY_PRE3:  wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_PRE4  ;
            WRK_FSM_STATE_LATENCY_PRE4:  wrk_fsm_state_next =                WRK_FSM_STATE_BUSY1         ;
            WRK_FSM_STATE_BUSY1:         wrk_fsm_state_next =                WRK_FSM_STATE_BUSY2         ;
            WRK_FSM_STATE_BUSY2:         wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ;
            WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_POST2 ;
            WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_POST3 ;
            WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_POST4 ;
            WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next =                WRK_FSM_STATE_STOP          ;
            WRK_FSM_STATE_STOP:          wrk_fsm_state_next =                WRK_FSM_STATE_IDLE          ;
            default:                     wrk_fsm_state_next =                WRK_FSM_STATE_IDLE          ;
        endcase
        //
    end


    //
    // Ready Flag Logic
    //
    reg rdy_reg = 1'b1;
    
    assign rdy = rdy_reg;
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n)               rdy_reg <= 1'b1;
        else case (wrk_fsm_state)
            WRK_FSM_STATE_IDLE:   rdy_reg <= ~ena;
            WRK_FSM_STATE_STOP:   rdy_reg <= 1'b1;
        endcase


    //
    // Source to Destination Data Logic
    //
    reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2;

    always @(posedge clk) begin
        {rd_wide_x_din_x_dly2,   rd_wide_y_din_x_dly2,   rd_wide_x_din_y_dly2,   rd_wide_y_din_y_dly2  } <= {rd_wide_x_din_x_dly1,   rd_wide_y_din_x_dly1,   rd_wide_x_din_y_dly1,   rd_wide_y_din_y_dly1  };
        {rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1}; 
    end

    task update_wide_dout;
        input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
        {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
        {             x_x,              y_x,              x_y,              y_y};
    endtask
    
    task update_narrow_dout;
        input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
        {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
        {               x_x,                y_x,                x_y,                y_y};
    endtask

    always @(posedge clk) begin
        //
        update_wide_dout  (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
        update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_MODULAR_SUBTRACT_X,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2,
                    WRK_FSM_STATE_LATENCY_POST4:
                        //
                        update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced);
                    // 
                endcase
            //
            UOP_OPCODE_COPY_CRT_Y2X:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);        
                              update_wide_dout  (rd_wide_x_din_y_dly2,   rd_wide_y_din_y_dly2,   rd_wide_x_din_y_dly2,   rd_wide_y_din_y_dly2); end
                    //
                endcase
            //    
            UOP_OPCODE_MODULAR_REDUCE_INIT:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
                    // 
                endcase
            //
            UOP_OPCODE_COPY_LADDERS_X2Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_wide_dout  (rd_wide_x_din_x_dly1,   rd_wide_x_din_x_dly2,   rd_wide_x_din_y_dly1,   rd_wide_x_din_y_dly2);
                              update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end
                   //
                endcase
            //
            UOP_OPCODE_CROSS_LADDERS_X2Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_wide_dout  (rd_wide_x_din_x_dly1,   rd_wide_x_din_y_dly2,   rd_wide_x_din_y_dly1,   rd_wide_x_din_x_dly2);
                              update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end
                  //                    
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2,
                    WRK_FSM_STATE_LATENCY_POST4:
                        // 
                        update_wide_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced);
                    //
                endcase                
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Z:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_wide_dout  (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced);
                              update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end
                    // 
                endcase
            //
            UOP_OPCODE_MERGE_LH:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2);
                        // 
                endcase
            //
        endcase
        //
    end


endmodule