aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_general_worker.v
blob: 0620bd62246c7f4979162187397ace305007108c (plain) (tree)































                                                                           

                              
               


                                  
           








                                                                                                                             

  
 










                                                 
                                                   







                                                             
        


                                                             
                                                                  




















































                                                                     














                                                        
    




                                                        
 


                      


                                                
 


                                                  
 


                                                
 


                                                  




























                                                     


                                                     
 


                                                       
    


                                                     
 


                                                       



























                                                          














                                                   


                               



                                                        
          



                                                            
          

                                                                                                                                                                                                              
          

                                                                                                                                                                                              
          
       

    
      
                               
      

                                                                                                 
    

                                                              
    

                                                                  
    
                                          
          
                         
              

                                 
              

                      

                                 
              
                         
                  



                                               
                      



                                                                 
                           
                  



                                              
                      



                                                                                               
                           
                  

                                             
                      






                                                                                               
                           
                  







                                                               





                   
                                     

      

                                                                                                       
    

                                                              
    

                                                                  
    
                                          
          
                         
              

                                 


                      

                                 
              
                         
                  



                                              
                      



                                                                         
                           
                  



                                              
                      



                                                                                                      

                           

                                               
                      



                                                                       
                           
                    

                   
           
   

      
                                
      










                                             
    





                                                     

                               

                                                                                                               


          









                                                             
    
                                    


                                      












                                                                 


           









                                                     
             

                                                 


           
                             
                                      
             
























                                                                                   


           
                               

                                      

                                         


           




















                                                                                                 

           




                                                                                                     


                                   



                                                                                                      


                                     

















                                                                                                                 
            
 

                               

                                                          
          
                     
              


                                          
                  






                                                                                                                                                           
                       
              












                                                                                                                                                           

                       










                                                                                                                                                           

                       













                                                                                                                                                                           

                       
                                          
                  








                                                                                                                                                           
                       

                                
                  











                                                                                                                                                         
                       
              


               

 
      
                                      
      



                                                              
 









                                                                                                           









                                                                       
    









                                                                           
 
                               
          

                                                                                 
          
                     
              



                                          
                  



                                                                                                                                                              
                       
              









                                                                                                                                                                        
                       









                                                                                                                                                                                                
                  





                                                                                                                                                    

               





















































                                                                                                                                                                                                                
          





























































































                                                                                                                                                                
              
                                          
                  
                                    
                      































                                                                                                                      
                          








































































                                                                                                                                                                                                                                      
                  
                   

                         
          











                                                                                                                          
 



                  
                                          
          
                                                        


                                                        

                      

                            


                               
                             




                                          
                                    
                                           
                                        
                                         



                                          
                                          
                  
                                    

                                                                         
                       
              


               

 





                            












                                                                                                                                


               

 
      
                       




                         
                                          
          
                                                  





                                                  
      
                                       
      













                                                                                                                                                                                                              


                                                                                   
                                                                                 




                                                                                           
                                                                                         
           
 

                               

                                                                                   
          
                     
              
                                         
                  
                                    
                      






                                                                                                                                                                                               
              
                                    
                  











                                                                                                                                                         
                  








                                                                                                                                         
              
                                        
                  











                                                                                                                                                     
                  









                                                                                                                                                     
              
                                          
                  










                                                                                                                                                                                   
                  








                                                                                                                                                                                 
              
                                          
                  











                                                                                                                                                                                     
                  










                                                                                                                                   
                  










                                                                                                                                                                                                   

       
 
         
//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpng_general_worker
(
    clk, rst_n,
    ena, rdy,
    sel_narrow_in, sel_narrow_out,
    sel_wide_in,   sel_wide_out,
    opcode,
    word_index_last, word_index_last_half,
    wrk_rd_wide_xy_ena_x,   wrk_rd_wide_xy_bank_x,   wrk_rd_wide_xy_addr_x,   wrk_rd_wide_x_din_x,    wrk_rd_wide_y_din_x,
    wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x,  wrk_rd_narrow_y_din_x,
    wrk_rd_wide_xy_ena_y,   wrk_rd_wide_xy_bank_y,   wrk_rd_wide_xy_addr_y,   wrk_rd_wide_x_din_y,    wrk_rd_wide_y_din_y,
    wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y,  wrk_rd_narrow_y_din_y,
    wrk_wr_wide_xy_ena_x,   wrk_wr_wide_xy_bank_x,   wrk_wr_wide_xy_addr_x,   wrk_wr_wide_x_dout_x,   wrk_wr_wide_y_dout_x,
    wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
    wrk_wr_wide_xy_ena_y,   wrk_wr_wide_xy_bank_y,   wrk_wr_wide_xy_addr_y,   wrk_wr_wide_x_dout_y,   wrk_wr_wide_y_dout_y,
    wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);


    //
    // Headers
    //
    `include "modexpng_parameters.vh"
    `include "modexpng_microcode.vh"

    
    //
    // Ports
    //
    input                                    clk;
    input                                    rst_n;

    input                                    ena;
    output                                   rdy;
    
    input  [              BANK_ADDR_W  -1:0] sel_narrow_in; 
    input  [              BANK_ADDR_W  -1:0] sel_narrow_out; 
    input  [              BANK_ADDR_W  -1:0] sel_wide_in; 
    input  [              BANK_ADDR_W  -1:0] sel_wide_out; 
        
    input  [              UOP_OPCODE_W -1:0] opcode;
    
    input  [              OP_ADDR_W    -1:0] word_index_last;
    input  [              OP_ADDR_W    -1:0] word_index_last_half;
    
    output                                   wrk_rd_wide_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_rd_wide_xy_addr_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_x_din_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_y_din_x;

    output                                   wrk_rd_narrow_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_rd_narrow_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_rd_narrow_xy_addr_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_x_din_x;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_y_din_x;
    
    output                                   wrk_rd_wide_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_rd_wide_xy_addr_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_x_din_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_y_din_y;

    output                                   wrk_rd_narrow_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_rd_narrow_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_rd_narrow_xy_addr_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_x_din_y;
    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_y_din_y;

    output                                   wrk_wr_wide_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_wr_wide_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_wr_wide_xy_addr_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_x_dout_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_y_dout_x;

    output                                   wrk_wr_narrow_xy_ena_x;
    output [              BANK_ADDR_W  -1:0] wrk_wr_narrow_xy_bank_x;
    output [              OP_ADDR_W    -1:0] wrk_wr_narrow_xy_addr_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_x_dout_x;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_y_dout_x;
    
    output                                   wrk_wr_wide_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_wr_wide_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_wr_wide_xy_addr_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_x_dout_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_wide_y_dout_y;

    output                                   wrk_wr_narrow_xy_ena_y;
    output [              BANK_ADDR_W  -1:0] wrk_wr_narrow_xy_bank_y;
    output [              OP_ADDR_W    -1:0] wrk_wr_narrow_xy_addr_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_x_dout_y;
    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_y_dout_y;


    //
    // FSM Declaration
    //

    localparam [3:0] WRK_FSM_STATE_IDLE          = 4'h0;
    
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1  = 4'h1;
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2  = 4'h2;
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3  = 4'h3;
    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4  = 4'h4;
    
    localparam [3:0] WRK_FSM_STATE_BUSY1         = 4'hA;
    localparam [3:0] WRK_FSM_STATE_BUSY2         = 4'hB;
    
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7;
    localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8;
    
    localparam [3:0] WRK_FSM_STATE_STOP          = 4'hF;

    reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
    reg [3:0] wrk_fsm_state_next;


    //
    // Control Signals
    //
    reg                    rd_wide_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_wide_bank_x;
    reg [  OP_ADDR_W -1:0] rd_wide_addr_x; 

    reg                    rd_narrow_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_narrow_bank_x;
    reg [  OP_ADDR_W -1:0] rd_narrow_addr_x; 

    reg                    rd_wide_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_wide_bank_y;
    reg [  OP_ADDR_W -1:0] rd_wide_addr_y; 

    reg                    rd_narrow_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] rd_narrow_bank_y;
    reg [  OP_ADDR_W -1:0] rd_narrow_addr_y; 
    
    reg                    wr_wide_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_x;
    reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
    reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;

    reg                    wr_narrow_xy_ena_x = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
    reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
    reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;

    reg                    wr_wide_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_y;
    reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
    reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;

    reg                    wr_narrow_xy_ena_y = 1'b0;
    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
    reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
    reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;


    //
    // Mapping
    //
    assign wrk_rd_wide_xy_ena_x     = rd_wide_ena_x;
    assign wrk_rd_wide_xy_bank_x    = rd_wide_bank_x;
    assign wrk_rd_wide_xy_addr_x    = rd_wide_addr_x;

    assign wrk_rd_narrow_xy_ena_x   = rd_narrow_ena_x;
    assign wrk_rd_narrow_xy_bank_x  = rd_narrow_bank_x;
    assign wrk_rd_narrow_xy_addr_x  = rd_narrow_addr_x;
    
    assign wrk_rd_wide_xy_ena_y     = rd_wide_ena_y;
    assign wrk_rd_wide_xy_bank_y    = rd_wide_bank_y;
    assign wrk_rd_wide_xy_addr_y    = rd_wide_addr_y;

    assign wrk_rd_narrow_xy_ena_y   = rd_narrow_ena_y;
    assign wrk_rd_narrow_xy_bank_y  = rd_narrow_bank_y;
    assign wrk_rd_narrow_xy_addr_y  = rd_narrow_addr_y;

    assign wrk_wr_wide_xy_ena_x     = wr_wide_xy_ena_x;
    assign wrk_wr_wide_xy_bank_x    = wr_wide_xy_bank_x;
    assign wrk_wr_wide_xy_addr_x    = wr_wide_xy_addr_x;
    assign wrk_wr_wide_x_dout_x     = wr_wide_x_dout_x;
    assign wrk_wr_wide_y_dout_x     = wr_wide_y_dout_x;

    assign wrk_wr_narrow_xy_ena_x   = wr_narrow_xy_ena_x;
    assign wrk_wr_narrow_xy_bank_x  = wr_narrow_xy_bank_x;
    assign wrk_wr_narrow_xy_addr_x  = wr_narrow_xy_addr_x;
    assign wrk_wr_narrow_x_dout_x   = wr_narrow_x_dout_x;
    assign wrk_wr_narrow_y_dout_x   = wr_narrow_y_dout_x;
    
    assign wrk_wr_wide_xy_ena_y     = wr_wide_xy_ena_y;
    assign wrk_wr_wide_xy_bank_y    = wr_wide_xy_bank_y;
    assign wrk_wr_wide_xy_addr_y    = wr_wide_xy_addr_y;
    assign wrk_wr_wide_x_dout_y     = wr_wide_x_dout_y;
    assign wrk_wr_wide_y_dout_y     = wr_wide_y_dout_y;

    assign wrk_wr_narrow_xy_ena_y   = wr_narrow_xy_ena_y;
    assign wrk_wr_narrow_xy_bank_y  = wr_narrow_xy_bank_y;
    assign wrk_wr_narrow_xy_addr_y  = wr_narrow_xy_addr_y;
    assign wrk_wr_narrow_x_dout_y   = wr_narrow_x_dout_y;
    assign wrk_wr_narrow_y_dout_y   = wr_narrow_y_dout_y;
   
   
    //
    // Delays
    //
    reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:3];
    reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:3];

    reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3];
    reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3];
    
    reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1;    
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1;
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1;
    
    always @(posedge clk) begin
        //
        {rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x};
        {rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x};
        {rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y};
        {rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y};
        //
        {rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x};
        {rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x};
        {rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y};
        {rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y};
        //
        {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
        {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
        //
        {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x};
        {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y};
        //
    end
    
  
    //
    // Source Read Enable Logic
    //
    task _update_wide_rd_en;   input _en; {rd_wide_ena_x,   rd_wide_ena_y  } <= {2{_en}}; endtask
    task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask
    
    task enable_wide_rd_en;  _update_wide_rd_en(1'b1); endtask
    task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask
    
    task enable_narrow_rd_en;  _update_narrow_rd_en(1'b1); endtask
    task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            disable_wide_rd_en;
            disable_narrow_rd_en;
            //
        end else begin
            //
            disable_wide_rd_en;
            disable_narrow_rd_en;
            //
            case (opcode)
                //
                UOP_OPCODE_PROPAGATE_CARRIES,
                UOP_OPCODE_OUTPUT_FROM_NARROW,
                UOP_OPCODE_MODULAR_REDUCE_INIT,
                UOP_OPCODE_MODULAR_SUBTRACT_X:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_BUSY1: enable_narrow_rd_en;
                    endcase
                //
                UOP_OPCODE_COPY_CRT_Y2X,
                UOP_OPCODE_MODULAR_SUBTRACT_Y,
                UOP_OPCODE_MODULAR_SUBTRACT_Z,
                UOP_OPCODE_REGULAR_ADD_UNEVEN:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en;  end
                    endcase
                //
                UOP_OPCODE_COPY_LADDERS_X2Y,
                UOP_OPCODE_CROSS_LADDERS_X2Y:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE2,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_LATENCY_PRE4,
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en;  end
                    endcase
                //
                UOP_OPCODE_MERGE_LH:
                    //
                    case (wrk_fsm_state_next)
                        WRK_FSM_STATE_LATENCY_PRE1,
                        WRK_FSM_STATE_LATENCY_PRE3,
                        WRK_FSM_STATE_BUSY1: enable_wide_rd_en;
                    endcase                
                //
            endcase
            //
        end


    //
    // Destination Write Enable Logic
    //
    
    task _update_wide_wr_en;   input _en; {wr_wide_xy_ena_x,   wr_wide_xy_ena_y  } <= {2{_en}}; endtask
    task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
    
    task enable_wide_wr_en;  _update_wide_wr_en(1'b1); endtask
    task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask
    
    task enable_narrow_wr_en;  _update_narrow_wr_en(1'b1); endtask
    task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            disable_wide_wr_en;
            disable_narrow_wr_en;
            //
        end else begin
            //
            disable_wide_wr_en;
            disable_narrow_wr_en;
            //
            case (opcode)
                //
                UOP_OPCODE_PROPAGATE_CARRIES,
                UOP_OPCODE_MODULAR_SUBTRACT_X,
                UOP_OPCODE_MERGE_LH,
                UOP_OPCODE_REGULAR_ADD_UNEVEN:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_LATENCY_POST1,
                        WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en;
                    endcase
                //
                UOP_OPCODE_COPY_CRT_Y2X,
                UOP_OPCODE_COPY_LADDERS_X2Y,
                UOP_OPCODE_CROSS_LADDERS_X2Y,
                UOP_OPCODE_MODULAR_SUBTRACT_Z:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_LATENCY_POST1,
                        WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end
                    endcase
                //
                UOP_OPCODE_MODULAR_REDUCE_INIT,
                UOP_OPCODE_MODULAR_SUBTRACT_Y:
                    //
                    case (wrk_fsm_state)
                        WRK_FSM_STATE_BUSY1,
                        WRK_FSM_STATE_LATENCY_POST1,
                        WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en;
                    endcase
                //  
            endcase
            //
        end
   

    //
    // Source Read Address Logic
    //
    reg [OP_ADDR_W -1:0] rd_wide_addr_next;
    reg [OP_ADDR_W -1:0] rd_narrow_addr_next;

    reg rd_wide_addr_is_last = 1'b0;
    reg rd_narrow_addr_is_last = 1'b0;
    
    reg rd_wide_addr_is_last_half = 1'b0;
    reg rd_narrow_addr_is_last_half = 1'b0;

    reg rd_wide_addr_next_is_last = 1'b0;
    reg rd_narrow_addr_next_is_last = 1'b0;
    
    reg rd_wide_addr_next_is_last_half = 1'b0;
    reg rd_narrow_addr_next_is_last_half = 1'b0;
    
    reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0;
    reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0;

    always @(posedge clk) begin
        //
        rd_wide_addr_is_last_half_dly   <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half};
        rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half};
        //
    end

    task preset_rd_wide_bank_addr;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        begin
            {rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr};
            {rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr};
            rd_wide_addr_is_last      <= 1'b0;
            rd_wide_addr_is_last_half <= 1'b0;
        end
    endtask
    
    task preset_rd_narrow_bank_addr;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        begin
            {rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr};
            {rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr};
            rd_narrow_addr_is_last      <= 1'b0;
            rd_narrow_addr_is_last_half <= 1'b0;
        end
    endtask
      
    task preset_rd_wide_addr_next;
        input [OP_ADDR_W -1:0] addr;
        begin
            rd_wide_addr_next              <= addr;
            rd_wide_addr_next_is_last      <= 1'b0;
            rd_wide_addr_next_is_last_half <= 1'b0;
        end
    endtask

    task preset_rd_narrow_addr_next;
        input [OP_ADDR_W -1:0] addr;
        begin
            rd_narrow_addr_next              <= addr;
            rd_narrow_addr_next_is_last      <= 1'b0;
            rd_narrow_addr_next_is_last_half <= 1'b0;
        end
    endtask
    
    task keep_rd_wide_bank;
        begin
            {rd_wide_bank_x} <= {rd_wide_bank_x};
            {rd_wide_bank_y} <= {rd_wide_bank_y};
        end
    endtask
    
    task switch_rd_wide_bank;
        input [BANK_ADDR_W -1:0] bank;
        begin
            {rd_wide_bank_x} <= {bank};
            {rd_wide_bank_y} <= {bank};
        end
    endtask
    
    task keep_rd_wide_addr;
        begin
            {rd_wide_addr_x} <= {rd_wide_addr_x};
            {rd_wide_addr_y} <= {rd_wide_addr_y};
        end
    endtask
    
    task advance_rd_wide_addr;
        begin
            {rd_wide_addr_x} <= {rd_wide_addr_next};
            {rd_wide_addr_y} <= {rd_wide_addr_next};
            rd_wide_addr_is_last      <= rd_wide_addr_next == word_index_last;
            rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half;
        end
    endtask
    
    task keep_rd_narrow_bank;
        begin
            {rd_narrow_bank_x} <= {rd_narrow_bank_x};
            {rd_narrow_bank_y} <= {rd_narrow_bank_y};
        end
    endtask
    
    task switch_rd_narrow_bank;
        input [BANK_ADDR_W -1:0] bank;
        begin
            {rd_narrow_bank_x} <= {bank};
            {rd_narrow_bank_y} <= {bank};
        end
    endtask
    
    task keep_rd_narrow_addr;
        begin
            {rd_narrow_addr_x} <= {rd_narrow_addr_x};
            {rd_narrow_addr_y} <= {rd_narrow_addr_y};
        end
    endtask
    
    task advance_rd_narrow_addr;
        begin
            {rd_narrow_addr_x} <= {rd_narrow_addr_next};
            {rd_narrow_addr_y} <= {rd_narrow_addr_next};
            rd_narrow_addr_is_last      <= rd_narrow_addr_next == word_index_last;
            rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half;
        end
    endtask
    
    task update_rd_wide_addr_flags;
        begin
            rd_wide_addr_next_is_last      <= rd_wide_addr_next == (word_index_last      - 1'b1);
            rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1);
        end
    endtask

    task update_rd_narrow_addr_flags;
        begin
            rd_narrow_addr_next_is_last      <= rd_narrow_addr_next == (word_index_last      - 1'b1);
            rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1);
        end
    endtask
    
    task advance_rd_wide_addr_next;
        begin
            rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_wide_addr_flags;
        end
    endtask

    task advance_rd_narrow_addr_next;
        begin
            rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_narrow_addr_flags;
        end
    endtask 

    task advance_rd_wide_addr_next_half;
        begin
            rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_wide_addr_flags;
        end
    endtask

    task advance_rd_narrow_addr_next_half;
        begin
            rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
            update_rd_narrow_addr_flags;
        end
    endtask 

    always @(posedge clk) begin
        //
        preset_rd_wide_bank_addr  (BANK_DNC, OP_ADDR_DNC);
        preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_OUTPUT_FROM_NARROW,
            UOP_OPCODE_MODULAR_SUBTRACT_X:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:              keep_rd_narrow_bank;
                endcase
            //
            UOP_OPCODE_COPY_CRT_Y2X,
            UOP_OPCODE_MODULAR_SUBTRACT_Z,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (sel_wide_in,   OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin keep_rd_wide_bank;   advance_rd_wide_addr;   advance_rd_wide_addr_next;
                                                      keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:        begin keep_rd_wide_bank; keep_rd_narrow_bank; end
                endcase
            //
            UOP_OPCODE_MODULAR_REDUCE_INIT:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (BANK_DNC,      OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin                      advance_rd_wide_addr;   advance_rd_wide_addr_next_half;
                                                      keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:              keep_rd_narrow_bank;
                endcase
            //
            UOP_OPCODE_COPY_LADDERS_X2Y,
            UOP_OPCODE_CROSS_LADDERS_X2Y:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (sel_wide_in,   OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank  (sel_wide_out);   keep_rd_wide_addr; 
                                                      switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end                                                      
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin advance_rd_wide_addr;   advance_rd_wide_addr_next;   switch_rd_wide_bank(sel_wide_in);
                                                      advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:        begin keep_rd_wide_addr;   switch_rd_wide_bank  (sel_wide_out);
                                                      keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end                                                      
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (BANK_WIDE_N,   OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3,
                    WRK_FSM_STATE_BUSY1:        begin keep_rd_wide_bank;   advance_rd_wide_addr;   advance_rd_wide_addr_next;
                                                      keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2:        begin keep_rd_wide_bank; keep_rd_narrow_bank; end
                endcase
            //
            UOP_OPCODE_MERGE_LH:
                //
                case (wrk_fsm_state_next)
                    WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr  (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next  (OP_ADDR_ONE);
                                                      preset_rd_narrow_bank_addr(BANK_DNC,    OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
                    WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr;   advance_rd_wide_addr_next_half;
                                                                         advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_BUSY1:        begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank;
                                                      else                                   switch_rd_wide_bank(BANK_WIDE_H);
                                                      advance_rd_wide_addr;   advance_rd_wide_addr_next_half;
                                                      advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
                    WRK_FSM_STATE_LATENCY_PRE2,
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2: keep_rd_wide_bank;
                endcase
            //
        endcase
        //
    end


    //
    // Destination Write Address Logic
    //
    reg                    modular_reduce_init_first_half_x;
    reg                    modular_reduce_init_first_half_y;
    reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x;
    reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y;

    always @(posedge clk) begin
        //
        modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half;
        modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half;
        //
        modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H;
        modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H;
        //
    end
    
    task update_wr_wide_bank_addr;
        input [BANK_ADDR_W -1:0] x_bank;
        input [BANK_ADDR_W -1:0] y_bank;
        input [  OP_ADDR_W -1:0] x_addr;
        input [  OP_ADDR_W -1:0] y_addr;
        begin
            {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
            {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
        end
    endtask
    
    task update_wr_narrow_bank_addr;
        input [BANK_ADDR_W -1:0] x_bank;
        input [BANK_ADDR_W -1:0] y_bank;
        input [  OP_ADDR_W -1:0] x_addr;
        input [  OP_ADDR_W -1:0] y_addr;
        begin
            {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
            {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
        end
    endtask

    always @(posedge clk) begin
        //
        update_wr_wide_bank_addr  (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
        update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_MODULAR_SUBTRACT_X,
            UOP_OPCODE_MERGE_LH,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
                endcase
            //
            UOP_OPCODE_COPY_CRT_Y2X,
            UOP_OPCODE_COPY_LADDERS_X2Y,
            UOP_OPCODE_CROSS_LADDERS_X2Y,
            UOP_OPCODE_MODULAR_SUBTRACT_Z:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
                                                       update_wr_wide_bank_addr  (sel_wide_out,   sel_wide_out,   rd_wide_addr_x_dly[3],   rd_wide_addr_y_dly[3]  ); end
                endcase
            //
            UOP_OPCODE_MODULAR_REDUCE_INIT:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
                endcase
            //
        endcase
        //
    end
 
    
    //
    // UOP_OPCODE_PROPAGATE_CARRIES
    //
    reg [CARRY_W -1:0] propagate_carries_x_x_cry_r;
    reg [CARRY_W -1:0] propagate_carries_y_x_cry_r;
    reg [CARRY_W -1:0] propagate_carries_x_y_cry_r;
    reg [CARRY_W -1:0] propagate_carries_y_y_cry_r;
    
    wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_x_cry_r};
    wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_x_cry_r};
    wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_y_cry_r};
    wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_y_cry_r};
    
    reg [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_r;
    reg [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_r;
    reg [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_r;
    reg [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_r;
    
    wire [CARRY_W -1:0] propagate_carries_x_x_w_cry_msb = propagate_carries_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
    wire [CARRY_W -1:0] propagate_carries_y_x_w_cry_msb = propagate_carries_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
    wire [CARRY_W -1:0] propagate_carries_x_y_w_cry_msb = propagate_carries_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
    wire [CARRY_W -1:0] propagate_carries_y_y_w_cry_msb = propagate_carries_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
    
    wire [WORD_W -1:0] propagate_carries_x_x_w_cry_lsb = propagate_carries_x_x_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] propagate_carries_y_x_w_cry_lsb = propagate_carries_y_x_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] propagate_carries_x_y_w_cry_lsb = propagate_carries_x_y_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] propagate_carries_y_y_w_cry_lsb = propagate_carries_y_y_w_cry_r[WORD_W -1:0];
    
    wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_x_w_cry_lsb};
    wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_x_w_cry_lsb};
    wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_y_w_cry_lsb};
    wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_y_w_cry_lsb};
    
    task _propagate_carries_update_cry;
        input [CARRY_W-1:0]     x_x_cry,                     y_x_cry,                     x_y_cry,                     y_y_cry;
        {   propagate_carries_x_x_cry_r, propagate_carries_y_x_cry_r, propagate_carries_x_y_cry_r, propagate_carries_y_y_cry_r} <=
        {                       x_x_cry,                     y_x_cry,                     x_y_cry,                     y_y_cry};
    endtask
    
    task propagate_carries_clear_cry; _propagate_carries_update_cry(                     CARRY_ZERO,                      CARRY_ZERO,                      CARRY_ZERO,                      CARRY_ZERO); endtask
    task propagate_carries_store_cry; _propagate_carries_update_cry(propagate_carries_x_x_w_cry_msb, propagate_carries_y_x_w_cry_msb, propagate_carries_x_y_w_cry_msb, propagate_carries_y_y_w_cry_msb); endtask
        
    task _propagate_carries_update_sum_w_cry;
        input [WORD_EXT_W-1:0] x_x_sum_w_cry,                 y_x_sum_w_cry,                 x_y_sum_w_cry,                 y_y_sum_w_cry;
        {      propagate_carries_x_x_w_cry_r, propagate_carries_y_x_w_cry_r, propagate_carries_x_y_w_cry_r, propagate_carries_y_y_w_cry_r} <=
        {                      x_x_sum_w_cry,                 y_x_sum_w_cry,                 x_y_sum_w_cry,                 y_y_sum_w_cry};
    endtask
    
    task propagate_carries_store_sum_w_cry; _propagate_carries_update_sum_w_cry(propagate_carries_x_x_w_cry, propagate_carries_y_x_w_cry, propagate_carries_x_y_w_cry, propagate_carries_y_y_w_cry); endtask

    always @(posedge clk)
        //
        if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
            //
            case (wrk_fsm_state)
                //
                WRK_FSM_STATE_LATENCY_PRE3:  propagate_carries_clear_cry;
                WRK_FSM_STATE_BUSY1,
                WRK_FSM_STATE_LATENCY_POST1: propagate_carries_store_cry;
                //
                WRK_FSM_STATE_LATENCY_PRE4,
                WRK_FSM_STATE_BUSY2,
                WRK_FSM_STATE_LATENCY_POST2: propagate_carries_store_sum_w_cry;
                //
            endcase
    
    
    //
    // UOP_OPCODE_MODULAR_SUBTRACT_X
    // UOP_OPCODE_MODULAR_SUBTRACT_Y
    //
    reg modular_subtract_x_brw_r;
    reg modular_subtract_y_brw_r;
    
    reg modular_subtract_x_cry_r;
    reg modular_subtract_y_cry_r;

    wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r};
    wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r};

    wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r};
    wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_brw_r};

    reg [WORD_W:0] modular_subtract_x_w_brw_r;
    reg [WORD_W:0] modular_subtract_y_w_brw_r;

    reg [WORD_W:0] modular_subtract_x_w_cry_r;
    reg [WORD_W:0] modular_subtract_y_w_cry_r;
    
    wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W];
    wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W];

    wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W];
    wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W];
    
    wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0];
    wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0];

    wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0];

    wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb};
    wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb};

    wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb};
    wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb};
    
    reg  [WORD_EXT_W -1:0] modular_subtract_x_mux;
    reg  [WORD_EXT_W -1:0] modular_subtract_y_mux;
    
    wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
    wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};
    
    task _modular_subtract_update_brw;
        input x_brw, y_brw;
        {modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw};
    endtask
    
    task _modular_subtract_update_cry;
        input x_cry, y_cry;
        {modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry};
    endtask
    
    task modular_subtract_clear_brw; _modular_subtract_update_brw(                        1'b0,                         1'b0); endtask
    task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask

    task modular_subtract_clear_cry; _modular_subtract_update_cry(                        1'b0,                         1'b0); endtask
    task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask
    
    task _modular_subtract_update_diff_w_brw;
        input [WORD_W:0] x_diff_w_brw, y_diff_w_brw;
        {modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw};
    endtask

    task _modular_subtract_update_sum_w_cry;
        input [WORD_W:0] x_sum_w_cry, y_sum_w_cry;
        {modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry};
    endtask
    
    task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask

    task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask
    
    always @(posedge clk)
        //
        case (opcode)
            //
            UOP_OPCODE_MODULAR_SUBTRACT_X:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_LATENCY_PRE3:  modular_subtract_clear_brw;
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too!
                    //
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw;
                    //
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_LATENCY_PRE3:  modular_subtract_clear_cry;
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry;
                    //
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry;
                    //
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Z:
                //
                case (wrk_fsm_state)
                    // 
                    WRK_FSM_STATE_LATENCY_PRE4,
                    WRK_FSM_STATE_BUSY2,
                    WRK_FSM_STATE_LATENCY_POST2:
                        //
                        begin modular_subtract_x_mux <= !modular_subtract_x_brw_r ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
                              modular_subtract_y_mux <= !modular_subtract_y_brw_r ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
                    //
                endcase            
            //
        endcase


    //
    // UOP_OPCODE_REGULAR_ADD_UNEVEN
    //
    reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r;
    reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r;
    reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r;
    reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r;
    
    wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r};
    wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r};
    wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r};
    wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r};
    
    wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1;
    wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1;
    wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1;
    wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1;
    
    reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r;
    reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r;
    reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r;
    reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r;
    
    wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
    wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
    wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
    wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
    
    wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0];
    wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0];
    
    wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb};
    wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb};
    wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb};
    wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb};
    
    reg regular_add_uneven_store_lsb_now;
    
    task _regular_add_uneven_update_cry;
        input [CARRY_W-1:0]     x_x_cry,                      y_x_cry,                      x_y_cry,                      y_y_cry;
        {  regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <=
        {                       x_x_cry,                      y_x_cry,                      x_y_cry,                      y_y_cry};
    endtask
    
    task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry(                      CARRY_ZERO,                       CARRY_ZERO,                       CARRY_ZERO,                       CARRY_ZERO); endtask
    task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask
        
    task _regular_add_uneven_update_sum_w_cry;
        input [WORD_EXT_W-1:0] x_x_sum_w_cry,                  y_x_sum_w_cry,                  x_y_sum_w_cry,                  y_y_sum_w_cry;
        {     regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <=
        {                      x_x_sum_w_cry,                  y_x_sum_w_cry,                  x_y_sum_w_cry,                  y_y_sum_w_cry};
    endtask
    
    task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask
    
    task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask

    always @(posedge clk)
        //
           case (wrk_fsm_state)
                //
                WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1;
                WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0;         
                //
            endcase
            
    always @(posedge clk)
        //
           case (wrk_fsm_state)
                //
                WRK_FSM_STATE_LATENCY_PRE3:  regular_add_uneven_clear_cry;
                WRK_FSM_STATE_BUSY1,
                WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry;
                //
                WRK_FSM_STATE_LATENCY_PRE4:                                        regular_add_uneven_store_sum_lsb_w_cry;
                WRK_FSM_STATE_BUSY2:         if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry;
                                             else                                  regular_add_uneven_store_sum_msb_w_cry;
                WRK_FSM_STATE_LATENCY_POST2:                                       regular_add_uneven_store_sum_msb_w_cry;
                //
            endcase


    //
    // FSM Process
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
        else        wrk_fsm_state <= wrk_fsm_state_next;


    //
    // Busy Exit Logic
    //    
    reg wrk_fsm_done = 1'b0;
    
    always @(posedge clk) begin
        //
        wrk_fsm_done <= 1'b0;
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES,
            UOP_OPCODE_OUTPUT_FROM_NARROW,
            UOP_OPCODE_COPY_CRT_Y2X,
            UOP_OPCODE_MODULAR_REDUCE_INIT,
            UOP_OPCODE_COPY_LADDERS_X2Y,
            UOP_OPCODE_CROSS_LADDERS_X2Y,
            UOP_OPCODE_MODULAR_SUBTRACT_X,
            UOP_OPCODE_MODULAR_SUBTRACT_Y,
            UOP_OPCODE_MODULAR_SUBTRACT_Z,
            UOP_OPCODE_MERGE_LH,
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state)
                    WRK_FSM_STATE_BUSY1:
                        if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1;
                endcase
            //
        endcase
        //
    end


    //
    // FSM Transition Logic
    //
    always @* begin
        //
        case (wrk_fsm_state)
            WRK_FSM_STATE_IDLE:          wrk_fsm_state_next = ena          ? WRK_FSM_STATE_LATENCY_PRE1  : WRK_FSM_STATE_IDLE  ;
            WRK_FSM_STATE_LATENCY_PRE1:  wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_PRE2  ;
            WRK_FSM_STATE_LATENCY_PRE2:  wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_PRE3  ;
            WRK_FSM_STATE_LATENCY_PRE3:  wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_PRE4  ;
            WRK_FSM_STATE_LATENCY_PRE4:  wrk_fsm_state_next =                WRK_FSM_STATE_BUSY1         ;
            WRK_FSM_STATE_BUSY1:         wrk_fsm_state_next =                WRK_FSM_STATE_BUSY2         ;
            WRK_FSM_STATE_BUSY2:         wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ;
            WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_POST2 ;
            WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_POST3 ;
            WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next =                WRK_FSM_STATE_LATENCY_POST4 ;
            WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next =                WRK_FSM_STATE_STOP          ;
            WRK_FSM_STATE_STOP:          wrk_fsm_state_next =                WRK_FSM_STATE_IDLE          ;
            default:                     wrk_fsm_state_next =                WRK_FSM_STATE_IDLE          ;
        endcase
        //
    end


    //
    // Ready Flag Logic
    //
    reg rdy_reg = 1'b1;
    
    assign rdy = rdy_reg;
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n)               rdy_reg <= 1'b1;
        else case (wrk_fsm_state)
            WRK_FSM_STATE_IDLE:   rdy_reg <= ~ena;
            WRK_FSM_STATE_STOP:   rdy_reg <= 1'b1;
        endcase


    //
    // Source to Destination Data Logic
    //
    reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2;
    reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2;
    reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2;

    always @(posedge clk) begin
        {rd_wide_x_din_x_dly2,   rd_wide_y_din_x_dly2,   rd_wide_x_din_y_dly2,   rd_wide_y_din_y_dly2  } <= {rd_wide_x_din_x_dly1,   rd_wide_y_din_x_dly1,   rd_wide_x_din_y_dly1,   rd_wide_y_din_y_dly1  };
        {rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1}; 
    end

    task update_wide_dout;
        input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
        {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
        {             x_x,              y_x,              x_y,              y_y};
    endtask
    
    task update_narrow_dout;
        input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
        {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
        {               x_x,                y_x,                x_y,                y_y};
    endtask

    always @(posedge clk) begin
        //
        update_wide_dout  (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
        update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
        //
        case (opcode)
            //
            UOP_OPCODE_PROPAGATE_CARRIES:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced);
                    // 
                endcase
            //
            UOP_OPCODE_COPY_CRT_Y2X:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);        
                              update_wide_dout  (rd_wide_x_din_y_dly2,   rd_wide_y_din_y_dly2,   rd_wide_x_din_y_dly2,   rd_wide_y_din_y_dly2); end
                    //
                endcase
            //    
            UOP_OPCODE_MODULAR_REDUCE_INIT:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
                    // 
                endcase
            //
            UOP_OPCODE_COPY_LADDERS_X2Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_wide_dout  (rd_wide_x_din_x_dly1,   rd_wide_x_din_x_dly2,   rd_wide_x_din_y_dly1,   rd_wide_x_din_y_dly2);
                              update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end
                   //
                endcase
            //
            UOP_OPCODE_CROSS_LADDERS_X2Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_wide_dout  (rd_wide_x_din_x_dly1,   rd_wide_x_din_y_dly2,   rd_wide_x_din_y_dly1,   rd_wide_x_din_x_dly2);
                              update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end
                  //                    
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_X:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_narrow_dout(modular_subtract_x_w_brw_reduced, modular_subtract_x_w_brw_reduced, modular_subtract_y_w_brw_reduced, modular_subtract_y_w_brw_reduced);
                    //
                endcase
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Y:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_wide_dout(modular_subtract_x_w_cry_reduced, modular_subtract_x_w_cry_reduced, modular_subtract_y_w_cry_reduced, modular_subtract_y_w_cry_reduced);
                    //
                endcase                
            //
            UOP_OPCODE_MODULAR_SUBTRACT_Z:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        begin update_wide_dout  (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced);
                              update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end
                    // 
                endcase
            //
            UOP_OPCODE_MERGE_LH:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2);
                        // 
                endcase
            //
            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                //
                case (wrk_fsm_state)
                    //
                    WRK_FSM_STATE_BUSY1,
                    WRK_FSM_STATE_LATENCY_POST1,
                    WRK_FSM_STATE_LATENCY_POST3:
                        //
                        update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced);
                    // 
                endcase
        endcase
        //
    end


endmodule