aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_recombinator_block.v
blob: f6e23e525a699973080c2427fc13ce4ae53bb563 (plain) (tree)































                                                                           

                                  
               









                                                                                                           

  
 


              
                                     
                                       
 
 










                                                                  
    

                                                                    
 




                                                                     
 




                                                                       
 




                                                                       




              

                                                     




              

                                                      

             
                                              
                                  

                                                                








                                
                                          
          

                                                    






                


                            

             

                                                          

            
                                       

            

                            

                    
                                  

                  
                                

                     

                                                                

                    
                                             




                                        












                                                  
    










                                                




                                



                                                                                         

                  



                     
              









                                                   

       


                                                                
                                                      




                                                                                                                    

               


                                                       
                                                      

                                                                                                                                                     

               


                                                         
                                                      

                                                                                                                                                       

               


                                                          
                                                      




                                                                                                                                                 

               


                                                       
                                                      




                                                                                  

               






                                                                                      
                                                      














                                                                             

               


                                                        
                                                      

                                                                                                                                                    

               


                                                          
                                                      

                                                                                                                                                      

               


















                                                                                                                            






                                                                                   

          


                                                                
                                                      














                                                                             

               


                                                       
                                                      

                                                                                                                                                  

               


                                                         
                                                      

                                                                                                                                                    

               


                                                          
                                                         














                                                                                                                                  

               


                                                                 
                                                      




                                                                                                                                                                                      

               


                                                       
                                                      

                                                                                                                                                   

               


                                                          
                                                      

                                                                                                                                                      

               


                                                       
                                                      





                                                                                   

               


                                                          
                                                      





                                                                                                                                

               


                                                        
                                                      





                                                                                                                                     

               


                                                           
                                                      





                                                                                                                                        

               


                                                       
                                                      





                                                                                                                      

               


                                                          
                                                      





                                                                                                                                                                   

               







                                         

                                      
                                          


                                                                                                        
                                                                                                                   
 





                                                   





                                                                                   



                                       


                                           







                                            


                                           







                                            



                                           







                                            



                                           







                                            







                                                 

                                                                                          
          

                                                                                                                                             


          















                                                                

                                                             
                      

                                            







                                               

                                            






                             



                                                       












                                                                                                                                                 



                                                         







                                                                                                                                  
                                                          



                                             



                                                          
















                                                                                                                                                    

                                                            


                                             
                                                          




























                                                 
                                                                              




















                                                           
                                         
                              
                                            


                                                           

                                                      



                                        
                                         









                                                                    

                                                                                 



               













                                                                   


                                 
                                          
          
                         


                                              

                                                             


                   













                                                  
 




                                                
 
 




                                 
    

                        




                                   


                                                             
 


                                     
    



                          
















                                                  




                  

                       
 
                         
 

                         

                                    




                   
                                



                                      

















                                                           

                                   
             



                                                     



                      




                                        
             



                                    




                                   




                                        
             



                                      
                                     
           


                      




                                        
             



                                    




                                   




                                                       


                    




                                                         


                  




                                                       


                    
                                                                              


                      
                                                                                


                    
                                                                              


                      


                                                     


                      


                                                     


                     

                                                                                      

           




                                                                                      






                        
                     
                                         


                     
                                         
           
       
    


                   







                            


                                                           

                   











                                                                                
 

                                                                                            
    




                                                                       
 



                                  




                                


                                    



                                

                                                                                                                                                    
                  
                                                                                                                                 
                  

                                                                                                                                 
                  




                                                                                                                                         




                                




                                                                                                                 

                                                                                                                          
                                                                                           








                          


                                    




                                
                                      



                                







                                                                                                                                   







                      


                                     




                                


                                    
                   
              
                                
                  

                                                                                                                                                 
                  
                                                                                                                               








                                                                                                                                      
                               
              
                                











                                                                                                                                                






                   


                       

                         

                                 

                            


                                                                                                                                      
                   

 
         
//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpng_recombinator_block
(
    clk, rst_n,
    ena, rdy,
    fsm_state_next,
    word_index_last,
    dsp_xy_ce_p,
    dsp_x_p, dsp_y_p,
    col_index, col_index_last,
    rd_narrow_xy_addr, rd_narrow_xy_bank,
    rcmb_wide_xy_bank,   rcmb_wide_xy_addr,   rcmb_wide_x_dout,   rcmb_wide_y_dout,   rcmb_wide_xy_valid,
    rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid,
    rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);


    //
    // Headers
    //
    `include "modexpng_parameters.vh"
    `include "modexpng_mmm_dual_fsm.vh"


    input                                         clk;
    input                                         rst_n;
    input                                         ena;
    output                                        rdy;
    input  [MMM_FSM_STATE_W                 -1:0] fsm_state_next;
    input  [      OP_ADDR_W                 -1:0] word_index_last;
    input                                         dsp_xy_ce_p;
    input  [          MAC_W * NUM_MULTS_AUX -1:0] dsp_x_p;
    input  [          MAC_W * NUM_MULTS_AUX -1:0] dsp_y_p;
    input  [    COL_INDEX_W                 -1:0] col_index;
    input  [    COL_INDEX_W                 -1:0] col_index_last;
    
    input  [    BANK_ADDR_W                 -1:0] rd_narrow_xy_bank;
    input  [      OP_ADDR_W                 -1:0] rd_narrow_xy_addr;

    output [    BANK_ADDR_W                 -1:0] rcmb_wide_xy_bank;
    output [      OP_ADDR_W                 -1:0] rcmb_wide_xy_addr;
    output [     WORD_EXT_W                 -1:0] rcmb_wide_x_dout;
    output [     WORD_EXT_W                 -1:0] rcmb_wide_y_dout;
    output                                        rcmb_wide_xy_valid;

    output [    BANK_ADDR_W                 -1:0] rcmb_narrow_xy_bank;
    output [      OP_ADDR_W                 -1:0] rcmb_narrow_xy_addr;
    output [     WORD_EXT_W                 -1:0] rcmb_narrow_x_dout;
    output [     WORD_EXT_W                 -1:0] rcmb_narrow_y_dout;
    output                                        rcmb_narrow_xy_valid;

    output [    BANK_ADDR_W                 -1:0] rdct_narrow_xy_bank;
    output [      OP_ADDR_W                 -1:0] rdct_narrow_xy_addr;
    output [     WORD_EXT_W                 -1:0] rdct_narrow_x_dout;
    output [     WORD_EXT_W                 -1:0] rdct_narrow_y_dout;
    output                                        rdct_narrow_xy_valid;


    //
    // Latches
    //
    reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS_AUX-1];
    reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS_AUX-1];


    //
    // Mapping
    //
    wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS_AUX-1];
    wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS_AUX-1];
    
    genvar z;
    generate for (z=0; z<NUM_MULTS_AUX; z=z+1)
        begin : gen_dsp_xy_p_split
            assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W];
            assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W];
        end
    endgenerate


    //
    // Delays
    //
    reg dsp_xy_ce_p_dly1 = 1'b0;

    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) dsp_xy_ce_p_dly1 <= 1'b0;
        else        dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;


    //
    // Registers
    //
    
    // valid
    reg xy_valid_lsb = 1'b0;
    reg xy_aux_lsb   = 1'b0;
    reg xy_valid_msb = 1'b0;
    
    // bitmap
    reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}};
    reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}};
    
    // index
    reg [MAC_INDEX_W-1:0] xy_index_lsb;
    
    // purge
    reg xy_purge_lsb = 1'b0;
    reg xy_purge_msb = 1'b0;
    
    // valid - latch
    reg xy_valid_latch_lsb = 1'b0;
    
    // aux - latch
    reg xy_aux_latch_lsb = 1'b0;
    
    // bitmap - latch
    reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = {NUM_MULTS{1'b0}};
    reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = {NUM_MULTS{1'b0}};

    // index - latch
    reg [MAC_INDEX_W-1:0] xy_index_latch_lsb;
    
    // purge - index
    reg       xy_purge_latch_lsb = 1'b0;
    reg       xy_purge_latch_msb = 1'b0;


    //
    // Anticipatory Values
    //
    reg                    xy_valid_lsb_adv [1:6];
    reg                    xy_valid_msb_adv [1:6];
    reg                    xy_aux_lsb_adv   [1:6];
    reg [NUM_MULTS   -1:0] xy_bitmap_lsb_adv[1:6];
    reg [NUM_MULTS   -1:0] xy_bitmap_msb_adv[1:6];
    reg [MAC_INDEX_W -1:0] xy_index_lsb_adv [1:6];
    reg [MAC_INDEX_W -1:0] xy_index_msb_adv [1:6];
    reg                    xy_purge_lsb_adv [1:6];
    reg                    xy_purge_msb_adv [1:6];
    
    
    //
    // Recombination Mode
    //
    localparam [1:0] RCMB_MODE_UNUSED    = 2'd0;
    localparam [1:0] RCMB_MODE_SQUARE    = 2'd1;
    localparam [1:0] RCMB_MODE_TRIANGLE  = 2'd2;
    localparam [1:0] RCMB_MODE_RECTANGLE = 2'd3;

    reg [1:0] rcmb_mode = RCMB_MODE_UNUSED;
           
    always @(posedge clk)
       //
       if (ena)
           //
           case (fsm_state_next)
               MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:    rcmb_mode <= RCMB_MODE_SQUARE;
               MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:  rcmb_mode <= RCMB_MODE_TRIANGLE;
               MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_RECTANGLE;
               default:                                 rcmb_mode <= RCMB_MODE_UNUSED;
           endcase


    //
    // Initialization
    //
    integer i;
    initial for (i=1; i<=6; i=i+1) begin
        xy_valid_lsb_adv [i] = 1'b0;
        xy_valid_msb_adv [i] = 1'b0;
        xy_aux_lsb_adv   [i] = 1'b0;
        xy_bitmap_lsb_adv[i] = {NUM_MULTS{1'b0}};
        xy_bitmap_msb_adv[i] = {NUM_MULTS{1'b0}};
        xy_index_lsb_adv [i] = {MAC_INDEX_W{1'bX}};
        xy_index_msb_adv [i] = {MAC_INDEX_W{1'bX}};
        xy_purge_lsb_adv [i] = 1'b0;
        xy_purge_msb_adv [i] = 1'b0;
    end
    
    function                     calc_square_triangle_valid_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_triangle_valid_lsb = 1'b1;
        else                                                                  calc_square_triangle_valid_lsb = 1'b0;
        //
    endfunction

    function                     calc_square_valid_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
    endfunction

    function                     calc_triangle_valid_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
    endfunction
    
    function                     calc_rectangle_valid_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
        else                                                                  calc_rectangle_valid_lsb = 1'b0;
        //
    endfunction

    function                     calc_triangle_aux_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_bank_value == BANK_NARROW_EXT) calc_triangle_aux_lsb = 1'b1;
        else calc_triangle_aux_lsb = 1'b0;
        //
    endfunction
    
    //
    // TODO: This will need some generic replacement defined in modexpng_parameters.vh
    //       in case anything different from NUM_MULTS = 8 is used.
    //
    function  [  NUM_MULTS -1:0] calc_square_triangle_bitmap_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
                3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
                3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
                3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
                3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
                3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
                3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
                3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
                3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
            endcase
        else            calc_square_triangle_bitmap_lsb = 8'b00000000;
        //
    endfunction

    function  [  NUM_MULTS -1:0] calc_square_bitmap_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
    endfunction

    function  [  NUM_MULTS -1:0] calc_triangle_bitmap_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
    endfunction

    function  [  NUM_MULTS -1:0] calc_rectangle_bitmap_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if ((narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
                3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
                3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
                3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
                3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
                3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
                3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
                3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
                3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
            endcase
        else            calc_rectangle_bitmap_lsb = 8'b00000000;
        //
    endfunction
       
       /*
        * These can be simplified (the difference between square/triangle and
        * rectangle is that the bank is checked or not). A universal function would
        * accept a parameter that tells it whether it should check the bank or not.
        * Let's do it later, too early to optimize now, it seems.
        */
       
    function  [MAC_INDEX_W -1:0] calc_square_triangle_index_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
                3'b000: calc_square_triangle_index_lsb = 3'd0;
                3'b001: calc_square_triangle_index_lsb = 3'd1;
                3'b010: calc_square_triangle_index_lsb = 3'd2;
                3'b011: calc_square_triangle_index_lsb = 3'd3;
                3'b100: calc_square_triangle_index_lsb = 3'd4;
                3'b101: calc_square_triangle_index_lsb = 3'd5;
                3'b110: calc_square_triangle_index_lsb = 3'd6;
                3'b111: calc_square_triangle_index_lsb = 3'd7;
            endcase
        else            calc_square_triangle_index_lsb = 3'dX;
        //
    endfunction

    function  [MAC_INDEX_W -1:0] calc_square_index_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
    endfunction

    function  [MAC_INDEX_W -1:0] calc_triangle_index_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
    endfunction

    function  [MAC_INDEX_W -1:0] calc_rectangle_index_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value;
        input [  OP_ADDR_W -1:0] slim_bram_xy_addr_value;
        //
        if ((slim_bram_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
            case (slim_bram_xy_addr_value[MAC_INDEX_W-1:0])
                3'b000: calc_rectangle_index_lsb = 3'd0;
                3'b001: calc_rectangle_index_lsb = 3'd1;
                3'b010: calc_rectangle_index_lsb = 3'd2;
                3'b011: calc_rectangle_index_lsb = 3'd3;
                3'b100: calc_rectangle_index_lsb = 3'd4;
                3'b101: calc_rectangle_index_lsb = 3'd5;
                3'b110: calc_rectangle_index_lsb = 3'd6;
                3'b111: calc_rectangle_index_lsb = 3'd7;
            endcase
        else            calc_rectangle_index_lsb = 3'dX;
        //
    endfunction
    
    function                     calc_square_rectangle_purge_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_rectangle_purge_lsb = narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_last_value;
        else                                                                  calc_square_rectangle_purge_lsb = 1'b0;
        //
    endfunction

    function                     calc_square_purge_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
    endfunction

    function                     calc_rectangle_purge_lsb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
    endfunction

    function                     calc_square_valid_msb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        input [  OP_ADDR_W -1:0] index_last_value;
        //
        if (narrow_xy_addr_value == index_last_value) calc_square_valid_msb = 1'b1;
        else                                          calc_square_valid_msb = 1'b0;
        //
    endfunction

    function                     calc_rectangle_valid_msb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        input [  OP_ADDR_W -1:0] index_last_value;
        //
        if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_valid_msb = 1'b1;
        else                                                                                    calc_rectangle_valid_msb = 1'b0;
        //
    endfunction
    
    function  [  NUM_MULTS -1:0] calc_square_bitmap_msb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        input [  OP_ADDR_W -1:0] index_last_value;
        //
        if (narrow_xy_addr_value == index_last_value) calc_square_bitmap_msb = {col_index_value != col_index_last_value, 7'b1111111};
        else                                          calc_square_bitmap_msb = 8'b00000000;
        //
    endfunction

    function  [  NUM_MULTS -1:0] calc_rectangle_bitmap_msb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        input [  OP_ADDR_W -1:0] index_last_value;
        //
        if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_bitmap_msb = 8'b11111111;
        else                                                                                    calc_rectangle_bitmap_msb = 8'b00000000;
        //
    endfunction

    function                     calc_square_purge_msb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        input [  OP_ADDR_W -1:0] index_last_value;
        //
        if (narrow_xy_addr_value == index_last_value) calc_square_purge_msb = col_index_value == col_index_last_value;
        else                                          calc_square_purge_msb = 1'b0;
        //
    endfunction

    function                     calc_rectangle_purge_msb;
        input [COL_INDEX_W -1:0] col_index_value;
        input [COL_INDEX_W -1:0] col_index_last_value;
        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
        input [  OP_ADDR_W -1:0] index_last_value;
        //
        if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_purge_msb = col_index_value == col_index_last_value;
        else                                                                                    calc_rectangle_purge_msb = 1'b0;
        //
    endfunction


    //
    // Recombinator Cell Instances
    //
    reg [WORD_W -1:0] rcmb_x_msb_carry_0;
    reg [WORD_W -1:0] rcmb_y_msb_carry_0;
    reg [WORD_W -1:0] rcmb_x_msb_carry_1;
    reg [WORD_W -1:0] rcmb_y_msb_carry_1;
    
    reg         rcmb_xy_lsb_ce = 1'b0;
    reg         rcmb_xy_lsb_ce_aux = 1'b0;
    reg  [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
    wire        rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
    reg         rcmb_xy_lsb_clr;
    wire        rcmb_xy_lsb_cry = !rcmb_xy_lsb_ce_purge[2] && (rcmb_xy_lsb_ce_purge[1] || rcmb_xy_lsb_ce_purge[0]);

    reg  [ MAC_W     -1:0] rcmb_x_lsb_din;
    reg  [ MAC_W     -1:0] rcmb_y_lsb_din;
    wire [WORD_W     -1:0] rcmb_x_lsb_dout;
    wire [WORD_W     -1:0] rcmb_y_lsb_dout;
    wire [WORD_EXT_W -2:0] rcmb_x_lsb_dout_ext;
    wire [WORD_EXT_W -2:0] rcmb_y_lsb_dout_ext;    

    reg         rcmb_xy_msb_ce = 1'b0;
    reg  [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
    wire        rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
    reg         rcmb_xy_msb_clr;
    
    reg  [ MAC_W -1:0] rcmb_x_msb_din;
    reg  [ MAC_W -1:0] rcmb_y_msb_din;
    wire [WORD_W -1:0] rcmb_x_msb_dout;
    wire [WORD_W -1:0] rcmb_y_msb_dout;
    
    modexpng_recombinator_cell recomb_x_lsb
    (
        .clk      (clk),
        .ce       (rcmb_xy_lsb_ce_combined),
        .clr      (rcmb_xy_lsb_clr),
        .cry      (rcmb_xy_lsb_cry),
        .cin      (rcmb_x_msb_carry_1),
        .din      (rcmb_x_lsb_din),
        .dout     (rcmb_x_lsb_dout),
        .dout_ext (rcmb_x_lsb_dout_ext)
    );
    modexpng_recombinator_cell recomb_y_lsb
    (
        .clk      (clk),
        .ce       (rcmb_xy_lsb_ce_combined),
        .clr      (rcmb_xy_lsb_clr),
        .cry      (rcmb_xy_lsb_cry),
        .cin      (rcmb_y_msb_carry_1),
        .din      (rcmb_y_lsb_din),
        .dout     (rcmb_y_lsb_dout),
        .dout_ext (rcmb_y_lsb_dout_ext)
    );

    modexpng_recombinator_cell recomb_x_msb
    (
        .clk      (clk),
        .ce       (rcmb_xy_msb_ce_combined),
        .clr      (rcmb_xy_msb_clr),
        .cry      (1'b0),
        .cin      (WORD_ZERO),
        .din      (rcmb_x_msb_din),
        .dout     (rcmb_x_msb_dout),
        .dout_ext ()
    );
    
    modexpng_recombinator_cell recomb_y_msb
    (
        .clk      (clk),
        .ce       (rcmb_xy_msb_ce_combined),
        .clr      (rcmb_xy_msb_clr),
        .cry      (1'b0),
        .cin      (WORD_ZERO),
        .din      (rcmb_y_msb_din),
        .dout     (rcmb_y_msb_dout),
        .dout_ext ()
    );

    always @(posedge clk) begin
        //
        rcmb_xy_lsb_ce <= xy_valid_latch_lsb;
        rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb;
        rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0];
        //
        if (xy_purge_latch_lsb) rcmb_xy_lsb_ce_purge <= 3'b111;
        else                    rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
        //
        if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) rcmb_xy_msb_ce_purge <= 2'b11;
        else                                                                         rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
        //
    end

    always @(posedge clk)
        //
        if (ena) begin
            rcmb_xy_lsb_clr <= 1'b1;
            rcmb_xy_msb_clr <= 1'b1;
        end else begin
            if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0;
            if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0;
        end

    always @(posedge clk)
        //
        if (xy_valid_latch_lsb) begin
            rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb];
            rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb];
        end else if (xy_aux_latch_lsb) begin
            rcmb_x_lsb_din <= dsp_x_p_latch[NUM_MULTS_AUX-1];
            rcmb_y_lsb_din <= dsp_y_p_latch[NUM_MULTS_AUX-1];
        end else begin
            rcmb_x_lsb_din <= {MAC_W{1'b0}};
            rcmb_y_lsb_din <= {MAC_W{1'b0}};
        end

    always @(posedge clk)
        //
        if (xy_bitmap_latch_msb[0]) begin
            rcmb_x_msb_din <= dsp_x_p_latch[0];
            rcmb_y_msb_din <= dsp_y_p_latch[0];
        end else begin
            rcmb_x_msb_din <= {MAC_W{1'b0}};
            rcmb_y_msb_din <= {MAC_W{1'b0}};
        end


    always @(posedge clk)
        //
        case (fsm_state_next)
            //
            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
                //
                xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_aux_lsb_adv   [6] <= 1'b0;
                xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                //
                xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
                xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
                xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
                //
            end
            //
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
                //
                xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_aux_lsb_adv   [6] <= calc_triangle_aux_lsb   (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_purge_lsb_adv [6] <= 1'b0;
                //
                xy_valid_msb_adv [6] <= 1'b0;
                xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}};
                xy_purge_msb_adv [6] <= 1'b0;
                //
            end
            //
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
                //
                xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_aux_lsb_adv   [6] <= 1'b0;
                xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                //
                xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
                xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
                xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
                //
            end
            //
            default: begin
                //
                xy_valid_lsb_adv [6] <= 1'b0;
                xy_aux_lsb_adv   [6] <= 1'b0;
                xy_bitmap_lsb_adv[6] <= {NUM_MULTS{1'b0}};
                xy_index_lsb_adv [6] <= {MAC_INDEX_W{1'bX}};
                xy_purge_lsb_adv [6] <= 1'b0;
                //
                xy_valid_msb_adv [6] <= 1'b0;
                xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}};
                xy_purge_msb_adv [6] <= 1'b0;
                //
            end
            //
        endcase


    always @(posedge clk) begin
        //
        xy_valid_lsb  <= xy_valid_lsb_adv [1];
        xy_aux_lsb    <= xy_aux_lsb_adv   [1];
        xy_bitmap_lsb <= xy_bitmap_lsb_adv[1];
        xy_index_lsb  <= xy_index_lsb_adv [1];
        xy_purge_lsb  <= xy_purge_lsb_adv [1];
        //
        xy_valid_latch_lsb  <= xy_valid_lsb;
        xy_aux_latch_lsb    <= xy_aux_lsb;
        xy_bitmap_latch_lsb <= xy_bitmap_lsb;
        xy_index_latch_lsb  <= xy_index_lsb;
        xy_purge_latch_lsb  <= xy_purge_lsb;
        //
        xy_valid_msb  <= xy_valid_msb_adv[1];
        xy_bitmap_msb <= xy_bitmap_msb_adv[1];
        xy_purge_msb  <= xy_purge_msb_adv[1];
        //
        if (xy_valid_msb) begin
            xy_bitmap_latch_msb <= xy_bitmap_msb;
            xy_purge_latch_msb  <= xy_purge_msb;
        end else begin
            xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]};
        end
        //
        //
        for (i=1; i<6; i=i+1) begin
            xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
            xy_aux_lsb_adv   [i] <= xy_aux_lsb_adv   [i+1];
            xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
            xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
            xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
            //
            xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
            xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
            xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
        end
        //
    end

    always @(posedge clk)
        //
        if (xy_bitmap_latch_msb[1])   // only shift 7 times
            //
            for (i=0; i<NUM_MULTS; i=i+1)
                //            
                if (i < (NUM_MULTS-1)) begin
                    dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
                    dsp_y_p_latch[i] <= dsp_y_p_latch[i+1];
                end else begin
                    dsp_x_p_latch[i] <= {MAC_W{1'bX}};
                    dsp_y_p_latch[i] <= {MAC_W{1'bX}};
                end
            //
        else if (dsp_xy_ce_p_dly1) begin
            //
            for (i=0; i<NUM_MULTS; i=i+1)
                //
                if (xy_bitmap_lsb[i]) begin
                    dsp_x_p_latch[i] <= dsp_x_p_split[i];
                    dsp_y_p_latch[i] <= dsp_y_p_split[i];
                end else if (xy_valid_msb && xy_bitmap_msb[i]) begin
                    dsp_x_p_latch[i] <= dsp_x_p_split[i];
                    dsp_y_p_latch[i] <= dsp_y_p_split[i];
                end
            //
            if (xy_aux_lsb) begin
                dsp_x_p_latch[NUM_MULTS_AUX-1] <= dsp_x_p_split[NUM_MULTS_AUX-1];
                dsp_y_p_latch[NUM_MULTS_AUX-1] <= dsp_y_p_split[NUM_MULTS_AUX-1];
            end
            //
        end


    reg rcmb_xy_lsb_ce_combined_dly = 1'b0;
    reg rcmb_xy_msb_ce_combined_dly = 1'b0;

    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            rcmb_xy_lsb_ce_combined_dly <= 1'b0;
            rcmb_xy_msb_ce_combined_dly <= 1'b0;        
        end else begin
            rcmb_xy_lsb_ce_combined_dly <= rcmb_xy_lsb_ce_combined;
            rcmb_xy_msb_ce_combined_dly <= rcmb_xy_msb_ce_combined;
        end        

    reg rcmb_xy_lsb_valid = 1'b0;
    reg rcmb_xy_msb_valid = 1'b0;

    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            rcmb_xy_lsb_valid <= 1'b0;
            rcmb_xy_msb_valid <= 1'b0;        
        end else begin
            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly;
            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly;
        end        


    //
    // Output Registers
    //
    reg [BANK_ADDR_W -1:0] wide_xy_bank;
    reg [  OP_ADDR_W -1:0] wide_xy_addr;
    reg [ WORD_EXT_W -1:0] wide_x_dout;
    reg [ WORD_EXT_W -1:0] wide_y_dout;
    reg                    wide_xy_valid = 1'b0;

    reg [BANK_ADDR_W -1:0] narrow_xy_bank;
    reg [  OP_ADDR_W -1:0] narrow_xy_addr;
    reg [ WORD_EXT_W -1:0] narrow_x_dout;
    reg [ WORD_EXT_W -1:0] narrow_y_dout;
    reg                    narrow_xy_valid = 1'b0;

    reg [BANK_ADDR_W -1:0] rdct_xy_bank;
    reg [  OP_ADDR_W -1:0] rdct_xy_addr;
    reg [ WORD_EXT_W -1:0] rdct_x_dout;
    reg [ WORD_EXT_W -1:0] rdct_y_dout;
    reg                    rdct_xy_valid = 1'b0;


    //
    // Internal Counters
    //
    reg [OP_ADDR_W -1:0] cnt_lsb;
    reg [OP_ADDR_W -1:0] cnt_msb;
    
    reg cnt_lsb_wrapped;
    reg cnt_msb_wrapped;
    
    reg [31:0] rcmb_xy_msb_delay_0;
    reg [31:0] rcmb_xy_msb_delay_1;
    reg [31:0] rcmb_xy_msb_delay_2;
    
    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_0 = OP_ADDR_ZERO;
    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_1 = OP_ADDR_ZERO;
    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_2 = OP_ADDR_ZERO;

    reg rcmb_msb_flag_delay_0 = 1'b0;
    reg rcmb_msb_flag_delay_1 = 1'b0;
    reg rcmb_msb_flag_delay_2 = 1'b0;
    
    
    //
    // Output Port Mapping
    //
    assign rcmb_wide_xy_bank  = wide_xy_bank;
    assign rcmb_wide_xy_addr  = wide_xy_addr;
    assign rcmb_wide_x_dout   = wide_x_dout;
    assign rcmb_wide_y_dout   = wide_y_dout;
    assign rcmb_wide_xy_valid = wide_xy_valid;

    assign rcmb_narrow_xy_bank  = narrow_xy_bank;
    assign rcmb_narrow_xy_addr  = narrow_xy_addr;
    assign rcmb_narrow_x_dout   = narrow_x_dout;
    assign rcmb_narrow_y_dout   = narrow_y_dout;
    assign rcmb_narrow_xy_valid = narrow_xy_valid;

    assign rdct_narrow_xy_bank  = rdct_xy_bank;
    assign rdct_narrow_xy_addr  = rdct_xy_addr;
    assign rdct_narrow_x_dout   = rdct_x_dout;
    assign rdct_narrow_y_dout   = rdct_y_dout;
    assign rdct_narrow_xy_valid = rdct_xy_valid;


    //
    // Ready Logic
    //
    reg rdy_reg = 1'b1;
    reg rdy_adv = 1'b1;

    assign rdy = rdy_reg;

    always @(posedge clk)
        //
        if (ena) rdy_reg <= 1'b0;
        else     rdy_reg <= rdy_adv;


    //
    // Helper Tasks
    //
    task advance_rcmb_msb_delay;
        input [   WORD_W -1:0] dout_x;
        input [   WORD_W -1:0] dout_y;
        input [OP_ADDR_W -1:0] cnt;
        input                  flag;
        begin
            //
            rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
            rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0;
            rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1;
            //
            rcmb_msb_cnt_delay_0 <= cnt;
            rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0;
            rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1;
            //
            rcmb_msb_flag_delay_0 <= flag;
            rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0;
            rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1;
            //
        end
    endtask
         
    task shift_rcmb_msb_carry;
        input [WORD_W -1:0] dout_x;
        input [WORD_W -1:0] dout_y;
        begin
            rcmb_x_msb_carry_0 <= dout_x;
            rcmb_y_msb_carry_0 <= dout_y;
            rcmb_x_msb_carry_1 <= rcmb_x_msb_carry_0;
            rcmb_y_msb_carry_1 <= rcmb_y_msb_carry_0;
        end
    endtask
    
    task _update_wide;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        input [ WORD_EXT_W -1:0] dout_x;
        input [ WORD_EXT_W -1:0] dout_y;
        input                    valid;
        begin
            wide_xy_bank  <= bank;
            wide_xy_addr  <= addr;
            wide_x_dout   <= dout_x;
            wide_y_dout   <= dout_y;
            wide_xy_valid <= valid;
        end
    endtask
    
    task _update_narrow;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        input [ WORD_EXT_W -1:0] dout_x;
        input [ WORD_EXT_W -1:0] dout_y;
        input                    valid;
        begin
            narrow_xy_bank  <= bank;
            narrow_xy_addr  <= addr;
            narrow_x_dout   <= dout_x;
            narrow_y_dout   <= dout_y;
            narrow_xy_valid <= valid;
        end
    endtask

    task _update_rdct;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        input [ WORD_EXT_W -1:0] dout_x;
        input [ WORD_EXT_W -1:0] dout_y;
        input                    valid;
        begin
            rdct_xy_bank  <= bank;
            rdct_xy_addr  <= addr;
            rdct_x_dout   <= dout_x;
            rdct_y_dout   <= dout_y;
            rdct_xy_valid <= valid;
        end
    endtask
            
    task set_wide;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        input [ WORD_EXT_W -1:0] dout_x;
        input [ WORD_EXT_W -1:0] dout_y;
        _update_wide(bank, addr, dout_x, dout_y, 1'b1);
    endtask
    
    task set_narrow;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        input [ WORD_EXT_W -1:0] dout_x;
        input [ WORD_EXT_W -1:0] dout_y;
        _update_narrow(bank, addr, dout_x, dout_y, 1'b1);
    endtask
    
    task set_rdct;
        input [BANK_ADDR_W -1:0] bank;
        input [  OP_ADDR_W -1:0] addr;
        input [ WORD_EXT_W -1:0] dout_x;
        input [ WORD_EXT_W -1:0] dout_y;
        _update_rdct(bank, addr, dout_x, dout_y, 1'b1);
    endtask
    
    task clear_wide;
        _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
    endtask

    task clear_narrow;
        _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
    endtask

    task clear_rdct;
        _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
    endtask
    
    task _set_cnt_lsb;
        input [OP_ADDR_W-1:0] cnt;
        input                 wrapped;
        {cnt_lsb, cnt_lsb_wrapped} <= {cnt, wrapped};
    endtask
    
    task _set_cnt_msb;
        input [OP_ADDR_W-1:0] cnt;
        input                 wrapped;
        {cnt_msb, cnt_msb_wrapped} <= {cnt, wrapped};
    endtask    

    task inc_cnt_lsb;
        if (cnt_lsb == word_index_last) _set_cnt_lsb(OP_ADDR_ZERO, 1'b1);
        else                            _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
    endtask
    
    task inc_cnt_msb;
        if (cnt_msb == word_index_last) _set_cnt_msb(OP_ADDR_ZERO, 1'b1);
        else                            _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
    endtask

    task inc_cnt_both;
        begin
            inc_cnt_lsb;
            inc_cnt_msb;
        end
    endtask
    
    task clr_cnt_lsb;
        _set_cnt_lsb(OP_ADDR_ZERO, 1'b0);
    endtask
    
    task clr_cnt_msb;
        _set_cnt_msb(OP_ADDR_ZERO, 1'b0);
    endtask
       
    
    //
    // Main Process
    //
    always @(posedge clk)
        //
        if (ena) begin
            clr_cnt_lsb();
            clr_cnt_msb();
        end else if (!rdy)
            //
            case (rcmb_mode)
                RCMB_MODE_SQUARE:    recombine_square();
                RCMB_MODE_TRIANGLE:  recombine_triangle();
                RCMB_MODE_RECTANGLE: recombine_rectangle();
            endcase
           
           
    //
    // Padding
    //
    wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout};
    wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout};
    
    wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_ext_pad = {1'b0, rcmb_x_lsb_dout_ext};
    wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_ext_pad = {1'b0, rcmb_y_lsb_dout_ext};

    wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout};
    wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout};

    wire [WORD_EXT_W-1:0] rcmb_x_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[15: 0]};
    wire [WORD_EXT_W-1:0] rcmb_y_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[31:16]};
    
    
    //
    // Handy Signal
    //
    wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; 


    //
    // Recombination Task - Square
    //
    task recombine_square;
        //
        begin
            //
            case (rcmb_xy_valid)
                2'b01: inc_cnt_lsb; 
                2'b10: inc_cnt_msb;
                2'b11: inc_cnt_both;
            endcase            
            //
            case (rcmb_xy_valid)
                //
                2'b00: if (rcmb_msb_flag_delay_2)       set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
                       else                             clear_wide;
                //
                2'b01:                                  set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
                //
                2'b10: if (cnt_msb < OP_ADDR_TWO)       clear_wide;                        
                       else                             set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
                //
                2'b11: if (!cnt_lsb_wrapped)            set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
                       else begin
                           if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
                           else                         set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
                       end
                //
            endcase            
            //
            case (rcmb_xy_valid)
                //
                2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
                //
                2'b01: if (rcmb_xy_lsb_cry)       shift_rcmb_msb_carry(WORD_DNC, WORD_DNC);
                //
                2'b10: if (cnt_msb < OP_ADDR_TWO) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
                //
                2'b11: begin                      advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
                       if (rcmb_xy_lsb_cry)       shift_rcmb_msb_carry(WORD_DNC, WORD_DNC);
                       end
                //
            endcase
            //        
        end
        //
    endtask
    
    
    //
    // Recombination Task - Triangle
    //
    task recombine_triangle;
        //
        begin
            //
            case (rcmb_xy_valid)
                2'b01: inc_cnt_lsb(); 
            endcase            
            //
            case (rcmb_xy_valid)
                //
                2'b00:                       clear_narrow;
                //
                2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q,   cnt_lsb,       rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
                       else                  set_narrow(BANK_NARROW_EXT, OP_ADDR_EXT_Q, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
                //
                2'b10:                       clear_narrow;
                //
                2'b11:                       clear_narrow;
                //
            endcase
            //        
        end
        //
    endtask


    //
    // Recombination Task - Rectangle
    //
    task recombine_rectangle;
        //
        begin
            //
            case (rcmb_xy_valid)
                2'b01: inc_cnt_lsb; 
                2'b10: inc_cnt_msb;
                2'b11: inc_cnt_both;
            endcase
            //
            case (rcmb_xy_valid)
                //
                2'b00:  if (rcmb_msb_flag_delay_2)  set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
                        else                        clear_rdct;
                //
                2'b01:                              set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
                //
                2'b10: if (!cnt_msb_wrapped) begin 
                       if (cnt_msb < OP_ADDR_TWO)   clear_rdct;                        
                       else                         set_rdct(BANK_RCMB_MH,  cnt_msb,      rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
                       end else                     set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
                //          
                2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad,     rcmb_y_lsb_dout_pad);
                       else                         set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
                //
            endcase            
            //
            case (rcmb_xy_valid)
                //
                2'b00: if (rcmb_msb_flag_delay_2)                       advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
                //
                2'b01: if (rcmb_xy_lsb_cry)                             shift_rcmb_msb_carry(WORD_DNC, WORD_DNC);
                //
                2'b10: begin 
                       if ((cnt_msb < OP_ADDR_TWO) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
                       if (cnt_msb_wrapped)                             advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
                       end
                //
                2'b11:                                                  advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
                //
            endcase
            //
        end
        //
    endtask
    
    
    //
    // Completion Logic
    //
    always @(posedge clk)
        //
        if (ena) rdy_adv <= 1'b0;
        else if (!rdy_reg)
            //
            case (rcmb_mode)
                RCMB_MODE_SQUARE:    case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
                RCMB_MODE_TRIANGLE:  case (rcmb_xy_valid) 2'b01:                            rdy_adv <= cnt_lsb_wrapped;        endcase
                RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
            endcase


endmodule