aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_part_recombinator.v
blob: 957ba8ee611c1d57b4e8986aab368b7373b3f5cd (plain) (tree)
1
2
3
4
5
6
7
8
9








                                 

                                         

                                                                                                                              


















                                                

                              


                                        
                                        
 










                                                   




              

                                       




              

                                   

             
                                              


























                                                                            

                                 






















                                       



                                     


















                                             
                                  






                                     






                                



                                                                          


                  



                                       
                                 







                                         
                                          












                                                                




















                                                                
                                                         











                                                                  














                                                                                    























                                                                 
























                                                                      

























                                                                                                                   
       























                                                                
























                                                                

























                                                                                                                














                                                                                             













                                                                                                













                                                            















                                                                                                
















                                                                                    















                                                                                                      














                                                                                















                                                                                                

                                     
                                  
                                             
                                                                                                    
                               







                                                                                
    























                                            
                                             




























                                                                                 

                                               




















                                                                                                             
                                             









                                                                                                                         














                                                                                                                                  

















                                                                                                                                               
              


                                             
                                             















                                                                  
                                                                  




                                                                                
                                                                              

















                                                                                 
                                                           




















                                                           
                                       






                                                         




                                                     









                                                          

                                    



                                                 











                                                  










                                             














                                                                      













                               



                                       
                          
             
              






                                                               




                                                               





















                                                               
                                                


           













                                                  










                                                                    









                                                                     




                                                                               





                                                                                
    
                           
                        
                            
             

                                               

           

                           
                        
                            
             

                                               


               
                          
             



                                                                                   

           

                          
             



                                                                                   


           
                          
             
                                          

           

                          
             
                                          











                                                                                        

                               




                                           
                                            




                          
          



                                     

                                           
                            

                                       





                                     







                                                                                                                                                                                        




                                     




                                                                                                                             









                            
          



                                     

                                           



                                     




                                                                                                                                                



                      
          

           















































                                                                                                                                                           

    



                                
                                    
              












                                                                        
                              






                                                                                                        
                  













                                                                                        


                      

 
    
                               
         
module modexpng_part_recombinator
(
    clk,
    rdy,
    fsm_state_next,
    index_last,
    dsp_x_ce_p, dsp_y_ce_p,
    ena_x,   ena_y,
    dsp_x_p, dsp_y_p,
    col_index, col_index_last,
    slim_bram_xy_addr, slim_bram_xy_bank,
    rcmb_fat_bram_xy_bank,  rcmb_fat_bram_xy_addr,  rcmb_fat_bram_x_dout,  rcmb_fat_bram_y_dout,  rcmb_fat_bram_xy_dout_valid,
    rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid
);


    //
    // Headers
    //
    `include "../rtl/modexpng_mmm_fsm.vh"
    `include "../rtl/modexpng_parameters.vh"
    `include "../rtl/modexpng_parameters_x8.vh"


    input                        clk;
    output                       rdy;
    input  [FSM_STATE_WIDTH-1:0] fsm_state_next;
    input [7:0]                  index_last;
    input                        dsp_x_ce_p;
    input                        dsp_y_ce_p;
    input                        ena_x;
    input                        ena_y;
    input  [9*47-1:0] dsp_x_p;
    input  [9*47-1:0] dsp_y_p;
    input  [     4:0] col_index;
    input  [     4:0] col_index_last;
    input  [     7:0] slim_bram_xy_addr;
    input  [     1:0] slim_bram_xy_bank;

    output [     2:0] rcmb_fat_bram_xy_bank;
    output [     7:0] rcmb_fat_bram_xy_addr;
    output [    17:0] rcmb_fat_bram_x_dout;
    output [    17:0] rcmb_fat_bram_y_dout;
    output            rcmb_fat_bram_xy_dout_valid;

    output [     2:0] rcmb_slim_bram_xy_bank;
    output [     7:0] rcmb_slim_bram_xy_addr;
    output [    17:0] rcmb_slim_bram_x_dout;
    output [    17:0] rcmb_slim_bram_y_dout;
    output            rcmb_slim_bram_xy_dout_valid;


    //
    // Latches
    //
    reg  [1*47-1:0] dsp_x_p_latch[0:8];
    reg  [1*47-1:0] dsp_y_p_latch[0:8];


    //
    // Mapping
    //
    wire [46:0] dsp_x_p_split[0:8];
    wire [46:0] dsp_y_p_split[0:8];
    
    genvar z;
    generate for (z=0; z<(NUM_MULTS+1); z=z+1)
        begin : gen_dsp_xy_p_split
            assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
            assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
        end
    endgenerate


    //
    // Delays
    //
    reg dsp_y_ce_p_dly1 = 1'b0;
    reg dsp_x_ce_p_dly1 = 1'b0;

    always @(posedge clk) begin
        //
        {dsp_y_ce_p_dly1, dsp_x_ce_p_dly1} <= {dsp_y_ce_p,      dsp_x_ce_p};
        //
    end


    //
    // Registers
    //
    
    // valid
    reg       x_valid_lsb = 1'b0;
    reg       y_valid_lsb = 1'b0;
    reg       x_aux_lsb   = 1'b0;
    reg       y_aux_lsb   = 1'b0;
    reg       x_valid_msb = 1'b0;
    reg       y_valid_msb = 1'b0;
    
    // bitmap
    reg [7:0] x_bitmap_lsb = {8{1'b0}};
    reg [7:0] y_bitmap_lsb = {8{1'b0}};
    reg [7:0] x_bitmap_msb = {8{1'b0}};
    reg [7:0] y_bitmap_msb = {8{1'b0}};
    
    // index
    reg [2:0] x_index_lsb = 3'dX;
    reg [2:0] y_index_lsb = 3'dX;
    
    // purge
    reg       x_purge_lsb = 1'b0;
    reg       y_purge_lsb = 1'b0;
    reg       x_purge_msb = 1'b0;
    reg       y_purge_msb = 1'b0;
    
    // valid - latch
    reg       x_valid_latch_lsb = 1'b0;
    reg       y_valid_latch_lsb = 1'b0;
    
    // aux - latch
    reg       x_aux_latch_lsb = 1'b0;
    reg       y_aux_latch_lsb = 1'b0;
    
    // bitmap - latch
    reg [7:0] x_bitmap_latch_lsb = {8{1'b0}};
    reg [7:0] y_bitmap_latch_lsb = {8{1'b0}};
    reg [7:0] x_bitmap_latch_msb = {8{1'b0}};
    reg [7:0] y_bitmap_latch_msb = {8{1'b0}};

    // index - latch
    reg [2:0] x_index_latch_lsb = 3'dX;
    reg [2:0] y_index_latch_lsb = 3'dX;
    
    // purge - index
    reg       x_purge_latch_lsb = 1'b0;
    reg       y_purge_latch_lsb = 1'b0;
    reg       x_purge_latch_msb = 1'b0;
    reg       y_purge_latch_msb = 1'b0;

    // 
    reg       xy_valid_lsb_adv[1:6];
    reg       xy_valid_msb_adv[1:6];
    reg       xy_aux_lsb_adv[1:6];
    reg [7:0] xy_bitmap_lsb_adv[1:6];
    reg [7:0] xy_bitmap_msb_adv[1:6];
    reg [2:0] xy_index_lsb_adv[1:6];
    reg [2:0] xy_index_msb_adv[1:6];
    reg       xy_purge_lsb_adv[1:6];
    reg       xy_purge_msb_adv[1:6];
    
    reg [1:0] rcmb_mode;
       
    always @(posedge clk)
       //
       if (ena_x && ena_y)
           //
           case (fsm_state_next)
               FSM_STATE_MULT_SQUARE_COL_0_BUSY:        rcmb_mode <= 2'd1;
               FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:      rcmb_mode <= 2'd2;
               FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:     rcmb_mode <= 2'd3;
               default:                                 rcmb_mode <= 2'd0;
           endcase

               
    integer i;
    initial for (i=1; i<6; i=i+1) begin
        xy_valid_lsb_adv[i] = 1'b0;
        xy_valid_msb_adv[i] = 1'b0;
        xy_aux_lsb_adv[i] = 1'b0;
        xy_bitmap_lsb_adv[i] = {8{1'b0}};
        xy_bitmap_msb_adv[i] = {8{1'b0}};
        xy_index_lsb_adv[i] = 3'dX;
        xy_index_msb_adv[i] = 3'dX;
        xy_purge_lsb_adv[i] = 1'b0;
        xy_purge_msb_adv[i] = 1'b0;
    end
    
    function        calc_square_valid_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                calc_square_valid_lsb = 1'b1;
            else
                calc_square_valid_lsb = 1'b0;
            //
        end
    endfunction
    
    function        calc_triangle_valid_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                calc_triangle_valid_lsb = 1'b1;
            else
                calc_triangle_valid_lsb = 1'b0;
            //
        end
    endfunction

    function        calc_triangle_aux_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [1:0] slim_bram_xy_bank_value;
        begin
            //
            if (slim_bram_xy_bank_value == BANK_SLIM_EXT)
                calc_triangle_aux_lsb = 1'b1;
            else
                calc_triangle_aux_lsb = 1'b0;
            //
            //if (slim_bram_xy_addr_value[7:3] == col_index_value)
                //calc_triangle_aux_lsb = 1'b1;
            //else
                //calc_triangle_aux_lsb = 1'b0;
            //
        end
    endfunction
    
    function        calc_rectangle_valid_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [1:0] slim_bram_xy_bank_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value) 
                calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT;
            else
                calc_rectangle_valid_lsb = 1'b0;
            //
        end
    endfunction
    
    function  [7:0] calc_square_bitmap_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                //
                case (slim_bram_xy_addr_value[2:0])
                    3'b000: calc_square_bitmap_lsb = 8'b00000001;
                    3'b001: calc_square_bitmap_lsb = 8'b00000010;
                    3'b010: calc_square_bitmap_lsb = 8'b00000100;
                    3'b011: calc_square_bitmap_lsb = 8'b00001000;
                    3'b100: calc_square_bitmap_lsb = 8'b00010000;
                    3'b101: calc_square_bitmap_lsb = 8'b00100000;
                    3'b110: calc_square_bitmap_lsb = 8'b01000000;
                    3'b111: calc_square_bitmap_lsb = 8'b10000000;
                endcase
                //
            else
                calc_square_bitmap_lsb = {8{1'b0}};
            //
        end
    endfunction
    
    function  [7:0] calc_triangle_bitmap_lsb;
           input [4:0] col_index_value;
           input [4:0] col_index_last_value;
           input [7:0] slim_bram_xy_addr_value;
           begin
               //
               if (slim_bram_xy_addr_value[7:3] == col_index_value)
                   //
                   case (slim_bram_xy_addr_value[2:0])
                       3'b000: calc_triangle_bitmap_lsb = 8'b00000001;
                       3'b001: calc_triangle_bitmap_lsb = 8'b00000010;
                       3'b010: calc_triangle_bitmap_lsb = 8'b00000100;
                       3'b011: calc_triangle_bitmap_lsb = 8'b00001000;
                       3'b100: calc_triangle_bitmap_lsb = 8'b00010000;
                       3'b101: calc_triangle_bitmap_lsb = 8'b00100000;
                       3'b110: calc_triangle_bitmap_lsb = 8'b01000000;
                       3'b111: calc_triangle_bitmap_lsb = 8'b10000000;
                   endcase
                   //
               else
                   calc_triangle_bitmap_lsb = {8{1'b0}};
               //
           end
       endfunction

    function  [7:0] calc_rectangle_bitmap_lsb;
           input [4:0] col_index_value;
           input [4:0] col_index_last_value;
           input [7:0] slim_bram_xy_addr_value;
           input [1:0] slim_bram_xy_bank_value;
           begin
               //
               if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
                   //
                   case (slim_bram_xy_addr_value[2:0])
                       3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
                       3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
                       3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
                       3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
                       3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
                       3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
                       3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
                       3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
                   endcase
                   //
               else
                   calc_rectangle_bitmap_lsb = {8{1'b0}};
               //
           end
       endfunction
       
    function  [2:0] calc_square_index_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                //
                case (slim_bram_xy_addr_value[2:0])
                    3'b000: calc_square_index_lsb = 3'd0;
                    3'b001: calc_square_index_lsb = 3'd1;
                    3'b010: calc_square_index_lsb = 3'd2;
                    3'b011: calc_square_index_lsb = 3'd3;
                    3'b100: calc_square_index_lsb = 3'd4;
                    3'b101: calc_square_index_lsb = 3'd5;
                    3'b110: calc_square_index_lsb = 3'd6;
                    3'b111: calc_square_index_lsb = 3'd7;
                endcase
                //
            else
                calc_square_index_lsb = 3'dX;
            //
        end
    endfunction

    function  [2:0] calc_triangle_index_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                //
                case (slim_bram_xy_addr_value[2:0])
                    3'b000: calc_triangle_index_lsb = 3'd0;
                    3'b001: calc_triangle_index_lsb = 3'd1;
                    3'b010: calc_triangle_index_lsb = 3'd2;
                    3'b011: calc_triangle_index_lsb = 3'd3;
                    3'b100: calc_triangle_index_lsb = 3'd4;
                    3'b101: calc_triangle_index_lsb = 3'd5;
                    3'b110: calc_triangle_index_lsb = 3'd6;
                    3'b111: calc_triangle_index_lsb = 3'd7;
                endcase
                //
            else
                calc_triangle_index_lsb = 3'dX;
            //
        end
    endfunction

    function  [2:0] calc_rectangle_index_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [1:0] slim_bram_xy_bank_value;
        begin
            //
            if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
                //
                case (slim_bram_xy_addr_value[2:0])
                    3'b000: calc_rectangle_index_lsb = 3'd0;
                    3'b001: calc_rectangle_index_lsb = 3'd1;
                    3'b010: calc_rectangle_index_lsb = 3'd2;
                    3'b011: calc_rectangle_index_lsb = 3'd3;
                    3'b100: calc_rectangle_index_lsb = 3'd4;
                    3'b101: calc_rectangle_index_lsb = 3'd5;
                    3'b110: calc_rectangle_index_lsb = 3'd6;
                    3'b111: calc_rectangle_index_lsb = 3'd7;
                endcase
                //
            else
                calc_rectangle_index_lsb = 3'dX;
            //
        end
    endfunction
    
    function        calc_square_purge_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                calc_square_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
            else
                calc_square_purge_lsb = 1'b0;
            //
        end
    endfunction

    function        calc_rectangle_purge_lsb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        begin
            //
            if (slim_bram_xy_addr_value[7:3] == col_index_value)
                calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
            else
                calc_rectangle_purge_lsb = 1'b0;
            //
        end
    endfunction

    function        calc_square_valid_msb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [7:0] index_last_value;
        begin
            //
            if (slim_bram_xy_addr_value == index_last_value)
                calc_square_valid_msb = 1'b1;
            else
                calc_square_valid_msb = 1'b0;
            //
        end
    endfunction

    function        calc_rectangle_valid_msb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [1:0] slim_bram_xy_bank_value;
        input [7:0] index_last_value;
        begin
            //
            if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
                calc_rectangle_valid_msb = 1'b1;
            else
                calc_rectangle_valid_msb = 1'b0;
            //
        end
    endfunction
    
    function  [7:0] calc_square_bitmap_msb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [7:0] index_last_value;
        begin
            //
            if (slim_bram_xy_addr_value == index_last_value) begin
                calc_square_bitmap_msb[7] = col_index_value != col_index_last_value;
                calc_square_bitmap_msb[6:0] = 7'b1111111;
            end else
                calc_square_bitmap_msb[7:0] = 8'b00000000;
            //
        end
    endfunction

    function  [7:0] calc_rectangle_bitmap_msb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [1:0] slim_bram_xy_bank_value;
        input [7:0] index_last_value;
        begin
            //
            if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin
                calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
            end else
                calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
            //
        end
    endfunction

    function        calc_square_purge_msb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [7:0] index_last_value;
        begin
            //
            if (slim_bram_xy_addr_value == index_last_value)
                calc_square_purge_msb = col_index_value == col_index_last_value;
            else
                calc_square_purge_msb = 1'b0;
            //
        end
    endfunction

    function        calc_rectangle_purge_msb;
        input [4:0] col_index_value;
        input [4:0] col_index_last_value;
        input [7:0] slim_bram_xy_addr_value;
        input [1:0] slim_bram_xy_bank_value;
        input [7:0] index_last_value;
        begin
            //
            if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
                calc_rectangle_purge_msb = col_index_value == col_index_last_value;
            else
                calc_rectangle_purge_msb = 1'b0;
            //
        end
    endfunction

    
    reg         recomb_lsb_ce = 1'b0;
    reg         recomb_lsb_ce_aux;
    reg  [ 2:0] recomb_lsb_ce_purge = 3'b000;
    wire        recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_aux | recomb_lsb_ce_purge[0];
    reg         recomb_lsb_clr;

    reg  [46:0] recomb_lsb_din;
    wire [15:0] recomb_lsb_dout;

    reg         recomb_msb_ce = 1'b0;
    reg  [ 1:0] recomb_msb_ce_purge = 2'b00;
    wire        recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0];
    reg         recomb_msb_clr;
    
    reg  [46:0] recomb_msb_din;
    wire [15:0] recomb_msb_dout;
    
    modexpng_recombinator_block recomb_x_lsb
    (
        .clk    (clk),
        .ce     (recomb_lsb_ce_combined),
        .clr    (recomb_lsb_clr),
        .din    (recomb_lsb_din),
        .dout   (recomb_lsb_dout)
    );

    modexpng_recombinator_block recomb_x_msb
    (
        .clk    (clk),
        .ce     (recomb_msb_ce_combined),
        .clr    (recomb_msb_clr),
        .din    (recomb_msb_din),
        .dout   (recomb_msb_dout)
    );

    always @(posedge clk) begin
        //
        recomb_lsb_ce <= x_valid_latch_lsb;
        recomb_lsb_ce_aux <= x_aux_latch_lsb;
        recomb_msb_ce <= x_bitmap_latch_msb[0];
        //
        if (x_purge_latch_lsb)
            recomb_lsb_ce_purge <= 3'b111;
        else
            recomb_lsb_ce_purge <= {1'b0, recomb_lsb_ce_purge[2:1]};
        //
        if (x_purge_latch_msb && x_bitmap_latch_msb[0] && !x_bitmap_latch_msb[1])
            recomb_msb_ce_purge = 2'b11;
        else
            recomb_msb_ce_purge <= {1'b0, recomb_msb_ce_purge[1]};
        //
    end


    always @(posedge clk)
        //
        if (ena_x & ena_y) begin
            recomb_lsb_clr <= 1'b1;
            recomb_msb_clr <= 1'b1;
        end else begin
            if (recomb_lsb_ce) recomb_lsb_clr <= 1'b0;
            if (recomb_msb_ce) recomb_msb_clr <= 1'b0;
        end

    always @(posedge clk)
        //
        if (x_valid_latch_lsb)
            recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb];
        else if (x_aux_latch_lsb)
            recomb_lsb_din <= dsp_x_p_latch[8];
        else
            recomb_lsb_din <= {47{1'b0}};

    always @(posedge clk)
        //
        if (x_bitmap_latch_msb[0])
            recomb_msb_din <= dsp_x_p_latch[0];
        else
            recomb_msb_din <= {47{1'b0}};


    always @(posedge clk)
        //
        case (fsm_state_next)
            //
            FSM_STATE_MULT_SQUARE_COL_0_TRIG,
            FSM_STATE_MULT_SQUARE_COL_N_TRIG,
            FSM_STATE_MULT_SQUARE_COL_0_BUSY,
            FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
                //
                xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr);
                xy_aux_lsb_adv   [6] <= 1'b0;
                xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr);
                xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr);
                xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
                //
                xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, slim_bram_xy_addr, index_last);
                xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, index_last);
                xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, slim_bram_xy_addr, index_last);
                //
            end
            //
            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
                //
                xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); /// bank
                xy_aux_lsb_adv   [6] <= calc_triangle_aux_lsb   (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
                xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); //! bank
                xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr); // ! bank!!!
                xy_purge_lsb_adv [6] <= 1'b0;
                //
                xy_valid_msb_adv [6] <= 1'b0;
                xy_bitmap_msb_adv[6] <= {8{1'b0}};
                xy_purge_msb_adv [6] <= 1'b0;
                //
            end
            //
            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
                //
                xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
                xy_aux_lsb_adv   [6] <= 1'b0;
                xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
                xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
                xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
                //
                xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
                xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
                xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
                //
            end
            //
            default: begin
                //
                xy_valid_lsb_adv [6] <= 1'b0;
                xy_aux_lsb_adv   [6] <= 1'b0;
                xy_bitmap_lsb_adv[6] <= {8{1'b0}};
                xy_index_lsb_adv [6] <= 3'dX;
                xy_purge_lsb_adv [6] <= 1'b0;
                //
                xy_valid_msb_adv [6] <= 1'b0;
                xy_bitmap_msb_adv[6] <= {8{1'b0}};
                xy_purge_msb_adv [6] <= 1'b0;
                //
            end
            //
        endcase


    always @(posedge clk) begin
        //
        {y_valid_lsb,  x_valid_lsb}  <= {2{xy_valid_lsb_adv [1]}};
        {y_aux_lsb,    x_aux_lsb}    <= {2{xy_aux_lsb_adv   [1]}};
        {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}};
        {y_index_lsb,  x_index_lsb}  <= {2{xy_index_lsb_adv [1]}};
        {y_purge_lsb,  x_purge_lsb}  <= {2{xy_purge_lsb_adv [1]}};
        //
        {y_valid_latch_lsb,  x_valid_latch_lsb}  <= {y_valid_lsb,  x_valid_lsb};
        {y_aux_latch_lsb,    x_aux_latch_lsb}    <= {y_aux_lsb,    x_aux_lsb};
        {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb};
        {y_index_latch_lsb,  x_index_latch_lsb}  <= {y_index_lsb,  x_index_lsb};
        {y_purge_latch_lsb,  x_purge_latch_lsb}  <= {y_purge_lsb,  x_purge_lsb};
        //
        {y_valid_msb,  x_valid_msb}  <= {2{xy_valid_msb_adv[1]}};
        {y_bitmap_msb, x_bitmap_msb} <= {2{xy_bitmap_msb_adv[1]}};
        {y_purge_msb,  x_purge_msb}  <= {2{xy_purge_msb_adv[1]}};
        //
        if (x_valid_msb) begin
            x_bitmap_latch_msb <= x_bitmap_msb;
            x_purge_latch_msb  <= x_purge_msb;
        end else begin
            x_bitmap_latch_msb <= {1'b0, x_bitmap_latch_msb[7:1]};
        end
        //
        //
        for (i=1; i<6; i=i+1) begin
            xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
            xy_aux_lsb_adv   [i] <= xy_aux_lsb_adv   [i+1];
            xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
            xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
            xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
            //
            xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
            xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
            xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
        end
        //
    end

    always @(posedge clk)
        //
        if (x_bitmap_latch_msb[1])   // only shift 7 times
            //
            for (i=0; i<8; i=i+1)            
                if (i < 7)
                    dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
                else
                    dsp_x_p_latch[i] <= {47{1'bX}};
            //
        else if (dsp_x_ce_p_dly1) begin
            //
            for (i=0; i<8; i=i+1)
                //
                if (x_bitmap_lsb[i])
                    dsp_x_p_latch[i] <= dsp_x_p_split[i];
                else if (x_valid_msb && x_bitmap_msb[i])
                    dsp_x_p_latch[i] <= dsp_x_p_split[i];
            //
            if (x_aux_lsb)
                dsp_x_p_latch[8] <= dsp_x_p_split[8];
            //
        end

    reg recomb_x_lsb_dout_valid = 1'b0;
    reg recomb_x_msb_dout_valid = 1'b0;

    always @(posedge clk) begin
        recomb_x_lsb_dout_valid <= recomb_lsb_ce_combined;
        recomb_x_msb_dout_valid <= recomb_msb_ce_combined;
    end
        

    reg [ 2:0] fat_bram_xy_bank_reg;
    reg [ 7:0] fat_bram_xy_addr_reg;
    reg [17:0] fat_bram_x_dout_reg;
    reg [17:0] fat_bram_y_dout_reg;
    reg        fat_bram_xy_dout_valid_reg = 1'b0;

    reg [ 2:0] slim_bram_xy_bank_reg;
    reg [ 7:0] slim_bram_xy_addr_reg;
    reg [17:0] slim_bram_x_dout_reg;
    reg [17:0] slim_bram_y_dout_reg;
    reg        slim_bram_xy_dout_valid_reg = 1'b0;

    reg [ 7:0] bram_xy_cnt_lsb;
    reg [ 7:0] bram_xy_cnt_msb;
    
    reg        bram_xy_cnt_lsb_wrapped;
    reg        bram_xy_cnt_msb_wrapped;

    reg [15:0] recomb_msb_dout_carry_0;
    reg [15:0] recomb_msb_dout_carry_1;
    
    reg [15:0] recomb_msb_dout_delay_0;
    reg [15:0] recomb_msb_dout_delay_1;
    reg [15:0] recomb_msb_dout_delay_2;
    
    reg [ 7:0] recomb_msb_cnt_delay_0 = 8'd0;
    reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0;
    reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0;

    reg        recomb_msb_flag_delay_0;
    reg        recomb_msb_flag_delay_1;
    reg        recomb_msb_flag_delay_2;

    assign rcmb_fat_bram_xy_bank       = fat_bram_xy_bank_reg;
    assign rcmb_fat_bram_xy_addr       = fat_bram_xy_addr_reg;
    assign rcmb_fat_bram_x_dout        = fat_bram_x_dout_reg;
    assign rcmb_fat_bram_y_dout        = fat_bram_y_dout_reg;
    assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;

    assign rcmb_slim_bram_xy_bank       = slim_bram_xy_bank_reg;
    assign rcmb_slim_bram_xy_addr       = slim_bram_xy_addr_reg;
    assign rcmb_slim_bram_x_dout        = slim_bram_x_dout_reg;
    assign rcmb_slim_bram_y_dout        = slim_bram_y_dout_reg;
    assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg;
    
    reg rdy_reg = 1'b1;
    reg rdy_adv = 1'b1;
    
    assign rdy = rdy_reg;
    
    
    always @(posedge clk)
        //
        if (ena_x & ena_y)
            rdy_reg <= 1'b0;
        else
            rdy_reg <= rdy_adv;

            
    task advance_recomb_msb_dout_delay;
        input [15:0] dout;
        input [ 7:0] cnt;
        input        flag;
        begin
            //
            recomb_msb_dout_delay_0 <= dout;
            recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
            recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
            //
            recomb_msb_cnt_delay_0 <= cnt;
            recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
            recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
            //
            recomb_msb_flag_delay_0 <= flag;
            recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0;
            recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1;
            //
        end
    endtask
         
    task shift_recomb_msb_dout_carry;
        input [15:0] dout;
        begin
            recomb_msb_dout_carry_0 <= dout;
            recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
        end
    endtask
    
    task _update_fat_bram_regs;
        input [ 2:0] bank;
        input [ 7:0] addr;
        input [17:0] dout_x;
        input [17:0] dout_y;
        input        valid;
        begin
            fat_bram_xy_bank_reg       <= bank;
            fat_bram_xy_addr_reg       <= addr;
            fat_bram_x_dout_reg        <= dout_x;
            fat_bram_y_dout_reg        <= dout_y;
            fat_bram_xy_dout_valid_reg <= valid;
        end
    endtask
    
    task _update_slim_bram_regs;
        input [ 2:0] bank;
        input [ 7:0] addr;
        input [17:0] dout_x;
        input [17:0] dout_y;
        input        valid;
        begin
            slim_bram_xy_bank_reg       <= bank;
            slim_bram_xy_addr_reg       <= addr;
            slim_bram_x_dout_reg        <= dout_x;
            slim_bram_y_dout_reg        <= dout_y;
            slim_bram_xy_dout_valid_reg <= valid;
        end
    endtask
            
    task set_fat_bram_regs;
        input [ 2:0] bank;
        input [ 7:0] addr;
        input [17:0] dout_x;
        input [17:0] dout_y;
        begin
            _update_fat_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
        end
    endtask
    
    task set_slim_bram_regs;
        input [ 2:0] bank;
        input [ 7:0] addr;
        input [17:0] dout_x;
        input [17:0] dout_y;
        begin
            _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
        end
    endtask
    
    task clear_fat_bram_regs;
        begin
            _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
        end
    endtask

    task clear_slim_bram_regs;
        begin
            _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
        end
    endtask
    
    task _set_bram_cnt_lsb;
        input [7:0] cnt;
        input       wrapped;
        begin
            bram_xy_cnt_lsb <= cnt;
            bram_xy_cnt_lsb_wrapped <= wrapped;
        end
    endtask
    
    task _set_bram_cnt_msb;
        input [7:0] cnt;
        input       wrapped;
        begin
            bram_xy_cnt_msb <= cnt;
            bram_xy_cnt_msb_wrapped <= wrapped;
        end
    endtask    

    task inc_bram_cnt_lsb;
        begin
            if (bram_xy_cnt_lsb == index_last)
                _set_bram_cnt_lsb(8'd0, 1'b1);
            else
                _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped);
        end
    endtask
    
    task inc_bram_cnt_msb;
        begin
            if (bram_xy_cnt_msb == index_last)
                _set_bram_cnt_msb(8'd0, 1'b1);
            else
                _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped);
        end
    endtask
    
    task clr_bram_cnt_lsb;
        begin
            _set_bram_cnt_lsb(8'd0, 1'b0);
        end
    endtask
    
    task clr_bram_cnt_msb;
        begin
            _set_bram_cnt_msb(8'd0, 1'b0);
        end
    endtask
    
    
   
    

    wire [1:0] rcmb_xy_dout_valid = {recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}; 
    
    always @(posedge clk)
        //
        if (ena_x & ena_y) begin
            clr_bram_cnt_lsb();
            clr_bram_cnt_msb();
        end else begin  // if not ready???
            //
            case (rcmb_mode)
                2'd1: recombine_square();
                2'd2: recombine_triangle();
                2'd3: recombine_rectangle();
            endcase
            //
        end
           
    task recombine_square;
        //
        begin
            //
            case (rcmb_xy_dout_valid)
                //
                2'b01: inc_bram_cnt_lsb(); 
                2'b10: inc_bram_cnt_msb();
                2'b11: begin
                    inc_bram_cnt_lsb();
                    inc_bram_cnt_msb();
                end
                //
            endcase            
            //
            case (rcmb_xy_dout_valid)
                //
                2'b00:  if (recomb_msb_flag_delay_2)  set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
                        else                                clear_fat_bram_regs();
                  2'b01:                                      set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
                  2'b10:  if (bram_xy_cnt_msb < 8'd2)         clear_fat_bram_regs();                        
                        else                                set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});                        
                2'b11:  if (bram_xy_cnt_lsb_wrapped)   set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); 
                        else                                set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
                default:    clear_fat_bram_regs();  // DEBUG!!!
                //
            endcase            
            //
            case (rcmb_xy_dout_valid)
                //
                2'b00:  if (recomb_msb_flag_delay_2)  advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
                2'b10:  if (bram_xy_cnt_msb < 8'd2)         shift_recomb_msb_dout_carry(recomb_msb_dout);
//                //
                2'b11:  begin                          advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
                        if (bram_xy_cnt_lsb_wrapped)   shift_recomb_msb_dout_carry({16{1'bX}});
                        end
                //
            endcase
            //        
        end
        //
    endtask
    
    
    task recombine_triangle;
        //
        begin
            //
            case (rcmb_xy_dout_valid)
                //
                2'b01: inc_bram_cnt_lsb(); 
               //
            endcase            
            //
            case (rcmb_xy_dout_valid)
                //
                2'b00:  clear_slim_bram_regs();
                2'b01:  if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q,   bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
                        else                         set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}});
                2'b10:  clear_slim_bram_regs();
                2'b11:  clear_slim_bram_regs();
                //
            endcase
            //        
        end
        //
    endtask


    task recombine_rectangle;
        //
        begin
            //
            case (rcmb_xy_dout_valid)
                //
                2'b01: inc_bram_cnt_lsb(); 
                2'b10: inc_bram_cnt_msb();
                2'b11: begin
                    inc_bram_cnt_lsb();
                    inc_bram_cnt_msb();
                end
                //
            endcase
//            //
            case (rcmb_xy_dout_valid)
//                //
                2'b00:  if (recomb_msb_flag_delay_2)  set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
                        else                                clear_fat_bram_regs();
                2'b01:                                      set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
                2'b10:  if (!bram_xy_cnt_msb_wrapped) begin 
                            if (bram_xy_cnt_msb < 8'd2)         clear_fat_bram_regs();                        
                            else                                set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
                        end else
                                                                set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}});
                            
                2'b11:  set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); 
//                //
            endcase            
//            //
            case (rcmb_xy_dout_valid)
//                //
                2'b00:  if (recomb_msb_flag_delay_2)  advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
                2'b10:  begin 
                            if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped)         shift_recomb_msb_dout_carry(recomb_msb_dout);
                            if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
                        end
//                //
                2'b11:  begin  advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
                                   shift_recomb_msb_dout_carry({16{1'bX}});
                        end
//                //
            endcase
            //
        end
        //
    endtask
    
    
    always @(posedge clk)
        //
        if (ena_x & ena_y) begin
            rdy_adv <= 1'b0;
        end else if (!rdy_reg) begin
            //
            case (rcmb_mode)
                //
                2'd1:   case (rcmb_xy_dout_valid)
                            //
                            2'b00: begin
                                //
                                if (recomb_msb_flag_delay_2) begin
                                    //
                                    rdy_adv <= ~recomb_msb_flag_delay_1;
                                    //
                                end
                                //
                            end
                            //
                        endcase
                //
                2'd2:   case (rcmb_xy_dout_valid)
                            //
                            2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped;                                //
                            //
                        endcase
                //
                2'd3: case (rcmb_xy_dout_valid)
                                            //
                                            2'b00: begin
                                                //
                                                if (recomb_msb_flag_delay_2) begin
                                                    //
                                                    rdy_adv <= ~recomb_msb_flag_delay_1;
                                                    //
                                                end
                                                //
                                            end
                                            //
                                        endcase
                //
            endcase
            //        
        end


    
        // add ready for mode=3
endmodule