aboutsummaryrefslogblamecommitdiff
path: root/rtl/modexpng_mmm_dual.v
blob: fd39943a42bd9ae3892d66b1e519c820c6edfb81 (plain) (tree)
1
2
3
4

                                                                        

                                                            










                                                                         


                                                                         














                                                                           
                        
 
               

             



                           
                  
                
                  
    

                               





                        



                      



                      

                    


















                         
                      





              
                                     
                                       




            

                                                     
    

                                                   
    





                                                                      
        

                                                             
    










                                                                   
 




                                                                 
 




                                                                  
 




                                                                    
 




                                                             
    

                                                        




                      

                                                               
 




                                                               




                  
                                          
          

                                                    




                                











                                                                       
    
    



                                             
                                                 
                                              


                                                  


                                                

             
                                               
                                   
                                                                               


               
    


                     




                                                                                  




                                    
    






                                     


                                                          


                                                                            



                                        


                                                          


                                                                                                            





















                                                   

                                                                                                               
                                                                                 
    







                               
                                                 
                                                                                                     
              
                                                                                  













                                                           
                                                   
                                                                                                           
              
                                                                                      














                                                               
                                                    
                                                                                                              
              
                                                                                        











                                                                 
                                          
          
                                          





                                 
                                                     





                                                                                        


                                                     
                                                                                                                                     
                  


                                                       
                                                                                                                                                                                      
                  


                                                        
                                                                                                                                                                       
                  
                                                       






                                 




                                                     
                                                                                      
                  




                                                       
                                                                                                                                                                                        
                  




                                                        
                                                                                                                                                                                  
                  
                                                    


                           

                            

                                 


                                                     
                                                     


                                                       
                                                       


                                                        







                                                                                                                                                 
                                                        
                                                                                                      
                  
                                               









                                 
                                                               


                                          
                                                                 


                   




                                                                                                 


               
                                          
          
                         





                                    
                                              


                                     









                                                                                                                     
                      


                                                         
                                                         


                                                           
                                                           


                                                            


                                                                                                                                    
                      





                                 





                                                                                           
                  


                                                     
                                                     


                                                       


                                                                                                                                 
                  




                                                        


                                                                                                                     
                  




                                 
                  




                                                     

                                                                                  


                                                       

                                                                                    
                                                       

                                                                                    




                                                            



                                                                                     




                                 
                  




                                                     

                                                                                      



                                                                                        
                  
                                                       

                                                                                        





                                                            










                                                                          




                                 
                  
















                                                        



                                                                             




                                 
                  




                                                       



                                                                                 




                                                        



                                                                                       















                                             

                                           
                                           


                                           
    
                                                                                      
    

                                                        
 

                                                        
    
                                                               
 

                                                                    
    

                                                        



                                       


                                              
                          
        




                                     
 
                                    
        


                              



                                              
                          
        




                                     
 
                                    
        


                              
      







                                  
                                          
          
                         






















                                                                      
    



                       
                             
    
                               

                        

                                                                           
               

                        

                                                                               
                                                                               


               
 
    
      
                                           
      


                                                                                                                   
  

                                                                                             
  
                               
          





                                     

                                      





                                                       
                          
                  

                                                                


                                                                                  

                                
           

          
 







                                                                         
        




                                                              

                                                                                                                                                                                   

               




                                                                  

                                                                                                                                                                                      




                             
              
                                                 
                                                 
                                                   
                                                   
                                                    







                                                                                                                                   
                                                    



                                                                                                                                         


                               
          




                                                 
          
       

 







                                                  

                                        
        

                                           
        
                                                 
        
                                                  
        


                                              
        

                                                 
        

                                                 
        




                                                     
        




                                                       
        




                                               






                                
                                          
          

                                                                                            












                                                                                              




                                                                                                                                             
     


                   
                                            

                        
                                                                                                                                                    
                        


                                                                                                                                                                      
            


                                                                                                                                                                      
            
                                                                                                                                                                   
 


                                                                                                                                                                             
            


                                                                                                                                                                        
            
                                                                                                                                                                     
 

                                                                                                                                
                                                                                                                                                                              
            


                                                                                                                                                                         
            
                                                                                                                                                                      
            
                                                                                                                                                             
            
                                                                                                                                
            
                                                                                                                                












                                    
                                          
          
                                                                         
                             
                                                                         
                                                                         


               






                         
                                          
          
                                                                           
                  
                                                                           
                                                                            
           





                                




                                                     
          
    
    
         
//======================================================================
//
// Copyright: 2019, The Commons Conservancy Cryptech Project
// SPDX-License-Identifier: BSD-3-Clause
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the copyright holder nor the names of its
//   contributors may be used to endorse or promote products derived from
//   this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpng_mmm_dual
(
    clk, rst_n,
    
    ena, rdy,
        
    ladder_mode,
    word_index_last,
    word_index_last_minus1,
    force_unity_b,
    only_reduce,
    just_multiply,
    
    sel_wide_in, sel_narrow_in,
    
    rd_wide_xy_ena,
    rd_wide_xy_ena_aux,
    rd_wide_xy_bank,
    rd_wide_xy_bank_aux,
    rd_wide_xy_addr,
    rd_wide_xy_addr_aux,
    rd_wide_x_din,
    rd_wide_y_din,
    rd_wide_x_din_aux,
    rd_wide_y_din_aux,
    
    rd_narrow_xy_ena,
    rd_narrow_xy_bank,
    rd_narrow_xy_addr,
    rd_narrow_x_din,
    rd_narrow_y_din,
    
    rcmb_wide_xy_bank,
    rcmb_wide_xy_addr,
    rcmb_wide_x_dout,
    rcmb_wide_y_dout,
    rcmb_wide_xy_valid,
    
    rcmb_narrow_xy_bank,
    rcmb_narrow_xy_addr,
    rcmb_narrow_x_dout,
    rcmb_narrow_y_dout,
    rcmb_narrow_xy_valid,
    
    rcmb_xy_bank,
    rcmb_xy_addr,
    rcmb_x_dout,
    rcmb_y_dout,
    rcmb_xy_valid,
    
    rdct_ena, rdct_rdy
);


    //
    // Headers
    //
    `include "modexpng_parameters.vh"
    `include "modexpng_mmm_dual_fsm.vh"


    //
    // Ports
    //
    input                                      clk;
    input                                      rst_n;
    
    input                                      ena;
    output                                     rdy;
    
    input                                      ladder_mode;
    input  [  OP_ADDR_W                  -1:0] word_index_last;
    input  [  OP_ADDR_W                  -1:0] word_index_last_minus1;
    input                                      force_unity_b;
    input                                      only_reduce;
    input                                      just_multiply;
        
    input  [BANK_ADDR_W                  -1:0] sel_wide_in;
    input  [BANK_ADDR_W                  -1:0] sel_narrow_in;
    
    output                                     rd_wide_xy_ena;
    output                                     rd_wide_xy_ena_aux;
    output [BANK_ADDR_W                  -1:0] rd_wide_xy_bank;
    output [BANK_ADDR_W                  -1:0] rd_wide_xy_bank_aux;
    
    output [  OP_ADDR_W * NUM_MULTS_HALF -1:0] rd_wide_xy_addr;
    output [  OP_ADDR_W                  -1:0] rd_wide_xy_addr_aux;
    input  [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_x_din;
    input  [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_y_din;    
    input  [ WORD_EXT_W                  -1:0] rd_wide_x_din_aux;
    input  [ WORD_EXT_W                  -1:0] rd_wide_y_din_aux;

    output                                     rd_narrow_xy_ena;
    output [BANK_ADDR_W                  -1:0] rd_narrow_xy_bank;
    output [  OP_ADDR_W                  -1:0] rd_narrow_xy_addr;
    input  [ WORD_EXT_W                  -1:0] rd_narrow_x_din;
    input  [ WORD_EXT_W                  -1:0] rd_narrow_y_din;

    output [BANK_ADDR_W                  -1:0] rcmb_wide_xy_bank;
    output [  OP_ADDR_W                  -1:0] rcmb_wide_xy_addr;
    output [ WORD_EXT_W                  -1:0] rcmb_wide_x_dout;
    output [ WORD_EXT_W                  -1:0] rcmb_wide_y_dout;
    output                                     rcmb_wide_xy_valid;

    output [BANK_ADDR_W                  -1:0] rcmb_narrow_xy_bank;
    output [  OP_ADDR_W                  -1:0] rcmb_narrow_xy_addr;
    output [ WORD_EXT_W                  -1:0] rcmb_narrow_x_dout;
    output [ WORD_EXT_W                  -1:0] rcmb_narrow_y_dout;
    output                                     rcmb_narrow_xy_valid;

    output [BANK_ADDR_W                  -1:0] rcmb_xy_bank;
    output [  OP_ADDR_W                  -1:0] rcmb_xy_addr;
    output [ WORD_EXT_W                  -1:0] rcmb_x_dout;
    output [ WORD_EXT_W                  -1:0] rcmb_y_dout;
    output                                     rcmb_xy_valid;
    
    output                                     rdct_ena;
    input                                      rdct_rdy;

    
    //
    // FSM Declaration
    //
    reg  [MMM_FSM_STATE_W -1:0] fsm_state = MMM_FSM_STATE_IDLE;
    reg  [MMM_FSM_STATE_W -1:0] fsm_state_next;

    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_idle;    
    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_square;
    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_triangle;
    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_rectangle;
    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_square_holdoff;

    
    //
    // FSM Process
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) fsm_state <= MMM_FSM_STATE_IDLE;
        else        fsm_state <= fsm_state_next;

        
    //
    // Storage Control Interface
    //
    reg                     wide_xy_ena = 1'b0;
    reg                     wide_xy_ena_aux = 1'b0;
    reg  [BANK_ADDR_W -1:0] wide_xy_bank;
    reg  [BANK_ADDR_W -1:0] wide_xy_bank_aux;
    reg  [  OP_ADDR_W -1:0] wide_xy_addr[0:NUM_MULTS_HALF-1];
    reg  [  OP_ADDR_W -1:0] wide_xy_addr_aux;
    
    reg                     narrow_xy_ena = 1'b0;
    reg  [BANK_ADDR_W -1:0] narrow_xy_bank;
    reg  [  OP_ADDR_W -1:0] narrow_xy_addr;
    reg  [  OP_ADDR_W -1:0] narrow_xy_addr_dly;
    wire [  OP_ADDR_W -1:0] narrow_xy_addr_inc = narrow_xy_addr + 1'b1;
    
    
    //
    // Outmap Port Mapping
    //
    assign rd_wide_xy_ena      = wide_xy_ena;
    assign rd_wide_xy_ena_aux  = wide_xy_ena_aux;
    assign rd_wide_xy_bank     = wide_xy_bank;
    assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
    assign rd_wide_xy_addr_aux = wide_xy_addr_aux;

    assign rd_narrow_xy_ena    = narrow_xy_ena;
    assign rd_narrow_xy_bank   = narrow_xy_bank;
    assign rd_narrow_xy_addr   = narrow_xy_addr;

    genvar z;
    generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
        begin : gen_rd_wide_xy_addr
            assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z];
        end
    endgenerate
        
    
    //
    // Column Counter
    //
    reg [COL_INDEX_W -1:0] col_index;      // current column index
    reg [COL_INDEX_W -1:0] col_index_prev; // delayed column index value
    reg [COL_INDEX_W -1:0] col_index_last; // index of the very last column
    reg [COL_INDEX_W -1:0] col_index_next; // precomputed next column index
    reg                    col_is_last;    // flag set during the very last column

    always @(posedge clk)
        //
        col_index_prev <= col_index;

    
    //
    // Column Counter Increment Logic
    //
    always @(posedge clk)
        //
        case (fsm_state_next)
            //
            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
                col_index       <= COL_INDEX_ZERO;
                col_index_last  <= word_index_last[OP_ADDR_W-1:MAC_INDEX_W];
                col_index_next  <= COL_INDEX_ONE;
                col_is_last     <= 1'b0;
                
            end
            //
            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
                col_index      <= col_index_next;
                col_is_last    <= col_index_next == col_index_last;
                col_index_next <= col_index_next == col_index_last ? COL_INDEX_ZERO : col_index_next + 1'b1;
            end
            //
        endcase


    //
    // Completion Flags
    //
    wire square_almost_done_comb;
    reg  square_almost_done_flop = 1'b0;
    reg  square_surely_done_flop = 1'b0;

    wire triangle_almost_done_comb;
    reg  triangle_almost_done_flop = 1'b0;
    reg  triangle_surely_done_flop = 1'b0;
    reg  triangle_tardy_done_flop = 1'b0;

    wire rectangle_almost_done_comb;
    reg  rectangle_almost_done_flop = 1'b0;        
    reg  rectangle_surely_done_flop = 1'b0;
    reg  rectangle_tardy_done_flop = 1'b0;

    assign square_almost_done_comb    = narrow_xy_addr == word_index_last_minus1;
    assign triangle_almost_done_comb  = narrow_xy_addr == {col_index, word_index_last_minus1[MAC_INDEX_W-1:0]};
    assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
    

    //
    // Square Completion Flags
    //
    always @(posedge clk) begin
        //
        case (fsm_state)
            //
            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: square_almost_done_flop <= square_almost_done_comb;
            //
            default:                              square_almost_done_flop <= 1'b0;
           //
        endcase
        //
        square_surely_done_flop <= square_almost_done_flop;
        //
    end

    //
    // Triangle Completion Flags
    //
    always @(posedge clk) begin
        //
        case (fsm_state)
            //
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: triangle_almost_done_flop <= triangle_almost_done_comb;
            //
            default:                                triangle_almost_done_flop <= 1'b0;
            //
        endcase
        //
        triangle_surely_done_flop <= triangle_almost_done_flop;
        triangle_tardy_done_flop  <= triangle_surely_done_flop;
        //
    end
      
    //
    // Rectangle Completion Flags
    //
    always @(posedge clk) begin
        //
        case (fsm_state)
            //
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: rectangle_almost_done_flop <= rectangle_almost_done_comb;
            //
            default:                                 rectangle_almost_done_flop <= 1'b0;
            //
        endcase
        //
        rectangle_surely_done_flop <= rectangle_almost_done_flop;
        rectangle_tardy_done_flop  <= rectangle_surely_done_flop;
        //
    end


    //
    // Narrow Storage Control Logic
    //
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) narrow_xy_ena <= 1'b0;
        else begin
            //
            // Narrow Address
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= OP_ADDR_ZERO;
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr_inc : OP_ADDR_ZERO;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? OP_ADDR_ZERO : narrow_xy_addr_inc;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? OP_ADDR_ONE : narrow_xy_addr_inc;
                //
                default: narrow_xy_addr <= OP_ADDR_DNC;
                //
            endcase
            //
            // Narrow Bank
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? BANK_NARROW_EXT : BANK_NARROW_COEFF;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? BANK_NARROW_EXT : BANK_NARROW_Q;            
                //
                default: narrow_xy_bank <= BANK_DNC;
                //
            endcase        
            //
            // Narrow Enable
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; 
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop;
                //
                default: narrow_xy_ena <= 1'b0;
                //
            endcase
            //
        end


    //
    // Wide Storage Control Logic
    //

    wire [MAC_INDEX_W-1:0] wide_offset_rom[0:NUM_MULTS_HALF-1];
    
    generate for (z=1; z<NUM_MULTS; z=z+2)
        begin : gen_wide_offset_rom
            assign wide_offset_rom[(z-1)/2] = z[MAC_INDEX_W-1:0];
        end
    endgenerate    

    function  [OP_ADDR_W-1:0] wide_xy_addr_next;
        input [OP_ADDR_W-1:0] wide_xy_addr_current;
        input [OP_ADDR_W-1:0] wide_xy_addr_last;
        if (wide_xy_addr_current > OP_ADDR_ZERO) wide_xy_addr_next = wide_xy_addr_current - 1'b1;
        else                                     wide_xy_addr_next = wide_xy_addr_last;
    endfunction
    
    integer j;
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            wide_xy_ena <= 1'b0;
            wide_xy_ena_aux <= 1'b0;
        end else begin
            //
            // Wide Address
            //        
            for (j=0; j<NUM_MULTS_HALF; j=j+1)
                //
                case (fsm_state_next)
                    //
                    // another way to code this is to extend the look-up table to 8 entries and just use shifts
                    // instead of subtractions (this requires further research, let's maybe not do it right now)
                    //
                    MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {OP_ADDR_ZERO, wide_offset_rom[j]};
                    //
                    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
                    //
                    MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                    MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                    MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                    MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
                    //
                    default: wide_xy_addr[j] <= OP_ADDR_DNC;
                    //
                endcase
            //
            // Wide Aux Address
            //
            case (fsm_state_next)
                //
                // there's a potentially more efficient way to code the switch (see above) 
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= OP_ADDR_ONE;
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= OP_ADDR_DNC;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : OP_ADDR_DNC;
                //
                default: wide_xy_addr_aux <= OP_ADDR_DNC;
                //
            endcase
            //
            // Wide Bank
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N;
                //
                default: wide_xy_bank <= BANK_DNC;
                //
            endcase
            //
            // Wide Aux Bank
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: begin
                    wide_xy_bank_aux <= BANK_DNC;
                    if (rcmb_xy_valid)
                        case (rcmb_xy_bank)
                            BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
                            BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
                        endcase
                    end
                    //
                default: wide_xy_bank_aux <= BANK_DNC;
                //
            endcase
            //
            // Wide Enable
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1;
                //
                default: wide_xy_ena <= 1'b0;
                //
            endcase
            //
            // Wide Aux Enable
            //
            case (fsm_state_next)
                //
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;
                //
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;
                //
                default: wide_xy_ena_aux <= 1'b0;
                //
            endcase
            //
        end
        
        
    //
    // Delay Lines
    //
    always @(posedge clk)
        //
        narrow_xy_addr_dly <= narrow_xy_addr;

    
    //
    // DSP Array Logic
    //
    reg             dsp_xy_ce_a     = 1'b0;
    reg             dsp_xy_ce_b     = 1'b0;
    reg             dsp_xy_ce_b_dly = 1'b0;
    reg             dsp_xy_ce_m     = 1'b0;
    reg             dsp_xy_ce_p     = 1'b0;
    reg             dsp_xy_ce_mode  = 1'b0;
    
    reg  [             NUM_MULTS_AUX      -1:0] dsp_xy_mode_z = {NUM_MULTS_AUX{1'b1}};
    
    wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_x_a;
    wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_y_a;

    reg  [WORD_W                          -1:0] dsp_x_b;
    reg  [WORD_W                          -1:0] dsp_y_b;
    
    reg  [CARRY_W                         -1:0] dsp_xy_b_carry;

    wire [MAC_W      * NUM_MULTS_AUX      -1:0] dsp_x_p;            
    wire [MAC_W      * NUM_MULTS_AUX      -1:0] dsp_y_p;
    
    assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din};
    assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din};
            
    always @(posedge clk)
        //
        dsp_xy_ce_b_dly <= dsp_xy_ce_b;

    modexpng_dsp_array_block dsp_array_block_x
    (
        .clk        (clk),
        
        .ce_a       (dsp_xy_ce_a),
        .ce_b       (dsp_xy_ce_b),
        .ce_m       (dsp_xy_ce_m),
        .ce_p       (dsp_xy_ce_p),
        .ce_mode    (dsp_xy_ce_mode),

        .mode_z     (dsp_xy_mode_z),
        
        .a          (dsp_x_a),
        .b          (dsp_x_b),
        .p          (dsp_x_p)
    );

    modexpng_dsp_array_block dsp_array_block_y
    (
        .clk        (clk),
        
        .ce_a       (dsp_xy_ce_a),
        .ce_b       (dsp_xy_ce_b),
        .ce_m       (dsp_xy_ce_m),
        .ce_p       (dsp_xy_ce_p),
        .ce_mode    (dsp_xy_ce_mode),

        .mode_z     (dsp_xy_mode_z),
        
        .a          (dsp_y_a),
        .b          (dsp_y_b),
        .p          (dsp_y_p)
    );
   

    //
    // DSP Control Logic
    //
    reg narrow_xy_ena_dly1 = 1'b0;
    reg narrow_xy_ena_dly2 = 1'b0;
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) begin
            //
            narrow_xy_ena_dly1 <= 1'b0;
            narrow_xy_ena_dly2 <= 1'b0;
            //
            dsp_xy_ce_a    <= 1'b0;
            dsp_xy_ce_b    <= 1'b0;
            dsp_xy_ce_m    <= 1'b0;
            dsp_xy_ce_p    <= 1'b0;
            dsp_xy_ce_mode <= 1'b0;
            //
        end else begin
            //
            narrow_xy_ena_dly1 <= narrow_xy_ena;
            narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; 
            //
            dsp_xy_ce_a    <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2;
            dsp_xy_ce_b    <= narrow_xy_ena_dly2;
            dsp_xy_ce_m    <= dsp_xy_ce_b_dly;
            dsp_xy_ce_p    <= dsp_xy_ce_m;
            dsp_xy_ce_mode <= dsp_xy_ce_b_dly;
            //
        end    
        
    
    //
    // DSP Feed Logic
    //
    reg dsp_merge_xy_b;
    reg dsp_merge_xy_b_first;
    
    always @(posedge clk) begin
        //
        case (fsm_state)
            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG:   dsp_merge_xy_b <= 1'b1;
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
        endcase
        //
        case (fsm_state)
            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1;
            default:                              dsp_merge_xy_b_first <= 1'b0;
        endcase
        //
    end

    
    //
    // On-the-fly Carry Recombination Logic
    //
    wire [WORD_EXT_W-1:0] rd_narrow_x_din_carry = rd_narrow_x_din + {WORD_ZERO, dsp_xy_b_carry};
    wire [WORD_EXT_W-1:0] rd_narrow_y_din_carry = rd_narrow_y_din + {WORD_ZERO, dsp_xy_b_carry};
    wire [WORD_EXT_W-1:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry;
  
    wire [WORD_W-1:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ?
        rd_narrow_xy_din_carry_mux[WORD_W-1:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
  
    always @(posedge clk) begin
        //
        dsp_x_b <= WORD_DNC;
        dsp_y_b <= WORD_DNC;
        //
        dsp_xy_b_carry <= CARRY_ZERO;
        //
        if (narrow_xy_ena_dly2) begin
            //
            if (!dsp_merge_xy_b) begin
                //
                dsp_x_b <= rd_narrow_x_din[WORD_W-1:0];
                dsp_y_b <= rd_narrow_y_din[WORD_W-1:0];
                //
                dsp_xy_b_carry <= CARRY_ZERO;
                //
            end else begin
                //
                dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity;
                dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity;
                //
                dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[WORD_EXT_W-1:WORD_W];
                //
            end                 
            //
        end
        //
    end


    //
    // DSP Mode Logic
    //
    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv1 = {NUM_MULTS_AUX{1'b1}};
    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv2 = {NUM_MULTS_AUX{1'b1}};
    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv3 = {NUM_MULTS_AUX{1'b1}};
    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv4 = {NUM_MULTS_AUX{1'b1}};
        
    function  [NUM_MULTS_AUX     -1:0] calc_mac_mode_z_square;
        //
        input [NUM_MULTS_HALF_AUX-1:0] col_index_value;
        input [OP_ADDR_W         -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_mac_mode_z_square = ~({{NUM_MULTS_AUX-1{1'b0}}, 1'b1} << narrow_xy_addr_value[MAC_INDEX_W-1:0]);
        else                                                                  calc_mac_mode_z_square = {NUM_MULTS_AUX{1'b1}};
    endfunction
    
    function  [NUM_MULTS_AUX      -1:0] calc_mac_mode_z_rectangle;
        //
        input [NUM_MULTS_HALF_AUX -1:0] col_index_value;
        input [OP_ADDR_W          -1:0] narrow_xy_addr_value;
        //
        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_mac_mode_z_rectangle = ~({{NUM_MULTS_AUX-1{1'b0}}, 1'b1} << narrow_xy_addr_value[MAC_INDEX_W-1:0]);
        else                                                                  calc_mac_mode_z_rectangle = {NUM_MULTS_AUX{1'b1}};
    endfunction
        
    always @(posedge clk)
        //
        case (fsm_state_next)
            //
            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b0}};
            //
            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
            //
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}};
            //
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
            //
            default: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}};
            //
        endcase

    always @(posedge clk) begin
        //
        dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
        //
        dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
        dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
        dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
        //
    end


    //
    // Recombinator
    //
    reg  rcmb_ena = 1'b0;
    wire rcmb_rdy;

    modexpng_recombinator_block recombinator_block
    (
        .clk                    (clk),
        .rst_n                  (rst_n),
        
        .ena                    (rcmb_ena),
        .rdy                    (rcmb_rdy),
        
        .fsm_state_next         (fsm_state_next),
        
        .word_index_last        (word_index_last),
        
        .dsp_xy_ce_p            (dsp_xy_ce_p),
        .dsp_x_p                (dsp_x_p),
        .dsp_y_p                (dsp_y_p),
        
        .col_index              (col_index),
        .col_index_last         (col_index_last),
        
        .rd_narrow_xy_addr      (narrow_xy_addr),
        .rd_narrow_xy_bank      (narrow_xy_bank),
        
        .rcmb_wide_xy_bank      (rcmb_wide_xy_bank),
        .rcmb_wide_xy_addr      (rcmb_wide_xy_addr),
        .rcmb_wide_x_dout       (rcmb_wide_x_dout),
        .rcmb_wide_y_dout       (rcmb_wide_y_dout),
        .rcmb_wide_xy_valid     (rcmb_wide_xy_valid),
        
        .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank),
        .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr),
        .rcmb_narrow_x_dout     (rcmb_narrow_x_dout),
        .rcmb_narrow_y_dout     (rcmb_narrow_y_dout),
        .rcmb_narrow_xy_valid   (rcmb_narrow_xy_valid),
        
        .rdct_narrow_xy_bank    (rcmb_xy_bank),
        .rdct_narrow_xy_addr    (rcmb_xy_addr),
        .rdct_narrow_x_dout     (rcmb_x_dout),
        .rdct_narrow_y_dout     (rcmb_y_dout),
        .rdct_narrow_xy_valid   (rcmb_xy_valid)

    );
    
    
    //
    // Recombinator Enable Logic
    //    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n) rcmb_ena <= 1'b0;
        else        rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;

        
    //
    // Handy Completion Flags
    //    
    wire square_done    = square_surely_done_flop;
    wire triangle_done  = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop;
    wire rectangle_done = rectangle_tardy_done_flop;
    

    //
    // FSM Transition Logic
    //
    assign fsm_state_after_idle           = !only_reduce   ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT  ;
    assign fsm_state_after_mult_square    =  col_is_last   ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF    : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT    ;
    assign fsm_state_after_mult_triangle  =  col_is_last   ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF  : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT  ;
    assign fsm_state_after_mult_rectangle =  col_is_last   ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT ;
    assign fsm_state_after_square_holdoff =  just_multiply ? MMM_FSM_STATE_STOP                   : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT  ;
     

    always @* begin
        //
        fsm_state_next = MMM_FSM_STATE_IDLE;
        //
        case (fsm_state)
            MMM_FSM_STATE_IDLE:                      fsm_state_next = ena ?            fsm_state_after_idle                    : MMM_FSM_STATE_IDLE;
                        
            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG    ;
            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY    ;
            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:    fsm_state_next = square_done ?    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT    : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
            
            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG    ;
            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY    ;
            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:    fsm_state_next = square_done ?    fsm_state_after_mult_square             : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
            
            MMM_FSM_STATE_MULT_SQUARE_HOLDOFF:       fsm_state_next = rcmb_rdy ?       fsm_state_after_square_holdoff          : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;

            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG  ;
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY  ;
            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:  fsm_state_next = triangle_done ?  MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT  : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
            
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG  ;
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY  ;
            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:  fsm_state_next = triangle_done ?  fsm_state_after_mult_triangle           : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
            
            MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF:     fsm_state_next = rcmb_rdy ?       MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;

            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;     
            
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle          : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
            
            MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next = rcmb_rdy ?       MMM_FSM_STATE_WAIT_REDUCTOR             : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
            
            MMM_FSM_STATE_WAIT_REDUCTOR:             fsm_state_next = rdct_rdy ?       MMM_FSM_STATE_STOP                      : MMM_FSM_STATE_WAIT_REDUCTOR;
            
            MMM_FSM_STATE_STOP:                      fsm_state_next =                  MMM_FSM_STATE_IDLE                      ;
            
            default:                                 fsm_state_next =                  MMM_FSM_STATE_IDLE                      ;

        endcase
        //
    end


    //
    // Reductor Control Logic
    //
    reg rdct_ena_reg = 1'b0;

    assign rdct_ena = rdct_ena_reg; 
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n)                                 rdct_ena_reg <= 1'b0;
        else case (fsm_state)
           MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
           default:                                 rdct_ena_reg <= 1'b0;
        endcase
    
    
    //
    // Ready Logic
    //
    reg rdy_reg = 1'b1;
    
    assign rdy = rdy_reg;
    
    always @(posedge clk or negedge rst_n)
        //
        if (!rst_n)                                        rdy_reg <= 1'b1;
        else begin
            if (rdy && ena)                                rdy_reg <= 1'b0;
            if (!rdy && (fsm_state == MMM_FSM_STATE_STOP)) rdy_reg <= 1'b1; 
        end


    //
    // Debug
    //
    `ifdef MODEXPNG_ENABLE_DEBUG
    real load_cyc_mult = 0.0;
    always @(posedge clk)
        //
        if (dsp_xy_ce_m)
            load_cyc_mult <= load_cyc_mult + 1.0;    
    `endif
    
    
endmodule