aboutsummaryrefslogblamecommitdiff
path: root/src/rtl/modexpa7_exponentiator.v
blob: 532c112eb03b6d35ab3f62aa33b91b3c13d35b1a (plain) (tree)


























































                                                                                                               
                 

                                                                                                             

                                                                     
                                                                     







                                                                            
                                                                            



                                                                                   



                                                                                                           
                                                                             






                                                                           



































                                                                         
           







                                                                 
           







                                                                 
           









                                                                         




               



                                        
           
                                             


               


                                                     
           



                                                                 
           
                                     
           
                                         
           





















                                                                             



               
 



                                          
                                                        



























                                                                           
                                                                                      

                                                                           
                                                                                                         






                                    
                                                                   




                                                                                
                                                                   
                                                                                            











                                                   
                                                                                                                

                                              


                                                                                                       



                                                        
                                                        
















                                                                                                 
                                                                                                 








                                                                                                 











                                                                                                            



                                                       
                                     






                                     
                                   

















                                     
                        





                         



                                      






                                                                                                       
                                                                                     
                            

                                                                                  
 
                                                                                     
                            

                                                                               
 
                                                                                     
                            

                                                                               
 
                                                                                     
                            

                                                                               
 
                                                                                     
                            

                                                                                  
 
                                                                                     
                            

                                                                                  

 































                                                                                                         








                                                
                                                                                  

                                              





                                                                                                           




                                      
                                                                                                   




                                      
                                                                                  

                                              






                                                                                                       




                                      

                                                                                  




                                      
                           




                                                                                     
                           

                                                                                     
                           

                                                                                     




                                      

                                                                                   
                           
                                                                                   

                                              
                                                                                                           




                                      

                                                                                   
                           
                                                                                   

                                              
                                                                                                           
                           

                                                                                   
                           
                                                                                   

                                              
                                                                                                           




                                      
                           

                                                                                       
                           

                                                                                       
                           

                                                                                       
                           







                                                                                       




                                      
                                                                                      

                                              
                                                                                                                       




                                      
                                                                                      

                                              





                                                                                                                       
                           





                                                                                                                       

                                              
                                                                                                                       









                                             



                                              

                                                                                                        

                        


                                      

                                              

                                                
                                              

                                              

                                                                                                        




                                      
                                                         

                                                                                                         




                                      


                                                         

                                                                                                         




                                      

                                              

                                                



                                                


                                                         

                                                                                                         









                                         

                             
                                      
                                              

                                                                                                                  


                                 

                                      

                                              



                                                                                       
                           
                                              
                                                                                        
                           
                                              
                                                                                        
                           
                                                                                                                  




                                      
                                              

                                                                                                             




                                      

                                                                             

                                              

                                                                                                                 




                                      
                           




                                                                                  





                                                                                   
                           

                                                                             

                                              

                                                                                                                 








                                     
                         
















                                                                              
                                                           
 




                                                                
 



                                                                       



                                                                      
                                                                    














                                                                              

                                                           




                                                                
 



                                                                       



                                                                      
                                                                    




                              

                                              











                                                                                                         

                         





                                               
                                                                         







                                                                                             
                                                     


                                 






















                                                                                                                                                                                                 






















                                                                                                                                                                                                 







































                                                                                                                                                                                                                       






                           
//======================================================================
//
// modexpa7_exponentiator.v
// -----------------------------------------------------------------------------
// Modular Montgomery Exponentiator.
//
// Authors: Pavel Shatov
//
// Copyright (c) 2017, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

module modexpa7_exponentiator #
	(
			//
			// This sets the address widths of memory buffers. Internal data
			// width is 32 bits, so for e.g. 2048-bit operands buffers must store
			// 2048 / 32 = 64 words, and these need 5-bit address bus, because
			// 2 ** 6 = 64.
			//
		parameter	OPERAND_ADDR_WIDTH		= 4,
		
			//
			// Explain.
			//
		parameter	SYSTOLIC_ARRAY_POWER		= 2
	)
	(
		input											clk,
		input											rst_n,

		input											ena,
		output										rdy,
		
		input											crt,
		
		output	[OPERAND_ADDR_WIDTH-1:0]	m_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	d_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	f_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	n1_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	n2_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff1_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff2_bram_addr,
		output	[OPERAND_ADDR_WIDTH-1:0]	r_bram_addr,

		input		[                32-1:0]	m_bram_out,
		input		[                32-1:0]	d_bram_out,
		input		[                32-1:0]	f_bram_out,
		input		[                32-1:0]	n1_bram_out,
		input		[                32-1:0]	n2_bram_out,
		input		[                32-1:0]	n_coeff1_bram_out,
		input		[                32-1:0]	n_coeff2_bram_out,

		output	[                32-1:0]	r_bram_in,
		output										r_bram_wr,

		input		[OPERAND_ADDR_WIDTH-1:0]	m_num_words,
		input		[OPERAND_ADDR_WIDTH+4:0]	d_num_bits
	);
	
	
		//
		// FSM Declaration
		//
	localparam	[ 7: 0]	FSM_STATE_EXP_IDLE		= 8'h00;
	//
	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_1		= 8'hA1;
	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_2		= 8'hA2;
	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_3		= 8'hA3;
	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_4		= 8'hA4;

	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_1		= 8'hB1;
	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_2		= 8'hB2;
	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_3		= 8'hB3;
	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_4		= 8'hB4;

	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_1		= 8'hC1;
	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_2		= 8'hC2;
	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_3		= 8'hC3;

	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_1		= 8'hD1;
	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_2		= 8'hD2;
	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_3		= 8'hD3;
	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_4		= 8'hD4;

	localparam	[ 7: 0]	FSM_STATE_EXP_NEXT		= 8'hE0;

	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_1		= 8'hF1;
	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_2		= 8'hF2;
	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_3		= 8'hF3;
	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_4		= 8'hF4;
	//
	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_1		= 8'h11;
	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_2		= 8'h12;
	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_3		= 8'h13;
	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_4		= 8'h14;

	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_1		= 8'h21;
	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_2		= 8'h22;
	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_3		= 8'h23;
	//
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_1	= 8'h31;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_2	= 8'h32;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_3	= 8'h33;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_4	= 8'h34;

	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_A_1	= 8'h41;
	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_A_2	= 8'h42;
	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_A_3	= 8'h43;
	//
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_1	= 8'h51;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_2	= 8'h52;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_3	= 8'h53;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_4	= 8'h54;

	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_B_1	= 8'h61;
	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_B_2	= 8'h62;
	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_B_3	= 8'h63;
	//
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_1	= 8'h71;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_2	= 8'h72;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_3	= 8'h73;
	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_4	= 8'h74;

	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_C_1	= 8'h81;
	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_C_2	= 8'h82;
	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_C_3	= 8'h83;
	//
	localparam	[ 7: 0]	FSM_STATE_EXP_STOP		= 8'hFF;


	/*
	 *  //
	 *
	 *  MUL_INIT:		P1 <= F
	 *             	P2 <= F
	 *             	P3 <= F
	 *            		T2 <= M
	 *
	 *  MUL_CALC:		TP = T2 * P3
	 *
	 *  //
	 *
	 *  CRT_INIT_A:	T2 <= M
	 *
	 *  CRT_CALC_A:	TP = T2 * P3 ("reduce only")
	 *
	 *  CRT_INIT_B:  	P1 <= F
	 *						P2 <= F
	 *						P3 <= F
	 *						T2 <= TP
	 *
	 *  CRT_CALC_B:	TP = T2 * P3
	 *
	 *  CRT_INIT_C:  	T2 <= TP
	 *
	 *  CRT_CALC_C:	TP = T2 * P3
	 *
	 *  //
	 *
	 *  EXP_INIT:		P1 <= TP
	 *						P2 <= TP
	 *						P3 <= TP
	 *						T1 <= 1
	 *						T2 <= 1
	 *
	 *  EXP_LOAD:		T0 <= T1
	 *
	 *  EXP_CALC:		PP = P1 * P2
	 *						TP = T2 * P3
	 *
	 *  EXP_FILL:		P1 <= PP
	 *						P2 <= PP
	 *						P3 <= PP
	 *						T1 <= D[i] ? TP : T0
	 *						T2 <= D[i] ? TP : T0
	 *
	 *  EXP_SAVE:		R  <=  T1
	 *
	 *  //
	 *
	 */

	
		//
		// FSM State / Next State
		//
	reg	[ 7: 0]	fsm_state = FSM_STATE_EXP_IDLE;
	reg	[ 7: 0]	fsm_next_state;


		//
		// Enable Delay and Trigger
		//
   reg ena_dly = 1'b0;
	
		/* delay enable by one clock cycle */
   always @(posedge clk) ena_dly <= ena;

		/* trigger new operation when enable goes high */
   wire ena_trig = ena && !ena_dly;
	
	
		//
		// Ready Flag Logic
		//
	reg rdy_reg = 1'b1;
	assign rdy = rdy_reg;

   always @(posedge clk or negedge rst_n)
		
			/* reset flag */
		if (rst_n == 1'b0) rdy_reg <= 1'b1;
		else begin
		
				/* clear flag when operation is started */
			if (fsm_state == FSM_STATE_EXP_IDLE)	rdy_reg <= ~ena_trig;
			
				/* set flag after operation is finished */
			if (fsm_state == FSM_STATE_EXP_STOP)	rdy_reg <= 1'b1;			
			
		end
		
		
		//
		// Parameters Latch
		//
	reg	[OPERAND_ADDR_WIDTH-1:0]	m_num_words_latch;
	reg	[OPERAND_ADDR_WIDTH+4:0]	d_num_bits_latch;

		/* save number of words in a and b when new operation starts */
	always @(posedge clk)
		//
		if ((fsm_state == FSM_STATE_EXP_IDLE) && ena_trig)
			{m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits};
			

		//
		// Block Memory Addresses
		//
		
		/*
		 * Explain what every memory does.
		 *
		 */
		
		/* the very first addresses */
	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_zero			= {{OPERAND_ADDR_WIDTH{1'b0}}};
	
		/* the very last addresses */
	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last			= {m_num_words_latch};
	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last_crt	=
		{m_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};

		/* address registers */
	reg	[OPERAND_ADDR_WIDTH-1:0]	m_addr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	d_addr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	f_addr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	r_addr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	t0_addr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	t1_addr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	t2_addr_wr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	t2_addr_rd;
	reg	[OPERAND_ADDR_WIDTH-1:0]	p_addr_wr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	p1_addr_rd;
	wire	[OPERAND_ADDR_WIDTH-1:0]	p2_addr_rd;
	wire	[OPERAND_ADDR_WIDTH-1:0]	p3_addr_rd;
	wire	[OPERAND_ADDR_WIDTH-1:0]	pp_addr_wr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	pp_addr_rd;
	wire	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_wr;
	reg	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_rd;
		
		/* handy increment values */
	wire	[OPERAND_ADDR_WIDTH-1:0]	m_addr_next			= m_addr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	d_addr_next			= d_addr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	f_addr_next			= f_addr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	r_addr_next			= r_addr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	t0_addr_next		= t0_addr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	t1_addr_next		= t1_addr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	t2_addr_wr_next	= t2_addr_wr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	p_addr_wr_next		= p_addr_wr + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	pp_addr_rd_next	= pp_addr_rd + 1'b1;
	wire	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_rd_next	= tp_addr_rd + 1'b1;
	
		/* handy stop flags */
	wire	m_addr_done				= (m_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	m_addr_done_crt		= (m_addr     == bram_addr_last_crt) ? 1'b1 : 1'b0;
	wire	d_addr_done				= (d_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	f_addr_done				= (f_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	r_addr_done				= (r_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	t0_addr_done			= (t0_addr    == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	t1_addr_done			= (t1_addr    == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	t2_addr_wr_done		= (t2_addr_wr == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	t2_addr_wr_done_crt	= (t2_addr_wr == bram_addr_last_crt) ? 1'b1 : 1'b0;
	wire	p_addr_wr_done			= (p_addr_wr  == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	pp_addr_rd_done		= (pp_addr_rd == bram_addr_last)     ? 1'b1 : 1'b0;
	wire	tp_addr_rd_done		= (tp_addr_rd == bram_addr_last)     ? 1'b1 : 1'b0;
				
		/* map registers to top-level ports */
	assign m_bram_addr = m_addr;
	assign d_bram_addr = d_addr;
	assign f_bram_addr = f_addr;
	assign r_bram_addr = r_addr;
	
		//
		// Internal Memories
		//

		/* memory inputs */
	reg	[31: 0]	r_data_in;
	reg	[31: 0]	t0_data_in;
	reg	[31: 0]	t1_data_in;
	reg	[31: 0]	t2_data_in;
	reg	[31: 0]	p_data_in;
	wire	[31: 0]	pp_data_in;
	wire	[31: 0]	tp_data_in;

		/* memory outputs */
	wire	[31: 0]	t0_data_out;
	wire	[31: 0]	t1_data_out;
	wire	[31: 0]	t2_data_out;
	wire	[31: 0]	p1_data_out;
	wire	[31: 0]	p2_data_out;
	wire	[31: 0]	p3_data_out;
	wire	[31: 0]	pp_data_out;
	wire	[31: 0]	tp_data_out;

		/* write enables */
	reg	r_wren;
	reg	t0_wren;
	reg	t1_wren;
	reg	t2_wren;
	reg	p_wren;
	wire	pp_wren;
	wire	tp_wren;
	
		/* map */
	assign r_bram_in = r_data_in;
	assign r_bram_wr = r_wren;

	bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out));

	bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out));
	
	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_t2 (.clk(clk),
		.a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(),
		.b_addr(t2_addr_rd), .b_out(t2_data_out));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_p1 (.clk(clk),
		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
		.b_addr(p1_addr_rd), .b_out(p1_data_out));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_p2 (.clk(clk),
		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
		.b_addr(p2_addr_rd), .b_out(p2_data_out));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_p3 (.clk(clk),
		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
		.b_addr(p3_addr_rd), .b_out(p3_data_out));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_pp (.clk(clk),
		.a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(),
		.b_addr(pp_addr_rd), .b_out(pp_data_out));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_tp (.clk(clk),
		.a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(),
		.b_addr(tp_addr_rd), .b_out(tp_data_out));


		//
		// Bit Counter
		//
	reg	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt;
		
	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_last = d_num_bits_latch;
	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_next = bit_cnt + 1'b1;

		/* handy flag */
	wire	bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
	
	always @(posedge clk)
		//
		if (fsm_next_state == FSM_STATE_EXP_LOAD_1)
			//
			case (fsm_state)
				FSM_STATE_EXP_INIT_4: bit_cnt <= bit_cnt_zero;
				FSM_STATE_EXP_NEXT:   bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
			endcase


		//
		// Flags
		//
	reg	flag_update_r;

	always @(posedge clk)
		//
		if (fsm_next_state == FSM_STATE_EXP_CALC_3)
			flag_update_r <= d_bram_out[bit_cnt[4:0]];
			

		//
		// Memory Address Control Logic
		//
	always @(posedge clk) begin
		//
		// m_addr
		//
		case (fsm_next_state)
			FSM_STATE_MUL_INIT_1: 		m_addr <= bram_addr_zero;
			FSM_STATE_MUL_INIT_2,
			FSM_STATE_MUL_INIT_3,
			FSM_STATE_MUL_INIT_4:		m_addr <= !m_addr_done ? m_addr_next : m_addr;
			//
			FSM_STATE_CRT_INIT_A_1: 	m_addr <= bram_addr_zero;
			FSM_STATE_CRT_INIT_A_2,
			FSM_STATE_CRT_INIT_A_3,
			FSM_STATE_CRT_INIT_A_4:		m_addr <= !m_addr_done_crt ? m_addr_next : m_addr;
		endcase
		//
		// d_addr
		//
		case (fsm_next_state)
			FSM_STATE_EXP_CALC_1:		d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
		endcase
		//
		// f_addr
		//
		case (fsm_next_state)
			FSM_STATE_MUL_INIT_1: 		f_addr <= bram_addr_zero;
			FSM_STATE_MUL_INIT_2,
			FSM_STATE_MUL_INIT_3,
			FSM_STATE_MUL_INIT_4:		f_addr <= !f_addr_done ? f_addr_next : f_addr;
			//
			FSM_STATE_CRT_INIT_B_1: 	f_addr <= bram_addr_zero;
			FSM_STATE_CRT_INIT_B_2,
			FSM_STATE_CRT_INIT_B_3,
			FSM_STATE_CRT_INIT_B_4:		f_addr <= !f_addr_done ? f_addr_next : f_addr;
			//
		endcase
		//
		// r_addr
		//
		case (fsm_next_state)
			FSM_STATE_EXP_SAVE_3:		r_addr <= bram_addr_zero;
			FSM_STATE_EXP_SAVE_4:		r_addr <= r_addr_next;
		endcase
		//
		// p_addr_wr
		//
		case (fsm_next_state)
			//
			FSM_STATE_MUL_INIT_3:		p_addr_wr <= bram_addr_zero;
			FSM_STATE_MUL_INIT_4:		p_addr_wr <= p_addr_wr_next;
			//
			FSM_STATE_CRT_INIT_B_3:		p_addr_wr <= bram_addr_zero;
			FSM_STATE_CRT_INIT_B_4:		p_addr_wr <= p_addr_wr_next;
			//
			FSM_STATE_EXP_INIT_3:		p_addr_wr <= bram_addr_zero;
			FSM_STATE_EXP_INIT_4:		p_addr_wr <= p_addr_wr_next;
			//
			FSM_STATE_EXP_FILL_3:		p_addr_wr <= bram_addr_zero;
			FSM_STATE_EXP_FILL_4:		p_addr_wr <= p_addr_wr_next;
		endcase
		//
		// t0_addr
		//
		case (fsm_next_state)
			FSM_STATE_EXP_LOAD_3:		t0_addr <= bram_addr_zero;
			FSM_STATE_EXP_LOAD_4:		t0_addr <= t0_addr_next;
			//
			FSM_STATE_EXP_FILL_1:		t0_addr <= bram_addr_zero;
			FSM_STATE_EXP_FILL_2,
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
		endcase		
		//
		// t1_addr
		//
		case (fsm_next_state)
			FSM_STATE_EXP_INIT_3:		t1_addr <= bram_addr_zero;
			FSM_STATE_EXP_INIT_4:		t1_addr <= t1_addr_next;
			//
			FSM_STATE_EXP_LOAD_1:		t1_addr <= bram_addr_zero;
			FSM_STATE_EXP_LOAD_2,
			FSM_STATE_EXP_LOAD_3,
			FSM_STATE_EXP_LOAD_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
			//
			FSM_STATE_EXP_FILL_3:		t1_addr <= bram_addr_zero;
			FSM_STATE_EXP_FILL_4:		t1_addr <= t1_addr_next;
			//
			FSM_STATE_EXP_SAVE_1:		t1_addr <= bram_addr_zero;
			FSM_STATE_EXP_SAVE_2,
			FSM_STATE_EXP_SAVE_3,
			FSM_STATE_EXP_SAVE_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
		endcase
		//
		// t2_addr_wr
		//
		case (fsm_next_state)
			//
			FSM_STATE_MUL_INIT_3:		t2_addr_wr <= bram_addr_zero;
			FSM_STATE_MUL_INIT_4:		t2_addr_wr <= t2_addr_wr_next;
			//
			FSM_STATE_CRT_INIT_A_3:		t2_addr_wr <= bram_addr_zero;
			FSM_STATE_CRT_INIT_A_4:		t2_addr_wr <= t2_addr_wr_next;
			//
			FSM_STATE_CRT_INIT_B_3:		t2_addr_wr <= bram_addr_zero;
			FSM_STATE_CRT_INIT_B_4:		t2_addr_wr <= t2_addr_wr_next;
			//
			FSM_STATE_CRT_INIT_C_3:		t2_addr_wr <= bram_addr_zero;
			FSM_STATE_CRT_INIT_C_4:		t2_addr_wr <= t2_addr_wr_next;
			//
			FSM_STATE_EXP_INIT_3:		t2_addr_wr <= bram_addr_zero;
			FSM_STATE_EXP_INIT_4:		t2_addr_wr <= t2_addr_wr_next;
			//
			FSM_STATE_EXP_FILL_3:		t2_addr_wr <= bram_addr_zero;
			FSM_STATE_EXP_FILL_4:		t2_addr_wr <= t2_addr_wr_next;
		endcase		
		//
		// pp_addr_rd
		//
		case (fsm_next_state)
			FSM_STATE_EXP_FILL_1:		pp_addr_rd <= bram_addr_zero;
			FSM_STATE_EXP_FILL_2,
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
		endcase
		//
		// tp_addr_rd
		//
		case (fsm_next_state)
			FSM_STATE_EXP_INIT_1: 		tp_addr_rd <= bram_addr_zero;
			FSM_STATE_EXP_INIT_2,
			FSM_STATE_EXP_INIT_3,
			FSM_STATE_EXP_INIT_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
			//
			FSM_STATE_CRT_INIT_B_1: 	tp_addr_rd <= bram_addr_zero;
			FSM_STATE_CRT_INIT_B_2,
			FSM_STATE_CRT_INIT_B_3,
			FSM_STATE_CRT_INIT_B_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
			//
			FSM_STATE_CRT_INIT_C_1: 	tp_addr_rd <= bram_addr_zero;
			FSM_STATE_CRT_INIT_C_2,
			FSM_STATE_CRT_INIT_C_3,
			FSM_STATE_CRT_INIT_C_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
			//
			FSM_STATE_EXP_FILL_1:		tp_addr_rd <= bram_addr_zero;
			FSM_STATE_EXP_FILL_2,
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
		endcase
		//
	end


		//
		// Memory Write Enable Logic
		//
	always @(posedge clk) begin
		//
		// r_wren
		//
		case (fsm_next_state)
			FSM_STATE_EXP_SAVE_3,
			FSM_STATE_EXP_SAVE_4:		r_wren <= 1'b1;
			default:							r_wren <= 1'b0;
		endcase
		//
		// p_wren
		//
		case (fsm_next_state)
			FSM_STATE_MUL_INIT_3,
			FSM_STATE_MUL_INIT_4,
			FSM_STATE_CRT_INIT_B_3,
			FSM_STATE_CRT_INIT_B_4,
			FSM_STATE_EXP_INIT_3,
			FSM_STATE_EXP_INIT_4,
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		p_wren <= 1'b1;
			default:							p_wren <= 1'b0;
		endcase
		//
		// t0_wren
		//
		case (fsm_next_state)
			FSM_STATE_EXP_LOAD_3,		
			FSM_STATE_EXP_LOAD_4:		t0_wren <= 1'b1;
			default:							t0_wren <= 1'b0;
		endcase
		//
		// t1_wren
		//
		case (fsm_next_state)
			FSM_STATE_EXP_INIT_3,		
			FSM_STATE_EXP_INIT_4,
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		t1_wren <= 1'b1;
			default:							t1_wren <= 1'b0;
		endcase
		//
		// t2_wren
		//
		case (fsm_next_state)
			FSM_STATE_MUL_INIT_3,
			FSM_STATE_MUL_INIT_4,
			FSM_STATE_CRT_INIT_A_3,
			FSM_STATE_CRT_INIT_A_4,
			FSM_STATE_CRT_INIT_B_3,
			FSM_STATE_CRT_INIT_B_4,
			FSM_STATE_CRT_INIT_C_3,
			FSM_STATE_CRT_INIT_C_4,
			FSM_STATE_EXP_INIT_3,		
			FSM_STATE_EXP_INIT_4,
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		t2_wren <= 1'b1;
			default:							t2_wren <= 1'b0;
		endcase
		//
	end
	
	
		//
		// Memory Input Selector
		//
	always @(posedge clk) begin
		//
		// r_data_in
		//
		case (fsm_next_state)
			FSM_STATE_EXP_SAVE_3,
			FSM_STATE_EXP_SAVE_4:		r_data_in	<= t1_data_out;
			default:							r_data_in	<= 32'dX;
		endcase		
		//
		// p_data_in
		//
		case (fsm_next_state)
			//
			FSM_STATE_MUL_INIT_3,
			FSM_STATE_MUL_INIT_4:		p_data_in	<= f_bram_out;
			//
			FSM_STATE_CRT_INIT_B_3,
			FSM_STATE_CRT_INIT_B_4:		p_data_in	<= f_bram_out;
			//
			FSM_STATE_EXP_INIT_3,
			FSM_STATE_EXP_INIT_4:		p_data_in	<= tp_data_out;
			//
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		p_data_in	<= pp_data_out;
			//
			default:							p_data_in	<= 32'dX;
		endcase
		//
		// t0_data_in
		//
		case (fsm_next_state)
			FSM_STATE_EXP_LOAD_3,
			FSM_STATE_EXP_LOAD_4:		t0_data_in <= t1_data_out;
			default:							t0_data_in <= 32'dX;
		endcase		
		//
		// t1_data_in
		//
		case (fsm_next_state)
			FSM_STATE_EXP_INIT_3:		t1_data_in <= 32'd1;
			FSM_STATE_EXP_INIT_4:		t1_data_in <= 32'd0;
			//
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
			default:							t1_data_in <= 32'dX;
		endcase		
		//
		// t2_data_in
		//
		case (fsm_next_state)
			//
			FSM_STATE_MUL_INIT_3,
			FSM_STATE_MUL_INIT_4:		t2_data_in <= m_bram_out;
			//
			FSM_STATE_CRT_INIT_A_3,
			FSM_STATE_CRT_INIT_A_4:		t2_data_in <= m_bram_out;
			//
			FSM_STATE_CRT_INIT_B_3,
			FSM_STATE_CRT_INIT_B_4:		t2_data_in <= tp_data_out;
			//
			FSM_STATE_CRT_INIT_C_3,
			FSM_STATE_CRT_INIT_C_4:		t2_data_in <= tp_data_out;
			//
			FSM_STATE_EXP_INIT_3:		t2_data_in <= 32'd1;
			FSM_STATE_EXP_INIT_4:		t2_data_in <= 32'd0;
			//
			FSM_STATE_EXP_FILL_3,
			FSM_STATE_EXP_FILL_4:		t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
			default:							t2_data_in <= 32'dX;
		endcase		
		//
	end
	
	
		//
		// Double Multiplier
		//
	reg	mul_ena;
	reg	mul_crt;
	wire	mul_rdy_pp;
	wire	mul_rdy_tp;
	wire	mul_rdy_all = mul_rdy_pp & mul_rdy_tp;

	modexpa7_systolic_multiplier #
	(
		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH),
		.SYSTOLIC_ARRAY_POWER	(SYSTOLIC_ARRAY_POWER)
	)
	mul_pp
	(
		.clk						(clk),
		.rst_n					(rst_n),

		.ena						(mul_ena),
		.rdy						(mul_rdy_pp),

		.reduce_only			(mul_crt),

		.a_bram_addr			(p1_addr_rd),
		.b_bram_addr			(p2_addr_rd),
		.n_bram_addr			(n1_bram_addr),
		.n_coeff_bram_addr	(n_coeff1_bram_addr),
		.r_bram_addr			(pp_addr_wr),

		.a_bram_out				(p1_data_out),
		.b_bram_out				(p2_data_out),
		.n_bram_out				(n1_bram_out),
		.n_coeff_bram_out		(n_coeff1_bram_out),

		.r_bram_in				(pp_data_in),
		.r_bram_wr				(pp_wren),

		.n_num_words			(m_num_words_latch)
	);

	modexpa7_systolic_multiplier #
	(
		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH),
		.SYSTOLIC_ARRAY_POWER	(SYSTOLIC_ARRAY_POWER)
	)
	mul_tp
	(
		.clk						(clk),
		.rst_n					(rst_n),

		.ena						(mul_ena),
		.rdy						(mul_rdy_tp),

		.reduce_only			(mul_crt),

		.a_bram_addr			(t2_addr_rd),
		.b_bram_addr			(p3_addr_rd),
		.n_bram_addr			(n2_bram_addr),
		.n_coeff_bram_addr	(n_coeff2_bram_addr),
		.r_bram_addr			(tp_addr_wr),

		.a_bram_out				(t2_data_out),
		.b_bram_out				(p3_data_out),
		.n_bram_out				(n2_bram_out),
		.n_coeff_bram_out		(n_coeff2_bram_out),

		.r_bram_in				(tp_data_in),
		.r_bram_wr				(tp_wren),

		.n_num_words			(m_num_words_latch)
	);
	
	
	always @(posedge clk)
		//
		case (fsm_next_state)
			FSM_STATE_MUL_CALC_1,
			FSM_STATE_CRT_CALC_A_1,
			FSM_STATE_CRT_CALC_B_1,
			FSM_STATE_CRT_CALC_C_1,
			FSM_STATE_EXP_CALC_1:		mul_ena <= 1'b1;
			default:							mul_ena <= 1'b0;
		endcase

	always @(posedge clk)
		//
		case (fsm_next_state)
			FSM_STATE_CRT_CALC_A_1:		mul_crt <= 1'b1;
			default:							mul_crt <= 1'b0;
		endcase
			

		//
		// FSM Process
		//
	always @(posedge clk or negedge rst_n)
		//
		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_EXP_IDLE;
		else						fsm_state <= fsm_next_state;
	
	
		//
		// FSM Transition Logic
		//
	always @* begin
		//
		fsm_next_state = FSM_STATE_EXP_STOP;
		//
		case (fsm_state)
			//
			//
			FSM_STATE_MUL_INIT_1:										fsm_next_state = FSM_STATE_MUL_INIT_2;
			FSM_STATE_MUL_INIT_2:										fsm_next_state = FSM_STATE_MUL_INIT_3;
			FSM_STATE_MUL_INIT_3:										fsm_next_state = FSM_STATE_MUL_INIT_4;
			FSM_STATE_MUL_INIT_4:	if (t2_addr_wr_done)			fsm_next_state = FSM_STATE_MUL_CALC_1;
											else								fsm_next_state = FSM_STATE_MUL_INIT_4;
			//
			FSM_STATE_MUL_CALC_1:										fsm_next_state = FSM_STATE_MUL_CALC_2;
			FSM_STATE_MUL_CALC_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_MUL_CALC_3;
											else								fsm_next_state = FSM_STATE_MUL_CALC_2;
			FSM_STATE_MUL_CALC_3:										fsm_next_state = FSM_STATE_EXP_INIT_1;
			//
			//
			FSM_STATE_CRT_INIT_A_1:										fsm_next_state = FSM_STATE_CRT_INIT_A_2;
			FSM_STATE_CRT_INIT_A_2:										fsm_next_state = FSM_STATE_CRT_INIT_A_3;
			FSM_STATE_CRT_INIT_A_3:										fsm_next_state = FSM_STATE_CRT_INIT_A_4;
			FSM_STATE_CRT_INIT_A_4:	if (t2_addr_wr_done_crt)	fsm_next_state = FSM_STATE_CRT_CALC_A_1;
											else								fsm_next_state = FSM_STATE_CRT_INIT_A_4;

			//
			FSM_STATE_CRT_CALC_A_1:										fsm_next_state = FSM_STATE_CRT_CALC_A_2;
			FSM_STATE_CRT_CALC_A_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_A_3;
											else								fsm_next_state = FSM_STATE_CRT_CALC_A_2;
			FSM_STATE_CRT_CALC_A_3:										fsm_next_state = FSM_STATE_CRT_INIT_B_1;
			//
			FSM_STATE_CRT_INIT_B_1:										fsm_next_state = FSM_STATE_CRT_INIT_B_2;
			FSM_STATE_CRT_INIT_B_2:										fsm_next_state = FSM_STATE_CRT_INIT_B_3;
			FSM_STATE_CRT_INIT_B_3:										fsm_next_state = FSM_STATE_CRT_INIT_B_4;
			FSM_STATE_CRT_INIT_B_4:	if (t2_addr_wr_done)			fsm_next_state = FSM_STATE_CRT_CALC_B_1;
											else								fsm_next_state = FSM_STATE_CRT_INIT_B_4;
			//
			FSM_STATE_CRT_CALC_B_1:										fsm_next_state = FSM_STATE_CRT_CALC_B_2;
			FSM_STATE_CRT_CALC_B_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_B_3;
											else								fsm_next_state = FSM_STATE_CRT_CALC_B_2;
			FSM_STATE_CRT_CALC_B_3:										fsm_next_state = FSM_STATE_CRT_INIT_C_1;
			//
			FSM_STATE_CRT_INIT_C_1:										fsm_next_state = FSM_STATE_CRT_INIT_C_2;
			FSM_STATE_CRT_INIT_C_2:										fsm_next_state = FSM_STATE_CRT_INIT_C_3;
			FSM_STATE_CRT_INIT_C_3:										fsm_next_state = FSM_STATE_CRT_INIT_C_4;
			FSM_STATE_CRT_INIT_C_4:	if (t2_addr_wr_done)			fsm_next_state = FSM_STATE_CRT_CALC_C_1;
											else								fsm_next_state = FSM_STATE_CRT_INIT_C_4;
			//
			FSM_STATE_CRT_CALC_C_1:										fsm_next_state = FSM_STATE_CRT_CALC_C_2;
			FSM_STATE_CRT_CALC_C_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_C_3;
											else								fsm_next_state = FSM_STATE_CRT_CALC_C_2;
			FSM_STATE_CRT_CALC_C_3:										fsm_next_state = FSM_STATE_EXP_INIT_1;
			//
			//
			FSM_STATE_EXP_IDLE:		if (ena_trig)					fsm_next_state = crt ?
																					FSM_STATE_CRT_INIT_A_1 : FSM_STATE_MUL_INIT_1;
											else								fsm_next_state = FSM_STATE_EXP_IDLE;
			//
			//
			FSM_STATE_EXP_INIT_1:										fsm_next_state = FSM_STATE_EXP_INIT_2;
			FSM_STATE_EXP_INIT_2:										fsm_next_state = FSM_STATE_EXP_INIT_3;
			FSM_STATE_EXP_INIT_3:										fsm_next_state = FSM_STATE_EXP_INIT_4;
			FSM_STATE_EXP_INIT_4:	if (t1_addr_done)				fsm_next_state = FSM_STATE_EXP_LOAD_1;
											else								fsm_next_state = FSM_STATE_EXP_INIT_4;
			//
			FSM_STATE_EXP_LOAD_1:										fsm_next_state = FSM_STATE_EXP_LOAD_2;
			FSM_STATE_EXP_LOAD_2:										fsm_next_state = FSM_STATE_EXP_LOAD_3;
			FSM_STATE_EXP_LOAD_3:										fsm_next_state = FSM_STATE_EXP_LOAD_4;
			FSM_STATE_EXP_LOAD_4:	if (t0_addr_done)				fsm_next_state = FSM_STATE_EXP_CALC_1;
											else								fsm_next_state = FSM_STATE_EXP_LOAD_4;
			//
			FSM_STATE_EXP_CALC_1:										fsm_next_state = FSM_STATE_EXP_CALC_2;
			FSM_STATE_EXP_CALC_2:	if (mul_rdy_all)				fsm_next_state = FSM_STATE_EXP_CALC_3;
											else								fsm_next_state = FSM_STATE_EXP_CALC_2;
			FSM_STATE_EXP_CALC_3:										fsm_next_state = FSM_STATE_EXP_FILL_1;
			//
			FSM_STATE_EXP_FILL_1:										fsm_next_state = FSM_STATE_EXP_FILL_2;
			FSM_STATE_EXP_FILL_2:										fsm_next_state = FSM_STATE_EXP_FILL_3;
			FSM_STATE_EXP_FILL_3:										fsm_next_state = FSM_STATE_EXP_FILL_4;
			FSM_STATE_EXP_FILL_4:	if (p_addr_wr_done)			fsm_next_state = FSM_STATE_EXP_NEXT;
											else								fsm_next_state = FSM_STATE_EXP_FILL_4;			
			//
			FSM_STATE_EXP_NEXT:		if (bit_cnt_done)				fsm_next_state = FSM_STATE_EXP_SAVE_1;
											else								fsm_next_state = FSM_STATE_EXP_LOAD_1;
			//
			FSM_STATE_EXP_SAVE_1:										fsm_next_state = FSM_STATE_EXP_SAVE_2;
			FSM_STATE_EXP_SAVE_2:										fsm_next_state = FSM_STATE_EXP_SAVE_3;
			FSM_STATE_EXP_SAVE_3:										fsm_next_state = FSM_STATE_EXP_SAVE_4;
			FSM_STATE_EXP_SAVE_4:	if (r_addr_done)				fsm_next_state = FSM_STATE_EXP_STOP;
											else								fsm_next_state = FSM_STATE_EXP_SAVE_4;
			//
			FSM_STATE_EXP_STOP:											fsm_next_state = FSM_STATE_EXP_IDLE;
			//
		endcase
		//
	end
			

endmodule