aboutsummaryrefslogblamecommitdiff
path: root/src/rtl/modexpa7_top.v
blob: ad101ddcd7232aefb4851eed8396e09adf69f121 (plain) (tree)




































                                                                           
                                                   










                                                                                                              

                                                                                                                 


































































































































































































































































































































                                                                                                                                                           

                                                                           












































































































                                                                                                                                                                                                   
//======================================================================
//
// Copyright (c) 2016, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
//   be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================

`timescale 1ns / 1ps

module modexpa7_top #
	(
		parameter OPERAND_ADDR_WIDTH		= 7,
		parameter SYSTOLIC_ARRAY_POWER	= 1
	)
	(
		input											clk,
		input											rst_n,
		
		input											init,
		output										ready,

		input											next,
		output										valid,

		input											crt_mode,

		input		[OPERAND_ADDR_WIDTH-1:0]	modulus_num_words,
		input		[OPERAND_ADDR_WIDTH+4:0]	exponent_num_bits,

		input 										bus_cs,
		input 										bus_we,
		input		[OPERAND_ADDR_WIDTH+1:0]	bus_addr,
		input		[                32-1:0]	bus_data_wr,
		output 	[                32-1:0]	bus_data_rd
	);
	
	
		/*
		 * FSM Declaration
		 */
		 
	localparam	[ 2: 0]	FSM_STATE_IDLE					= 3'd0;
	
	localparam	[ 2: 0]	FSM_STATE_PRECALC_START		= 3'd1;
	localparam	[ 2: 0]	FSM_STATE_PRECALC_CRUNCH	= 3'd2;
	localparam	[ 2: 0]	FSM_STATE_PRECALC_FINAL		= 3'd3;
		
	localparam	[ 2: 0]	FSM_STATE_EXPONENT_START	= 3'd4;
	localparam	[ 2: 0]	FSM_STATE_EXPONENT_CRUNCH	= 3'd5;
	localparam	[ 2: 0]	FSM_STATE_EXPONENT_FINAL	= 3'd6;
	
	localparam	[ 7: 0]	FSM_STATE_STOP					= 3'd7;
	
	
		/*
		 * FSM State / Next State
		 */
		 
	reg	[ 7: 0]	fsm_state = FSM_STATE_IDLE;
	reg	[ 7: 0]	fsm_next_state;


		/*
		 * Enable Delay (Trigger)
		 */
		 
   reg init_dly = 1'b0;
   reg next_dly = 1'b0;

		// delay init and next by one clock cycle
   always @(posedge clk) init_dly <= init;
	always @(posedge clk) next_dly <= next;

		// trigger new operation when one of the control inputs goes from low to high
   wire init_trig = init && !init_dly;
   wire next_trig = next && !next_dly;
	
	
		/*
		 * Ready and Valid Flags Logic
		 */
		 
	reg ready_reg = 1'b0;
	reg valid_reg = 1'b0;
	
	assign ready = ready_reg;
	assign valid = valid_reg;

		// ready flag logic
   always @(posedge clk or negedge rst_n)
		//
		if (rst_n == 1'b0)							ready_reg <= 1'b0;	// reset flag to default state
		else case (fsm_state)
			FSM_STATE_IDLE:	if (init_trig)		ready_reg <= 1'b0;	// clear flag when operation is started
			FSM_STATE_STOP:	if (!ready_reg)	ready_reg <= 1'b1;	// set flag after operation is finished
		endcase

		// valid flag logic
   always @(posedge clk or negedge rst_n)
		//
		if (rst_n == 1'b0)							valid_reg <= 1'b0;	// reset flag to default state
		else case (fsm_state)
			FSM_STATE_IDLE:	if (next_trig)		valid_reg <= 1'b0;	// clear flag when operation is started
			FSM_STATE_STOP:	if (!valid_reg)	valid_reg <= 1'b1;	// set flag after operation is finished
		endcase

	
		/*
		 * Parameters Latch
		 */
	reg	[OPERAND_ADDR_WIDTH-1:0]	modulus_num_words_latch;
	reg	[OPERAND_ADDR_WIDTH+4:0]	exponent_num_bits_latch;

		// save number of words in modulus when pre-calculation has been triggered,
		// i.e. user has apparently loaded a new modulus into the core
	always @(posedge clk)
		//
		if (fsm_next_state == FSM_STATE_PRECALC_START)
			modulus_num_words_latch <= modulus_num_words;

		// save number of bits in exponent when exponentiation has been triggered,
		// i.e. user has loaded a new message into the core and wants exponentiate
	always @(posedge clk)
		//
		if (fsm_next_state == FSM_STATE_EXPONENT_START)
			exponent_num_bits_latch <= exponent_num_bits;

	
		/*
		 * Split bus address into bank/word parts.
		 */
	wire	[                 2 - 1 : 0]	bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+1:OPERAND_ADDR_WIDTH];
	wire	[OPERAND_ADDR_WIDTH - 1 : 0]	bus_addr_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
	
	
		/*
		 * Define bank offsets.
		 */
	localparam	[ 1: 0]	BANK_MODULUS	= 2'b00;	// 0
	localparam	[ 1: 0]	BANK_MESSAGE	= 2'b01;	// 1
	localparam	[ 1: 0]	BANK_EXPONENT	= 2'b10;	// 2
	localparam	[ 1: 0]	BANK_RESULT		= 2'b11;	// 3
	
	
		/*
		 * Instantiate user-accessible memories.
		 *
		 * We have four block memories: N for modulus, M for message, D for exponent
		 * and R for result. Memories N, M and D and writeable from the user's side,
		 * memory R is writeable from the core's side and is read-only by user.
		 *
		 * Note, that the core does squaring and multiplication simultaneously, so
		 * there are two identical systolic multipliers inside. It's better to have two
		 * copies of modulus to give router some freeding in placing the multipliers,
		 * that's why there are actually two identical block memories N1 and N2 instead of N.
		 * User reads from the first one, but writes to both of them. Note that the synthesis
		 * tool might get too clever and find out that N1 and N2 are identical and decide
		 * to throw one of them away, use (* KEEP="TRUE" *) or something like that then.
		 *
		 * We also need N3 and N4, because during pre-computation F and N_COEFF are calculated
		 * at the same time, so we need two more copies of modulus to allow different words
		 * of it to be read at the same time.
		 */
	
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n1_addr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n2_addr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n3_addr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n4_addr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_m_addr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_d_addr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_r_addr;
	
	wire	[                32-1:0]	core_n1_data;
	wire	[                32-1:0]	core_n2_data;
	wire	[                32-1:0]	core_n3_data;
	wire	[                32-1:0]	core_n4_data;
	wire	[                32-1:0]	core_m_data;
	wire	[                32-1:0]	core_d_data;
	wire	[                32-1:0]	core_r_data;
	
	wire	[                32-1:0]	user_n_data;
	wire	[                32-1:0]	user_m_data;
	wire	[                32-1:0]	user_d_data;
	wire	[                32-1:0]	user_r_data;
	
	wire										core_r_wren;
	wire										user_n_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MODULUS);
	wire										user_m_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MESSAGE);
	wire										user_d_wren = bus_cs && bus_we && (bus_addr_bank == BANK_EXPONENT);
	
	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_n1 (.clk(clk),
		.a_addr(bus_addr_word), .a_out(user_n_data), .a_wr(user_n_wren), .a_in(bus_data_wr), 
		.b_addr(core_n1_addr),  .b_out(core_n1_data));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_n2 (.clk(clk),
		.a_addr(bus_addr_word), .a_out(), .a_wr(user_n_wren), .a_in(bus_data_wr), 
		.b_addr(core_n2_addr),  .b_out(core_n2_data));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_n3 (.clk(clk),
		.a_addr(bus_addr_word), .a_out(), .a_wr(user_n_wren), .a_in(bus_data_wr), 
		.b_addr(core_n3_addr),  .b_out(core_n3_data));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_n4 (.clk(clk),
		.a_addr(bus_addr_word), .a_out(), .a_wr(user_n_wren), .a_in(bus_data_wr), 
		.b_addr(core_n4_addr),  .b_out(core_n4_data));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_m (.clk(clk),
		.a_addr(bus_addr_word), .a_out(user_m_data), .a_wr(user_m_wren), .a_in(bus_data_wr), 
		.b_addr(core_m_addr),   .b_out(core_m_data));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_d (.clk(clk),
		.a_addr(bus_addr_word), .a_out(user_d_data), .a_wr(user_d_wren), .a_in(bus_data_wr), 
		.b_addr(core_d_addr), .b_out(core_d_data));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_r (.clk(clk),
		.a_addr(core_r_addr), .a_out(), .a_wr(core_r_wren), .a_in(core_r_data), 
		.b_addr(bus_addr_word), .b_out(user_r_data));

		
		/*
		 * Instantiate internal memories.
		 *
		 * We have two block memories: F for Montgomery factor and N_COEFF for modulus-dependent
		 * coefficient, they are written to during pre-calculation and read from during exponentiation.
		 *
		 * Note, that there are actually two identical block memories N_COEFF1 and N_COEFF2 instead of
		 * just one N_COEFF, read the explanation above. F is only used by one of the multipliers, so
		 * we don't need F1 and F2.
		 */

	wire	[OPERAND_ADDR_WIDTH-1:0]	core_f_addr_wr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_f_addr_rd;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n_coeff_addr_wr;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n_coeff1_addr_rd;
	wire	[OPERAND_ADDR_WIDTH-1:0]	core_n_coeff2_addr_rd;
	
	wire	[                32-1:0]	core_f_data_wr;
	wire	[                32-1:0]	core_f_data_rd;
	wire	[                32-1:0]	core_n_coeff_data_wr;
	wire	[                32-1:0]	core_n_coeff1_data_rd;
	wire	[                32-1:0]	core_n_coeff2_data_rd;
		
	wire										core_f_wren;
	wire										core_n_coeff_wren;

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_f (.clk(clk),
		.a_addr(core_f_addr_wr), .a_out(), .a_wr(core_f_wren), .a_in(core_f_data_wr),
		.b_addr(core_f_addr_rd), .b_out(core_f_data_rd));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_n_coeff1 (.clk(clk),
		.a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
		.b_addr(core_n_coeff1_addr_rd), .b_out(core_n_coeff1_data_rd));

	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
	bram_n_coeff2 (.clk(clk),
		.a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
		.b_addr(core_n_coeff2_addr_rd), .b_out(core_n_coeff2_data_rd));
		

		/*
		 * Montgomery factor calculation module.
		 */
	(* EQUIVALENT_REGISTER_REMOVAL="NO" *)
	reg	precalc_f_ena = 1'b0;
	wire	precalc_r_rdy;
	
	modexpa7_factor #
	(
		.OPERAND_ADDR_WIDTH	(OPERAND_ADDR_WIDTH)
	)
	precalc_f
	(
		.clk				(clk),
		.rst_n			(rst_n),

		.ena				(precalc_f_ena),
		.rdy				(precalc_r_rdy),

		.n_bram_addr	(core_n3_addr),
		.f_bram_addr	(core_f_addr_wr),

		.n_bram_out		(core_n3_data),

		.f_bram_in		(core_f_data_wr),
		.f_bram_wr		(core_f_wren),

		.n_num_words	(modulus_num_words_latch)
	);
	
	
		/*
		 * Modulus-depentent coefficient calculation module.
		 */
	(* EQUIVALENT_REGISTER_REMOVAL="NO" *)	 
	reg	precalc_n_coeff_ena = 1'b0;
	wire	precalc_n_coeff_rdy;
	
	modexpa7_n_coeff #
	(
		.OPERAND_ADDR_WIDTH	(OPERAND_ADDR_WIDTH)
	)
	precalc_n_coeff
	(
		.clk						(clk),
		.rst_n					(rst_n),

		.ena						(precalc_n_coeff_ena),
		.rdy						(precalc_n_coeff_rdy),

		.n_bram_addr			(core_n4_addr),
		.n_coeff_bram_addr	(core_n_coeff_addr_wr),

		.n_bram_out				(core_n4_data),

		.n_coeff_bram_in		(core_n_coeff_data_wr),
		.n_coeff_bram_wr		(core_n_coeff_wren),

		.n_num_words			(modulus_num_words_latch)
	);		 

		/*
		 * Exponentiation module.
		 */
		 
	reg	exponent_ena = 1'b0;
	wire	exponent_rdy;
		 
	modexpa7_exponentiator #
	(
		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH),
		.SYSTOLIC_ARRAY_POWER	(SYSTOLIC_ARRAY_POWER)
	)
	exponent_r
	(
		.clk						(clk),
		.rst_n					(rst_n),

		.ena						(exponent_ena),
		.rdy						(exponent_rdy),
		
		.crt						(crt_mode),
		
		.m_bram_addr			(core_m_addr),
		.d_bram_addr			(core_d_addr),
		.f_bram_addr			(core_f_addr_rd),
		.n1_bram_addr			(core_n1_addr),
		.n2_bram_addr			(core_n2_addr),
		.n_coeff1_bram_addr	(core_n_coeff1_addr_rd),
		.n_coeff2_bram_addr	(core_n_coeff2_addr_rd),
		.r_bram_addr			(core_r_addr),

		.m_bram_out				(core_m_data),
		.d_bram_out				(core_d_data),
		.f_bram_out				(core_f_data_rd),
		.n1_bram_out			(core_n1_data),
		.n2_bram_out			(core_n2_data),
		.n_coeff1_bram_out	(core_n_coeff1_data_rd),
		.n_coeff2_bram_out	(core_n_coeff2_data_rd),

		.r_bram_in				(core_r_data),
		.r_bram_wr				(core_r_wren),

		.m_num_words			(modulus_num_words_latch),
		.d_num_bits				(exponent_num_bits_latch)
	);
	
	
		/*
		 * Sub-Module Enable Logic
		 */
		 
	always @(posedge clk) begin
		precalc_f_ena			<= (fsm_next_state == FSM_STATE_PRECALC_START)  ? 1'b1 : 1'b0;
		precalc_n_coeff_ena	<= (fsm_next_state == FSM_STATE_PRECALC_START)  ? 1'b1 : 1'b0;
		exponent_ena			<= (fsm_next_state == FSM_STATE_EXPONENT_START) ? 1'b1 : 1'b0;
	end
		
		
	
	
		/*
		 * FSM Process
		 */
		 
	always @(posedge clk or negedge rst_n)
		//
		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_IDLE;
		else						fsm_state <= fsm_next_state;
	
	
		/*
		 * FSM Transition Logic
		 */
		 
		 // handy flag that tells whether both pre-calculations modules are idle
	wire	precalc_rdy = precalc_n_coeff_rdy && precalc_r_rdy;
	
	always @* begin
		//
		fsm_next_state = FSM_STATE_STOP;
		//
		case (fsm_state)
			//
			FSM_STATE_IDLE:					if (init_trig)			fsm_next_state = FSM_STATE_PRECALC_START;		// init has priority over next
													else if (next_trig)	fsm_next_state = FSM_STATE_EXPONENT_START;
													else						fsm_next_state = FSM_STATE_IDLE;
			//
			FSM_STATE_PRECALC_START:									fsm_next_state = FSM_STATE_PRECALC_CRUNCH;
			FSM_STATE_PRECALC_CRUNCH:		if (precalc_rdy)		fsm_next_state = FSM_STATE_PRECALC_FINAL;
													else						fsm_next_state = FSM_STATE_PRECALC_CRUNCH;
			FSM_STATE_PRECALC_FINAL:									fsm_next_state = FSM_STATE_STOP;
			//
			FSM_STATE_EXPONENT_START:									fsm_next_state = FSM_STATE_EXPONENT_CRUNCH;
			FSM_STATE_EXPONENT_CRUNCH:		if (exponent_rdy)		fsm_next_state = FSM_STATE_EXPONENT_FINAL;
													else						fsm_next_state = FSM_STATE_EXPONENT_CRUNCH;
			FSM_STATE_EXPONENT_FINAL:									fsm_next_state = FSM_STATE_STOP;
			//
			FSM_STATE_STOP:												fsm_next_state = FSM_STATE_IDLE;
			//
		endcase
		//
	end


		/*
		 * Bus read mux.
		 */
		 
		// delay bus_addr_bank by 1 clock cycle to remember from where we've just been reading
	reg	[1: 0]	bus_addr_bank_dly;
	always @(posedge clk)
		if (bus_cs) bus_addr_bank_dly <= bus_addr_bank;

		// map mux to output port
	reg	[31: 0]	bus_data_rd_mux;
	assign bus_data_rd = bus_data_rd_mux;

		// select the right data word
	always @(*)
		//
		case (bus_addr_bank_dly)
			//
			BANK_MODULUS:		bus_data_rd_mux = user_n_data;
			BANK_MESSAGE:		bus_data_rd_mux = user_m_data;
			BANK_EXPONENT:		bus_data_rd_mux = user_d_data;
			BANK_RESULT:		bus_data_rd_mux = user_r_data;
			//
		endcase
		

endmodule