aboutsummaryrefslogblamecommitdiff
path: root/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
blob: c3dccd5f44a035250e6e5ecce30dd29156c937f0 (plain) (tree)

































                                                                                              
                             




















































































































































































































































































































































































                                                                                                                                                                                   
`timescale 1ns / 1ps

module modinv_helper_invert_precalc
	(
		clk, rst_n,
		ena, rdy,
		
		r_addr, r_din,
		s_addr, s_din,
		u_addr, u_din,
		v_addr, v_din,
		
		r_dbl_addr,          r_dbl_wren,          r_dbl_dout,
		s_dbl_addr,          s_dbl_wren,          s_dbl_dout,
		r_plus_s_addr,       r_plus_s_wren,       r_plus_s_dout,
		u_half_addr,         u_half_wren,         u_half_dout,
		v_half_addr,         v_half_wren,         v_half_dout,
		u_minus_v_addr,      u_minus_v_wren,      u_minus_v_dout,      u_minus_v_din,
		v_minus_u_addr,      v_minus_u_wren,      v_minus_u_dout,      v_minus_u_din,
		u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
		v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
	);
	

		//
		// Parameters
		//
	parameter BUFFER_NUM_WORDS		= 9;
	parameter BUFFER_ADDR_BITS		= 4;
	
	
		//
		// clog2
		//
`include "../modinv_clog2.v"
	
	
		//
		// Constants
		//
	localparam PROC_NUM_CYCLES	= 2 * BUFFER_NUM_WORDS + 4;
	localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
	
	
		//
		// Ports
		//
	input		wire									clk;
	input		wire									rst_n;
	input		wire									ena;
	output	wire									rdy;

	output	wire	[BUFFER_ADDR_BITS-1:0]	r_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	s_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	u_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	v_addr;
		
	input		wire	[              32-1:0]	r_din;
	input		wire	[              32-1:0]	s_din;
	input		wire	[              32-1:0]	u_din;
	input		wire	[              32-1:0]	v_din;
		
	output	wire	[BUFFER_ADDR_BITS-1:0]	r_dbl_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	s_dbl_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	r_plus_s_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	u_half_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	v_half_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	u_minus_v_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	v_minus_u_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	u_minus_v_half_addr;
	output	wire	[BUFFER_ADDR_BITS-1:0]	v_minus_u_half_addr;
		
	output	wire	[              32-1:0]	r_dbl_dout;
	output	wire	[              32-1:0]	s_dbl_dout;
	output	wire	[              32-1:0]	r_plus_s_dout;
	output	wire	[              32-1:0]	u_half_dout;
	output	wire	[              32-1:0]	v_half_dout;
	output	wire	[              32-1:0]	u_minus_v_dout;
	output	wire	[              32-1:0]	v_minus_u_dout;
	output	wire	[              32-1:0]	u_minus_v_half_dout;
	output	wire	[              32-1:0]	v_minus_u_half_dout;
		
	output	wire									r_dbl_wren;
	output	wire									s_dbl_wren;
	output	wire									r_plus_s_wren;
	output	wire									u_half_wren;
	output	wire									v_half_wren;
	output	wire									u_minus_v_wren;
	output	wire									v_minus_u_wren;
	output	wire									u_minus_v_half_wren;
	output	wire									v_minus_u_half_wren;
	
	input		wire	[              32-1:0]	u_minus_v_din;
	input		wire	[              32-1:0]	v_minus_u_din;
	


		//
		// Counter
		//
	reg	[PROC_CNT_BITS-1:0]	proc_cnt;

	wire	[PROC_CNT_BITS-1:0]	proc_cnt_max	= PROC_NUM_CYCLES - 1;
	wire	[PROC_CNT_BITS-1:0]	proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
	wire	[PROC_CNT_BITS-1:0]	proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
																	proc_cnt + 1'b1 : proc_cnt_zero;
	
		//
		// Addresses
		//
	reg	[BUFFER_ADDR_BITS-1:0]	addr_in;

	wire	[BUFFER_ADDR_BITS-1:0]	addr_in_last	= BUFFER_NUM_WORDS - 1;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_in_zero	= {BUFFER_ADDR_BITS{1'b0}};
	wire	[BUFFER_ADDR_BITS-1:0]	addr_in_next	= (addr_in < addr_in_last) ?
																		addr_in + 1'b1 : addr_in_zero;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_in_prev	= (addr_in > addr_in_zero) ?
																		addr_in - 1'b1 : addr_in_zero;
																		
	reg	[BUFFER_ADDR_BITS-1:0]	addr_out1;
	
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out1_last	= BUFFER_NUM_WORDS - 1;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out1_zero	= {BUFFER_ADDR_BITS{1'b0}};
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out1_next	= (addr_out1 < addr_out1_last) ?
																		addr_out1 + 1'b1 : addr_out1_zero;
																		
	reg	[BUFFER_ADDR_BITS-1:0]	addr_out2;
	
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out2_last	= BUFFER_NUM_WORDS - 1;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out2_zero	= {BUFFER_ADDR_BITS{1'b0}};
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out2_next	= (addr_out2 < addr_out2_last) ?
																		addr_out2 + 1'b1 : addr_out2_zero;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out2_prev	= (addr_out2 > addr_out2_zero) ?
																		addr_out2 - 1'b1 : addr_out2_zero;
																		
	reg	[BUFFER_ADDR_BITS-1:0]	addr_out3;
	
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out3_last	= BUFFER_NUM_WORDS - 1;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out3_zero	= {BUFFER_ADDR_BITS{1'b0}};
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out3_prev	= (addr_out3 > addr_out3_zero) ?
																		addr_out3 - 1'b1 : addr_out3_last;

	reg	[BUFFER_ADDR_BITS-1:0]	addr_out4;
	
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out4_last	= BUFFER_NUM_WORDS - 1;
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out4_zero	= {BUFFER_ADDR_BITS{1'b0}};
	wire	[BUFFER_ADDR_BITS-1:0]	addr_out4_prev	= (addr_out4 > addr_out4_zero) ?
																		addr_out4 - 1'b1 : addr_out4_last;

	
	assign r_addr					= addr_in;
	assign s_addr					= addr_in;
	assign u_addr					= addr_in;
	assign v_addr					= addr_in;
		
	assign r_dbl_addr				= addr_out1;
	assign s_dbl_addr				= addr_out1;
	assign r_plus_s_addr			= addr_out2;
	assign u_half_addr			= addr_out3;
	assign v_half_addr			= addr_out3;
	assign u_minus_v_addr		= addr_out2;
	assign v_minus_u_addr		= addr_out2;
	assign u_minus_v_half_addr	= addr_out4;
	assign v_minus_u_half_addr	= addr_out4;
	
		
		//
		// Ready Flag
		//
	assign rdy = (proc_cnt == proc_cnt_zero);
	
	
		//
		// Address Increment/Decrement Logic
		//
	wire	inc_addr_in;
	wire	dec_addr_in;
	wire	inc_addr_out1;
	wire	inc_addr_out2;
	wire	dec_addr_out2;
	wire	dec_addr_out3;
	wire	dec_addr_out4;

	wire	[PROC_CNT_BITS-1:0]	cnt_inc_addr_in_start	= 0 * BUFFER_NUM_WORDS + 1;
	wire	[PROC_CNT_BITS-1:0]	cnt_inc_addr_in_stop		= 1 * BUFFER_NUM_WORDS - 1;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_inc_addr_out1_start	= 0 * BUFFER_NUM_WORDS + 2;
	wire	[PROC_CNT_BITS-1:0]	cnt_inc_addr_out1_stop	= 1 * BUFFER_NUM_WORDS + 1;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_inc_addr_out2_start	= 0 * BUFFER_NUM_WORDS + 3;
	wire	[PROC_CNT_BITS-1:0]	cnt_inc_addr_out2_stop	= 1 * BUFFER_NUM_WORDS + 1;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_out2_start	= 1 * BUFFER_NUM_WORDS + 3;
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_out2_stop	= 2 * BUFFER_NUM_WORDS + 1;	

	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_in_start	= 1 * BUFFER_NUM_WORDS + 0;
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_in_stop		= 2 * BUFFER_NUM_WORDS - 2;	
	
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_out3_start	= 1 * BUFFER_NUM_WORDS + 1;
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_out3_stop	= 2 * BUFFER_NUM_WORDS + 0;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_out4_start	= 1 * BUFFER_NUM_WORDS + 4;
	wire	[PROC_CNT_BITS-1:0]	cnt_dec_addr_out4_stop	= 2 * BUFFER_NUM_WORDS + 3;	

	assign inc_addr_in   = (proc_cnt >= cnt_inc_addr_in_start)   && (proc_cnt <= cnt_inc_addr_in_stop);
	assign dec_addr_in   = (proc_cnt >= cnt_dec_addr_in_start)   && (proc_cnt <= cnt_dec_addr_in_stop);
	assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
	assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
	assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
	assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
	assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
	
	
	always @(posedge clk) begin
		//
		if (rdy) begin
			//
			addr_in 		<= addr_in_zero;
			addr_out1	<= addr_out1_zero;
			addr_out2	<= addr_out2_zero;
			addr_out3	<= addr_out3_last;
			addr_out4	<= addr_out4_last;
			//
		end else begin
			//
			if (inc_addr_in)				addr_in <= addr_in_next;
			else if (dec_addr_in)		addr_in <= addr_in_prev;
			//
			if (inc_addr_out1)			addr_out1 <= addr_out1_next;
			else								addr_out1 <= addr_out1_zero;
			//
			if (inc_addr_out2)			addr_out2 <= addr_out2_next;
			else if (dec_addr_out2)		addr_out2 <= addr_out2_prev;
			//
			if (dec_addr_out3)			addr_out3 <= addr_out3_prev;
			else								addr_out3 <= addr_out3_last;
			//
			if (dec_addr_out4)			addr_out4 <= addr_out4_prev;
			else								addr_out4 <= addr_out4_last;
			//
		end
		//
	end
	
	
		//
		// Write Enable Logic
		//
	wire	wren_out1;
	wire	wren_out2;
	wire	wren_out3;
	wire	wren_out4;

	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out1_start	= 0 * BUFFER_NUM_WORDS + 2;
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out1_stop	= 1 * BUFFER_NUM_WORDS + 1;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out2_start	= 0 * BUFFER_NUM_WORDS + 3;
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out2_stop	= 1 * BUFFER_NUM_WORDS + 2;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out3_start	= 1 * BUFFER_NUM_WORDS + 1;
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out3_stop	= 2 * BUFFER_NUM_WORDS + 0;
	
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out4_start	= 1 * BUFFER_NUM_WORDS + 4;
	wire	[PROC_CNT_BITS-1:0]	cnt_wren_out4_stop	= 2 * BUFFER_NUM_WORDS + 3;

	assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
	assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
	assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
	assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);

	assign r_dbl_wren				= wren_out1;
	assign s_dbl_wren				= wren_out1;
	assign r_plus_s_wren			= wren_out2;
	assign u_half_wren			= wren_out3;
	assign v_half_wren			= wren_out3;
	assign u_minus_v_wren		= wren_out2;
	assign v_minus_u_wren		= wren_out2;
	assign u_minus_v_half_wren	= wren_out4;
	assign v_minus_u_half_wren	= wren_out4;


		//
		// Adder (r + s)
		//
	wire	[31: 0]	add32_r_plus_s_sum_out;
	wire				add32_r_plus_s_carry_in;
	wire				add32_r_plus_s_carry_out;
	
	adder32_wrapper add32_r_plus_s
	(
		.clk		(clk),
		.a			(r_din),
		.b			(s_din),
		.s			(add32_r_plus_s_sum_out),
		.c_in		(add32_r_plus_s_carry_in),
		.c_out	(add32_r_plus_s_carry_out)
	);
	
		//
		// Subtractor (u - v)
		//
	wire	[31: 0]	sub32_u_minus_v_difference_out;
	wire				sub32_u_minus_v_borrow_in;
	wire				sub32_u_minus_v_borrow_out;
	
	subtractor32_wrapper sub32_u_minus_v
	(
		.clk		(clk),
		.a			(u_din),
		.b			(v_din),
		.d			(sub32_u_minus_v_difference_out),
		.b_in		(sub32_u_minus_v_borrow_in),
		.b_out	(sub32_u_minus_v_borrow_out)
	);
	
		//
		// Subtractor (v - u)
		//
	wire	[31: 0]	sub32_v_minus_u_difference_out;
	wire				sub32_v_minus_u_borrow_in;
	wire				sub32_v_minus_u_borrow_out;
	
	subtractor32_wrapper sub32_v_minus_u
	(
		.clk		(clk),
		.a			(v_din),
		.b			(u_din),
		.d			(sub32_v_minus_u_difference_out),
		.b_in		(sub32_v_minus_u_borrow_in),
		.b_out	(sub32_v_minus_u_borrow_out)
	);
	
	
		//
		// Carry & Borrow Masking Logic
		//
	reg	mask_carry_borrow;
	
	always @(posedge clk)
		//
		mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
			1'b0 : 1'b1;
		
	assign add32_r_plus_s_carry_in   = add32_r_plus_s_carry_out   & ~mask_carry_borrow;
	assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
	assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
	
	
		//
		// Carry Bits
		//
	reg	r_dbl_carry;
	reg	s_dbl_carry;
	reg	u_half_carry;
	reg	v_half_carry;
	reg	u_minus_v_half_carry;
	reg	v_minus_u_half_carry;
	
	always @(posedge clk) begin
		
		r_dbl_carry					<= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
											r_din[31] : 1'b0;
								
		s_dbl_carry					<= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
											s_din[31] : 1'b0;
								
		u_half_carry				<= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
											u_din[0] : 1'b0;
		
		v_half_carry				<= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
											v_din[0] : 1'b0;
									
		u_minus_v_half_carry		<= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
											u_minus_v_din[0] : 1'b0;
		
		v_minus_u_half_carry		<= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
											v_minus_u_din[0] : 1'b0;

	end
	
	
		//
		// Data Mapper
		//
	assign r_dbl_dout				= {r_din[30:0], r_dbl_carry};
	assign s_dbl_dout				= {s_din[30:0], s_dbl_carry};
	assign r_plus_s_dout			= add32_r_plus_s_sum_out;
	assign u_half_dout			= {u_half_carry, u_din[31:1]};
	assign v_half_dout			= {v_half_carry, v_din[31:1]};
	assign u_minus_v_dout		= sub32_u_minus_v_difference_out;
	assign v_minus_u_dout		= sub32_v_minus_u_difference_out;
	assign u_minus_v_half_dout	= {u_minus_v_half_carry, u_minus_v_din[31:1]};
	assign v_minus_u_half_dout	= {v_minus_u_half_carry, v_minus_u_din[31:1]};
	
	
		//
		// Primary Counter Logic
		//
	always @(posedge clk or negedge rst_n)
		//
		if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
		else begin
			if (!rdy)		proc_cnt <= proc_cnt_next;
			else if (ena)	proc_cnt <= proc_cnt_next;
		end


endmodule