aboutsummaryrefslogblamecommitdiff
path: root/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
blob: b64b8e734c4f9c97528a0337f89e9c7941431e1b (plain) (tree)







































































































































































































































































































































                                                                                                                                                                           
`timescale 1ns / 1ps

module modinv_helper_reduce_precalc
  (
   clk, rst_n,
   ena, rdy,

   k,

   s_is_odd, k_is_nul,

   r_addr, r_din, r_wren, r_dout,
   s_addr, s_din,
   u_addr,        u_wren, u_dout,
   v_addr,        v_wren, v_dout,
   q_addr, q_din
   );


   //
   // Parameters
   //
   parameter OPERAND_NUM_WORDS	= 8;
   parameter OPERAND_ADDR_BITS	= 3;
   parameter BUFFER_NUM_WORDS		= 9;
   parameter BUFFER_ADDR_BITS		= 4;
   parameter K_NUM_BITS				= 10;


   //
   // clog2
   //
`include "../modinv_clog2.v"


   //
   // Constants
   //
   localparam PROC_NUM_CYCLES	= 2 * BUFFER_NUM_WORDS + 4;
   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);


   //
   // Ports
   //
   input		wire									clk;
   input		wire									rst_n;
   input		wire									ena;
   output	wire 										rdy;

   input		wire [       K_NUM_BITS-1:0] 						k;

   output	wire 										s_is_odd;
   output	wire 										k_is_nul;

   output	wire [ BUFFER_ADDR_BITS-1:0] 							r_addr;
   output	wire [ BUFFER_ADDR_BITS-1:0] 							s_addr;
   output	wire [ BUFFER_ADDR_BITS-1:0] 							u_addr;
   output	wire [ BUFFER_ADDR_BITS-1:0] 							v_addr;
   output	wire [OPERAND_ADDR_BITS-1:0] 							q_addr;

   input		wire [              32-1:0] 						r_din;
   input		wire [              32-1:0] 						s_din;
   input		wire [              32-1:0] 						q_din;

   output	wire 										r_wren;
   output	wire 										u_wren;
   output	wire 										v_wren;

   output	wire [              32-1:0] 							r_dout;
   output	wire [              32-1:0] 							u_dout;
   output	wire [              32-1:0] 							v_dout;


   //
   // Counter
   //
   reg [PROC_CNT_BITS-1:0] 									proc_cnt;

   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
												proc_cnt + 1'b1 : proc_cnt_zero;

   //
   // Addresses
   //
   reg [ BUFFER_ADDR_BITS-1:0] 									addr_in_buf;
   reg [OPERAND_ADDR_BITS-1:0] 									addr_in_op;
   reg [ BUFFER_ADDR_BITS-1:0] 									addr_out1;
   reg [ BUFFER_ADDR_BITS-1:0] 									addr_out2;
   reg [ BUFFER_ADDR_BITS-1:0] 									addr_out3;

   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_last	= BUFFER_NUM_WORDS - 1;
   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_zero	= {BUFFER_ADDR_BITS{1'b0}};
   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_next	= (addr_in_buf < addr_in_buf_last) ?
												addr_in_buf + 1'b1 : addr_in_buf_zero;
   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_prev	= (addr_in_buf > addr_in_buf_zero) ?
												addr_in_buf - 1'b1 : addr_in_buf_zero;

   wire [OPERAND_ADDR_BITS-1:0] 								addr_in_op_last	= OPERAND_NUM_WORDS - 1;
   wire [OPERAND_ADDR_BITS-1:0] 								addr_in_op_zero	= {OPERAND_ADDR_BITS{1'b0}};
   wire [OPERAND_ADDR_BITS-1:0] 								addr_in_op_next	= (addr_in_op < addr_in_op_last) ?
												addr_in_op + 1'b1 : addr_in_op_zero;

   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_last	= BUFFER_NUM_WORDS - 1;
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_zero	= {BUFFER_ADDR_BITS{1'b0}};
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_next	= (addr_out1 < addr_out1_last) ?
												addr_out1 + 1'b1 : addr_out1_zero;
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_prev	= (addr_out1 > addr_out1_zero) ?
												addr_out1 - 1'b1 : addr_out1_zero;

   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_last	= BUFFER_NUM_WORDS - 1;
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_zero	= {BUFFER_ADDR_BITS{1'b0}};
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_prev	= (addr_out2 > addr_out2_zero) ?
												addr_out2 - 1'b1 : addr_out2_last;

   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_last	= BUFFER_NUM_WORDS - 1;
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_zero	= {BUFFER_ADDR_BITS{1'b0}};
   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_prev	= (addr_out3 > addr_out3_zero) ?
												addr_out3 - 1'b1 : addr_out3_last;


   assign s_addr = addr_in_buf;
   assign q_addr = addr_in_op;
   assign r_addr = addr_out1;
   assign u_addr = addr_out2;
   assign v_addr = addr_out3;


   //
   // Ready Flag
   //
   assign rdy = (proc_cnt == proc_cnt_zero);


   //
   // Address Increment/Decrement Logic
   //
   wire 											inc_addr_buf_in;
   wire 											dec_addr_buf_in;
   wire 											inc_addr_op_in;
   wire 											inc_addr_out1;
   wire 											dec_addr_out1;
   wire 											dec_addr_out2;
   wire 											dec_addr_out3;

   wire [PROC_CNT_BITS-1:0] 									cnt_calc_flags					= 0 * BUFFER_NUM_WORDS + 2;

   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_buf_in_start	= 0 * BUFFER_NUM_WORDS + 1;
   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_buf_in_stop	= 1 * BUFFER_NUM_WORDS - 1;

   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_buf_in_start	= 1 * BUFFER_NUM_WORDS + 0;
   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_buf_in_stop	= 2 * BUFFER_NUM_WORDS - 2;

   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_op_in_start	= 0 * OPERAND_NUM_WORDS + 1;
   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_op_in_stop		= 1 * OPERAND_NUM_WORDS + 0;

   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out1_start		= 0 * BUFFER_NUM_WORDS + 3;
   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out1_stop		= 1 * BUFFER_NUM_WORDS + 1;

   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out1_start		= 1 * BUFFER_NUM_WORDS + 3;
   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out1_stop		= 2 * BUFFER_NUM_WORDS + 1;

   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out2_start		= 1 * BUFFER_NUM_WORDS + 1;
   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out2_stop		= 2 * BUFFER_NUM_WORDS + 0;

   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out3_start		= 1 * BUFFER_NUM_WORDS + 4;
   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out3_stop		= 2 * BUFFER_NUM_WORDS + 3;

   assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop);
   assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
   assign inc_addr_op_in  = (proc_cnt >= cnt_inc_addr_op_in_start)  && (proc_cnt <= cnt_inc_addr_op_in_stop);
   assign inc_addr_out1   = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
   assign dec_addr_out1   = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop);
   assign dec_addr_out2   = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
   assign dec_addr_out3   = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);

   always @(posedge clk) begin
      //
      if (rdy) begin
	 //
	 addr_in_buf		<= addr_in_buf_zero;
	 addr_in_op		<= addr_in_op_zero;
	 addr_out1		<= addr_out1_zero;
	 addr_out2		<= addr_out2_last;
	 addr_out3		<= addr_out3_last;
	 //
      end else begin
	 //
	 if (inc_addr_buf_in)			addr_in_buf	<= addr_in_buf_next;
	 else if (dec_addr_buf_in)	addr_in_buf	<= addr_in_buf_prev;
	 //
	 if (inc_addr_op_in)			addr_in_op	<= addr_in_op_next;
	 else								addr_in_op	<= addr_in_op_zero;
	 //
	 if (inc_addr_out1)			addr_out1	<= addr_out1_next;
	 else if (dec_addr_out1)		addr_out1	<= addr_out1_prev;
	 //
	 if (dec_addr_out2)			addr_out2	<= addr_out2_prev;
	 else								addr_out2	<= addr_out2_last;
	 //
	 if (dec_addr_out3)			addr_out3	<= addr_out3_prev;
	 else								addr_out3	<= addr_out3_last;
	 //
      end
      //
   end


   //
   // Write Enable Logic
   //
   wire	wren_out1;
   wire	wren_out2;
   wire	wren_out3;

   wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start	= 0 * BUFFER_NUM_WORDS + 3;
   wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop	= 1 * BUFFER_NUM_WORDS + 2;

   wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start	= 1 * BUFFER_NUM_WORDS + 1;
   wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop	= 2 * BUFFER_NUM_WORDS + 0;

   wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start	= 1 * BUFFER_NUM_WORDS + 4;
   wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop	= 2 * BUFFER_NUM_WORDS + 3;

   assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
   assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
   assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);

   assign r_wren = wren_out1;
   assign u_wren = wren_out2;
   assign v_wren = wren_out3;

   //
   // Adder (s + q)
   //
   wire [31: 0] 	    q_din_masked;
   wire [31: 0] 	    add32_s_plus_q_sum_out;
   wire 		    add32_s_plus_q_carry_in;
   wire 		    add32_s_plus_q_carry_out;

   adder32_wrapper add32_r_plus_s
     (
      .clk		(clk),
      .a			(s_din),
      .b			(q_din_masked),
      .s			(add32_s_plus_q_sum_out),
      .c_in		(add32_s_plus_q_carry_in),
      .c_out	(add32_s_plus_q_carry_out)
      );


   //
   // Carry Masking Logic
   //
   wire 		    mask_carry;

   assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;


   //
   // Addend Masking Logic
   //
   reg 			    q_din_mask;

   always @(posedge clk)
     q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0;

   assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;

   assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry;


   //
   // Carry Bits
   //
   reg 			    s_half_carry;
   reg 			    s_plus_q_half_carry;

   always @(posedge clk) begin
      //
      s_half_carry				<= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ?
						   s_din[0] : 1'b0;
      //
      s_plus_q_half_carry		<= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
					   r_din[0] : 1'b0;
      //
   end

   //
   // Data Mapper
   //
   assign r_dout = add32_s_plus_q_sum_out;
   assign u_dout = {s_half_carry,        s_din[31:1]};
   assign v_dout = {s_plus_q_half_carry, r_din[31:1]};


   //
   // Primary Counter Logic
   //
   always @(posedge clk or negedge rst_n)
     //
     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
     else begin
	if (!rdy)		proc_cnt <= proc_cnt_next;
	else if (ena)	proc_cnt <= proc_cnt_next;
     end


   //
   // Output Flags
   //
   reg	s_is_odd_reg;
   reg	k_is_nul_reg;

   assign s_is_odd = s_is_odd_reg;
   assign k_is_nul = k_is_nul_reg;

   always @(posedge clk)
     //
     if (proc_cnt == cnt_calc_flags) begin
	s_is_odd_reg <= s_din[0];
	k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
     end


endmodule