`timescale 1ns / 1ps
module modinv_helper_reduce_precalc
(
clk, rst_n,
ena, rdy,
k,
s_is_odd, k_is_nul,
r_addr, r_din, r_wren, r_dout,
s_addr, s_din,
u_addr, u_wren, u_dout,
v_addr, v_wren, v_dout,
q_addr, q_din
);
//
// Parameters
//
parameter OPERAND_NUM_WORDS = 8;
parameter OPERAND_ADDR_BITS = 3;
parameter BUFFER_NUM_WORDS = 9;
parameter BUFFER_ADDR_BITS = 4;
parameter K_NUM_BITS = 10;
//
// clog2
//
`include "..\modinv_clog2.v"
//
// Constants
//
localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
//
// Ports
//
input wire clk;
input wire rst_n;
input wire ena;
output wire rdy;
input wire [ K_NUM_BITS-1:0] k;
output wire s_is_odd;
output wire k_is_nul;
output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
output wire [OPERAND_ADDR_BITS-1:0] q_addr;
input wire [ 32-1:0] r_din;
input wire [ 32-1:0] s_din;
input wire [ 32-1:0] q_din;
output wire r_wren;
output wire u_wren;
output wire v_wren;
output wire [ 32-1:0] r_dout;
output wire [ 32-1:0] u_dout;
output wire [ 32-1:0] v_dout;
//
// Counter
//
reg [PROC_CNT_BITS-1:0] proc_cnt;
wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
proc_cnt + 1'b1 : proc_cnt_zero;
//
// Addresses
//
reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf;
reg [OPERAND_ADDR_BITS-1:0] addr_in_op;
reg [ BUFFER_ADDR_BITS-1:0] addr_out1;
reg [ BUFFER_ADDR_BITS-1:0] addr_out2;
reg [ BUFFER_ADDR_BITS-1:0] addr_out3;
wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1;
wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ?
addr_in_buf + 1'b1 : addr_in_buf_zero;
wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ?
addr_in_buf - 1'b1 : addr_in_buf_zero;
wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1;
wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}};
wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ?
addr_in_op + 1'b1 : addr_in_op_zero;
wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
addr_out1 + 1'b1 : addr_out1_zero;
wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ?
addr_out1 - 1'b1 : addr_out1_zero;
wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
addr_out2 - 1'b1 : addr_out2_last;
wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
addr_out3 - 1'b1 : addr_out3_last;
assign s_addr = addr_in_buf;
assign q_addr = addr_in_op;
assign r_addr = addr_out1;
assign u_addr = addr_out2;
assign v_addr = addr_out3;
//
// Ready Flag
//
assign rdy = (proc_cnt == proc_cnt_zero);
//
// Address Increment/Decrement Logic
//
wire inc_addr_buf_in;
wire dec_addr_buf_in;
wire inc_addr_op_in;
wire inc_addr_out1;
wire dec_addr_out1;
wire dec_addr_out2;
wire dec_addr_out3;
wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop);
assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop);
assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop);
assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
always @(posedge clk) begin
//
if (rdy) begin
//
addr_in_buf <= addr_in_buf_zero;
addr_in_op <= addr_in_op_zero;
addr_out1 <= addr_out1_zero;
addr_out2 <= addr_out2_last;
addr_out3 <= addr_out3_last;
//
end else begin
//
if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next;
else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev;
//
if (inc_addr_op_in) addr_in_op <= addr_in_op_next;
else addr_in_op <= addr_in_op_zero;
//
if (inc_addr_out1) addr_out1 <= addr_out1_next;
else if (dec_addr_out1) addr_out1 <= addr_out1_prev;
//
if (dec_addr_out2) addr_out2 <= addr_out2_prev;
else addr_out2 <= addr_out2_last;
//
if (dec_addr_out3) addr_out3 <= addr_out3_prev;
else addr_out3 <= addr_out3_last;
//
end
//
end
//
// Write Enable Logic
//
wire wren_out1;
wire wren_out2;
wire wren_out3;
wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3;
wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2;
wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4;
wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
assign r_wren = wren_out1;
assign u_wren = wren_out2;
assign v_wren = wren_out3;
//
// Adder (s + q)
//
wire [31: 0] q_din_masked;
wire [31: 0] add32_s_plus_q_sum_out;
wire add32_s_plus_q_carry_in;
wire add32_s_plus_q_carry_out;
adder32_wrapper add32_r_plus_s
(
.clk (clk),
.a (s_din),
.b (q_din_masked),
.s (add32_s_plus_q_sum_out),
.c_in (add32_s_plus_q_carry_in),
.c_out (add32_s_plus_q_carry_out)
);
//
// Carry Masking Logic
//
wire mask_carry;
assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;
//
// Addend Masking Logic
//
reg q_din_mask;
always @(posedge clk)
q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0;
assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;
assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry;
//
// Carry Bits
//
reg s_half_carry;
reg s_plus_q_half_carry;
always @(posedge clk) begin
//
s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ?
s_din[0] : 1'b0;
//
s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
r_din[0] : 1'b0;
//
end
//
// Data Mapper
//
assign r_dout = add32_s_plus_q_sum_out;
assign u_dout = {s_half_carry, s_din[31:1]};
assign v_dout = {s_plus_q_half_carry, r_din[31:1]};
//
// Primary Counter Logic
//
always @(posedge clk or negedge rst_n)
//
if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
else begin
if (!rdy) proc_cnt <= proc_cnt_next;
else if (ena) proc_cnt <= proc_cnt_next;
end
//
// Output Flags
//
reg s_is_odd_reg;
reg k_is_nul_reg;
assign s_is_odd = s_is_odd_reg;
assign k_is_nul = k_is_nul_reg;
always @(posedge clk)
//
if (proc_cnt == cnt_calc_flags) begin
s_is_odd_reg <= s_din[0];
k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
end
endmodule