`timescale 1ns / 1ps
module modinv_helper_invert_precalc
(
clk, rst_n,
ena, rdy,
r_addr, r_din,
s_addr, s_din,
u_addr, u_din,
v_addr, v_din,
r_dbl_addr, r_dbl_wren, r_dbl_dout,
s_dbl_addr, s_dbl_wren, s_dbl_dout,
r_plus_s_addr, r_plus_s_wren, r_plus_s_dout,
u_half_addr, u_half_wren, u_half_dout,
v_half_addr, v_half_wren, v_half_dout,
u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din,
v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din,
u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
);
//
// Parameters
//
parameter BUFFER_NUM_WORDS = 9;
parameter BUFFER_ADDR_BITS = 4;
//
// clog2
//
`include "..\modinv_clog2.v"
//
// Constants
//
localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
//
// Ports
//
input wire clk;
input wire rst_n;
input wire ena;
output wire rdy;
output wire [BUFFER_ADDR_BITS-1:0] r_addr;
output wire [BUFFER_ADDR_BITS-1:0] s_addr;
output wire [BUFFER_ADDR_BITS-1:0] u_addr;
output wire [BUFFER_ADDR_BITS-1:0] v_addr;
input wire [ 32-1:0] r_din;
input wire [ 32-1:0] s_din;
input wire [ 32-1:0] u_din;
input wire [ 32-1:0] v_din;
output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr;
output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr;
output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
output wire [ 32-1:0] r_dbl_dout;
output wire [ 32-1:0] s_dbl_dout;
output wire [ 32-1:0] r_plus_s_dout;
output wire [ 32-1:0] u_half_dout;
output wire [ 32-1:0] v_half_dout;
output wire [ 32-1:0] u_minus_v_dout;
output wire [ 32-1:0] v_minus_u_dout;
output wire [ 32-1:0] u_minus_v_half_dout;
output wire [ 32-1:0] v_minus_u_half_dout;
output wire r_dbl_wren;
output wire s_dbl_wren;
output wire r_plus_s_wren;
output wire u_half_wren;
output wire v_half_wren;
output wire u_minus_v_wren;
output wire v_minus_u_wren;
output wire u_minus_v_half_wren;
output wire v_minus_u_half_wren;
input wire [ 32-1:0] u_minus_v_din;
input wire [ 32-1:0] v_minus_u_din;
//
// Counter
//
reg [PROC_CNT_BITS-1:0] proc_cnt;
wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
proc_cnt + 1'b1 : proc_cnt_zero;
//
// Addresses
//
reg [BUFFER_ADDR_BITS-1:0] addr_in;
wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ?
addr_in + 1'b1 : addr_in_zero;
wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
addr_in - 1'b1 : addr_in_zero;
reg [BUFFER_ADDR_BITS-1:0] addr_out1;
wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
addr_out1 + 1'b1 : addr_out1_zero;
reg [BUFFER_ADDR_BITS-1:0] addr_out2;
wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ?
addr_out2 + 1'b1 : addr_out2_zero;
wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
addr_out2 - 1'b1 : addr_out2_zero;
reg [BUFFER_ADDR_BITS-1:0] addr_out3;
wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
addr_out3 - 1'b1 : addr_out3_last;
reg [BUFFER_ADDR_BITS-1:0] addr_out4;
wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1;
wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}};
wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ?
addr_out4 - 1'b1 : addr_out4_last;
assign r_addr = addr_in;
assign s_addr = addr_in;
assign u_addr = addr_in;
assign v_addr = addr_in;
assign r_dbl_addr = addr_out1;
assign s_dbl_addr = addr_out1;
assign r_plus_s_addr = addr_out2;
assign u_half_addr = addr_out3;
assign v_half_addr = addr_out3;
assign u_minus_v_addr = addr_out2;
assign v_minus_u_addr = addr_out2;
assign u_minus_v_half_addr = addr_out4;
assign v_minus_u_half_addr = addr_out4;
//
// Ready Flag
//
assign rdy = (proc_cnt == proc_cnt_zero);
//
// Address Increment/Decrement Logic
//
wire inc_addr_in;
wire dec_addr_in;
wire inc_addr_out1;
wire inc_addr_out2;
wire dec_addr_out2;
wire dec_addr_out3;
wire dec_addr_out4;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3;
wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4;
wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
always @(posedge clk) begin
//
if (rdy) begin
//
addr_in <= addr_in_zero;
addr_out1 <= addr_out1_zero;
addr_out2 <= addr_out2_zero;
addr_out3 <= addr_out3_last;
addr_out4 <= addr_out4_last;
//
end else begin
//
if (inc_addr_in) addr_in <= addr_in_next;
else if (dec_addr_in) addr_in <= addr_in_prev;
//
if (inc_addr_out1) addr_out1 <= addr_out1_next;
else addr_out1 <= addr_out1_zero;
//
if (inc_addr_out2) addr_out2 <= addr_out2_next;
else if (dec_addr_out2) addr_out2 <= addr_out2_prev;
//
if (dec_addr_out3) addr_out3 <= addr_out3_prev;
else addr_out3 <= addr_out3_last;
//
if (dec_addr_out4) addr_out4 <= addr_out4_prev;
else addr_out4 <= addr_out4_last;
//
end
//
end
//
// Write Enable Logic
//
wire wren_out1;
wire wren_out2;
wire wren_out3;
wire wren_out4;
wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2;
wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3;
wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2;
wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1;
wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4;
wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);
assign r_dbl_wren = wren_out1;
assign s_dbl_wren = wren_out1;
assign r_plus_s_wren = wren_out2;
assign u_half_wren = wren_out3;
assign v_half_wren = wren_out3;
assign u_minus_v_wren = wren_out2;
assign v_minus_u_wren = wren_out2;
assign u_minus_v_half_wren = wren_out4;
assign v_minus_u_half_wren = wren_out4;
//
// Adder (r + s)
//
wire [31: 0] add32_r_plus_s_sum_out;
wire add32_r_plus_s_carry_in;
wire add32_r_plus_s_carry_out;
adder32_wrapper add32_r_plus_s
(
.clk (clk),
.a (r_din),
.b (s_din),
.s (add32_r_plus_s_sum_out),
.c_in (add32_r_plus_s_carry_in),
.c_out (add32_r_plus_s_carry_out)
);
//
// Subtractor (u - v)
//
wire [31: 0] sub32_u_minus_v_difference_out;
wire sub32_u_minus_v_borrow_in;
wire sub32_u_minus_v_borrow_out;
subtractor32_wrapper sub32_u_minus_v
(
.clk (clk),
.a (u_din),
.b (v_din),
.d (sub32_u_minus_v_difference_out),
.b_in (sub32_u_minus_v_borrow_in),
.b_out (sub32_u_minus_v_borrow_out)
);
//
// Subtractor (v - u)
//
wire [31: 0] sub32_v_minus_u_difference_out;
wire sub32_v_minus_u_borrow_in;
wire sub32_v_minus_u_borrow_out;
subtractor32_wrapper sub32_v_minus_u
(
.clk (clk),
.a (v_din),
.b (u_din),
.d (sub32_v_minus_u_difference_out),
.b_in (sub32_v_minus_u_borrow_in),
.b_out (sub32_v_minus_u_borrow_out)
);
//
// Carry & Borrow Masking Logic
//
reg mask_carry_borrow;
always @(posedge clk)
//
mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
1'b0 : 1'b1;
assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow;
assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
//
// Carry Bits
//
reg r_dbl_carry;
reg s_dbl_carry;
reg u_half_carry;
reg v_half_carry;
reg u_minus_v_half_carry;
reg v_minus_u_half_carry;
always @(posedge clk) begin
r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
r_din[31] : 1'b0;
s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
s_din[31] : 1'b0;
u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
u_din[0] : 1'b0;
v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
v_din[0] : 1'b0;
u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
u_minus_v_din[0] : 1'b0;
v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
v_minus_u_din[0] : 1'b0;
end
//
// Data Mapper
//
assign r_dbl_dout = {r_din[30:0], r_dbl_carry};
assign s_dbl_dout = {s_din[30:0], s_dbl_carry};
assign r_plus_s_dout = add32_r_plus_s_sum_out;
assign u_half_dout = {u_half_carry, u_din[31:1]};
assign v_half_dout = {v_half_carry, v_din[31:1]};
assign u_minus_v_dout = sub32_u_minus_v_difference_out;
assign v_minus_u_dout = sub32_v_minus_u_difference_out;
assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]};
assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]};
//
// Primary Counter Logic
//
always @(posedge clk or negedge rst_n)
//
if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
else begin
if (!rdy) proc_cnt <= proc_cnt_next;
else if (ena) proc_cnt <= proc_cnt_next;
end
endmodule