diff options
Diffstat (limited to 'rtl/modular')
-rw-r--r-- | rtl/modular/modular_adder.v | 298 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_copy.v | 148 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_init.v | 172 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v | 286 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v | 408 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v | 257 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v | 328 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v | 153 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/modinv_clog2.v | 10 | ||||
-rw-r--r-- | rtl/modular/modular_invertor/modular_invertor.v | 981 | ||||
-rw-r--r-- | rtl/modular/modular_multiplier_256.v | 402 | ||||
-rw-r--r-- | rtl/modular/modular_reductor_256.v | 666 | ||||
-rw-r--r-- | rtl/modular/modular_subtractor.v | 292 |
13 files changed, 4401 insertions, 0 deletions
diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v new file mode 100644 index 0000000..5641feb --- /dev/null +++ b/rtl/modular/modular_adder.v @@ -0,0 +1,298 @@ +//------------------------------------------------------------------------------ +// +// modular_adder.v +// ----------------------------------------------------------------------------- +// Modular adder.
+// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------
+
+module modular_adder
+ (
+ clk, rst_n,
+ ena, rdy,
+ ab_addr, n_addr, s_addr, s_wren, + a_din, b_din, n_din, s_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter WORD_COUNTER_WIDTH = 3;
+
+
+ // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction
+
+
+ // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output +
+ output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] s_addr; // index of current S word
+ output wire s_wren; // store current S word now
+ + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N + output wire [ 31:0] s_dout; // S = (A + B) mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_ab;
+ reg [WORD_COUNTER_WIDTH-1:0] index_n;
+ reg [WORD_COUNTER_WIDTH-1:0] index_s;
+
+ /* map registers to output ports */
+ assign ab_addr = index_ab;
+ assign n_addr = index_n;
+ assign s_addr = index_s;
+
+
+ //
+ // Adder
+ //
+ wire [31: 0] add32_s;
+ wire add32_c_in;
+ wire add32_c_out;
+
+ adder32_wrapper adder32
+ ( + .clk (clk), + .a (a_din), + .b (b_din), + .s (add32_s), + .c_in (add32_c_in), + .c_out (add32_c_out) + );
+
+
+ //
+ // Subtractor
+ //
+ wire [31: 0] sub32_d;
+ wire sub32_b_in;
+ wire sub32_b_out;
+
+ subtractor32_wrapper subtractor32
+ ( + .clk (clk), + .a (add32_s), + .b (n_din), + .d (sub32_d), + .b_in (sub32_b_in), + .b_out (sub32_b_out) + );
+
+
+ //
+ // FSM
+ //
+
+ localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
+
+ wire fsm_latch_msb_carry = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
+
+ wire inc_index_ab = |fsm_shreg_inc_index_ab;
+ wire inc_index_n = |fsm_shreg_inc_index_n;
+ wire store_sum_ab = |fsm_shreg_store_sum_ab;
+ wire store_sum_ab_n = |fsm_shreg_store_sum_ab_n;
+ wire store_data_s = |fsm_shreg_store_data_s;
+ wire inc_index_s = |fsm_shreg_inc_index_s;
+
+ always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+
+
+
+
+
+ //
+ // Carry & Borrow Masking Logic
+ //
+ reg add32_c_mask;
+ reg sub32_b_mask;
+
+ always @(posedge clk) begin
+ //
+ add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ sub32_b_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ //
+ end
+
+ assign add32_c_in = add32_c_out & ~add32_c_mask;
+ assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
+
+
+ //
+ // Carry & Borrow Latch Logic
+ //
+ reg add32_carry_latch;
+ reg sub32_borrow_latch;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out;
+ if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
+ //
+ end
+
+
+ //
+ // Intermediate Results
+ //
+ reg [32*OPERAND_NUM_WORDS-1:0] s_ab;
+ reg [32*OPERAND_NUM_WORDS-1:0] s_ab_n;
+
+ always @(posedge clk)
+ //
+ if (store_data_s) begin
+ //
+ s_ab <= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]};
+ s_ab_n <= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end else begin
+ //
+ if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]};
+ if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end
+
+
+ // + // Word Index Increment Logic + //
+ always @(posedge clk) + //
+ if (rdy) begin
+ //
+ index_ab <= WORD_INDEX_ZERO;
+ index_n <= WORD_INDEX_ZERO;
+ index_s <= WORD_INDEX_ZERO;
+ //
+ end else begin
+ //
+ if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
+ if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
+ if (inc_index_s) index_s <= WORD_INDEX_NEXT_OR_ZERO(index_s);
+ //
+ end
+
+
+ //
+ // Output Sum Selector
+ //
+ wire mux_select_ab = sub32_borrow_latch && !add32_carry_latch;
+
+
+ //
+ // Output Data and Write Enable Logic
+ //
+ reg s_wren_reg;
+ reg [31: 0] s_dout_reg;
+ wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0];
+
+ assign s_wren = s_wren_reg;
+ assign s_dout = s_dout_reg;
+
+ always @(posedge clk) + //
+ if (rdy) begin
+ //
+ s_wren_reg <= 1'b0;
+ s_dout_reg <= {32{1'bX}};
+ //
+ end else begin
+ //
+ s_wren_reg <= store_data_s;
+ s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}};
+ //
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v new file mode 100644 index 0000000..07c1b4f --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v @@ -0,0 +1,148 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_copy
+ (
+ clk, rst_n,
+ ena, rdy,
+ s_addr, s_din,
+ a1_addr, a1_wren, a1_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter OPERAND_ADDR_BITS = 3;
+
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 2;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+
+ input wire ena;
+ output wire rdy;
+
+ output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
+
+ output wire a1_wren;
+
+ input wire [ 31:0] s_din;
+
+ output wire [ 31:0] a1_dout;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [OPERAND_ADDR_BITS-1:0] addr_s;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_s_max = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_s_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_s_next = (addr_s < addr_s_max) ?
+ addr_s + 1'b1 : addr_s_zero;
+
+ reg [OPERAND_ADDR_BITS-1:0] addr_a1;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_a1_max = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_a1_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_a1_next = (addr_a1 < addr_a1_max) ?
+ addr_a1 + 1'b1 : addr_a1_zero;
+
+ assign s_addr = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s};
+ assign a1_addr = addr_a1;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_s;
+ wire inc_addr_a1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_stop = OPERAND_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_stop = OPERAND_NUM_WORDS + 1;
+
+ assign inc_addr_s = (proc_cnt >= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop);
+ assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_s) addr_s <= addr_s_next;
+ else addr_s <= addr_s_zero;
+ //
+ if (inc_addr_a1) addr_a1 <= addr_a1_next;
+ else addr_a1 <= addr_a1_zero;
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_a1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1;
+
+ assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop);
+
+ assign a1_wren = wren_a1;
+
+
+ //
+ // Data Logic
+ //
+ assign a1_dout = s_din;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v new file mode 100644 index 0000000..0468134 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_init.v @@ -0,0 +1,172 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_init
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, a_din,
+ q_addr, q_din,
+ r_addr, r_wren, r_dout,
+ s_addr, s_wren, s_dout,
+ u_addr, u_wren, u_dout,
+ v_addr, v_wren, v_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter OPERAND_ADDR_BITS = 3;
+
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ output wire [OPERAND_ADDR_BITS-1:0] a_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] q_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
+
+ output wire r_wren;
+ output wire s_wren;
+ output wire u_wren;
+ output wire v_wren;
+
+ input wire [ 31:0] a_din;
+ input wire [ 31:0] q_din;
+ output wire [ 31:0] r_dout;
+ output wire [ 31:0] s_dout;
+ output wire [ 31:0] u_dout;
+ output wire [ 31:0] v_dout;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [OPERAND_ADDR_BITS-1:0] addr_aq;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ?
+ addr_aq + 1'b1 : addr_aq_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_rsuv;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ?
+ addr_rsuv + 1'b1 : addr_rsuv_zero;
+
+ assign a_addr = addr_aq;
+ assign q_addr = addr_aq;
+
+ assign r_addr = addr_rsuv;
+ assign s_addr = addr_rsuv;
+ assign u_addr = addr_rsuv;
+ assign v_addr = addr_rsuv;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_aq;
+ wire inc_addr_rsuv;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1;
+
+ assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop);
+ assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_aq) addr_aq <= addr_aq_next;
+ else addr_aq <= addr_aq_zero;
+ //
+ if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next;
+ else addr_rsuv <= addr_rsuv_zero;
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_rsuv;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1;
+
+ assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop);
+
+ assign r_wren = wren_rsuv;
+ assign s_wren = wren_rsuv;
+ assign u_wren = wren_rsuv;
+ assign v_wren = wren_rsuv;
+
+
+ //
+ // Data Logic
+ //
+ assign r_dout = 32'd0;
+ assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0;
+ assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0;
+ assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v new file mode 100644 index 0000000..6b65eb1 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v @@ -0,0 +1,286 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_invert_compare
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ u_addr, u_din,
+ v_addr, v_din,
+
+ u_gt_v, v_eq_1, + u_is_even, v_is_even
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ input wire [ 32-1:0] u_din;
+ input wire [ 32-1:0] v_din;
+
+ output wire u_gt_v;
+ output wire v_eq_1;
+ output wire u_is_even;
+ output wire v_is_even;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
+ addr_in - 1'b1 : addr_in_last;
+
+ assign u_addr = addr_in;
+ assign v_addr = addr_in;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Decrement Logic
+ //
+ wire dec_addr_in;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0;
+
+ assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
+
+ always @(posedge clk)
+ //
+ if (rdy) addr_in <= addr_in_last;
+ else if (dec_addr_in) addr_in <= addr_in_prev;
+
+
+ //
+ // Comparison Stage Flags
+ //
+ wire calc_leg;
+ wire calc_leg_final;
+ wire calc_parity;
+
+ wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2;
+ wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1;
+
+ assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop);
+ assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop);
+ assign calc_parity = (proc_cnt == cnt_calc_parity);
+
+
+ //
+ // Dummy Input
+ //
+ reg sub32_din_1_lsb;
+ wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb};
+
+ always @(posedge clk)
+ //
+ sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0;
+
+
+ // + // Subtractor (u - v) + // + wire [31: 0] sub32_u_minus_v_difference_out; + wire sub32_u_minus_v_borrow_in; + wire sub32_u_minus_v_borrow_out; + + subtractor32_wrapper sub32_u_minus_v + ( + .clk (clk), + .a (u_din), + .b (v_din), + .d (sub32_u_minus_v_difference_out), + .b_in (sub32_u_minus_v_borrow_in), + .b_out (sub32_u_minus_v_borrow_out) + );
+
+
+ // + // Subtractor (v - 1) + // + wire [31: 0] sub32_v_minus_1_difference_out; + wire sub32_v_minus_1_borrow_in; + wire sub32_v_minus_1_borrow_out; + + subtractor32_wrapper sub32_v_minus_1 + ( + .clk (clk), + .a (v_din), + .b (sub32_din_1), + .d (sub32_v_minus_1_difference_out), + .b_in (sub32_v_minus_1_borrow_in), + .b_out (sub32_v_minus_1_borrow_out) + );
+
+
+
+ //
+ // Borrow Masking Logic
+ //
+ reg mask_borrow;
+
+ always @(posedge clk)
+ //
+ mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ?
+ 1'b0 : 1'b1;
+
+ assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow;
+ assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow;
+
+
+ //
+ // Comparison Logic
+ //
+ reg cmp_u_v_l;
+ reg cmp_u_v_e;
+ reg cmp_u_v_g;
+
+ reg cmp_v_1_l;
+ reg cmp_v_1_e;
+ reg cmp_v_1_g;
+
+ wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g);
+ wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g);
+
+ wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0;
+ wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0;
+
+ wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0;
+ wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0;
+
+ reg u_is_even_reg;
+ reg v_is_even_reg;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ if (ena) begin
+ //
+ cmp_u_v_l <= 1'b0;
+ cmp_u_v_e <= 1'b0;
+ cmp_u_v_g <= 1'b0;
+ //
+ cmp_v_1_l <= 1'b0;
+ cmp_v_1_e <= 1'b0;
+ cmp_v_1_g <= 1'b0;
+ //
+ u_is_even_reg <= 1'bX;
+ v_is_even_reg <= 1'bX;
+ //
+ end
+ //
+ end else begin
+ //
+ // parity
+ //
+ if (calc_parity) begin
+ u_is_even_reg <= ~u_din[0];
+ v_is_even_reg <= ~v_din[0];
+ end
+ //
+ // u <> v
+ //
+ if (cmp_unresolved_u_v && calc_leg) begin
+ //
+ if (cmp_u_v_borrow_is_set)
+ cmp_u_v_l <= 1'b1;
+ //
+ if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero)
+ cmp_u_v_g <= 1'b1;
+ //
+ if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final)
+ cmp_u_v_e <= 1'b1;
+ //
+ end
+ //
+ // v <> 1
+ //
+ if (cmp_unresolved_v_1 && calc_leg) begin
+ //
+ if (cmp_v_1_borrow_is_set)
+ cmp_v_1_l <= 1'b1;
+ //
+ if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero)
+ cmp_v_1_g <= 1'b1;
+ //
+ if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final)
+ cmp_v_1_e <= 1'b1;
+ //
+ end
+ //
+ end
+
+
+ //
+ // Output Flags
+ //
+ assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g;
+ assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g;
+
+ assign u_is_even = u_is_even_reg;
+ assign v_is_even = v_is_even_reg;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v new file mode 100644 index 0000000..ab15563 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v @@ -0,0 +1,408 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_invert_precalc
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ r_addr, r_din,
+ s_addr, s_din,
+ u_addr, u_din,
+ v_addr, v_din,
+
+ r_dbl_addr, r_dbl_wren, r_dbl_dout,
+ s_dbl_addr, s_dbl_wren, s_dbl_dout,
+ r_plus_s_addr, r_plus_s_wren, r_plus_s_dout,
+ u_half_addr, u_half_wren, u_half_dout,
+ v_half_addr, v_half_wren, v_half_dout,
+ u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din,
+ v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din,
+ u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
+ v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ input wire [ 32-1:0] r_din;
+ input wire [ 32-1:0] s_din;
+ input wire [ 32-1:0] u_din;
+ input wire [ 32-1:0] v_din;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
+
+ output wire [ 32-1:0] r_dbl_dout;
+ output wire [ 32-1:0] s_dbl_dout;
+ output wire [ 32-1:0] r_plus_s_dout;
+ output wire [ 32-1:0] u_half_dout;
+ output wire [ 32-1:0] v_half_dout;
+ output wire [ 32-1:0] u_minus_v_dout;
+ output wire [ 32-1:0] v_minus_u_dout;
+ output wire [ 32-1:0] u_minus_v_half_dout;
+ output wire [ 32-1:0] v_minus_u_half_dout;
+
+ output wire r_dbl_wren;
+ output wire s_dbl_wren;
+ output wire r_plus_s_wren;
+ output wire u_half_wren;
+ output wire v_half_wren;
+ output wire u_minus_v_wren;
+ output wire v_minus_u_wren;
+ output wire u_minus_v_half_wren;
+ output wire v_minus_u_half_wren;
+
+ input wire [ 32-1:0] u_minus_v_din;
+ input wire [ 32-1:0] v_minus_u_din;
+
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ?
+ addr_in + 1'b1 : addr_in_zero;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
+ addr_in - 1'b1 : addr_in_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out1;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
+ addr_out1 + 1'b1 : addr_out1_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out2;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ?
+ addr_out2 + 1'b1 : addr_out2_zero;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
+ addr_out2 - 1'b1 : addr_out2_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out3;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
+ addr_out3 - 1'b1 : addr_out3_last;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out4;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ?
+ addr_out4 - 1'b1 : addr_out4_last;
+
+
+ assign r_addr = addr_in;
+ assign s_addr = addr_in;
+ assign u_addr = addr_in;
+ assign v_addr = addr_in;
+
+ assign r_dbl_addr = addr_out1;
+ assign s_dbl_addr = addr_out1;
+ assign r_plus_s_addr = addr_out2;
+ assign u_half_addr = addr_out3;
+ assign v_half_addr = addr_out3;
+ assign u_minus_v_addr = addr_out2;
+ assign v_minus_u_addr = addr_out2;
+ assign u_minus_v_half_addr = addr_out4;
+ assign v_minus_u_half_addr = addr_out4;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment/Decrement Logic
+ //
+ wire inc_addr_in;
+ wire dec_addr_in;
+ wire inc_addr_out1;
+ wire inc_addr_out2;
+ wire dec_addr_out2;
+ wire dec_addr_out3;
+ wire dec_addr_out4;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
+ assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
+ assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
+ assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
+ assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
+ assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
+ assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
+
+
+ always @(posedge clk) begin
+ //
+ if (rdy) begin
+ //
+ addr_in <= addr_in_zero;
+ addr_out1 <= addr_out1_zero;
+ addr_out2 <= addr_out2_zero;
+ addr_out3 <= addr_out3_last;
+ addr_out4 <= addr_out4_last;
+ //
+ end else begin
+ //
+ if (inc_addr_in) addr_in <= addr_in_next;
+ else if (dec_addr_in) addr_in <= addr_in_prev;
+ //
+ if (inc_addr_out1) addr_out1 <= addr_out1_next;
+ else addr_out1 <= addr_out1_zero;
+ //
+ if (inc_addr_out2) addr_out2 <= addr_out2_next;
+ else if (dec_addr_out2) addr_out2 <= addr_out2_prev;
+ //
+ if (dec_addr_out3) addr_out3 <= addr_out3_prev;
+ else addr_out3 <= addr_out3_last;
+ //
+ if (dec_addr_out4) addr_out4 <= addr_out4_prev;
+ else addr_out4 <= addr_out4_last;
+ //
+ end
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out1;
+ wire wren_out2;
+ wire wren_out3;
+ wire wren_out4;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
+ assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
+ assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
+ assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);
+
+ assign r_dbl_wren = wren_out1;
+ assign s_dbl_wren = wren_out1;
+ assign r_plus_s_wren = wren_out2;
+ assign u_half_wren = wren_out3;
+ assign v_half_wren = wren_out3;
+ assign u_minus_v_wren = wren_out2;
+ assign v_minus_u_wren = wren_out2;
+ assign u_minus_v_half_wren = wren_out4;
+ assign v_minus_u_half_wren = wren_out4;
+
+
+ // + // Adder (r + s) + // + wire [31: 0] add32_r_plus_s_sum_out; + wire add32_r_plus_s_carry_in; + wire add32_r_plus_s_carry_out; + + adder32_wrapper add32_r_plus_s + ( + .clk (clk), + .a (r_din), + .b (s_din), + .s (add32_r_plus_s_sum_out), + .c_in (add32_r_plus_s_carry_in), + .c_out (add32_r_plus_s_carry_out) + );
+
+ // + // Subtractor (u - v) + // + wire [31: 0] sub32_u_minus_v_difference_out; + wire sub32_u_minus_v_borrow_in; + wire sub32_u_minus_v_borrow_out; + + subtractor32_wrapper sub32_u_minus_v + ( + .clk (clk), + .a (u_din), + .b (v_din), + .d (sub32_u_minus_v_difference_out), + .b_in (sub32_u_minus_v_borrow_in), + .b_out (sub32_u_minus_v_borrow_out) + );
+
+ // + // Subtractor (v - u) + // + wire [31: 0] sub32_v_minus_u_difference_out; + wire sub32_v_minus_u_borrow_in; + wire sub32_v_minus_u_borrow_out; + + subtractor32_wrapper sub32_v_minus_u + ( + .clk (clk), + .a (v_din), + .b (u_din), + .d (sub32_v_minus_u_difference_out), + .b_in (sub32_v_minus_u_borrow_in), + .b_out (sub32_v_minus_u_borrow_out) + );
+
+
+ //
+ // Carry & Borrow Masking Logic
+ //
+ reg mask_carry_borrow;
+
+ always @(posedge clk)
+ //
+ mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
+ 1'b0 : 1'b1;
+
+ assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow;
+ assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
+ assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
+
+
+ //
+ // Carry Bits
+ //
+ reg r_dbl_carry;
+ reg s_dbl_carry;
+ reg u_half_carry;
+ reg v_half_carry;
+ reg u_minus_v_half_carry;
+ reg v_minus_u_half_carry;
+
+ always @(posedge clk) begin
+
+ r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
+ r_din[31] : 1'b0;
+
+ s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
+ s_din[31] : 1'b0;
+
+ u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
+ u_din[0] : 1'b0;
+
+ v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
+ v_din[0] : 1'b0;
+
+ u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
+ u_minus_v_din[0] : 1'b0;
+
+ v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
+ v_minus_u_din[0] : 1'b0;
+
+ end
+
+
+ //
+ // Data Mapper
+ //
+ assign r_dbl_dout = {r_din[30:0], r_dbl_carry};
+ assign s_dbl_dout = {s_din[30:0], s_dbl_carry};
+ assign r_plus_s_dout = add32_r_plus_s_sum_out;
+ assign u_half_dout = {u_half_carry, u_din[31:1]};
+ assign v_half_dout = {v_half_carry, v_din[31:1]};
+ assign u_minus_v_dout = sub32_u_minus_v_difference_out;
+ assign v_minus_u_dout = sub32_v_minus_u_difference_out;
+ assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]};
+ assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]};
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v new file mode 100644 index 0000000..0cd6ac5 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v @@ -0,0 +1,257 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_invert_update
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ u_gt_v, v_eq_1, + u_is_even, v_is_even,
+
+ r_addr, r_wren, r_dout,
+ s_addr, s_wren, s_dout,
+ u_addr, u_wren, u_dout,
+ v_addr, v_wren, v_dout,
+
+ r_dbl_addr, r_dbl_din,
+ s_dbl_addr, s_dbl_din,
+ r_plus_s_addr, r_plus_s_din,
+ u_half_addr, u_half_din,
+ v_half_addr, v_half_din,
+ u_minus_v_half_addr, u_minus_v_half_din,
+ v_minus_u_half_addr, v_minus_u_half_din
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ input wire u_gt_v;
+ input wire v_eq_1; + input wire u_is_even;
+ input wire v_is_even;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ output wire r_wren;
+ output wire s_wren;
+ output wire u_wren;
+ output wire v_wren;
+
+ output wire [ 32-1:0] r_dout;
+ output wire [ 32-1:0] s_dout;
+ output wire [ 32-1:0] u_dout;
+ output wire [ 32-1:0] v_dout;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
+
+ input wire [ 32-1:0] r_dbl_din;
+ input wire [ 32-1:0] s_dbl_din;
+ input wire [ 32-1:0] r_plus_s_din;
+ input wire [ 32-1:0] u_half_din;
+ input wire [ 32-1:0] v_half_din;
+ input wire [ 32-1:0] u_minus_v_half_din;
+ input wire [ 32-1:0] v_minus_u_half_din;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
+ addr_in + 1'b1 : addr_in_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
+ addr_out + 1'b1 : addr_out_zero;
+
+ assign r_addr = addr_out;
+ assign s_addr = addr_out;
+ assign u_addr = addr_out;
+ assign v_addr = addr_out;
+
+ assign r_dbl_addr = addr_in;
+ assign s_dbl_addr = addr_in;
+ assign r_plus_s_addr = addr_in;
+ assign u_half_addr = addr_in;
+ assign v_half_addr = addr_in;
+ assign u_minus_v_half_addr = addr_in;
+ assign v_minus_u_half_addr = addr_in;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_in;
+ wire inc_addr_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
+ assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_in) addr_in <= addr_in_next;
+ else addr_in <= addr_in_zero;
+ //
+ if (inc_addr_out) addr_out <= addr_out_next;
+ else addr_out <= addr_out_zero;
+ //
+ end
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
+
+ reg r_wren_allow;
+ reg s_wren_allow;
+ reg u_wren_allow;
+ reg v_wren_allow;
+
+ assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy;
+ assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy;
+ assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy;
+ assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy;
+
+
+ //
+ // Data Logic
+ //
+ reg [31: 0] r_dout_mux;
+ reg [31: 0] s_dout_mux;
+ reg [31: 0] u_dout_mux;
+ reg [31: 0] v_dout_mux;
+
+ assign r_dout = r_dout_mux;
+ assign s_dout = s_dout_mux;
+ assign u_dout = u_dout_mux;
+ assign v_dout = v_dout_mux;
+
+ always @(*) begin
+ //
+ // r, s, u, v
+ //
+ if (u_is_even) begin
+ //
+ u_dout_mux = u_half_din;
+ v_dout_mux = {32{1'bX}};
+ r_dout_mux = {32{1'bX}};
+ s_dout_mux = s_dbl_din;
+ //
+ u_wren_allow = 1'b1;
+ v_wren_allow = 1'b0;
+ r_wren_allow = 1'b0;
+ s_wren_allow = 1'b1;
+ //
+ end else begin
+ //
+ if (v_is_even) begin
+ //
+ u_dout_mux = {32{1'bX}};
+ v_dout_mux = v_half_din;
+ r_dout_mux = r_dbl_din;
+ s_dout_mux = {32{1'bX}};
+ //
+ u_wren_allow = 1'b0;
+ v_wren_allow = 1'b1;
+ r_wren_allow = 1'b1;
+ s_wren_allow = 1'b0;
+ //
+ end else begin
+ //
+ u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}};
+ v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din;
+ r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din;
+ s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din;
+ //
+ u_wren_allow = u_gt_v;
+ v_wren_allow = !u_gt_v;
+ r_wren_allow = 1'b1;
+ s_wren_allow = 1'b1;
+ //
+ end
+ //
+ end
+ //
+ end
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v new file mode 100644 index 0000000..fb858a6 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v @@ -0,0 +1,328 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_reduce_precalc
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ k,
+
+ s_is_odd, k_is_nul,
+
+ r_addr, r_din, r_wren, r_dout,
+ s_addr, s_din,
+ u_addr, u_wren, u_dout,
+ v_addr, v_wren, v_dout,
+ q_addr, q_din
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter OPERAND_ADDR_BITS = 3;
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+ parameter K_NUM_BITS = 10;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ input wire [ K_NUM_BITS-1:0] k;
+
+ output wire s_is_odd;
+ output wire k_is_nul;
+
+ output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] q_addr;
+
+ input wire [ 32-1:0] r_din;
+ input wire [ 32-1:0] s_din;
+ input wire [ 32-1:0] q_din;
+
+ output wire r_wren;
+ output wire u_wren;
+ output wire v_wren;
+
+ output wire [ 32-1:0] r_dout;
+ output wire [ 32-1:0] u_dout;
+ output wire [ 32-1:0] v_dout;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf;
+ reg [OPERAND_ADDR_BITS-1:0] addr_in_op;
+ reg [ BUFFER_ADDR_BITS-1:0] addr_out1;
+ reg [ BUFFER_ADDR_BITS-1:0] addr_out2;
+ reg [ BUFFER_ADDR_BITS-1:0] addr_out3; + + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ? + addr_in_buf + 1'b1 : addr_in_buf_zero; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ? + addr_in_buf - 1'b1 : addr_in_buf_zero; +
+ wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ? + addr_in_op + 1'b1 : addr_in_op_zero;
+ + wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? + addr_out1 + 1'b1 : addr_out1_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ? + addr_out1 - 1'b1 : addr_out1_zero; + + wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? + addr_out2 - 1'b1 : addr_out2_last; +
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? + addr_out3 - 1'b1 : addr_out3_last; +
+
+ assign s_addr = addr_in_buf;
+ assign q_addr = addr_in_op;
+ assign r_addr = addr_out1;
+ assign u_addr = addr_out2;
+ assign v_addr = addr_out3;
+
+
+ // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); +
+
+ // + // Address Increment/Decrement Logic + // + wire inc_addr_buf_in; + wire dec_addr_buf_in;
+ wire inc_addr_op_in;
+ wire inc_addr_out1;
+ wire dec_addr_out1;
+ wire dec_addr_out2;
+ wire dec_addr_out3;
+
+ wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2; +
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0;
+ + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1; +
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop); + assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
+ assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop); + assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); + assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop); + assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
+ assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); +
+ always @(posedge clk) begin + // + if (rdy) begin + // + addr_in_buf <= addr_in_buf_zero;
+ addr_in_op <= addr_in_op_zero;
+ addr_out1 <= addr_out1_zero;
+ addr_out2 <= addr_out2_last;
+ addr_out3 <= addr_out3_last; + // + end else begin + // + if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next; + else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev;
+ //
+ if (inc_addr_op_in) addr_in_op <= addr_in_op_next;
+ else addr_in_op <= addr_in_op_zero; + //
+ if (inc_addr_out1) addr_out1 <= addr_out1_next; + else if (dec_addr_out1) addr_out1 <= addr_out1_prev;
+ //
+ if (dec_addr_out2) addr_out2 <= addr_out2_prev; + else addr_out2 <= addr_out2_last;
+ //
+ if (dec_addr_out3) addr_out3 <= addr_out3_prev; + else addr_out3 <= addr_out3_last;
+ // + end + // + end +
+
+ // + // Write Enable Logic + // + wire wren_out1;
+ wire wren_out2;
+ wire wren_out3; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2; +
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0; +
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); + assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
+ assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); + + assign r_wren = wren_out1; + assign u_wren = wren_out2;
+ assign v_wren = wren_out3;
+
+ // + // Adder (s + q) + //
+ wire [31: 0] q_din_masked; + wire [31: 0] add32_s_plus_q_sum_out; + wire add32_s_plus_q_carry_in; + wire add32_s_plus_q_carry_out; + + adder32_wrapper add32_r_plus_s + ( + .clk (clk), + .a (s_din), + .b (q_din_masked), + .s (add32_s_plus_q_sum_out), + .c_in (add32_s_plus_q_carry_in), + .c_out (add32_s_plus_q_carry_out) + );
+
+
+ // + // Carry Masking Logic + // + wire mask_carry; +
+ assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;
+
+
+ //
+ // Addend Masking Logic
+ //
+ reg q_din_mask;
+
+ always @(posedge clk)
+ q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0; +
+ assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;
+ + assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry; +
+
+ // + // Carry Bits + // + reg s_half_carry; + reg s_plus_q_half_carry; + + always @(posedge clk) begin
+ // + s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ? + s_din[0] : 1'b0; + // + s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + r_din[0] : 1'b0;
+ // + end
+
+ // + // Data Mapper + // + assign r_dout = add32_s_plus_q_sum_out;
+ assign u_dout = {s_half_carry, s_din[31:1]}; + assign v_dout = {s_plus_q_half_carry, r_din[31:1]};
+
+ + //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+ //
+ // Output Flags
+ //
+ reg s_is_odd_reg;
+ reg k_is_nul_reg;
+
+ assign s_is_odd = s_is_odd_reg;
+ assign k_is_nul = k_is_nul_reg;
+
+ always @(posedge clk)
+ //
+ if (proc_cnt == cnt_calc_flags) begin
+ s_is_odd_reg <= s_din[0];
+ k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v new file mode 100644 index 0000000..ea5b854 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v @@ -0,0 +1,153 @@ +`timescale 1ns / 1ps
+
+module modinv_helper_reduce_update
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ s_is_odd, k_is_nul,
+
+ s_addr, s_wren, s_dout,
+ u_addr, u_din,
+ v_addr, v_din
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ input wire s_is_odd;
+ input wire k_is_nul; +
+ output wire [BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ output wire s_wren;
+
+ output wire [ 32-1:0] s_dout;
+
+ input wire [ 32-1:0] u_din;
+ input wire [ 32-1:0] v_din;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
+ addr_in + 1'b1 : addr_in_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
+ addr_out + 1'b1 : addr_out_zero;
+
+ assign s_addr = addr_out;
+ assign u_addr = addr_in;
+ assign v_addr = addr_in;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_in;
+ wire inc_addr_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
+ assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_in) addr_in <= addr_in_next;
+ else addr_in <= addr_in_zero;
+ //
+ if (inc_addr_out) addr_out <= addr_out_next;
+ else addr_out <= addr_out_zero;
+ //
+ end
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
+
+ assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy;
+
+
+ //
+ // Data Logic
+ //
+ assign s_dout = s_is_odd ? v_din : u_din;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v new file mode 100644 index 0000000..2f7b64d --- /dev/null +++ b/rtl/modular/modular_invertor/modinv_clog2.v @@ -0,0 +1,10 @@ +function integer clog2;
+ input integer value;
+ integer result;
+ begin
+ value = value - 1;
+ for (result = 0; value > 0; result = result + 1)
+ value = value >> 1;
+ clog2 = result;
+ end
+endfunction
diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v new file mode 100644 index 0000000..e9f2460 --- /dev/null +++ b/rtl/modular/modular_invertor/modular_invertor.v @@ -0,0 +1,981 @@ +//------------------------------------------------------------------------------ +// +// modular_invertor.v +// ----------------------------------------------------------------------------- +// Modular invertor.
+// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------
+
+module modular_invertor
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, q_addr, a1_addr, a1_wren, + a_din, q_din, a1_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter MAX_OPERAND_WIDTH = 256;
+
+
+ //
+ // clog2
+ //
+`include "modinv_clog2.v"
+
+
+ //
+ // More Parameters
+ //
+ localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32;
+ localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS);
+
+ localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1;
+ localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS);
+
+ localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH;
+ localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS);
+
+ localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1);
+
+
+ //
+ // Ports
+ // + input wire clk;
+ input wire rst_n;
+
+ input wire ena;
+ output wire rdy;
+
+ output wire [OPERAND_ADDR_BITS-1:0] a_addr;
+ output reg [OPERAND_ADDR_BITS-1:0] q_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
+ output wire a1_wren;
+ + input wire [32-1:0] a_din;
+ input wire [32-1:0] q_din;
+ output wire [32-1:0] a1_dout;
+
+
+ //
+ // "Redundant" Power of 2 (K)
+ //
+ reg [K_NUM_BITS-1:0] k;
+
+
+ //
+ // Buffers
+ //
+ reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr;
+ reg buf_r_wr_en;
+ reg [ 32-1:0] buf_r_wr_din;
+ wire [ 32-1:0] buf_r_wr_dout;
+ wire [ 32-1:0] buf_r_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_r
+ ( .clk(clk), + .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din), + .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout) + );
+
+ reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr;
+ reg buf_s_wr_en;
+ reg [ 32-1:0] buf_s_wr_din;
+ wire [ 32-1:0] buf_s_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_s
+ ( .clk(clk), + .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din), + .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout) + );
+
+ reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr;
+ reg buf_u_wr_en;
+ reg [ 32-1:0] buf_u_wr_din;
+ wire [ 32-1:0] buf_u_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_u
+ ( .clk(clk), + .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din), + .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout) + );
+
+ reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr;
+ reg buf_v_wr_en;
+ reg [ 32-1:0] buf_v_wr_din;
+ wire [ 32-1:0] buf_v_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_v
+ ( .clk(clk), + .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din), + .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr;
+ wire buf_r_dbl_wr_en;
+ wire [ 32-1:0] buf_r_dbl_wr_din;
+ wire [ 32-1:0] buf_r_dbl_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_r_dbl
+ ( .clk(clk), + .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din), + .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr;
+ wire buf_s_dbl_wr_en;
+ wire [ 32-1:0] buf_s_dbl_wr_din;
+ wire [ 32-1:0] buf_s_dbl_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_s_dbl
+ ( .clk(clk), + .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din), + .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr;
+ wire buf_r_plus_s_wr_en;
+ wire [ 32-1:0] buf_r_plus_s_wr_din;
+ wire [ 32-1:0] buf_r_plus_s_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_r_plus_s
+ ( .clk(clk), + .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din), + .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr;
+ wire buf_u_minus_v_wr_en;
+ wire [ 32-1:0] buf_u_minus_v_wr_din;
+ wire [ 32-1:0] buf_u_minus_v_wr_dout;
+
+ assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_u_minus_v
+ ( .clk(clk), + .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din), + .b_addr(buf_u_minus_v_rd_addr), .b_out() + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr;
+ wire buf_v_minus_u_wr_en;
+ wire [ 32-1:0] buf_v_minus_u_wr_din;
+ wire [ 32-1:0] buf_v_minus_u_wr_dout;
+
+ assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_v_minus_u
+ ( .clk(clk), + .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din), + .b_addr(buf_v_minus_u_rd_addr), .b_out() + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr;
+ wire buf_u_half_wr_en;
+ wire [ 32-1:0] buf_u_half_wr_din;
+ wire [ 32-1:0] buf_u_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_u_half
+ ( .clk(clk), + .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din), + .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr;
+ wire buf_v_half_wr_en;
+ wire [ 32-1:0] buf_v_half_wr_din;
+ wire [ 32-1:0] buf_v_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_v_half
+ ( .clk(clk), + .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din), + .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr;
+ wire buf_u_minus_v_half_wr_en;
+ wire [ 32-1:0] buf_u_minus_v_half_wr_din;
+ wire [ 32-1:0] buf_u_minus_v_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_u_minus_v_half
+ ( .clk(clk), + .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din), + .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout) + );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr;
+ wire buf_v_minus_u_half_wr_en;
+ wire [ 32-1:0] buf_v_minus_u_half_wr_din;
+ wire [ 32-1:0] buf_v_minus_u_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + )
+ buf_v_minus_u_half
+ ( .clk(clk), + .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din), + .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout) + );
+
+
+ //
+ // Helper Modules
+ //
+ wire helper_init_ena;
+ wire helper_invert_precalc_ena;
+ wire helper_invert_compare_ena;
+ wire helper_invert_update_ena;
+ wire helper_reduce_precalc_ena;
+ wire helper_reduce_update_ena;
+ wire helper_copy_ena;
+
+ wire helper_init_rdy;
+ wire helper_invert_precalc_rdy;
+ wire helper_invert_compare_rdy;
+ wire helper_invert_update_rdy;
+ wire helper_reduce_precalc_rdy;
+ wire helper_reduce_update_rdy;
+ wire helper_copy_rdy;
+
+ wire helper_init_done = helper_init_rdy && !helper_init_ena;
+ wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena;
+ wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena;
+ wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena;
+ wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena;
+ wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena;
+ wire helper_copy_done = helper_copy_rdy && !helper_copy_ena;
+
+
+ //
+ // Helper Module - Initialization
+ //
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr;
+ wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr;
+
+ wire helper_init_r_wren;
+ wire helper_init_s_wren;
+ wire helper_init_u_wren;
+ wire helper_init_v_wren;
+
+ wire [ 32-1:0] helper_init_r_data;
+ wire [ 32-1:0] helper_init_s_data;
+ wire [ 32-1:0] helper_init_u_data;
+ wire [ 32-1:0] helper_init_v_data;
+
+ modinv_helper_init #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
+
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_init
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_init_ena),
+ .rdy (helper_init_rdy),
+
+ .a_addr (a_addr),
+ .q_addr (helper_init_q_addr),
+
+ .r_addr (helper_init_r_addr),
+ .s_addr (helper_init_s_addr),
+ .u_addr (helper_init_u_addr),
+ .v_addr (helper_init_v_addr),
+
+ .q_din (q_din),
+ .a_din (a_din),
+
+ .r_dout (helper_init_r_data),
+ .s_dout (helper_init_s_data),
+ .u_dout (helper_init_u_data),
+ .v_dout (helper_init_v_data),
+
+ .r_wren (helper_init_r_wren),
+ .s_wren (helper_init_s_wren),
+ .u_wren (helper_init_u_wren),
+ .v_wren (helper_init_v_wren)
+ );
+
+
+ //
+ // Helper Module - Inversion Pre-Calculation
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr;
+
+ modinv_helper_invert_precalc #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_invert_precalc
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_invert_precalc_ena),
+ .rdy (helper_invert_precalc_rdy),
+
+ .r_addr (helper_invert_precalc_r_addr),
+ .s_addr (helper_invert_precalc_s_addr),
+ .u_addr (helper_invert_precalc_u_addr),
+ .v_addr (helper_invert_precalc_v_addr),
+
+ .r_din (buf_r_rd_dout),
+ .s_din (buf_s_rd_dout),
+ .u_din (buf_u_rd_dout),
+ .v_din (buf_v_rd_dout),
+
+ .r_dbl_addr (buf_r_dbl_wr_addr),
+ .s_dbl_addr (buf_s_dbl_wr_addr),
+ .r_plus_s_addr (buf_r_plus_s_wr_addr),
+
+ .u_half_addr (buf_u_half_wr_addr),
+ .v_half_addr (buf_v_half_wr_addr),
+ .u_minus_v_addr (buf_u_minus_v_wr_addr),
+ .v_minus_u_addr (buf_v_minus_u_wr_addr),
+ .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr),
+ .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr),
+
+ .r_dbl_dout (buf_r_dbl_wr_din),
+ .s_dbl_dout (buf_s_dbl_wr_din),
+ .r_plus_s_dout (buf_r_plus_s_wr_din),
+
+ .u_half_dout (buf_u_half_wr_din),
+ .v_half_dout (buf_v_half_wr_din),
+ .u_minus_v_dout (buf_u_minus_v_wr_din),
+ .v_minus_u_dout (buf_v_minus_u_wr_din),
+ .u_minus_v_half_dout (buf_u_minus_v_half_wr_din),
+ .v_minus_u_half_dout (buf_v_minus_u_half_wr_din),
+
+ .r_dbl_wren (buf_r_dbl_wr_en),
+ .s_dbl_wren (buf_s_dbl_wr_en),
+ .r_plus_s_wren (buf_r_plus_s_wr_en),
+
+ .u_half_wren (buf_u_half_wr_en),
+ .v_half_wren (buf_v_half_wr_en),
+ .u_minus_v_wren (buf_u_minus_v_wr_en),
+ .v_minus_u_wren (buf_v_minus_u_wr_en),
+ .u_minus_v_half_wren (buf_u_minus_v_half_wr_en),
+ .v_minus_u_half_wren (buf_v_minus_u_half_wr_en),
+
+ .u_minus_v_din (buf_u_minus_v_wr_dout),
+ .v_minus_u_din (buf_v_minus_u_wr_dout)
+ );
+
+
+ //
+ // Helper Module - Inversion Comparison
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr;
+
+ wire flag_invert_u_gt_v;
+ wire flag_invert_v_eq_1; + wire flag_invert_u_is_even;
+ wire flag_invert_v_is_even;
+
+ modinv_helper_invert_compare #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_invert_compare
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_invert_compare_ena),
+ .rdy (helper_invert_compare_rdy),
+
+ .u_addr (helper_invert_compare_u_addr),
+ .v_addr (helper_invert_compare_v_addr),
+
+ .u_din (buf_u_rd_dout),
+ .v_din (buf_v_rd_dout),
+
+ .u_gt_v (flag_invert_u_gt_v),
+ .v_eq_1 (flag_invert_v_eq_1), + .u_is_even (flag_invert_u_is_even),
+ .v_is_even (flag_invert_v_is_even)
+ );
+
+
+ //
+ // Helper Module - Inversion Update
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr;
+
+ wire helper_invert_update_r_wren;
+ wire helper_invert_update_s_wren;
+ wire helper_invert_update_u_wren;
+ wire helper_invert_update_v_wren;
+
+ wire [ 32-1:0] helper_invert_update_r_data;
+ wire [ 32-1:0] helper_invert_update_s_data;
+ wire [ 32-1:0] helper_invert_update_u_data;
+ wire [ 32-1:0] helper_invert_update_v_data;
+
+ modinv_helper_invert_update #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_invert_update
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_invert_update_ena),
+ .rdy (helper_invert_update_rdy),
+
+ .u_gt_v (flag_invert_u_gt_v),
+ .v_eq_1 (flag_invert_v_eq_1), + .u_is_even (flag_invert_u_is_even),
+ .v_is_even (flag_invert_v_is_even),
+
+ .r_addr (helper_invert_update_r_addr),
+ .s_addr (helper_invert_update_s_addr),
+ .u_addr (helper_invert_update_u_addr),
+ .v_addr (helper_invert_update_v_addr),
+
+ .r_wren (helper_invert_update_r_wren),
+ .s_wren (helper_invert_update_s_wren),
+ .u_wren (helper_invert_update_u_wren),
+ .v_wren (helper_invert_update_v_wren),
+
+ .r_dout (helper_invert_update_r_data),
+ .s_dout (helper_invert_update_s_data),
+ .u_dout (helper_invert_update_u_data),
+ .v_dout (helper_invert_update_v_data),
+
+ .r_dbl_addr (buf_r_dbl_rd_addr),
+ .s_dbl_addr (buf_s_dbl_rd_addr),
+ .r_plus_s_addr (buf_r_plus_s_rd_addr),
+ .u_half_addr (buf_u_half_rd_addr),
+ .v_half_addr (buf_v_half_rd_addr),
+ .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr),
+ .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr),
+
+ .r_dbl_din (buf_r_dbl_rd_dout),
+ .s_dbl_din (buf_s_dbl_rd_dout),
+ .r_plus_s_din (buf_r_plus_s_rd_dout),
+ .u_half_din (buf_u_half_rd_dout),
+ .v_half_din (buf_v_half_rd_dout),
+ .u_minus_v_half_din (buf_u_minus_v_half_rd_dout),
+ .v_minus_u_half_din (buf_v_minus_u_half_rd_dout)
+ );
+
+
+ //
+ // Helper Module - Reduction Pre-Calculation
+ //
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr;
+ wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr;
+
+ wire helper_reduce_precalc_r_wren;
+ wire helper_reduce_precalc_u_wren;
+ wire helper_reduce_precalc_v_wren;
+
+ wire [ 32-1:0] helper_reduce_precalc_r_data;
+ wire [ 32-1:0] helper_reduce_precalc_u_data;
+ wire [ 32-1:0] helper_reduce_precalc_v_data;
+
+ wire flag_reduce_s_is_odd;
+ wire flag_invert_k_is_nul; +
+ modinv_helper_reduce_precalc #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS),
+ .K_NUM_BITS (K_NUM_BITS)
+ )
+ helper_reduce_precalc
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_reduce_precalc_ena),
+ .rdy (helper_reduce_precalc_rdy),
+
+ .r_addr (helper_reduce_precalc_r_addr),
+ .s_addr (helper_reduce_precalc_s_addr),
+ .u_addr (helper_reduce_precalc_u_addr),
+ .v_addr (helper_reduce_precalc_v_addr),
+ .q_addr (helper_reduce_precalc_q_addr),
+
+ .k (k),
+
+ .s_is_odd (flag_reduce_s_is_odd),
+ .k_is_nul (flag_reduce_k_is_nul),
+
+ .r_din (buf_r_wr_dout),
+ .s_din (buf_s_rd_dout),
+ .q_din (q_din),
+
+ .r_wren (helper_reduce_precalc_r_wren),
+ .u_wren (helper_reduce_precalc_u_wren),
+ .v_wren (helper_reduce_precalc_v_wren),
+
+ .r_dout (helper_reduce_precalc_r_data),
+ .u_dout (helper_reduce_precalc_u_data),
+ .v_dout (helper_reduce_precalc_v_data)
+ );
+
+ //
+ // Helper Module - Reduction Update
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr;
+
+ wire helper_reduce_update_s_wren;
+
+ wire [ 32-1:0] helper_reduce_update_s_data;
+
+ modinv_helper_reduce_update #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_reduce_update
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_reduce_update_ena),
+ .rdy (helper_reduce_update_rdy),
+
+ .s_is_odd (flag_reduce_s_is_odd),
+ .k_is_nul (flag_reduce_k_is_nul),
+
+ .s_addr (helper_reduce_update_s_addr),
+ .u_addr (helper_reduce_update_u_addr),
+ .v_addr (helper_reduce_update_v_addr),
+
+ .s_wren (helper_reduce_update_s_wren),
+
+ .s_dout (helper_reduce_update_s_data),
+
+ .u_din (buf_u_rd_dout),
+ .v_din (buf_v_rd_dout)
+ );
+
+
+ //
+ // Helper Module - Copying
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr;
+
+ modinv_helper_copy #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
+
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_copy
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_copy_ena),
+ .rdy (helper_copy_rdy),
+
+ .s_addr (helper_copy_s_addr),
+ .a1_addr (a1_addr),
+
+ .s_din (buf_s_rd_dout),
+
+ .a1_dout (a1_dout),
+
+ .a1_wren (a1_wren)
+ );
+
+
+ //
+ // Round Counter
+ //
+ reg [ROUND_COUNTER_BITS-1:0] round_counter;
+ wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1;
+ wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}};
+ wire [ROUND_COUNTER_BITS-1:0] round_counter_next =
+ (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_STATE_IDLE = 4'd0;
+
+ localparam FSM_STATE_INIT = 4'd1;
+
+ localparam FSM_STATE_INVERT_PRECALC = 4'd11;
+ localparam FSM_STATE_INVERT_COMPARE = 4'd12;
+ localparam FSM_STATE_INVERT_UPDATE = 4'd13;
+
+ localparam FSM_STATE_REDUCE_PRECALC = 4'd14;
+ localparam FSM_STATE_REDUCE_UPDATE = 4'd15;
+
+ localparam FSM_STATE_COPY = 4'd2;
+
+ localparam FSM_STATE_DONE = 4'd3;
+
+ reg [3:0] fsm_state = FSM_STATE_IDLE;
+ reg [3:0] fsm_state_dly = FSM_STATE_IDLE;
+
+ wire fsm_state_new = (fsm_state != fsm_state_dly);
+
+ wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ?
+ FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC;
+
+ wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ?
+ FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY;
+ + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else case (fsm_state)
+ FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE;
+ FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT;
+ FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC;
+ FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE;
+ FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE;
+ FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC;
+ FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE;
+ FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY;
+ FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
+ default: fsm_state <= FSM_STATE_IDLE; + endcase
+
+ always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE; + else fsm_state_dly <= fsm_state;
+
+
+ assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new;
+ assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new;
+ assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new;
+ assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new;
+ assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new;
+ assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new;
+ assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new;
+
+
+ //
+ // Counter Increment
+ // + always @(posedge clk) begin
+ //
+ if ((fsm_state == FSM_STATE_INIT) && helper_init_done)
+ round_counter <= round_counter_zero;
+ //
+ if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done)
+ round_counter <= round_counter_next;
+ //
+ if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done)
+ round_counter <= round_counter_next;
+ //
+ end
+
+
+ //
+ // Q Address Selector
+ //
+ always @(*) begin
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: q_addr = helper_init_q_addr;
+ FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr;
+ default: q_addr = {OPERAND_ADDR_BITS{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Buffer Address Selector
+ //
+ always @(*) begin
+ //
+ // Write Ports
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr;
+ FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr;
+ default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr;
+ FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr;
+ FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr;
+ default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr;
+ FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr;
+ default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr;
+ FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr;
+ default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ // Read Ports
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr;
+ default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr;
+ FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr;
+ default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr;
+ FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr;
+ FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr;
+ default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr;
+ FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr;
+ FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr;
+ default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Buffer Write Enable Logic
+ //
+ always @(*) begin
+ //
+ // Write Ports
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren;
+ FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren;
+ FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren;
+ default: buf_r_wr_en = 1'b0;
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren;
+ FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren;
+ FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren;
+ default: buf_s_wr_en = 1'b0;
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren;
+ FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren;
+ FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren;
+ default: buf_u_wr_en = 1'b0;
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren;
+ FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren;
+ FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren;
+ default: buf_v_wr_en = 1'b0;
+ endcase
+ //
+ end
+
+
+ //
+ // Buffer Write Data Selector
+ //
+ always @(*) begin
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data;
+ FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data;
+ FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data;
+ default: buf_r_wr_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data;
+ FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data;
+ FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data;
+ default: buf_s_wr_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data;
+ FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data;
+ FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data;
+ default: buf_u_wr_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data;
+ FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data;
+ FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data;
+ default: buf_v_wr_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear */
+ if (rdy && ena) rdy_reg <= 1'b0;
+
+ /* set */
+ if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1;
+
+ end
+
+
+ //
+ // Store Redundant Power of 2 (K)
+ //
+ always @(posedge clk)
+ //
+ if (helper_init_ena)
+ k <= {K_NUM_BITS{1'b0}};
+ else begin
+
+ if (helper_invert_update_ena && !flag_invert_v_eq_1)
+ k <= k + 1'b1;
+
+ if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}}))
+ k <= k - 1'b1;
+
+ end
+
+endmodule
+
+
+//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v new file mode 100644 index 0000000..8487aee --- /dev/null +++ b/rtl/modular/modular_multiplier_256.v @@ -0,0 +1,402 @@ +//------------------------------------------------------------------------------
+//
+// modular_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Modular multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_multiplier_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, b_addr, n_addr, p_addr, p_wren,
+ a_din, b_din, n_din, p_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word
+ output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] a_din; // A
+ input wire [ 31:0] b_din; // B
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = A * B mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_a;
+ reg [WORD_COUNTER_WIDTH-1:0] index_b;
+
+ /* map registers to output ports */
+ assign a_addr = index_a;
+ assign b_addr = index_b;
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+ wire inc_index_a = |fsm_shreg_inc_index_a;
+ wire store_word_a = |fsm_shreg_store_word_a;
+ wire inc_index_b = |fsm_shreg_inc_index_b;
+ wire clear_mac_ab = |fsm_shreg_inc_index_b;
+ wire shift_wide_a = |fsm_shreg_inc_index_b;
+ wire enable_mac_ab = |fsm_shreg_inc_index_b;
+ wire store_si_msb = |fsm_shreg_store_si_msb;
+ wire store_si_lsb = fsm_shreg_store_si_lsb;
+ wire shift_si = |fsm_shreg_shift_si;
+ wire mask_cw1_sum = fsm_shreg_mask_cw1_sum;
+ wire store_c_word = |fsm_shreg_store_c_word;
+ wire reduce_start = fsm_shreg_reduce_start;
+ wire reduce_stop = fsm_shreg_reduce_stop;
+
+
+ //
+ // FSM Logic
+ //
+ wire reduce_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done)
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ reg index_b_ff;
+
+ always @(posedge clk)
+ //
+ if (inc_index_b) index_b_ff <= ~index_b_ff;
+ else index_b_ff <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ index_a <= WORD_INDEX_ZERO;
+ index_b <= WORD_INDEX_LAST;
+ //
+ end else begin
+ //
+ if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+ if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+ //
+ end
+
+
+ //
+ // Wide Operand Buffer
+ //
+ reg [255:0] buf_a_wide;
+
+ always @(posedge clk)
+ //
+ if (store_word_a)
+ buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
+ else if (shift_wide_a)
+ buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+
+
+ //
+ // Multiplier Array
+ //
+ wire mac_inhibit; // control signal to pause all accumulators
+
+ wire [46: 0] mac[0:15]; // outputs of all accumulators
+ reg [15: 0] mac_clear; // individual per-accumulator clear flag
+
+ assign mac_inhibit = ~enable_mac_ab;
+
+ always @(posedge clk)
+ //
+ if (!clear_mac_ab)
+ mac_clear <= {16{1'b1}};
+ else begin
+
+ if (mac_clear == {16{1'b1}})
+ mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
+ else
+ mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
+
+
+ end
+
+ //
+ // Array of parallel multipliers
+ //
+ genvar i;
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_mac_array
+ //
+ mac16_wrapper mac16_inst
+ (
+ .clk (clk),
+ .ce (~mac_inhibit),
+
+ .clr (mac_clear[i]),
+
+ .a (buf_a_wide[16*i+:16]),
+ .b (index_b_ff ? b_din[15:0] : b_din[31:16]),
+ .s (mac[i])
+ );
+ //
+ end
+ endgenerate
+
+ //
+ // Intermediate Words
+ //
+ reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
+ reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
+
+
+ wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
+ wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
+
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_si_lsb_new
+ assign si_lsb_new[47*i+:47] = mac[15-i];
+ end
+ endgenerate
+
+ generate for (i=1; i<16; i=i+1)
+ begin : gen_si_msb_new
+ assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
+ end
+ endgenerate
+
+ always @(posedge clk) begin
+ //
+ if (shift_si) begin
+ si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
+ si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
+ end else begin
+
+ if (store_si_msb)
+ si_msb <= si_msb_new;
+
+ if (store_si_lsb)
+ si_lsb <= si_lsb_new;
+ end
+
+ end
+
+
+ //
+ // Accumulators
+ //
+ wire [46: 0] add48_cw0_s;
+ wire [46: 0] add48_cw1_s;
+
+
+ //
+ // cw0, b, cw1, b
+ //
+ reg [30: 0] si_prev_dly;
+ reg [15: 0] si_next_dly;
+
+ always @(posedge clk)
+ //
+ if (shift_si)
+ si_prev_dly <= si_lsb[93:63];
+ else
+ si_prev_dly <= {31{1'b0}};
+
+ always @(posedge clk)
+ //
+ si_next_dly <= si_lsb[62:47];
+
+ wire [46: 0] add48_cw0_a = si_lsb[46:0];
+ wire [46: 0] add48_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+ wire [46: 0] add48_cw1_a = add48_cw0_s;
+ wire [46: 0] add48_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add48_cw1_s[46:32]}};
+
+ adder47_wrapper add48_cw0_inst
+ (
+ .clk (clk),
+ .a (add48_cw0_a),
+ .b (add48_cw0_b),
+ .s (add48_cw0_s)
+ );
+
+ adder47_wrapper add48_cw1_inst
+ (
+ .clk (clk),
+ .a (add48_cw1_a),
+ .b (add48_cw1_b),
+ .s (add48_cw1_s)
+ );
+
+
+
+ //
+ // Full-Size Product
+ //
+ reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
+
+ wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
+ wire [ 31:0] reduce_c_word;
+
+ always @(posedge clk)
+ //
+ if (store_c_word)
+ bram_c_addr <= bram_c_addr + 1'b1;
+ else
+ bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1)
+ )
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_c_addr),
+ .a_wr (store_c_word),
+ .a_in (add48_cw1_s[31:0]),
+ .a_out (),
+
+ .b_addr (reduce_c_addr),
+ .b_out (reduce_c_word)
+ );
+
+
+ //
+ // Reduction Stage
+ //
+ modular_reductor_256 reduce_256_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_done),
+
+ .x_addr (reduce_c_addr),
+ .n_addr (n_addr),
+ .p_addr (p_addr),
+ .p_wren (p_wren),
+
+ .x_din (reduce_c_word),
+ .n_din (n_din),
+ .p_dout (p_dout)
+ );
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v new file mode 100644 index 0000000..774f42e --- /dev/null +++ b/rtl/modular/modular_reductor_256.v @@ -0,0 +1,666 @@ +//------------------------------------------------------------------------------ +// +// modular_reductor_256.v +// ----------------------------------------------------------------------------- +// Modular reductor.
+// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------
+
+module modular_reductor_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ x_addr, n_addr, p_addr, p_wren,
+ x_din, n_din, p_dout
+ );
+
+ // + // Constants + // + localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
+
+
+ // + // Handy Functions + // + function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction
+
+
+ //
+ // Ports
+ // + input wire clk; // system clock
+ input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output
+ + output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now
+ + input wire [ 31:0] x_din; // X
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = X mod N
+
+
+ // + // Word Indices + // + reg [WORD_COUNTER_WIDTH:0] index_x;
+ + + /* map registers to output ports */ + assign x_addr = index_x;
+
+ + // + // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
+
+ wire [5-1:0] fsm_shreg_reduce_stage_start;
+ wire [5-1:0] fsm_shreg_reduce_stage_stop;
+
+ genvar s;
+ generate for (s=0; s<5; s=s+1)
+ begin : gen_fsm_shreg_reduce_stages
+ assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+ assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+ end
+ endgenerate
+
+ wire inc_index_x = |fsm_shreg_inc_index_x;
+ wire store_word_z = |fsm_shreg_store_word_z;
+ wire reduce_start = |fsm_shreg_reduce_stage_start;
+ wire reduce_stop = |fsm_shreg_reduce_stage_stop;
+ wire store_p = fsm_shreg_reduce_stage_stop[0];
+
+
+ wire reduce_adder0_done;
+ wire reduce_adder1_done;
+ wire reduce_subtractor_done;
+
+ wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+ always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy)
+ //
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ // + else if (!reduce_stop || reduce_done_all)
+ //
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end
+
+
+ // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) + // + index_x <= WORD_INDEX_LAST; + // + else if (inc_index_x)
+ //
+ index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+ //
+ // Look-up Table
+ //
+
+ // TODO: Explain s5!!!
+
+ reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
+ reg [9 -1:0] z_wren; //
+ reg [9 -1:0] z_mask; // mask input to store zero word
+ reg [9 -1:0] z_save; // save previous word once again
+
+ always @(posedge clk) + //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // s9 s8 s7 s6 s5 s4 s3 s2 s1
+ // || || || || || || || || ||
+ 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
+ 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
+ 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
+ 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
+ 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
+ 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
+ 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
+ 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
+ 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
+ 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
+ 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
+ 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
+ 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
+ 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
+ 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
+ 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
+ // + default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
+ // + endcase
+
+ always @(posedge clk)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ // + default: z_wren <= {9{1'b0}};
+ // + endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
+ 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ // + default: z_mask <= {9{1'bX}};
+ // + endcase
+ + always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ // + default: z_save <= {9{1'bX}};
+ // + endcase
+
+
+ //
+ // Intermediate Numbers
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
+ wire [ 32-1:0] reduce_z_dout[1:9];
+
+ reg [31: 0] x_din_dly;
+ always @(posedge clk)
+ //
+ x_din_dly <= x_din;
+
+
+ genvar z;
+ generate for (z=1; z<=9; z=z+1)
+ //
+ begin : gen_z_bram
+ //
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_c_inst + ( + .clk (clk), + + .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), + .a_wr (z_wren[z-1] & store_word_z), + .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), + .a_out (), + + .b_addr (reduce_z_addr[z]), + .b_out (reduce_z_dout[z]) + );
+ //
+ end
+ //
+ endgenerate
+
+
+
+
+ wire [ 32-1:0] bram_sum0_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
+ wire bram_sum0_wr_wren;
+
+ wire [ 32-1:0] bram_sum1_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
+ wire bram_sum1_wr_wren;
+
+ wire [ 32-1:0] bram_diff_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
+ wire bram_diff_wr_wren;
+
+ wire [ 32-1:0] bram_sum0_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
+
+ wire [ 32-1:0] bram_sum1_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
+
+ wire [ 32-1:0] bram_diff_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
+
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum0_inst + ( + .clk (clk), + + .a_addr (bram_sum0_wr_addr), + .a_wr (bram_sum0_wr_wren), + .a_in (bram_sum0_wr_din), + .a_out (), + + .b_addr (bram_sum0_rd_addr), + .b_out (bram_sum0_rd_dout) + );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum1_inst + ( + .clk (clk), + + .a_addr (bram_sum1_wr_addr), + .a_wr (bram_sum1_wr_wren), + .a_in (bram_sum1_wr_din), + .a_out (), + + .b_addr (bram_sum1_rd_addr), + .b_out (bram_sum1_rd_dout) + );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_diff_inst + ( + .clk (clk), + + .a_addr (bram_diff_wr_addr), + .a_wr (bram_diff_wr_wren), + .a_in (bram_diff_wr_din), + .a_out (), + + .b_addr (bram_diff_rd_addr), + .b_out (bram_diff_rd_dout) + );
+
+
+
+ wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
+
+ reg [ 32-1:0] adder0_a_din;
+ reg [ 32-1:0] adder0_b_din;
+
+ reg [ 32-1:0] adder1_a_din;
+ reg [ 32-1:0] adder1_b_din;
+
+ reg [ 32-1:0] subtractor_a_din;
+ reg [ 32-1:0] subtractor_b_din;
+
+ // n_addr - only 1 output, because all modules are in sync
+
+ modular_adder adder_inst0
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder0_done),
+
+ .ab_addr (adder0_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum0_wr_addr),
+ .s_wren (bram_sum0_wr_wren),
+ + .a_din (adder0_a_din),
+ .b_din (adder0_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum0_wr_din)
+ );
+
+ modular_adder adder_inst1
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder1_done),
+
+ .ab_addr (adder1_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum1_wr_addr),
+ .s_wren (bram_sum1_wr_wren),
+ + .a_din (adder1_a_din),
+ .b_din (adder1_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum1_wr_din)
+ );
+
+ modular_subtractor subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_subtractor_done),
+
+ .ab_addr (subtractor_ab_addr),
+ .n_addr (n_addr),
+ .d_addr (bram_diff_wr_addr),
+ .d_wren (bram_diff_wr_wren),
+ + .a_din (subtractor_a_din),
+ .b_din (subtractor_b_din),
+ .n_din (n_din),
+ .d_dout (bram_diff_wr_din)
+ );
+
+
+ //
+ // address
+ //
+ always @(*)
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ //
+ 5'b10000: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = adder0_ab_addr;
+ reduce_z_addr[3] = adder1_ab_addr;
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = subtractor_ab_addr;
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ 5'b01000: begin
+ reduce_z_addr[1] = adder0_ab_addr;
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = adder1_ab_addr;
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = subtractor_ab_addr;
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00100: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = adder0_ab_addr;
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = subtractor_ab_addr;
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00010: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = subtractor_ab_addr;
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder0_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00001: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = adder0_ab_addr;
+ end
+ //
+ default: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+
+ //
+ // adder 0
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_a_din = reduce_z_dout[2];
+ 5'b01000: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00100: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00010: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00001: adder0_a_din = bram_sum0_rd_dout;
+ default: adder0_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_b_din = reduce_z_dout[2];
+ 5'b01000: adder0_b_din = reduce_z_dout[1];
+ 5'b00100: adder0_b_din = reduce_z_dout[5];
+ 5'b00010: adder0_b_din = bram_sum1_rd_dout;
+ 5'b00001: adder0_b_din = bram_diff_rd_dout;
+ default: adder0_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+ //
+ // adder 1
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_a_din = reduce_z_dout[3];
+ 5'b01000: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00100: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00010: adder1_a_din = {32{1'bX}};
+ 5'b00001: adder1_a_din = {32{1'bX}};
+ default: adder1_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_b_din = reduce_z_dout[3];
+ 5'b01000: adder1_b_din = reduce_z_dout[4];
+ 5'b00100: adder1_b_din = {32{1'b0}};
+ 5'b00010: adder1_b_din = {32{1'bX}};
+ 5'b00001: adder1_b_din = {32{1'bX}};
+ default: adder1_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // subtractor
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_a_din = {32{1'b0}};
+ 5'b01000: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00100: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00010: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00001: subtractor_a_din = {32{1'bX}};
+ default: subtractor_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_b_din = reduce_z_dout[6];
+ 5'b01000: subtractor_b_din = reduce_z_dout[7];
+ 5'b00100: subtractor_b_din = reduce_z_dout[8];
+ 5'b00010: subtractor_b_din = reduce_z_dout[9];
+ 5'b00001: subtractor_b_din = {32{1'bX}};
+ default: subtractor_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+
+
+ assign p_addr = bram_sum0_wr_addr; + assign p_wren = bram_sum0_wr_wren & store_p;
+ assign p_dout = bram_sum0_wr_din;
+
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v new file mode 100644 index 0000000..322aec4 --- /dev/null +++ b/rtl/modular/modular_subtractor.v @@ -0,0 +1,292 @@ +//------------------------------------------------------------------------------ +// +// modular_subtractor.v +// ----------------------------------------------------------------------------- +// Modular subtractor.
+// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------
+
+module modular_subtractor
+ (
+ clk, rst_n,
+ ena, rdy,
+ ab_addr, n_addr, d_addr, d_wren, + a_din, b_din, n_din, d_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter WORD_COUNTER_WIDTH = 3;
+
+
+ // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction
+
+
+ // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output +
+ output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word
+ output wire d_wren; // store current D word now
+ + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N + output wire [ 31:0] d_dout; // D = (A - B) mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_ab;
+ reg [WORD_COUNTER_WIDTH-1:0] index_n;
+ reg [WORD_COUNTER_WIDTH-1:0] index_d;
+
+ /* map registers to output ports */
+ assign ab_addr = index_ab;
+ assign n_addr = index_n;
+ assign d_addr = index_d;
+
+
+ //
+ // Subtractor
+ //
+ wire [31: 0] sub32_d;
+ wire sub32_b_in;
+ wire sub32_b_out;
+
+ subtractor32_wrapper subtractor32
+ ( + .clk (clk), + .a (a_din), + .b (b_din), + .d (sub32_d), + .b_in (sub32_b_in), + .b_out (sub32_b_out) + );
+
+
+ //
+ // Adder
+ //
+ wire [31: 0] add32_s;
+ wire add32_c_in;
+ wire add32_c_out;
+
+ adder32_wrapper adder32
+ ( + .clk (clk), + .a (sub32_d), + .b (n_din), + .s (add32_s), + .c_in (add32_c_in), + .c_out (add32_c_out) + );
+
+
+ //
+ // FSM
+ //
+
+ localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
+
+ wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+
+ wire inc_index_ab = |fsm_shreg_inc_index_ab;
+ wire inc_index_n = |fsm_shreg_inc_index_n;
+ wire store_dif_ab = |fsm_shreg_store_dif_ab;
+ wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n;
+ wire store_data_d = |fsm_shreg_store_data_d;
+ wire inc_index_d = |fsm_shreg_inc_index_d;
+
+ always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Borrow & Carry Masking Logic
+ //
+ reg sub32_b_mask;
+ reg add32_c_mask;
+
+
+ always @(posedge clk) begin
+ //
+ sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ //
+ end
+
+ assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
+ assign add32_c_in = add32_c_out & ~add32_c_mask;
+
+
+
+ //
+ // Borrow & Carry Latch Logic
+ //
+ reg sub32_borrow_latch;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
+ //
+ end
+
+
+ //
+ // Intermediate Results
+ //
+ reg [32*OPERAND_NUM_WORDS-1:0] d_ab;
+ reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n;
+
+ always @(posedge clk)
+ //
+ if (store_data_d) begin
+ //
+ d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]};
+ d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end else begin
+ //
+ if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]};
+ if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end
+
+
+ // + // Word Index Increment Logic + //
+ always @(posedge clk) + //
+ if (rdy) begin
+ //
+ index_ab <= WORD_INDEX_ZERO;
+ index_n <= WORD_INDEX_ZERO;
+ index_d <= WORD_INDEX_ZERO;
+ //
+ end else begin
+ //
+ if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
+ if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
+ if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d);
+ //
+ end
+
+
+ //
+ // Output Sum Selector
+ //
+ wire mux_select_ab_n = sub32_borrow_latch;
+
+
+ //
+ // Output Data and Write Enable Logic
+ //
+ reg d_wren_reg;
+ reg [31: 0] d_dout_reg;
+ wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0];
+
+ assign d_wren = d_wren_reg;
+ assign d_dout = d_dout_reg;
+
+ always @(posedge clk) + //
+ if (rdy) begin
+ //
+ d_wren_reg <= 1'b0;
+ d_dout_reg <= {32{1'bX}};
+ //
+ end else begin
+ //
+ d_wren_reg <= store_data_d;
+ d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}};
+ //
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------
|