From 25e338149fdb8e06c82d99600769a8498a85ef2c Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 31 Oct 2016 00:14:51 +0300 Subject: Initial commit of base point multiplier core for ECDSA curve P-256. --- bench/tb_curve_adder_256.v | 420 +++++++++ bench/tb_curve_doubler_256.v | 409 +++++++++ bench/tb_curve_multiplier_256.v | 281 ++++++ bench/tb_lowlevel_adder32.v | 175 ++++ bench/tb_lowlevel_adder47.v | 151 ++++ bench/tb_lowlevel_subtractor32.v | 174 ++++ bench/tb_modular_adder.v | 357 ++++++++ bench/tb_modular_invertor.v | 226 +++++ bench/tb_modular_multiplier_256.v | 366 ++++++++ bench/tb_modular_subtractor.v | 356 ++++++++ bench/tb_mw_comparator.v | 322 +++++++ bench/tb_mw_mover.v | 282 ++++++ rtl/curve/curve_dbl_add_256.v | 868 ++++++++++++++++++ rtl/curve/curve_mul_256.v | 720 +++++++++++++++ rtl/curve/rom/brom_p256_delta.v | 68 ++ rtl/curve/rom/brom_p256_g_x.v | 68 ++ rtl/curve/rom/brom_p256_g_y.v | 68 ++ rtl/curve/rom/brom_p256_h_x.v | 68 ++ rtl/curve/rom/brom_p256_h_y.v | 68 ++ rtl/curve/rom/brom_p256_one.v | 68 ++ rtl/curve/rom/brom_p256_q.v | 68 ++ rtl/curve/rom/brom_p256_zero.v | 70 ++ rtl/curve/uop/uop_add_rom.v | 66 ++ rtl/curve/uop/uop_conv_rom.v | 38 + rtl/curve/uop/uop_dbl_rom.v | 58 ++ rtl/curve/uop/uop_init_rom.v | 33 + rtl/curve/uop_ecdsa.v | 50 ++ rtl/ecdsa256.v | 160 ++++ rtl/ecdsa256_wrapper.v | 177 ++++ rtl/lowlevel/adder32_wrapper.v | 73 ++ rtl/lowlevel/adder47_wrapper.v | 69 ++ rtl/lowlevel/artix7/adder32_artix7.v | 96 ++ rtl/lowlevel/artix7/adder47_artix7.v | 91 ++ rtl/lowlevel/artix7/dsp48e1_wrapper.v | 159 ++++ rtl/lowlevel/artix7/mac16_artix7.v | 90 ++ rtl/lowlevel/artix7/subtractor32_artix7.v | 94 ++ rtl/lowlevel/ecdsa_lowlevel_settings.v | 17 + rtl/lowlevel/mac16_wrapper.v | 75 ++ rtl/lowlevel/subtractor32_wrapper.v | 72 ++ rtl/modular/modular_adder.v | 298 +++++++ .../modular_invertor/helper/modinv_helper_copy.v | 148 ++++ .../modular_invertor/helper/modinv_helper_init.v | 172 ++++ .../helper/modinv_helper_invert_compare.v | 286 ++++++ .../helper/modinv_helper_invert_precalc.v | 408 +++++++++ .../helper/modinv_helper_invert_update.v | 257 ++++++ .../helper/modinv_helper_reduce_precalc.v | 328 +++++++ .../helper/modinv_helper_reduce_update.v | 153 ++++ rtl/modular/modular_invertor/modinv_clog2.v | 10 + rtl/modular/modular_invertor/modular_invertor.v | 981 +++++++++++++++++++++ rtl/modular/modular_multiplier_256.v | 402 +++++++++ rtl/modular/modular_reductor_256.v | 666 ++++++++++++++ rtl/modular/modular_subtractor.v | 292 ++++++ rtl/multiword/mw_comparator.v | 220 +++++ rtl/multiword/mw_mover.v | 175 ++++ rtl/util/bram_1rw_1ro_readfirst.v | 101 +++ 55 files changed, 11968 insertions(+) create mode 100644 bench/tb_curve_adder_256.v create mode 100644 bench/tb_curve_doubler_256.v create mode 100644 bench/tb_curve_multiplier_256.v create mode 100644 bench/tb_lowlevel_adder32.v create mode 100644 bench/tb_lowlevel_adder47.v create mode 100644 bench/tb_lowlevel_subtractor32.v create mode 100644 bench/tb_modular_adder.v create mode 100644 bench/tb_modular_invertor.v create mode 100644 bench/tb_modular_multiplier_256.v create mode 100644 bench/tb_modular_subtractor.v create mode 100644 bench/tb_mw_comparator.v create mode 100644 bench/tb_mw_mover.v create mode 100644 rtl/curve/curve_dbl_add_256.v create mode 100644 rtl/curve/curve_mul_256.v create mode 100644 rtl/curve/rom/brom_p256_delta.v create mode 100644 rtl/curve/rom/brom_p256_g_x.v create mode 100644 rtl/curve/rom/brom_p256_g_y.v create mode 100644 rtl/curve/rom/brom_p256_h_x.v create mode 100644 rtl/curve/rom/brom_p256_h_y.v create mode 100644 rtl/curve/rom/brom_p256_one.v create mode 100644 rtl/curve/rom/brom_p256_q.v create mode 100644 rtl/curve/rom/brom_p256_zero.v create mode 100644 rtl/curve/uop/uop_add_rom.v create mode 100644 rtl/curve/uop/uop_conv_rom.v create mode 100644 rtl/curve/uop/uop_dbl_rom.v create mode 100644 rtl/curve/uop/uop_init_rom.v create mode 100644 rtl/curve/uop_ecdsa.v create mode 100644 rtl/ecdsa256.v create mode 100644 rtl/ecdsa256_wrapper.v create mode 100644 rtl/lowlevel/adder32_wrapper.v create mode 100644 rtl/lowlevel/adder47_wrapper.v create mode 100644 rtl/lowlevel/artix7/adder32_artix7.v create mode 100644 rtl/lowlevel/artix7/adder47_artix7.v create mode 100644 rtl/lowlevel/artix7/dsp48e1_wrapper.v create mode 100644 rtl/lowlevel/artix7/mac16_artix7.v create mode 100644 rtl/lowlevel/artix7/subtractor32_artix7.v create mode 100644 rtl/lowlevel/ecdsa_lowlevel_settings.v create mode 100644 rtl/lowlevel/mac16_wrapper.v create mode 100644 rtl/lowlevel/subtractor32_wrapper.v create mode 100644 rtl/modular/modular_adder.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_copy.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_init.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v create mode 100644 rtl/modular/modular_invertor/modinv_clog2.v create mode 100644 rtl/modular/modular_invertor/modular_invertor.v create mode 100644 rtl/modular/modular_multiplier_256.v create mode 100644 rtl/modular/modular_reductor_256.v create mode 100644 rtl/modular/modular_subtractor.v create mode 100644 rtl/multiword/mw_comparator.v create mode 100644 rtl/multiword/mw_mover.v create mode 100644 rtl/util/bram_1rw_1ro_readfirst.v diff --git a/bench/tb_curve_adder_256.v b/bench/tb_curve_adder_256.v new file mode 100644 index 0000000..a20743a --- /dev/null +++ b/bench/tb_curve_adder_256.v @@ -0,0 +1,420 @@ +//------------------------------------------------------------------------------ +// +// tb_curve_adder_256.v +// ----------------------------------------------------------------------------- +// Testbench for 256-bit curve point adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +`timescale 1ns / 1ps +//------------------------------------------------------------------------------ + +module tb_curve_adder_256; + + + // + // Test Vectors + // + localparam [255:0] PX_1 = 256'ha536512112e4bb911ae72744a914761ddc53700f889c88e583e0edd45c179b08; + localparam [255:0] PY_1 = 256'h239e73bf40f4831ab71ccea072291893ac8582982ea6fec6bd6aaf36ac32d22e; + localparam [255:0] PZ_1 = 256'h32258ae04c5498bb34b29c54a7f95afc10c009540c51731eae164750ca385029; + + localparam [255:0] RX_1 = 256'he4fcdd1a151b405b2a567d20d7674031c6d5b207b0b5dcf277015d81784492d5; + localparam [255:0] RY_1 = 256'h4782c540b58988b07bb8e0c5ad3ff562dd45c075a39ee71896d5eb33702dd656; + localparam [255:0] RZ_1 = 256'hae637ff2fd5468780241afb3a8ebaeb8618e86b4a1a211b350546c9e6fea93d4; + + localparam [255:0] PX_2 = 256'he58a6470e038f6b261d5a9a72fb2bd96b6bad433ff7baea6a40b5facf5085189; + localparam [255:0] PY_2 = 256'h03dd8785b592307811ee5512e2d713c5dc65f60f01883340fe0f56f858a39474; + localparam [255:0] PZ_2 = 256'h1b4657b1e79c9074fbf7f63f96ce2854db4808afc72841fac623dc68d9bff64d; + + localparam [255:0] RX_2 = 256'hc354e99a827a3f1c30f29f6b1d72273eb0daaeb06bb373ed315e305b89d857ca; + localparam [255:0] RY_2 = 256'h0cb054f95589c1fcbe763df3b8d7badd568d5e93a667076dddfc70dcfab74948; + localparam [255:0] RZ_2 = 256'hd79d9170dd628aee82d149715a6ec6cc44426ccae236d2a146edbd15a564ea53; + + localparam [255:0] PX_3 = 256'hbf5fe30c79025a0b638b0fd62bf1349aee0a9fc7fc2719291b0c23535c16eb52; + localparam [255:0] PY_3 = 256'h8a637c7c0b9459de664d40a717e1abc0f843f03169fae943e0835cbe767da06b; + localparam [255:0] PZ_3 = 256'h0871d93601d654216912866514a788a92e8a9b6047611bf185d459e204727377; + + localparam [255:0] RX_3 = 256'h1ba6259b5b750e4d6e4f490f661646cd9491be16965f47044ac2688048e567c5; + localparam [255:0] RY_3 = 256'h80e55c16f403f8d7282bca628477771a45330567caa5aaab9a54919dbe05e3e4; + localparam [255:0] RZ_3 = 256'hb99663f045c9602b05f23aaaa508e6167d15740be900175dbeceb957a9dad951; + + localparam [255:0] PX_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx; + localparam [255:0] PY_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx; + localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000; + + localparam [255:0] RX_4 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x + localparam [255:0] RY_4 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y + localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001; + + localparam [255:0] PX_5 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x + localparam [255:0] PY_5 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y + localparam [255:0] PZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001; + + localparam [255:0] RX_5 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0; // H.x + localparam [255:0] RY_5 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07; // H.y + localparam [255:0] RZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001; + + localparam [255:0] PX_6 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x + localparam [255:0] PY_6 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y + localparam [255:0] PZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001; + + localparam [255:0] RX_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001; + localparam [255:0] RY_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001; + localparam [255:0] RZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000000; + + localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff; + + + // + // Core Parameters + // + localparam WORD_COUNTER_WIDTH = 3; + localparam OPERAND_NUM_WORDS = 8; + + + // + // Clock (100 MHz) + // + reg clk = 1'b0; + always #5 clk = ~clk; + + + // + // Inputs, Outputs + // + reg rst_n; + reg ena; + wire rdy; + + + // + // Buffers (PX, PY, PZ, RX, RY, RZ, Q) + // + wire [WORD_COUNTER_WIDTH-1:0] core_px_addr; + wire [WORD_COUNTER_WIDTH-1:0] core_py_addr; + wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr; + + wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr; + wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr; + wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr; + + wire [WORD_COUNTER_WIDTH-1:0] core_q_addr; + + wire core_rx_wren; + wire core_ry_wren; + wire core_rz_wren; + + wire [ 32-1:0] core_px_data; + wire [ 32-1:0] core_py_data; + wire [ 32-1:0] core_pz_data; + + wire [ 32-1:0] core_rx_data_wr; + wire [ 32-1:0] core_ry_data_wr; + wire [ 32-1:0] core_rz_data_wr; + + wire [ 32-1:0] core_rx_data_rd; + wire [ 32-1:0] core_ry_data_rd; + wire [ 32-1:0] core_rz_data_rd; + + wire [ 32-1:0] core_q_data; + + reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr; + reg tb_xyzq_wren; + + reg [ 31:0] tb_px_data; + reg [ 31:0] tb_py_data; + reg [ 31:0] tb_pz_data; + wire [ 31:0] tb_rx_data; + wire [ 31:0] tb_ry_data; + wire [ 31:0] tb_rz_data; + reg [ 31:0] tb_q_data; + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_px + ( .clk(clk), + .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(), + .b_addr(core_px_addr), .b_out(core_px_data) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_py + ( .clk(clk), + .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(), + .b_addr(core_py_addr), .b_out(core_py_data) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_pz + ( .clk(clk), + .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(), + .b_addr(core_pz_addr), .b_out(core_pz_data) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_q + ( .clk(clk), + .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(), + .b_addr(core_q_addr), .b_out(core_q_data) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_rx + ( .clk(clk), + .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd), + .b_addr(tb_xyzq_addr), .b_out(tb_rx_data) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_ry + ( .clk(clk), + .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd), + .b_addr(tb_xyzq_addr), .b_out(tb_ry_data) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_rz + ( .clk(clk), + .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd), + .b_addr(tb_xyzq_addr), .b_out(tb_rz_data) + ); + + + // + // Opcode + // + wire [ 5: 0] add_uop_addr; + wire [19: 0] add_uop; + + uop_add_rom add_rom + ( + .clk (clk), + .addr (add_uop_addr), + .data (add_uop) + ); + + // + // UUT + // + curve_dbl_add_256 uut + ( + .clk (clk), + .rst_n (rst_n), + + .ena (ena), + .rdy (rdy), + + .uop_addr (add_uop_addr), + .uop (add_uop), + + .px_addr (core_px_addr), + .py_addr (core_py_addr), + .pz_addr (core_pz_addr), + .rx_addr (core_rx_addr), + .ry_addr (core_ry_addr), + .rz_addr (core_rz_addr), + .q_addr (core_q_addr), + + .rx_wren (core_rx_wren), + .ry_wren (core_ry_wren), + .rz_wren (core_rz_wren), + + .px_din (core_px_data), + .py_din (core_py_data), + .pz_din (core_pz_data), + .rx_din (core_rx_data_rd), + .ry_din (core_ry_data_rd), + .rz_din (core_rz_data_rd), + .rx_dout (core_rx_data_wr), + .ry_dout (core_ry_data_wr), + .rz_dout (core_rz_data_wr), + .q_din (core_q_data) + ); + + + // + // Testbench Routine + // + reg ok = 1; + initial begin + + /* initialize control inputs */ + rst_n = 0; + ena = 0; + + /* wait for some time */ + #200; + + /* de-assert reset */ + rst_n = 1; + + /* wait for some time */ + #100; + + /* run tests */ + test_curve_adder(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1); + test_curve_adder(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2); + test_curve_adder(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3); + test_curve_adder(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4); + test_curve_adder(PX_5, PY_5, PZ_5, RX_5, RY_5, RZ_5); + test_curve_adder(PX_6, Q - PY_6, PZ_6, RX_6, RY_6, RZ_6); + + /* print result */ + if (ok) $display("tb_curve_adder_256: SUCCESS"); + else $display("tb_curve_adder_256: FAILURE"); + // + $finish; + // + end + + + // + // Test Task + // + reg t_ok; + + integer w; + + task test_curve_adder; + + input [255:0] px; + input [255:0] py; + input [255:0] pz; + + input [255:0] rx; + input [255:0] ry; + input [255:0] rz; + + reg [255:0] px_shreg; + reg [255:0] py_shreg; + reg [255:0] pz_shreg; + + reg [255:0] rx_shreg; + reg [255:0] ry_shreg; + reg [255:0] rz_shreg; + + reg [255:0] q_shreg; + + begin + + /* start filling memories */ + tb_xyzq_wren = 1; + + /* initialize shift registers */ + px_shreg = px; + py_shreg = py; + pz_shreg = pz; + q_shreg = Q; + + /* write all the words */ + for (w=0; w= {1'b0, n}) + s = s - {1'b0, n}; + + /* start operation */ + ena = 1; + + /* clear flag */ + #10 ena = 0; + + /* wait for operation to complete */ + while (!rdy) #10; + + /* read result */ + for (w=0; w y) ? 1 : 0; + + /* start operation */ + ena = 1; + + /* clear flag */ + #10 ena = 0; + + /* wait for operation to complete */ + while (!rdy) #10; + + /* compare */ + cmp_ok = (cmp_l == core_cmp_l) && (cmp_e == core_cmp_e) && (cmp_g == core_cmp_g); + + /* display results */ + $display("test_mw_comparator(): %s", cmp_ok ? "OK" : "ERROR"); + + /* update global flag */ + ok = ok && cmp_ok; + + end + + endtask + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/bench/tb_mw_mover.v b/bench/tb_mw_mover.v new file mode 100644 index 0000000..be767fc --- /dev/null +++ b/bench/tb_mw_mover.v @@ -0,0 +1,282 @@ +//------------------------------------------------------------------------------ +// +// tb_modular_mover.v +// ----------------------------------------------------------------------------- +// Testbench for multi-word data mover. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +`timescale 1ns / 1ps +//------------------------------------------------------------------------------ + +module tb_mw_mover; + + + // + // Test Vectors + // + localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd; + localparam [255:0] X_2 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210; + localparam [255:0] X_3 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf; + localparam [255:0] X_4 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899; + + + // + // Core Parameters + // + localparam WORD_COUNTER_WIDTH = 3; + localparam OPERAND_NUM_WORDS = 8; + + + // + // Clock (100 MHz) + // + reg clk = 1'b0; + always #5 clk = ~clk; + + + // + // Inputs, Outputs + // + reg rst_n; + reg ena; + wire rdy; + + + // + // Buffers (X, Y) + // + wire [WORD_COUNTER_WIDTH-1:0] core_x_addr; + wire [WORD_COUNTER_WIDTH-1:0] core_y_addr; + wire core_y_wren; + + wire [ 32-1:0] core_x_data; + wire [ 32-1:0] core_y_data; + + reg [WORD_COUNTER_WIDTH-1:0] tb_x_addr; + reg [WORD_COUNTER_WIDTH-1:0] tb_y_addr; + reg tb_x_wren; + + reg [ 32-1:0] tb_x_data; + wire [ 32-1:0] tb_y_data; + + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (WORD_COUNTER_WIDTH) + ) + bram_x + ( + .clk (clk), + + .a_addr (tb_x_addr), + .a_wr (tb_x_wren), + .a_in (tb_x_data), + .a_out (), + + .b_addr (core_x_addr), + .b_out (core_x_data) + ); + + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (WORD_COUNTER_WIDTH) + ) + bram_d + ( + .clk (clk), + + .a_addr (core_y_addr), + .a_wr (core_y_wren), + .a_in (core_y_data), + .a_out (), + + .b_addr (tb_y_addr), + .b_out (tb_y_data) + ); + + + // + // UUT + // + mw_mover # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + uut + ( + .clk (clk), + .rst_n (rst_n), + + .ena (ena), + .rdy (rdy), + + .x_addr (core_x_addr), + .y_addr (core_y_addr), + .y_wren (core_y_wren), + + .x_din (core_x_data), + .y_dout (core_y_data) + ); + + + // + // Testbench Routine + // + reg ok = 1; + initial begin + + /* initialize control inputs */ + rst_n = 0; + ena = 0; + + tb_x_wren = 0; + + /* wait for some time */ + #200; + + /* de-assert reset */ + rst_n = 1; + + /* wait for some time */ + #100; + + /* run tests */ + test_modular_mover(X_1); + test_modular_mover(X_2); + test_modular_mover(X_3); + test_modular_mover(X_4); + + /* print result */ + if (ok) $display("tb_modular_mover: SUCCESS"); + else $display("tb_modular_mover: FAILURE"); + // + $finish; + // + end + + + // + // Test Task + // + reg [255:0] y; + reg y_ok; + + integer w; + + reg [255:0] x_shreg; + reg [255:0] y_shreg; + + task test_modular_mover; + + input [255:0] x; + + begin + + /* start filling memories */ + tb_x_wren = 1; + + /* initialize shift registers */ + x_shreg = x; + + /* write all the words */ + for (w=0; w= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop); + assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop); + + always @(posedge clk) begin + // + if (inc_addr_s) addr_s <= addr_s_next; + else addr_s <= addr_s_zero; + // + if (inc_addr_a1) addr_a1 <= addr_a1_next; + else addr_a1 <= addr_a1_zero; + // + end + + + // + // Write Enable Logic + // + wire wren_a1; + + wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1; + + assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop); + + assign a1_wren = wren_a1; + + + // + // Data Logic + // + assign a1_dout = s_din; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v new file mode 100644 index 0000000..0468134 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_init.v @@ -0,0 +1,172 @@ +`timescale 1ns / 1ps + +module modinv_helper_init + ( + clk, rst_n, + ena, rdy, + a_addr, a_din, + q_addr, q_din, + r_addr, r_wren, r_dout, + s_addr, s_wren, s_dout, + u_addr, u_wren, u_dout, + v_addr, v_wren, v_dout + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter OPERAND_ADDR_BITS = 3; + + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + output wire [OPERAND_ADDR_BITS-1:0] a_addr; + output wire [OPERAND_ADDR_BITS-1:0] q_addr; + output wire [ BUFFER_ADDR_BITS-1:0] r_addr; + output wire [ BUFFER_ADDR_BITS-1:0] s_addr; + output wire [ BUFFER_ADDR_BITS-1:0] u_addr; + output wire [ BUFFER_ADDR_BITS-1:0] v_addr; + + output wire r_wren; + output wire s_wren; + output wire u_wren; + output wire v_wren; + + input wire [ 31:0] a_din; + input wire [ 31:0] q_din; + output wire [ 31:0] r_dout; + output wire [ 31:0] s_dout; + output wire [ 31:0] u_dout; + output wire [ 31:0] v_dout; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [OPERAND_ADDR_BITS-1:0] addr_aq; + + wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ? + addr_aq + 1'b1 : addr_aq_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_rsuv; + + wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ? + addr_rsuv + 1'b1 : addr_rsuv_zero; + + assign a_addr = addr_aq; + assign q_addr = addr_aq; + + assign r_addr = addr_rsuv; + assign s_addr = addr_rsuv; + assign u_addr = addr_rsuv; + assign v_addr = addr_rsuv; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_aq; + wire inc_addr_rsuv; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1; + + assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop); + assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop); + + always @(posedge clk) begin + // + if (inc_addr_aq) addr_aq <= addr_aq_next; + else addr_aq <= addr_aq_zero; + // + if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next; + else addr_rsuv <= addr_rsuv_zero; + // + end + + + // + // Write Enable Logic + // + wire wren_rsuv; + + wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1; + + assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop); + + assign r_wren = wren_rsuv; + assign s_wren = wren_rsuv; + assign u_wren = wren_rsuv; + assign v_wren = wren_rsuv; + + + // + // Data Logic + // + assign r_dout = 32'd0; + assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0; + assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0; + assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v new file mode 100644 index 0000000..6b65eb1 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v @@ -0,0 +1,286 @@ +`timescale 1ns / 1ps + +module modinv_helper_invert_compare + ( + clk, rst_n, + ena, rdy, + + u_addr, u_din, + v_addr, v_din, + + u_gt_v, v_eq_1, + u_is_even, v_is_even + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + input wire [ 32-1:0] u_din; + input wire [ 32-1:0] v_din; + + output wire u_gt_v; + output wire v_eq_1; + output wire u_is_even; + output wire v_is_even; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ? + addr_in - 1'b1 : addr_in_last; + + assign u_addr = addr_in; + assign v_addr = addr_in; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Decrement Logic + // + wire dec_addr_in; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0; + + assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop); + + always @(posedge clk) + // + if (rdy) addr_in <= addr_in_last; + else if (dec_addr_in) addr_in <= addr_in_prev; + + + // + // Comparison Stage Flags + // + wire calc_leg; + wire calc_leg_final; + wire calc_parity; + + wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2; + wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1; + + assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop); + assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop); + assign calc_parity = (proc_cnt == cnt_calc_parity); + + + // + // Dummy Input + // + reg sub32_din_1_lsb; + wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb}; + + always @(posedge clk) + // + sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0; + + + // + // Subtractor (u - v) + // + wire [31: 0] sub32_u_minus_v_difference_out; + wire sub32_u_minus_v_borrow_in; + wire sub32_u_minus_v_borrow_out; + + subtractor32_wrapper sub32_u_minus_v + ( + .clk (clk), + .a (u_din), + .b (v_din), + .d (sub32_u_minus_v_difference_out), + .b_in (sub32_u_minus_v_borrow_in), + .b_out (sub32_u_minus_v_borrow_out) + ); + + + // + // Subtractor (v - 1) + // + wire [31: 0] sub32_v_minus_1_difference_out; + wire sub32_v_minus_1_borrow_in; + wire sub32_v_minus_1_borrow_out; + + subtractor32_wrapper sub32_v_minus_1 + ( + .clk (clk), + .a (v_din), + .b (sub32_din_1), + .d (sub32_v_minus_1_difference_out), + .b_in (sub32_v_minus_1_borrow_in), + .b_out (sub32_v_minus_1_borrow_out) + ); + + + + // + // Borrow Masking Logic + // + reg mask_borrow; + + always @(posedge clk) + // + mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ? + 1'b0 : 1'b1; + + assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow; + assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow; + + + // + // Comparison Logic + // + reg cmp_u_v_l; + reg cmp_u_v_e; + reg cmp_u_v_g; + + reg cmp_v_1_l; + reg cmp_v_1_e; + reg cmp_v_1_g; + + wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g); + wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g); + + wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0; + wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0; + + wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0; + wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0; + + reg u_is_even_reg; + reg v_is_even_reg; + + always @(posedge clk) + // + if (rdy) begin + // + if (ena) begin + // + cmp_u_v_l <= 1'b0; + cmp_u_v_e <= 1'b0; + cmp_u_v_g <= 1'b0; + // + cmp_v_1_l <= 1'b0; + cmp_v_1_e <= 1'b0; + cmp_v_1_g <= 1'b0; + // + u_is_even_reg <= 1'bX; + v_is_even_reg <= 1'bX; + // + end + // + end else begin + // + // parity + // + if (calc_parity) begin + u_is_even_reg <= ~u_din[0]; + v_is_even_reg <= ~v_din[0]; + end + // + // u <> v + // + if (cmp_unresolved_u_v && calc_leg) begin + // + if (cmp_u_v_borrow_is_set) + cmp_u_v_l <= 1'b1; + // + if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero) + cmp_u_v_g <= 1'b1; + // + if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final) + cmp_u_v_e <= 1'b1; + // + end + // + // v <> 1 + // + if (cmp_unresolved_v_1 && calc_leg) begin + // + if (cmp_v_1_borrow_is_set) + cmp_v_1_l <= 1'b1; + // + if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero) + cmp_v_1_g <= 1'b1; + // + if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final) + cmp_v_1_e <= 1'b1; + // + end + // + end + + + // + // Output Flags + // + assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g; + assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g; + + assign u_is_even = u_is_even_reg; + assign v_is_even = v_is_even_reg; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v new file mode 100644 index 0000000..ab15563 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v @@ -0,0 +1,408 @@ +`timescale 1ns / 1ps + +module modinv_helper_invert_precalc + ( + clk, rst_n, + ena, rdy, + + r_addr, r_din, + s_addr, s_din, + u_addr, u_din, + v_addr, v_din, + + r_dbl_addr, r_dbl_wren, r_dbl_dout, + s_dbl_addr, s_dbl_wren, s_dbl_dout, + r_plus_s_addr, r_plus_s_wren, r_plus_s_dout, + u_half_addr, u_half_wren, u_half_dout, + v_half_addr, v_half_wren, v_half_dout, + u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din, + v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din, + u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout, + v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + output wire [BUFFER_ADDR_BITS-1:0] r_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + input wire [ 32-1:0] r_din; + input wire [ 32-1:0] s_din; + input wire [ 32-1:0] u_din; + input wire [ 32-1:0] v_din; + + output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr; + + output wire [ 32-1:0] r_dbl_dout; + output wire [ 32-1:0] s_dbl_dout; + output wire [ 32-1:0] r_plus_s_dout; + output wire [ 32-1:0] u_half_dout; + output wire [ 32-1:0] v_half_dout; + output wire [ 32-1:0] u_minus_v_dout; + output wire [ 32-1:0] v_minus_u_dout; + output wire [ 32-1:0] u_minus_v_half_dout; + output wire [ 32-1:0] v_minus_u_half_dout; + + output wire r_dbl_wren; + output wire s_dbl_wren; + output wire r_plus_s_wren; + output wire u_half_wren; + output wire v_half_wren; + output wire u_minus_v_wren; + output wire v_minus_u_wren; + output wire u_minus_v_half_wren; + output wire v_minus_u_half_wren; + + input wire [ 32-1:0] u_minus_v_din; + input wire [ 32-1:0] v_minus_u_din; + + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ? + addr_in + 1'b1 : addr_in_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ? + addr_in - 1'b1 : addr_in_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out1; + + wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? + addr_out1 + 1'b1 : addr_out1_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out2; + + wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ? + addr_out2 + 1'b1 : addr_out2_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? + addr_out2 - 1'b1 : addr_out2_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out3; + + wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? + addr_out3 - 1'b1 : addr_out3_last; + + reg [BUFFER_ADDR_BITS-1:0] addr_out4; + + wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ? + addr_out4 - 1'b1 : addr_out4_last; + + + assign r_addr = addr_in; + assign s_addr = addr_in; + assign u_addr = addr_in; + assign v_addr = addr_in; + + assign r_dbl_addr = addr_out1; + assign s_dbl_addr = addr_out1; + assign r_plus_s_addr = addr_out2; + assign u_half_addr = addr_out3; + assign v_half_addr = addr_out3; + assign u_minus_v_addr = addr_out2; + assign v_minus_u_addr = addr_out2; + assign u_minus_v_half_addr = addr_out4; + assign v_minus_u_half_addr = addr_out4; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment/Decrement Logic + // + wire inc_addr_in; + wire dec_addr_in; + wire inc_addr_out1; + wire inc_addr_out2; + wire dec_addr_out2; + wire dec_addr_out3; + wire dec_addr_out4; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); + assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop); + assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); + assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop); + assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop); + assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); + assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop); + + + always @(posedge clk) begin + // + if (rdy) begin + // + addr_in <= addr_in_zero; + addr_out1 <= addr_out1_zero; + addr_out2 <= addr_out2_zero; + addr_out3 <= addr_out3_last; + addr_out4 <= addr_out4_last; + // + end else begin + // + if (inc_addr_in) addr_in <= addr_in_next; + else if (dec_addr_in) addr_in <= addr_in_prev; + // + if (inc_addr_out1) addr_out1 <= addr_out1_next; + else addr_out1 <= addr_out1_zero; + // + if (inc_addr_out2) addr_out2 <= addr_out2_next; + else if (dec_addr_out2) addr_out2 <= addr_out2_prev; + // + if (dec_addr_out3) addr_out3 <= addr_out3_prev; + else addr_out3 <= addr_out3_last; + // + if (dec_addr_out4) addr_out4 <= addr_out4_prev; + else addr_out4 <= addr_out4_last; + // + end + // + end + + + // + // Write Enable Logic + // + wire wren_out1; + wire wren_out2; + wire wren_out3; + wire wren_out4; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); + assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop); + assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); + assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop); + + assign r_dbl_wren = wren_out1; + assign s_dbl_wren = wren_out1; + assign r_plus_s_wren = wren_out2; + assign u_half_wren = wren_out3; + assign v_half_wren = wren_out3; + assign u_minus_v_wren = wren_out2; + assign v_minus_u_wren = wren_out2; + assign u_minus_v_half_wren = wren_out4; + assign v_minus_u_half_wren = wren_out4; + + + // + // Adder (r + s) + // + wire [31: 0] add32_r_plus_s_sum_out; + wire add32_r_plus_s_carry_in; + wire add32_r_plus_s_carry_out; + + adder32_wrapper add32_r_plus_s + ( + .clk (clk), + .a (r_din), + .b (s_din), + .s (add32_r_plus_s_sum_out), + .c_in (add32_r_plus_s_carry_in), + .c_out (add32_r_plus_s_carry_out) + ); + + // + // Subtractor (u - v) + // + wire [31: 0] sub32_u_minus_v_difference_out; + wire sub32_u_minus_v_borrow_in; + wire sub32_u_minus_v_borrow_out; + + subtractor32_wrapper sub32_u_minus_v + ( + .clk (clk), + .a (u_din), + .b (v_din), + .d (sub32_u_minus_v_difference_out), + .b_in (sub32_u_minus_v_borrow_in), + .b_out (sub32_u_minus_v_borrow_out) + ); + + // + // Subtractor (v - u) + // + wire [31: 0] sub32_v_minus_u_difference_out; + wire sub32_v_minus_u_borrow_in; + wire sub32_v_minus_u_borrow_out; + + subtractor32_wrapper sub32_v_minus_u + ( + .clk (clk), + .a (v_din), + .b (u_din), + .d (sub32_v_minus_u_difference_out), + .b_in (sub32_v_minus_u_borrow_in), + .b_out (sub32_v_minus_u_borrow_out) + ); + + + // + // Carry & Borrow Masking Logic + // + reg mask_carry_borrow; + + always @(posedge clk) + // + mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? + 1'b0 : 1'b1; + + assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow; + assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow; + assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow; + + + // + // Carry Bits + // + reg r_dbl_carry; + reg s_dbl_carry; + reg u_half_carry; + reg v_half_carry; + reg u_minus_v_half_carry; + reg v_minus_u_half_carry; + + always @(posedge clk) begin + + r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? + r_din[31] : 1'b0; + + s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? + s_din[31] : 1'b0; + + u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + u_din[0] : 1'b0; + + v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + v_din[0] : 1'b0; + + u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ? + u_minus_v_din[0] : 1'b0; + + v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ? + v_minus_u_din[0] : 1'b0; + + end + + + // + // Data Mapper + // + assign r_dbl_dout = {r_din[30:0], r_dbl_carry}; + assign s_dbl_dout = {s_din[30:0], s_dbl_carry}; + assign r_plus_s_dout = add32_r_plus_s_sum_out; + assign u_half_dout = {u_half_carry, u_din[31:1]}; + assign v_half_dout = {v_half_carry, v_din[31:1]}; + assign u_minus_v_dout = sub32_u_minus_v_difference_out; + assign v_minus_u_dout = sub32_v_minus_u_difference_out; + assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]}; + assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]}; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v new file mode 100644 index 0000000..0cd6ac5 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v @@ -0,0 +1,257 @@ +`timescale 1ns / 1ps + +module modinv_helper_invert_update + ( + clk, rst_n, + ena, rdy, + + u_gt_v, v_eq_1, + u_is_even, v_is_even, + + r_addr, r_wren, r_dout, + s_addr, s_wren, s_dout, + u_addr, u_wren, u_dout, + v_addr, v_wren, v_dout, + + r_dbl_addr, r_dbl_din, + s_dbl_addr, s_dbl_din, + r_plus_s_addr, r_plus_s_din, + u_half_addr, u_half_din, + v_half_addr, v_half_din, + u_minus_v_half_addr, u_minus_v_half_din, + v_minus_u_half_addr, v_minus_u_half_din + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + input wire u_gt_v; + input wire v_eq_1; + input wire u_is_even; + input wire v_is_even; + + output wire [BUFFER_ADDR_BITS-1:0] r_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + output wire r_wren; + output wire s_wren; + output wire u_wren; + output wire v_wren; + + output wire [ 32-1:0] r_dout; + output wire [ 32-1:0] s_dout; + output wire [ 32-1:0] u_dout; + output wire [ 32-1:0] v_dout; + + output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr; + + input wire [ 32-1:0] r_dbl_din; + input wire [ 32-1:0] s_dbl_din; + input wire [ 32-1:0] r_plus_s_din; + input wire [ 32-1:0] u_half_din; + input wire [ 32-1:0] v_half_din; + input wire [ 32-1:0] u_minus_v_half_din; + input wire [ 32-1:0] v_minus_u_half_din; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ? + addr_in + 1'b1 : addr_in_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out; + + wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ? + addr_out + 1'b1 : addr_out_zero; + + assign r_addr = addr_out; + assign s_addr = addr_out; + assign u_addr = addr_out; + assign v_addr = addr_out; + + assign r_dbl_addr = addr_in; + assign s_dbl_addr = addr_in; + assign r_plus_s_addr = addr_in; + assign u_half_addr = addr_in; + assign v_half_addr = addr_in; + assign u_minus_v_half_addr = addr_in; + assign v_minus_u_half_addr = addr_in; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_in; + wire inc_addr_out; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1; + + assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); + assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop); + + always @(posedge clk) begin + // + if (inc_addr_in) addr_in <= addr_in_next; + else addr_in <= addr_in_zero; + // + if (inc_addr_out) addr_out <= addr_out_next; + else addr_out <= addr_out_zero; + // + end + + // + // Write Enable Logic + // + wire wren_out; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1; + + assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop); + + reg r_wren_allow; + reg s_wren_allow; + reg u_wren_allow; + reg v_wren_allow; + + assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy; + assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy; + assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy; + assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy; + + + // + // Data Logic + // + reg [31: 0] r_dout_mux; + reg [31: 0] s_dout_mux; + reg [31: 0] u_dout_mux; + reg [31: 0] v_dout_mux; + + assign r_dout = r_dout_mux; + assign s_dout = s_dout_mux; + assign u_dout = u_dout_mux; + assign v_dout = v_dout_mux; + + always @(*) begin + // + // r, s, u, v + // + if (u_is_even) begin + // + u_dout_mux = u_half_din; + v_dout_mux = {32{1'bX}}; + r_dout_mux = {32{1'bX}}; + s_dout_mux = s_dbl_din; + // + u_wren_allow = 1'b1; + v_wren_allow = 1'b0; + r_wren_allow = 1'b0; + s_wren_allow = 1'b1; + // + end else begin + // + if (v_is_even) begin + // + u_dout_mux = {32{1'bX}}; + v_dout_mux = v_half_din; + r_dout_mux = r_dbl_din; + s_dout_mux = {32{1'bX}}; + // + u_wren_allow = 1'b0; + v_wren_allow = 1'b1; + r_wren_allow = 1'b1; + s_wren_allow = 1'b0; + // + end else begin + // + u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}}; + v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din; + r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din; + s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din; + // + u_wren_allow = u_gt_v; + v_wren_allow = !u_gt_v; + r_wren_allow = 1'b1; + s_wren_allow = 1'b1; + // + end + // + end + // + end + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v new file mode 100644 index 0000000..fb858a6 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v @@ -0,0 +1,328 @@ +`timescale 1ns / 1ps + +module modinv_helper_reduce_precalc + ( + clk, rst_n, + ena, rdy, + + k, + + s_is_odd, k_is_nul, + + r_addr, r_din, r_wren, r_dout, + s_addr, s_din, + u_addr, u_wren, u_dout, + v_addr, v_wren, v_dout, + q_addr, q_din + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter OPERAND_ADDR_BITS = 3; + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + parameter K_NUM_BITS = 10; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + input wire [ K_NUM_BITS-1:0] k; + + output wire s_is_odd; + output wire k_is_nul; + + output wire [ BUFFER_ADDR_BITS-1:0] r_addr; + output wire [ BUFFER_ADDR_BITS-1:0] s_addr; + output wire [ BUFFER_ADDR_BITS-1:0] u_addr; + output wire [ BUFFER_ADDR_BITS-1:0] v_addr; + output wire [OPERAND_ADDR_BITS-1:0] q_addr; + + input wire [ 32-1:0] r_din; + input wire [ 32-1:0] s_din; + input wire [ 32-1:0] q_din; + + output wire r_wren; + output wire u_wren; + output wire v_wren; + + output wire [ 32-1:0] r_dout; + output wire [ 32-1:0] u_dout; + output wire [ 32-1:0] v_dout; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf; + reg [OPERAND_ADDR_BITS-1:0] addr_in_op; + reg [ BUFFER_ADDR_BITS-1:0] addr_out1; + reg [ BUFFER_ADDR_BITS-1:0] addr_out2; + reg [ BUFFER_ADDR_BITS-1:0] addr_out3; + + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ? + addr_in_buf + 1'b1 : addr_in_buf_zero; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ? + addr_in_buf - 1'b1 : addr_in_buf_zero; + + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ? + addr_in_op + 1'b1 : addr_in_op_zero; + + wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? + addr_out1 + 1'b1 : addr_out1_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ? + addr_out1 - 1'b1 : addr_out1_zero; + + wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? + addr_out2 - 1'b1 : addr_out2_last; + + wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? + addr_out3 - 1'b1 : addr_out3_last; + + + assign s_addr = addr_in_buf; + assign q_addr = addr_in_op; + assign r_addr = addr_out1; + assign u_addr = addr_out2; + assign v_addr = addr_out3; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment/Decrement Logic + // + wire inc_addr_buf_in; + wire dec_addr_buf_in; + wire inc_addr_op_in; + wire inc_addr_out1; + wire dec_addr_out1; + wire dec_addr_out2; + wire dec_addr_out3; + + wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop); + assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop); + assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop); + assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); + assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop); + assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop); + assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); + + always @(posedge clk) begin + // + if (rdy) begin + // + addr_in_buf <= addr_in_buf_zero; + addr_in_op <= addr_in_op_zero; + addr_out1 <= addr_out1_zero; + addr_out2 <= addr_out2_last; + addr_out3 <= addr_out3_last; + // + end else begin + // + if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next; + else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev; + // + if (inc_addr_op_in) addr_in_op <= addr_in_op_next; + else addr_in_op <= addr_in_op_zero; + // + if (inc_addr_out1) addr_out1 <= addr_out1_next; + else if (dec_addr_out1) addr_out1 <= addr_out1_prev; + // + if (dec_addr_out2) addr_out2 <= addr_out2_prev; + else addr_out2 <= addr_out2_last; + // + if (dec_addr_out3) addr_out3 <= addr_out3_prev; + else addr_out3 <= addr_out3_last; + // + end + // + end + + + // + // Write Enable Logic + // + wire wren_out1; + wire wren_out2; + wire wren_out3; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); + assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop); + assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); + + assign r_wren = wren_out1; + assign u_wren = wren_out2; + assign v_wren = wren_out3; + + // + // Adder (s + q) + // + wire [31: 0] q_din_masked; + wire [31: 0] add32_s_plus_q_sum_out; + wire add32_s_plus_q_carry_in; + wire add32_s_plus_q_carry_out; + + adder32_wrapper add32_r_plus_s + ( + .clk (clk), + .a (s_din), + .b (q_din_masked), + .s (add32_s_plus_q_sum_out), + .c_in (add32_s_plus_q_carry_in), + .c_out (add32_s_plus_q_carry_out) + ); + + + // + // Carry Masking Logic + // + wire mask_carry; + + assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1; + + + // + // Addend Masking Logic + // + reg q_din_mask; + + always @(posedge clk) + q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0; + + assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din; + + assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry; + + + // + // Carry Bits + // + reg s_half_carry; + reg s_plus_q_half_carry; + + always @(posedge clk) begin + // + s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ? + s_din[0] : 1'b0; + // + s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + r_din[0] : 1'b0; + // + end + + // + // Data Mapper + // + assign r_dout = add32_s_plus_q_sum_out; + assign u_dout = {s_half_carry, s_din[31:1]}; + assign v_dout = {s_plus_q_half_carry, r_din[31:1]}; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + + // + // Output Flags + // + reg s_is_odd_reg; + reg k_is_nul_reg; + + assign s_is_odd = s_is_odd_reg; + assign k_is_nul = k_is_nul_reg; + + always @(posedge clk) + // + if (proc_cnt == cnt_calc_flags) begin + s_is_odd_reg <= s_din[0]; + k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v new file mode 100644 index 0000000..ea5b854 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v @@ -0,0 +1,153 @@ +`timescale 1ns / 1ps + +module modinv_helper_reduce_update + ( + clk, rst_n, + ena, rdy, + + s_is_odd, k_is_nul, + + s_addr, s_wren, s_dout, + u_addr, u_din, + v_addr, v_din + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + input wire s_is_odd; + input wire k_is_nul; + + output wire [BUFFER_ADDR_BITS-1:0] s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + output wire s_wren; + + output wire [ 32-1:0] s_dout; + + input wire [ 32-1:0] u_din; + input wire [ 32-1:0] v_din; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ? + addr_in + 1'b1 : addr_in_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out; + + wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ? + addr_out + 1'b1 : addr_out_zero; + + assign s_addr = addr_out; + assign u_addr = addr_in; + assign v_addr = addr_in; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_in; + wire inc_addr_out; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1; + + assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); + assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop); + + always @(posedge clk) begin + // + if (inc_addr_in) addr_in <= addr_in_next; + else addr_in <= addr_in_zero; + // + if (inc_addr_out) addr_out <= addr_out_next; + else addr_out <= addr_out_zero; + // + end + + // + // Write Enable Logic + // + wire wren_out; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1; + + assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop); + + assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy; + + + // + // Data Logic + // + assign s_dout = s_is_odd ? v_din : u_din; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v new file mode 100644 index 0000000..2f7b64d --- /dev/null +++ b/rtl/modular/modular_invertor/modinv_clog2.v @@ -0,0 +1,10 @@ +function integer clog2; + input integer value; + integer result; + begin + value = value - 1; + for (result = 0; value > 0; result = result + 1) + value = value >> 1; + clog2 = result; + end +endfunction diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v new file mode 100644 index 0000000..e9f2460 --- /dev/null +++ b/rtl/modular/modular_invertor/modular_invertor.v @@ -0,0 +1,981 @@ +//------------------------------------------------------------------------------ +// +// modular_invertor.v +// ----------------------------------------------------------------------------- +// Modular invertor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_invertor + ( + clk, rst_n, + ena, rdy, + a_addr, q_addr, a1_addr, a1_wren, + a_din, q_din, a1_dout + ); + + + // + // Parameters + // + parameter MAX_OPERAND_WIDTH = 256; + + + // + // clog2 + // +`include "modinv_clog2.v" + + + // + // More Parameters + // + localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32; + localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS); + + localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1; + localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS); + + localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH; + localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS); + + localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1); + + + // + // Ports + // + input wire clk; + input wire rst_n; + + input wire ena; + output wire rdy; + + output wire [OPERAND_ADDR_BITS-1:0] a_addr; + output reg [OPERAND_ADDR_BITS-1:0] q_addr; + output wire [OPERAND_ADDR_BITS-1:0] a1_addr; + output wire a1_wren; + + input wire [32-1:0] a_din; + input wire [32-1:0] q_din; + output wire [32-1:0] a1_dout; + + + // + // "Redundant" Power of 2 (K) + // + reg [K_NUM_BITS-1:0] k; + + + // + // Buffers + // + reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr; + reg buf_r_wr_en; + reg [ 32-1:0] buf_r_wr_din; + wire [ 32-1:0] buf_r_wr_dout; + wire [ 32-1:0] buf_r_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_r + ( .clk(clk), + .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din), + .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout) + ); + + reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr; + reg buf_s_wr_en; + reg [ 32-1:0] buf_s_wr_din; + wire [ 32-1:0] buf_s_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_s + ( .clk(clk), + .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din), + .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout) + ); + + reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr; + reg buf_u_wr_en; + reg [ 32-1:0] buf_u_wr_din; + wire [ 32-1:0] buf_u_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u + ( .clk(clk), + .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din), + .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout) + ); + + reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr; + reg buf_v_wr_en; + reg [ 32-1:0] buf_v_wr_din; + wire [ 32-1:0] buf_v_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v + ( .clk(clk), + .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din), + .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr; + wire buf_r_dbl_wr_en; + wire [ 32-1:0] buf_r_dbl_wr_din; + wire [ 32-1:0] buf_r_dbl_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_r_dbl + ( .clk(clk), + .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din), + .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr; + wire buf_s_dbl_wr_en; + wire [ 32-1:0] buf_s_dbl_wr_din; + wire [ 32-1:0] buf_s_dbl_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_s_dbl + ( .clk(clk), + .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din), + .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr; + wire buf_r_plus_s_wr_en; + wire [ 32-1:0] buf_r_plus_s_wr_din; + wire [ 32-1:0] buf_r_plus_s_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_r_plus_s + ( .clk(clk), + .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din), + .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr; + wire buf_u_minus_v_wr_en; + wire [ 32-1:0] buf_u_minus_v_wr_din; + wire [ 32-1:0] buf_u_minus_v_wr_dout; + + assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u_minus_v + ( .clk(clk), + .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din), + .b_addr(buf_u_minus_v_rd_addr), .b_out() + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr; + wire buf_v_minus_u_wr_en; + wire [ 32-1:0] buf_v_minus_u_wr_din; + wire [ 32-1:0] buf_v_minus_u_wr_dout; + + assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v_minus_u + ( .clk(clk), + .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din), + .b_addr(buf_v_minus_u_rd_addr), .b_out() + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr; + wire buf_u_half_wr_en; + wire [ 32-1:0] buf_u_half_wr_din; + wire [ 32-1:0] buf_u_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u_half + ( .clk(clk), + .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din), + .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr; + wire buf_v_half_wr_en; + wire [ 32-1:0] buf_v_half_wr_din; + wire [ 32-1:0] buf_v_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v_half + ( .clk(clk), + .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din), + .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr; + wire buf_u_minus_v_half_wr_en; + wire [ 32-1:0] buf_u_minus_v_half_wr_din; + wire [ 32-1:0] buf_u_minus_v_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u_minus_v_half + ( .clk(clk), + .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din), + .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr; + wire buf_v_minus_u_half_wr_en; + wire [ 32-1:0] buf_v_minus_u_half_wr_din; + wire [ 32-1:0] buf_v_minus_u_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v_minus_u_half + ( .clk(clk), + .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din), + .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout) + ); + + + // + // Helper Modules + // + wire helper_init_ena; + wire helper_invert_precalc_ena; + wire helper_invert_compare_ena; + wire helper_invert_update_ena; + wire helper_reduce_precalc_ena; + wire helper_reduce_update_ena; + wire helper_copy_ena; + + wire helper_init_rdy; + wire helper_invert_precalc_rdy; + wire helper_invert_compare_rdy; + wire helper_invert_update_rdy; + wire helper_reduce_precalc_rdy; + wire helper_reduce_update_rdy; + wire helper_copy_rdy; + + wire helper_init_done = helper_init_rdy && !helper_init_ena; + wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena; + wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena; + wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena; + wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena; + wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena; + wire helper_copy_done = helper_copy_rdy && !helper_copy_ena; + + + // + // Helper Module - Initialization + // + wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr; + wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr; + + wire helper_init_r_wren; + wire helper_init_s_wren; + wire helper_init_u_wren; + wire helper_init_v_wren; + + wire [ 32-1:0] helper_init_r_data; + wire [ 32-1:0] helper_init_s_data; + wire [ 32-1:0] helper_init_u_data; + wire [ 32-1:0] helper_init_v_data; + + modinv_helper_init # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), + + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_init + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_init_ena), + .rdy (helper_init_rdy), + + .a_addr (a_addr), + .q_addr (helper_init_q_addr), + + .r_addr (helper_init_r_addr), + .s_addr (helper_init_s_addr), + .u_addr (helper_init_u_addr), + .v_addr (helper_init_v_addr), + + .q_din (q_din), + .a_din (a_din), + + .r_dout (helper_init_r_data), + .s_dout (helper_init_s_data), + .u_dout (helper_init_u_data), + .v_dout (helper_init_v_data), + + .r_wren (helper_init_r_wren), + .s_wren (helper_init_s_wren), + .u_wren (helper_init_u_wren), + .v_wren (helper_init_v_wren) + ); + + + // + // Helper Module - Inversion Pre-Calculation + // + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr; + + modinv_helper_invert_precalc # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_invert_precalc + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_invert_precalc_ena), + .rdy (helper_invert_precalc_rdy), + + .r_addr (helper_invert_precalc_r_addr), + .s_addr (helper_invert_precalc_s_addr), + .u_addr (helper_invert_precalc_u_addr), + .v_addr (helper_invert_precalc_v_addr), + + .r_din (buf_r_rd_dout), + .s_din (buf_s_rd_dout), + .u_din (buf_u_rd_dout), + .v_din (buf_v_rd_dout), + + .r_dbl_addr (buf_r_dbl_wr_addr), + .s_dbl_addr (buf_s_dbl_wr_addr), + .r_plus_s_addr (buf_r_plus_s_wr_addr), + + .u_half_addr (buf_u_half_wr_addr), + .v_half_addr (buf_v_half_wr_addr), + .u_minus_v_addr (buf_u_minus_v_wr_addr), + .v_minus_u_addr (buf_v_minus_u_wr_addr), + .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr), + .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr), + + .r_dbl_dout (buf_r_dbl_wr_din), + .s_dbl_dout (buf_s_dbl_wr_din), + .r_plus_s_dout (buf_r_plus_s_wr_din), + + .u_half_dout (buf_u_half_wr_din), + .v_half_dout (buf_v_half_wr_din), + .u_minus_v_dout (buf_u_minus_v_wr_din), + .v_minus_u_dout (buf_v_minus_u_wr_din), + .u_minus_v_half_dout (buf_u_minus_v_half_wr_din), + .v_minus_u_half_dout (buf_v_minus_u_half_wr_din), + + .r_dbl_wren (buf_r_dbl_wr_en), + .s_dbl_wren (buf_s_dbl_wr_en), + .r_plus_s_wren (buf_r_plus_s_wr_en), + + .u_half_wren (buf_u_half_wr_en), + .v_half_wren (buf_v_half_wr_en), + .u_minus_v_wren (buf_u_minus_v_wr_en), + .v_minus_u_wren (buf_v_minus_u_wr_en), + .u_minus_v_half_wren (buf_u_minus_v_half_wr_en), + .v_minus_u_half_wren (buf_v_minus_u_half_wr_en), + + .u_minus_v_din (buf_u_minus_v_wr_dout), + .v_minus_u_din (buf_v_minus_u_wr_dout) + ); + + + // + // Helper Module - Inversion Comparison + // + wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr; + + wire flag_invert_u_gt_v; + wire flag_invert_v_eq_1; + wire flag_invert_u_is_even; + wire flag_invert_v_is_even; + + modinv_helper_invert_compare # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_invert_compare + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_invert_compare_ena), + .rdy (helper_invert_compare_rdy), + + .u_addr (helper_invert_compare_u_addr), + .v_addr (helper_invert_compare_v_addr), + + .u_din (buf_u_rd_dout), + .v_din (buf_v_rd_dout), + + .u_gt_v (flag_invert_u_gt_v), + .v_eq_1 (flag_invert_v_eq_1), + .u_is_even (flag_invert_u_is_even), + .v_is_even (flag_invert_v_is_even) + ); + + + // + // Helper Module - Inversion Update + // + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr; + + wire helper_invert_update_r_wren; + wire helper_invert_update_s_wren; + wire helper_invert_update_u_wren; + wire helper_invert_update_v_wren; + + wire [ 32-1:0] helper_invert_update_r_data; + wire [ 32-1:0] helper_invert_update_s_data; + wire [ 32-1:0] helper_invert_update_u_data; + wire [ 32-1:0] helper_invert_update_v_data; + + modinv_helper_invert_update # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_invert_update + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_invert_update_ena), + .rdy (helper_invert_update_rdy), + + .u_gt_v (flag_invert_u_gt_v), + .v_eq_1 (flag_invert_v_eq_1), + .u_is_even (flag_invert_u_is_even), + .v_is_even (flag_invert_v_is_even), + + .r_addr (helper_invert_update_r_addr), + .s_addr (helper_invert_update_s_addr), + .u_addr (helper_invert_update_u_addr), + .v_addr (helper_invert_update_v_addr), + + .r_wren (helper_invert_update_r_wren), + .s_wren (helper_invert_update_s_wren), + .u_wren (helper_invert_update_u_wren), + .v_wren (helper_invert_update_v_wren), + + .r_dout (helper_invert_update_r_data), + .s_dout (helper_invert_update_s_data), + .u_dout (helper_invert_update_u_data), + .v_dout (helper_invert_update_v_data), + + .r_dbl_addr (buf_r_dbl_rd_addr), + .s_dbl_addr (buf_s_dbl_rd_addr), + .r_plus_s_addr (buf_r_plus_s_rd_addr), + .u_half_addr (buf_u_half_rd_addr), + .v_half_addr (buf_v_half_rd_addr), + .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr), + .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr), + + .r_dbl_din (buf_r_dbl_rd_dout), + .s_dbl_din (buf_s_dbl_rd_dout), + .r_plus_s_din (buf_r_plus_s_rd_dout), + .u_half_din (buf_u_half_rd_dout), + .v_half_din (buf_v_half_rd_dout), + .u_minus_v_half_din (buf_u_minus_v_half_rd_dout), + .v_minus_u_half_din (buf_v_minus_u_half_rd_dout) + ); + + + // + // Helper Module - Reduction Pre-Calculation + // + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr; + wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr; + + wire helper_reduce_precalc_r_wren; + wire helper_reduce_precalc_u_wren; + wire helper_reduce_precalc_v_wren; + + wire [ 32-1:0] helper_reduce_precalc_r_data; + wire [ 32-1:0] helper_reduce_precalc_u_data; + wire [ 32-1:0] helper_reduce_precalc_v_data; + + wire flag_reduce_s_is_odd; + wire flag_invert_k_is_nul; + + modinv_helper_reduce_precalc # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS), + .K_NUM_BITS (K_NUM_BITS) + ) + helper_reduce_precalc + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_reduce_precalc_ena), + .rdy (helper_reduce_precalc_rdy), + + .r_addr (helper_reduce_precalc_r_addr), + .s_addr (helper_reduce_precalc_s_addr), + .u_addr (helper_reduce_precalc_u_addr), + .v_addr (helper_reduce_precalc_v_addr), + .q_addr (helper_reduce_precalc_q_addr), + + .k (k), + + .s_is_odd (flag_reduce_s_is_odd), + .k_is_nul (flag_reduce_k_is_nul), + + .r_din (buf_r_wr_dout), + .s_din (buf_s_rd_dout), + .q_din (q_din), + + .r_wren (helper_reduce_precalc_r_wren), + .u_wren (helper_reduce_precalc_u_wren), + .v_wren (helper_reduce_precalc_v_wren), + + .r_dout (helper_reduce_precalc_r_data), + .u_dout (helper_reduce_precalc_u_data), + .v_dout (helper_reduce_precalc_v_data) + ); + + // + // Helper Module - Reduction Update + // + wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr; + + wire helper_reduce_update_s_wren; + + wire [ 32-1:0] helper_reduce_update_s_data; + + modinv_helper_reduce_update # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_reduce_update + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_reduce_update_ena), + .rdy (helper_reduce_update_rdy), + + .s_is_odd (flag_reduce_s_is_odd), + .k_is_nul (flag_reduce_k_is_nul), + + .s_addr (helper_reduce_update_s_addr), + .u_addr (helper_reduce_update_u_addr), + .v_addr (helper_reduce_update_v_addr), + + .s_wren (helper_reduce_update_s_wren), + + .s_dout (helper_reduce_update_s_data), + + .u_din (buf_u_rd_dout), + .v_din (buf_v_rd_dout) + ); + + + // + // Helper Module - Copying + // + wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr; + + modinv_helper_copy # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), + + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_copy + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_copy_ena), + .rdy (helper_copy_rdy), + + .s_addr (helper_copy_s_addr), + .a1_addr (a1_addr), + + .s_din (buf_s_rd_dout), + + .a1_dout (a1_dout), + + .a1_wren (a1_wren) + ); + + + // + // Round Counter + // + reg [ROUND_COUNTER_BITS-1:0] round_counter; + wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1; + wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}}; + wire [ROUND_COUNTER_BITS-1:0] round_counter_next = + (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero; + + + // + // FSM + // + localparam FSM_STATE_IDLE = 4'd0; + + localparam FSM_STATE_INIT = 4'd1; + + localparam FSM_STATE_INVERT_PRECALC = 4'd11; + localparam FSM_STATE_INVERT_COMPARE = 4'd12; + localparam FSM_STATE_INVERT_UPDATE = 4'd13; + + localparam FSM_STATE_REDUCE_PRECALC = 4'd14; + localparam FSM_STATE_REDUCE_UPDATE = 4'd15; + + localparam FSM_STATE_COPY = 4'd2; + + localparam FSM_STATE_DONE = 4'd3; + + reg [3:0] fsm_state = FSM_STATE_IDLE; + reg [3:0] fsm_state_dly = FSM_STATE_IDLE; + + wire fsm_state_new = (fsm_state != fsm_state_dly); + + wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ? + FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC; + + wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ? + FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else case (fsm_state) + FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE; + FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT; + FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC; + FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE; + FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE; + FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC; + FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE; + FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY; + FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE; + default: fsm_state <= FSM_STATE_IDLE; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE; + else fsm_state_dly <= fsm_state; + + + assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new; + assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new; + assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new; + assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new; + assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new; + assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new; + assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new; + + + // + // Counter Increment + // + always @(posedge clk) begin + // + if ((fsm_state == FSM_STATE_INIT) && helper_init_done) + round_counter <= round_counter_zero; + // + if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done) + round_counter <= round_counter_next; + // + if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done) + round_counter <= round_counter_next; + // + end + + + // + // Q Address Selector + // + always @(*) begin + // + case (fsm_state) + FSM_STATE_INIT: q_addr = helper_init_q_addr; + FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr; + default: q_addr = {OPERAND_ADDR_BITS{1'bX}}; + endcase + // + end + + + // + // Buffer Address Selector + // + always @(*) begin + // + // Write Ports + // + case (fsm_state) + FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr; + FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr; + FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr; + default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr; + FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr; + FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr; + default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr; + FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr; + FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr; + default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr; + FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr; + FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr; + default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + // Read Ports + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr; + default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr; + FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr; + FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr; + default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr; + FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr; + FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr; + default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr; + FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr; + FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr; + default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + end + + + // + // Buffer Write Enable Logic + // + always @(*) begin + // + // Write Ports + // + case (fsm_state) + FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren; + FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren; + FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren; + default: buf_r_wr_en = 1'b0; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren; + FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren; + FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren; + default: buf_s_wr_en = 1'b0; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren; + FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren; + FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren; + default: buf_u_wr_en = 1'b0; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren; + FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren; + FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren; + default: buf_v_wr_en = 1'b0; + endcase + // + end + + + // + // Buffer Write Data Selector + // + always @(*) begin + // + case (fsm_state) + FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data; + FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data; + FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data; + default: buf_r_wr_din = {32{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data; + FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data; + FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data; + default: buf_s_wr_din = {32{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data; + FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data; + FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data; + default: buf_u_wr_din = {32{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data; + FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data; + FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data; + default: buf_v_wr_din = {32{1'bX}}; + endcase + // + end + + + // + // Ready Logic + // + reg rdy_reg = 1'b1; + + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear */ + if (rdy && ena) rdy_reg <= 1'b0; + + /* set */ + if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1; + + end + + + // + // Store Redundant Power of 2 (K) + // + always @(posedge clk) + // + if (helper_init_ena) + k <= {K_NUM_BITS{1'b0}}; + else begin + + if (helper_invert_update_ena && !flag_invert_v_eq_1) + k <= k + 1'b1; + + if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}})) + k <= k - 1'b1; + + end + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v new file mode 100644 index 0000000..8487aee --- /dev/null +++ b/rtl/modular/modular_multiplier_256.v @@ -0,0 +1,402 @@ +//------------------------------------------------------------------------------ +// +// modular_multiplier_256.v +// ----------------------------------------------------------------------------- +// Modular multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_multiplier_256 + ( + clk, rst_n, + ena, rdy, + a_addr, b_addr, n_addr, p_addr, p_wren, + a_din, b_din, n_din, p_dout + ); + + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 8; + localparam WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word + output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N (must be P-256!) + output wire [ 31:0] p_dout; // P = A * B mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_a; + reg [WORD_COUNTER_WIDTH-1:0] index_b; + + /* map registers to output ports */ + assign a_addr = index_a; + assign b_addr = index_b; + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; + wire store_word_a = |fsm_shreg_store_word_a; + wire inc_index_b = |fsm_shreg_inc_index_b; + wire clear_mac_ab = |fsm_shreg_inc_index_b; + wire shift_wide_a = |fsm_shreg_inc_index_b; + wire enable_mac_ab = |fsm_shreg_inc_index_b; + wire store_si_msb = |fsm_shreg_store_si_msb; + wire store_si_lsb = fsm_shreg_store_si_lsb; + wire shift_si = |fsm_shreg_shift_si; + wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; + wire store_c_word = |fsm_shreg_store_c_word; + wire reduce_start = fsm_shreg_reduce_start; + wire reduce_stop = fsm_shreg_reduce_stop; + + + // + // FSM Logic + // + wire reduce_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done) + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + reg index_b_ff; + + always @(posedge clk) + // + if (inc_index_b) index_b_ff <= ~index_b_ff; + else index_b_ff <= 1'b0; + + always @(posedge clk) + // + if (rdy) begin + // + index_a <= WORD_INDEX_ZERO; + index_b <= WORD_INDEX_LAST; + // + end else begin + // + if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); + if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); + // + end + + + // + // Wide Operand Buffer + // + reg [255:0] buf_a_wide; + + always @(posedge clk) + // + if (store_word_a) + buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; + else if (shift_wide_a) + buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; + + + // + // Multiplier Array + // + wire mac_inhibit; // control signal to pause all accumulators + + wire [46: 0] mac[0:15]; // outputs of all accumulators + reg [15: 0] mac_clear; // individual per-accumulator clear flag + + assign mac_inhibit = ~enable_mac_ab; + + always @(posedge clk) + // + if (!clear_mac_ab) + mac_clear <= {16{1'b1}}; + else begin + + if (mac_clear == {16{1'b1}}) + mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; + else + mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; + + + end + + // + // Array of parallel multipliers + // + genvar i; + generate for (i=0; i<16; i=i+1) + begin : gen_mac_array + // + mac16_wrapper mac16_inst + ( + .clk (clk), + .ce (~mac_inhibit), + + .clr (mac_clear[i]), + + .a (buf_a_wide[16*i+:16]), + .b (index_b_ff ? b_din[15:0] : b_din[31:16]), + .s (mac[i]) + ); + // + end + endgenerate + + // + // Intermediate Words + // + reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; + reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + + + wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; + wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + + generate for (i=0; i<16; i=i+1) + begin : gen_si_lsb_new + assign si_lsb_new[47*i+:47] = mac[15-i]; + end + endgenerate + + generate for (i=1; i<16; i=i+1) + begin : gen_si_msb_new + assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; + end + endgenerate + + always @(posedge clk) begin + // + if (shift_si) begin + si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; + si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; + end else begin + + if (store_si_msb) + si_msb <= si_msb_new; + + if (store_si_lsb) + si_lsb <= si_lsb_new; + end + + end + + + // + // Accumulators + // + wire [46: 0] add48_cw0_s; + wire [46: 0] add48_cw1_s; + + + // + // cw0, b, cw1, b + // + reg [30: 0] si_prev_dly; + reg [15: 0] si_next_dly; + + always @(posedge clk) + // + if (shift_si) + si_prev_dly <= si_lsb[93:63]; + else + si_prev_dly <= {31{1'b0}}; + + always @(posedge clk) + // + si_next_dly <= si_lsb[62:47]; + + wire [46: 0] add48_cw0_a = si_lsb[46:0]; + wire [46: 0] add48_cw0_b = {{16{1'b0}}, si_prev_dly}; + + wire [46: 0] add48_cw1_a = add48_cw0_s; + wire [46: 0] add48_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add48_cw1_s[46:32]}}; + + adder47_wrapper add48_cw0_inst + ( + .clk (clk), + .a (add48_cw0_a), + .b (add48_cw0_b), + .s (add48_cw0_s) + ); + + adder47_wrapper add48_cw1_inst + ( + .clk (clk), + .a (add48_cw1_a), + .b (add48_cw1_b), + .s (add48_cw1_s) + ); + + + + // + // Full-Size Product + // + reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + + wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; + wire [ 31:0] reduce_c_word; + + always @(posedge clk) + // + if (store_c_word) + bram_c_addr <= bram_c_addr + 1'b1; + else + bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) + ) + bram_c_inst + ( + .clk (clk), + + .a_addr (bram_c_addr), + .a_wr (store_c_word), + .a_in (add48_cw1_s[31:0]), + .a_out (), + + .b_addr (reduce_c_addr), + .b_out (reduce_c_word) + ); + + + // + // Reduction Stage + // + modular_reductor_256 reduce_256_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_done), + + .x_addr (reduce_c_addr), + .n_addr (n_addr), + .p_addr (p_addr), + .p_wren (p_wren), + + .x_din (reduce_c_word), + .n_din (n_din), + .p_dout (p_dout) + ); + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v new file mode 100644 index 0000000..774f42e --- /dev/null +++ b/rtl/modular/modular_reductor_256.v @@ -0,0 +1,666 @@ +//------------------------------------------------------------------------------ +// +// modular_reductor_256.v +// ----------------------------------------------------------------------------- +// Modular reductor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_reductor_256 + ( + clk, rst_n, + ena, rdy, + x_addr, n_addr, p_addr, p_wren, + x_din, n_din, p_dout + ); + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 8; + localparam WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] x_din; // X + input wire [ 31:0] n_din; // N (must be P-256!) + output wire [ 31:0] p_dout; // P = X mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH:0] index_x; + + + /* map registers to output ports */ + assign x_addr = index_x; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS]; + wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS]; + wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5]; + + wire [5-1:0] fsm_shreg_reduce_stage_start; + wire [5-1:0] fsm_shreg_reduce_stage_stop; + + genvar s; + generate for (s=0; s<5; s=s+1) + begin : gen_fsm_shreg_reduce_stages + assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1]; + assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2]; + end + endgenerate + + wire inc_index_x = |fsm_shreg_inc_index_x; + wire store_word_z = |fsm_shreg_store_word_z; + wire reduce_start = |fsm_shreg_reduce_stage_start; + wire reduce_stop = |fsm_shreg_reduce_stage_stop; + wire store_p = fsm_shreg_reduce_stage_stop[0]; + + + wire reduce_adder0_done; + wire reduce_adder1_done; + wire reduce_subtractor_done; + + wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + // + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done_all) + // + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) + // + index_x <= WORD_INDEX_LAST; + // + else if (inc_index_x) + // + index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x); + + + // + // Look-up Table + // + + // TODO: Explain s5!!! + + reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; // + reg [9 -1:0] z_wren; // + reg [9 -1:0] z_mask; // mask input to store zero word + reg [9 -1:0] z_save; // save previous word once again + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // s9 s8 s7 s6 s5 s4 s3 s2 s1 + // || || || || || || || || || + 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00}; + 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01}; + 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02}; + 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03}; + 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04}; + 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05}; + 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06}; + 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07}; + 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx}; + 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx}; + 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx}; + 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx}; + 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx}; + 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx}; + 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx}; + 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx}; + // + default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}}; + // + endcase + + always @(posedge clk) + // + case (index_x) + // + // 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | + 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + // + default: z_wren <= {9{1'b0}}; + // + endcase + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | + 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0}; + 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0}; + 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0}; + 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0}; + 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}; + 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}; + 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + // + default: z_mask <= {9{1'bX}}; + // + endcase + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | + 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + // + default: z_save <= {9{1'bX}}; + // + endcase + + + // + // Intermediate Numbers + // + reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9]; + wire [ 32-1:0] reduce_z_dout[1:9]; + + reg [31: 0] x_din_dly; + always @(posedge clk) + // + x_din_dly <= x_din; + + + genvar z; + generate for (z=1; z<=9; z=z+1) + // + begin : gen_z_bram + // + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_c_inst + ( + .clk (clk), + + .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), + .a_wr (z_wren[z-1] & store_word_z), + .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), + .a_out (), + + .b_addr (reduce_z_addr[z]), + .b_out (reduce_z_dout[z]) + ); + // + end + // + endgenerate + + + + + wire [ 32-1:0] bram_sum0_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr; + wire bram_sum0_wr_wren; + + wire [ 32-1:0] bram_sum1_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr; + wire bram_sum1_wr_wren; + + wire [ 32-1:0] bram_diff_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr; + wire bram_diff_wr_wren; + + wire [ 32-1:0] bram_sum0_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr; + + wire [ 32-1:0] bram_sum1_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr; + + wire [ 32-1:0] bram_diff_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr; + + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum0_inst + ( + .clk (clk), + + .a_addr (bram_sum0_wr_addr), + .a_wr (bram_sum0_wr_wren), + .a_in (bram_sum0_wr_din), + .a_out (), + + .b_addr (bram_sum0_rd_addr), + .b_out (bram_sum0_rd_dout) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum1_inst + ( + .clk (clk), + + .a_addr (bram_sum1_wr_addr), + .a_wr (bram_sum1_wr_wren), + .a_in (bram_sum1_wr_din), + .a_out (), + + .b_addr (bram_sum1_rd_addr), + .b_out (bram_sum1_rd_dout) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_diff_inst + ( + .clk (clk), + + .a_addr (bram_diff_wr_addr), + .a_wr (bram_diff_wr_wren), + .a_in (bram_diff_wr_din), + .a_out (), + + .b_addr (bram_diff_rd_addr), + .b_out (bram_diff_rd_dout) + ); + + + + wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr; + wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr; + wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr; + + reg [ 32-1:0] adder0_a_din; + reg [ 32-1:0] adder0_b_din; + + reg [ 32-1:0] adder1_a_din; + reg [ 32-1:0] adder1_b_din; + + reg [ 32-1:0] subtractor_a_din; + reg [ 32-1:0] subtractor_b_din; + + // n_addr - only 1 output, because all modules are in sync + + modular_adder adder_inst0 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_adder0_done), + + .ab_addr (adder0_ab_addr), + .n_addr (), + .s_addr (bram_sum0_wr_addr), + .s_wren (bram_sum0_wr_wren), + + .a_din (adder0_a_din), + .b_din (adder0_b_din), + .n_din (n_din), + .s_dout (bram_sum0_wr_din) + ); + + modular_adder adder_inst1 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_adder1_done), + + .ab_addr (adder1_ab_addr), + .n_addr (), + .s_addr (bram_sum1_wr_addr), + .s_wren (bram_sum1_wr_wren), + + .a_din (adder1_a_din), + .b_din (adder1_b_din), + .n_din (n_din), + .s_dout (bram_sum1_wr_din) + ); + + modular_subtractor subtractor_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_subtractor_done), + + .ab_addr (subtractor_ab_addr), + .n_addr (n_addr), + .d_addr (bram_diff_wr_addr), + .d_wren (bram_diff_wr_wren), + + .a_din (subtractor_a_din), + .b_din (subtractor_b_din), + .n_din (n_din), + .d_dout (bram_diff_wr_din) + ); + + + // + // address + // + always @(*) + // + case (fsm_shreg_reduce_stage_stop) + // + 5'b10000: begin + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = adder0_ab_addr; + reduce_z_addr[3] = adder1_ab_addr; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = subtractor_ab_addr; + reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + 5'b01000: begin + reduce_z_addr[1] = adder0_ab_addr; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = adder1_ab_addr; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[7] = subtractor_ab_addr; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder1_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00100: begin + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = adder0_ab_addr; + reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[8] = subtractor_ab_addr; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder1_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00010: begin + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = subtractor_ab_addr; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder0_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00001: begin + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = adder0_ab_addr; + end + // + default: begin + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + endcase + + + + // + // adder 0 + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder0_a_din = reduce_z_dout[2]; + 5'b01000: adder0_a_din = bram_sum0_rd_dout; + 5'b00100: adder0_a_din = bram_sum0_rd_dout; + 5'b00010: adder0_a_din = bram_sum0_rd_dout; + 5'b00001: adder0_a_din = bram_sum0_rd_dout; + default: adder0_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder0_b_din = reduce_z_dout[2]; + 5'b01000: adder0_b_din = reduce_z_dout[1]; + 5'b00100: adder0_b_din = reduce_z_dout[5]; + 5'b00010: adder0_b_din = bram_sum1_rd_dout; + 5'b00001: adder0_b_din = bram_diff_rd_dout; + default: adder0_b_din = {32{1'bX}}; + endcase + // + end + + // + // adder 1 + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder1_a_din = reduce_z_dout[3]; + 5'b01000: adder1_a_din = bram_sum1_rd_dout; + 5'b00100: adder1_a_din = bram_sum1_rd_dout; + 5'b00010: adder1_a_din = {32{1'bX}}; + 5'b00001: adder1_a_din = {32{1'bX}}; + default: adder1_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder1_b_din = reduce_z_dout[3]; + 5'b01000: adder1_b_din = reduce_z_dout[4]; + 5'b00100: adder1_b_din = {32{1'b0}}; + 5'b00010: adder1_b_din = {32{1'bX}}; + 5'b00001: adder1_b_din = {32{1'bX}}; + default: adder1_b_din = {32{1'bX}}; + endcase + // + end + + + // + // subtractor + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: subtractor_a_din = {32{1'b0}}; + 5'b01000: subtractor_a_din = bram_diff_rd_dout; + 5'b00100: subtractor_a_din = bram_diff_rd_dout; + 5'b00010: subtractor_a_din = bram_diff_rd_dout; + 5'b00001: subtractor_a_din = {32{1'bX}}; + default: subtractor_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: subtractor_b_din = reduce_z_dout[6]; + 5'b01000: subtractor_b_din = reduce_z_dout[7]; + 5'b00100: subtractor_b_din = reduce_z_dout[8]; + 5'b00010: subtractor_b_din = reduce_z_dout[9]; + 5'b00001: subtractor_b_din = {32{1'bX}}; + default: subtractor_b_din = {32{1'bX}}; + endcase + // + end + + + + + assign p_addr = bram_sum0_wr_addr; + assign p_wren = bram_sum0_wr_wren & store_p; + assign p_dout = bram_sum0_wr_din; + + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v new file mode 100644 index 0000000..322aec4 --- /dev/null +++ b/rtl/modular/modular_subtractor.v @@ -0,0 +1,292 @@ +//------------------------------------------------------------------------------ +// +// modular_subtractor.v +// ----------------------------------------------------------------------------- +// Modular subtractor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_subtractor + ( + clk, rst_n, + ena, rdy, + ab_addr, n_addr, d_addr, d_wren, + a_din, b_din, n_din, d_dout + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word + output wire d_wren; // store current D word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N + output wire [ 31:0] d_dout; // D = (A - B) mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_ab; + reg [WORD_COUNTER_WIDTH-1:0] index_n; + reg [WORD_COUNTER_WIDTH-1:0] index_d; + + /* map registers to output ports */ + assign ab_addr = index_ab; + assign n_addr = index_n; + assign d_addr = index_d; + + + // + // Subtractor + // + wire [31: 0] sub32_d; + wire sub32_b_in; + wire sub32_b_out; + + subtractor32_wrapper subtractor32 + ( + .clk (clk), + .a (a_din), + .b (b_din), + .d (sub32_d), + .b_in (sub32_b_in), + .b_out (sub32_b_out) + ); + + + // + // Adder + // + wire [31: 0] add32_s; + wire add32_c_in; + wire add32_c_out; + + adder32_wrapper adder32 + ( + .clk (clk), + .a (sub32_d), + .b (n_din), + .s (add32_s), + .c_in (add32_c_in), + .c_out (add32_c_out) + ); + + + // + // FSM + // + + localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)]; + + wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + + wire inc_index_ab = |fsm_shreg_inc_index_ab; + wire inc_index_n = |fsm_shreg_inc_index_n; + wire store_dif_ab = |fsm_shreg_store_dif_ab; + wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n; + wire store_data_d = |fsm_shreg_store_data_d; + wire inc_index_d = |fsm_shreg_inc_index_d; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Borrow & Carry Masking Logic + // + reg sub32_b_mask; + reg add32_c_mask; + + + always @(posedge clk) begin + // + sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; + add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; + // + end + + assign sub32_b_in = sub32_b_out & ~sub32_b_mask; + assign add32_c_in = add32_c_out & ~add32_c_mask; + + + + // + // Borrow & Carry Latch Logic + // + reg sub32_borrow_latch; + + always @(posedge clk) begin + // + if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out; + // + end + + + // + // Intermediate Results + // + reg [32*OPERAND_NUM_WORDS-1:0] d_ab; + reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n; + + always @(posedge clk) + // + if (store_data_d) begin + // + d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]}; + d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]}; + // + end else begin + // + if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]}; + if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) begin + // + index_ab <= WORD_INDEX_ZERO; + index_n <= WORD_INDEX_ZERO; + index_d <= WORD_INDEX_ZERO; + // + end else begin + // + if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab); + if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n); + if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d); + // + end + + + // + // Output Sum Selector + // + wire mux_select_ab_n = sub32_borrow_latch; + + + // + // Output Data and Write Enable Logic + // + reg d_wren_reg; + reg [31: 0] d_dout_reg; + wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0]; + + assign d_wren = d_wren_reg; + assign d_dout = d_dout_reg; + + always @(posedge clk) + // + if (rdy) begin + // + d_wren_reg <= 1'b0; + d_dout_reg <= {32{1'bX}}; + // + end else begin + // + d_wren_reg <= store_data_d; + d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}}; + // + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v new file mode 100644 index 0000000..b97a6cf --- /dev/null +++ b/rtl/multiword/mw_comparator.v @@ -0,0 +1,220 @@ +//------------------------------------------------------------------------------ +// +// mw_comparator.v +// ----------------------------------------------------------------------------- +// Multi-word comparator. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mw_comparator + ( + clk, rst_n, + ena, rdy, + xy_addr, x_din, y_din, + cmp_l, cmp_e, cmp_g + ); + + + // + // Parameters + // + parameter WORD_COUNTER_WIDTH = 3; + parameter OPERAND_NUM_WORDS = 8; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREV_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] xy_addr; // address of current X and Y words + input wire [ 32-1:0] x_din; // current X word + input wire [ 32-1:0] y_din; // current Y word + + output wire cmp_l; // X < Y ? + output wire cmp_e; // X = Y ? + output wire cmp_g; // X > Y ? + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_xy; + + reg reg_cmp_l; + reg reg_cmp_e; + reg reg_cmp_g; + + + // + // Output Mapping + // + assign xy_addr = index_xy; + + assign cmp_l = reg_cmp_l; + assign cmp_e = reg_cmp_e; + assign cmp_g = reg_cmp_g; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_xy = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_calc_leg = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + wire fsm_shreg_calc_leg_last = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + + wire dec_index_xy = |fsm_shreg_dec_index_xy; + wire calc_leg = |fsm_shreg_calc_leg; + wire calc_leg_last = fsm_shreg_calc_leg_last; + + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) index_xy <= WORD_INDEX_LAST; + else if (dec_index_xy) index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy); + + + // + // 32-bit Subtractor + // + wire [31: 0] sub32_d_out; + wire sub32_b_in; + wire sub32_b_out; + + subtractor32_wrapper subtractor32_inst + ( + .clk (clk), + + .a (x_din), + .b (y_din), + + .d (sub32_d_out), + + .b_in (sub32_b_in), + .b_out (sub32_b_out) + ); + + + // + // Borrow Masking Logic + // + reg sub32_b_mask; + + always @(posedge clk) + // + sub32_b_mask <= (index_xy == WORD_INDEX_LAST) ? 1'b1 : 1'b0; + + assign sub32_b_in = sub32_b_out & ~sub32_b_mask; + + // + // Output Logic + // + wire cmp_unresolved = !(cmp_l || cmp_g); + + wire cmp_borrow_is_set = (sub32_b_out == 1'b1) ? 1'b1 : 1'b0; + wire cmp_difference_is_nonzero = (sub32_d_out != 32'd0) ? 1'b1 : 1'b0; + + always @(posedge clk) + // + if (rdy) begin + // + if (ena) begin + // + reg_cmp_l <= 1'b0; + reg_cmp_e <= 1'b0; + reg_cmp_g <= 1'b0; + // + end + // + end else if (cmp_unresolved && calc_leg) begin + // + if ( cmp_borrow_is_set) reg_cmp_l <= 1'b1; + if (!cmp_borrow_is_set && cmp_difference_is_nonzero) reg_cmp_g <= 1'b1; + if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last) reg_cmp_e <= 1'b1; + // + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v new file mode 100644 index 0000000..5db95a7 --- /dev/null +++ b/rtl/multiword/mw_mover.v @@ -0,0 +1,175 @@ +//------------------------------------------------------------------------------ +// +// mw_mover.v +// ----------------------------------------------------------------------------- +// Multi-word data mover. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mw_mover + ( + clk, rst_n, + ena, rdy, + x_addr, y_addr, y_wren, + x_din, y_dout + ); + + + // + // Parameters + // + parameter WORD_COUNTER_WIDTH = 3; + parameter OPERAND_NUM_WORDS = 8; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] x_addr; // address of current X word + output wire [WORD_COUNTER_WIDTH-1:0] y_addr; // address of current Y word + output wire y_wren; // store current Y word + + input wire [ 32-1:0] x_din; // current X word + output wire [ 32-1:0] y_dout; // current Y word + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_x; + reg [WORD_COUNTER_WIDTH-1:0] index_y; + + + // + // Output Mapping + // + assign x_addr = index_x; + assign y_addr = index_y; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_y = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + + wire inc_index_x = |fsm_shreg_inc_index_x; + wire inc_index_y = |fsm_shreg_inc_index_y; + wire store_word_y = |fsm_shreg_inc_index_x; + + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) begin + index_x <= WORD_INDEX_ZERO; + index_y <= WORD_INDEX_ZERO; + end else begin + if (inc_index_x) index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x); + if (inc_index_y) index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y); + end + + + // + // Write Enable Logic + // + reg y_wren_reg; + + assign y_wren = y_wren_reg; + + always @(posedge clk) + // + if (rdy) y_wren_reg <= 1'b0; + else y_wren_reg <= store_word_y; + + + // + // Output Logic + // + assign y_dout = x_din; + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/util/bram_1rw_1ro_readfirst.v new file mode 100644 index 0000000..28782c2 --- /dev/null +++ b/rtl/util/bram_1rw_1ro_readfirst.v @@ -0,0 +1,101 @@ +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module bram_1rw_1ro_readfirst + #(parameter MEM_WIDTH = 32, + parameter MEM_ADDR_BITS = 8) + ( + input wire clk, + + input wire [MEM_ADDR_BITS-1:0] a_addr, + input wire a_wr, + input wire [MEM_WIDTH-1:0] a_in, + output wire [MEM_WIDTH-1:0] a_out, + + input wire [MEM_ADDR_BITS-1:0] b_addr, + output wire [MEM_WIDTH-1:0] b_out + ); + + + // + // BRAM + // + (* RAM_STYLE="BLOCK" *) + reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; + + + // + // Initialization + // + /** + integer c; + initial begin + for (c=0; c<(2**MEM_ADDR_BITS); c=c+1) + bram[c] = {MEM_WIDTH{1'b0}}; + end + **/ + + + + // + // Output Registers + // + reg [MEM_WIDTH-1:0] bram_reg_a; + reg [MEM_WIDTH-1:0] bram_reg_b; + + assign a_out = bram_reg_a; + assign b_out = bram_reg_b; + + + // + // Read-Write Port A + // + always @(posedge clk) begin + // + bram_reg_a <= bram[a_addr]; + // + if (a_wr) bram[a_addr] <= a_in; + // + end + + + // + // Read-Only Port B + // + always @(posedge clk) + // + bram_reg_b <= bram[b_addr]; + + +endmodule -- cgit v1.2.3