aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2016-10-31 00:14:51 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2016-10-31 00:14:51 +0300
commit25e338149fdb8e06c82d99600769a8498a85ef2c (patch)
treecf4b855bcf1083f3cffebf6a98b5aaad45baf362
Initial commit of base point multiplier core for ECDSA curve P-256.
-rw-r--r--bench/tb_curve_adder_256.v420
-rw-r--r--bench/tb_curve_doubler_256.v409
-rw-r--r--bench/tb_curve_multiplier_256.v281
-rw-r--r--bench/tb_lowlevel_adder32.v175
-rw-r--r--bench/tb_lowlevel_adder47.v151
-rw-r--r--bench/tb_lowlevel_subtractor32.v174
-rw-r--r--bench/tb_modular_adder.v357
-rw-r--r--bench/tb_modular_invertor.v226
-rw-r--r--bench/tb_modular_multiplier_256.v366
-rw-r--r--bench/tb_modular_subtractor.v356
-rw-r--r--bench/tb_mw_comparator.v322
-rw-r--r--bench/tb_mw_mover.v282
-rw-r--r--rtl/curve/curve_dbl_add_256.v868
-rw-r--r--rtl/curve/curve_mul_256.v720
-rw-r--r--rtl/curve/rom/brom_p256_delta.v68
-rw-r--r--rtl/curve/rom/brom_p256_g_x.v68
-rw-r--r--rtl/curve/rom/brom_p256_g_y.v68
-rw-r--r--rtl/curve/rom/brom_p256_h_x.v68
-rw-r--r--rtl/curve/rom/brom_p256_h_y.v68
-rw-r--r--rtl/curve/rom/brom_p256_one.v68
-rw-r--r--rtl/curve/rom/brom_p256_q.v68
-rw-r--r--rtl/curve/rom/brom_p256_zero.v70
-rw-r--r--rtl/curve/uop/uop_add_rom.v66
-rw-r--r--rtl/curve/uop/uop_conv_rom.v38
-rw-r--r--rtl/curve/uop/uop_dbl_rom.v58
-rw-r--r--rtl/curve/uop/uop_init_rom.v33
-rw-r--r--rtl/curve/uop_ecdsa.v50
-rw-r--r--rtl/ecdsa256.v160
-rw-r--r--rtl/ecdsa256_wrapper.v177
-rw-r--r--rtl/lowlevel/adder32_wrapper.v73
-rw-r--r--rtl/lowlevel/adder47_wrapper.v69
-rw-r--r--rtl/lowlevel/artix7/adder32_artix7.v96
-rw-r--r--rtl/lowlevel/artix7/adder47_artix7.v91
-rw-r--r--rtl/lowlevel/artix7/dsp48e1_wrapper.v159
-rw-r--r--rtl/lowlevel/artix7/mac16_artix7.v90
-rw-r--r--rtl/lowlevel/artix7/subtractor32_artix7.v94
-rw-r--r--rtl/lowlevel/ecdsa_lowlevel_settings.v17
-rw-r--r--rtl/lowlevel/mac16_wrapper.v75
-rw-r--r--rtl/lowlevel/subtractor32_wrapper.v72
-rw-r--r--rtl/modular/modular_adder.v298
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_copy.v148
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_init.v172
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v286
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v408
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v257
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v328
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v153
-rw-r--r--rtl/modular/modular_invertor/modinv_clog2.v10
-rw-r--r--rtl/modular/modular_invertor/modular_invertor.v981
-rw-r--r--rtl/modular/modular_multiplier_256.v402
-rw-r--r--rtl/modular/modular_reductor_256.v666
-rw-r--r--rtl/modular/modular_subtractor.v292
-rw-r--r--rtl/multiword/mw_comparator.v220
-rw-r--r--rtl/multiword/mw_mover.v175
-rw-r--r--rtl/util/bram_1rw_1ro_readfirst.v101
55 files changed, 11968 insertions, 0 deletions
diff --git a/bench/tb_curve_adder_256.v b/bench/tb_curve_adder_256.v
new file mode 100644
index 0000000..a20743a
--- /dev/null
+++ b/bench/tb_curve_adder_256.v
@@ -0,0 +1,420 @@
+//------------------------------------------------------------------------------
+//
+// tb_curve_adder_256.v
+// -----------------------------------------------------------------------------
+// Testbench for 256-bit curve point adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_curve_adder_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] PX_1 = 256'ha536512112e4bb911ae72744a914761ddc53700f889c88e583e0edd45c179b08;
+ localparam [255:0] PY_1 = 256'h239e73bf40f4831ab71ccea072291893ac8582982ea6fec6bd6aaf36ac32d22e;
+ localparam [255:0] PZ_1 = 256'h32258ae04c5498bb34b29c54a7f95afc10c009540c51731eae164750ca385029;
+
+ localparam [255:0] RX_1 = 256'he4fcdd1a151b405b2a567d20d7674031c6d5b207b0b5dcf277015d81784492d5;
+ localparam [255:0] RY_1 = 256'h4782c540b58988b07bb8e0c5ad3ff562dd45c075a39ee71896d5eb33702dd656;
+ localparam [255:0] RZ_1 = 256'hae637ff2fd5468780241afb3a8ebaeb8618e86b4a1a211b350546c9e6fea93d4;
+
+ localparam [255:0] PX_2 = 256'he58a6470e038f6b261d5a9a72fb2bd96b6bad433ff7baea6a40b5facf5085189;
+ localparam [255:0] PY_2 = 256'h03dd8785b592307811ee5512e2d713c5dc65f60f01883340fe0f56f858a39474;
+ localparam [255:0] PZ_2 = 256'h1b4657b1e79c9074fbf7f63f96ce2854db4808afc72841fac623dc68d9bff64d;
+
+ localparam [255:0] RX_2 = 256'hc354e99a827a3f1c30f29f6b1d72273eb0daaeb06bb373ed315e305b89d857ca;
+ localparam [255:0] RY_2 = 256'h0cb054f95589c1fcbe763df3b8d7badd568d5e93a667076dddfc70dcfab74948;
+ localparam [255:0] RZ_2 = 256'hd79d9170dd628aee82d149715a6ec6cc44426ccae236d2a146edbd15a564ea53;
+
+ localparam [255:0] PX_3 = 256'hbf5fe30c79025a0b638b0fd62bf1349aee0a9fc7fc2719291b0c23535c16eb52;
+ localparam [255:0] PY_3 = 256'h8a637c7c0b9459de664d40a717e1abc0f843f03169fae943e0835cbe767da06b;
+ localparam [255:0] PZ_3 = 256'h0871d93601d654216912866514a788a92e8a9b6047611bf185d459e204727377;
+
+ localparam [255:0] RX_3 = 256'h1ba6259b5b750e4d6e4f490f661646cd9491be16965f47044ac2688048e567c5;
+ localparam [255:0] RY_3 = 256'h80e55c16f403f8d7282bca628477771a45330567caa5aaab9a54919dbe05e3e4;
+ localparam [255:0] RZ_3 = 256'hb99663f045c9602b05f23aaaa508e6167d15740be900175dbeceb957a9dad951;
+
+ localparam [255:0] PX_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
+ localparam [255:0] PY_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
+ localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] RX_4 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
+ localparam [255:0] RY_4 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
+ localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] PX_5 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
+ localparam [255:0] PY_5 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
+ localparam [255:0] PZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] RX_5 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0; // H.x
+ localparam [255:0] RY_5 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07; // H.y
+ localparam [255:0] RZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] PX_6 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
+ localparam [255:0] PY_6 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
+ localparam [255:0] PZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] RX_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RY_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (PX, PY, PZ, RX, RY, RZ, Q)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_q_addr;
+
+ wire core_rx_wren;
+ wire core_ry_wren;
+ wire core_rz_wren;
+
+ wire [ 32-1:0] core_px_data;
+ wire [ 32-1:0] core_py_data;
+ wire [ 32-1:0] core_pz_data;
+
+ wire [ 32-1:0] core_rx_data_wr;
+ wire [ 32-1:0] core_ry_data_wr;
+ wire [ 32-1:0] core_rz_data_wr;
+
+ wire [ 32-1:0] core_rx_data_rd;
+ wire [ 32-1:0] core_ry_data_rd;
+ wire [ 32-1:0] core_rz_data_rd;
+
+ wire [ 32-1:0] core_q_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr;
+ reg tb_xyzq_wren;
+
+ reg [ 31:0] tb_px_data;
+ reg [ 31:0] tb_py_data;
+ reg [ 31:0] tb_pz_data;
+ wire [ 31:0] tb_rx_data;
+ wire [ 31:0] tb_ry_data;
+ wire [ 31:0] tb_rz_data;
+ reg [ 31:0] tb_q_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_px
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(),
+ .b_addr(core_px_addr), .b_out(core_px_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_py
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(),
+ .b_addr(core_py_addr), .b_out(core_py_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_pz
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(),
+ .b_addr(core_pz_addr), .b_out(core_pz_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_q
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(),
+ .b_addr(core_q_addr), .b_out(core_q_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rx
+ ( .clk(clk),
+ .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rx_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_ry
+ ( .clk(clk),
+ .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_ry_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rz
+ ( .clk(clk),
+ .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rz_data)
+ );
+
+
+ //
+ // Opcode
+ //
+ wire [ 5: 0] add_uop_addr;
+ wire [19: 0] add_uop;
+
+ uop_add_rom add_rom
+ (
+ .clk (clk),
+ .addr (add_uop_addr),
+ .data (add_uop)
+ );
+
+ //
+ // UUT
+ //
+ curve_dbl_add_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .uop_addr (add_uop_addr),
+ .uop (add_uop),
+
+ .px_addr (core_px_addr),
+ .py_addr (core_py_addr),
+ .pz_addr (core_pz_addr),
+ .rx_addr (core_rx_addr),
+ .ry_addr (core_ry_addr),
+ .rz_addr (core_rz_addr),
+ .q_addr (core_q_addr),
+
+ .rx_wren (core_rx_wren),
+ .ry_wren (core_ry_wren),
+ .rz_wren (core_rz_wren),
+
+ .px_din (core_px_data),
+ .py_din (core_py_data),
+ .pz_din (core_pz_data),
+ .rx_din (core_rx_data_rd),
+ .ry_din (core_ry_data_rd),
+ .rz_din (core_rz_data_rd),
+ .rx_dout (core_rx_data_wr),
+ .ry_dout (core_ry_data_wr),
+ .rz_dout (core_rz_data_wr),
+ .q_din (core_q_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_curve_adder(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1);
+ test_curve_adder(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2);
+ test_curve_adder(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3);
+ test_curve_adder(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4);
+ test_curve_adder(PX_5, PY_5, PZ_5, RX_5, RY_5, RZ_5);
+ test_curve_adder(PX_6, Q - PY_6, PZ_6, RX_6, RY_6, RZ_6);
+
+ /* print result */
+ if (ok) $display("tb_curve_adder_256: SUCCESS");
+ else $display("tb_curve_adder_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg t_ok;
+
+ integer w;
+
+ task test_curve_adder;
+
+ input [255:0] px;
+ input [255:0] py;
+ input [255:0] pz;
+
+ input [255:0] rx;
+ input [255:0] ry;
+ input [255:0] rz;
+
+ reg [255:0] px_shreg;
+ reg [255:0] py_shreg;
+ reg [255:0] pz_shreg;
+
+ reg [255:0] rx_shreg;
+ reg [255:0] ry_shreg;
+ reg [255:0] rz_shreg;
+
+ reg [255:0] q_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xyzq_wren = 1;
+
+ /* initialize shift registers */
+ px_shreg = px;
+ py_shreg = py;
+ pz_shreg = pz;
+ q_shreg = Q;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_px_data = px_shreg[31:0];
+ tb_py_data = py_shreg[31:0];
+ tb_pz_data = pz_shreg[31:0];
+ tb_q_data = q_shreg[31:0];
+
+ /* shift inputs */
+ px_shreg = {{32{1'bX}}, px_shreg[255:32]};
+ py_shreg = {{32{1'bX}}, py_shreg[255:32]};
+ pz_shreg = {{32{1'bX}}, pz_shreg[255:32]};
+ q_shreg = {{32{1'bX}}, q_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyzq_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_px_data = {32{1'bX}};
+ tb_py_data = {32{1'bX}};
+ tb_pz_data = {32{1'bX}};
+ tb_q_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyzq_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ rx_shreg = {tb_rx_data, rx_shreg[255:32]};
+ ry_shreg = {tb_ry_data, ry_shreg[255:32]};
+ rz_shreg = {tb_rz_data, rz_shreg[255:32]};
+
+ end
+
+ /* compare */
+ t_ok = (rx_shreg == rx) &&
+ (ry_shreg == ry) &&
+ (rz_shreg == rz);
+
+ /* display results */
+ $display("test_curve_adder(): %s", t_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && t_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_curve_doubler_256.v b/bench/tb_curve_doubler_256.v
new file mode 100644
index 0000000..c7b7541
--- /dev/null
+++ b/bench/tb_curve_doubler_256.v
@@ -0,0 +1,409 @@
+//------------------------------------------------------------------------------
+//
+// tb_curve_doubler_256.v
+// -----------------------------------------------------------------------------
+// Testbench for 256-bit curve point doubler.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_curve_doubler_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] PX_1 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
+ localparam [255:0] PY_1 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
+ localparam [255:0] PZ_1 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] RX_1 = 256'h9a978f59acd1b5ad570e7d52dcfcde43804b42274f61ddcf1e7d848391d6c70f;
+ localparam [255:0] RY_1 = 256'h4126885e7f786af905338238e5346d5fe77fc46388668bd0fd59be3190d2f5d1;
+ localparam [255:0] RZ_1 = 256'h9fc685c5fc34ff371dcfd694f81f3c2c579c66aed662bd9d976c80d06f7ea3ea;
+
+ localparam [255:0] PX_2 = 256'h0ec88440c8b00a9e572bf1bceb7d0c5906bd65990a9b7081130bd72e2c136ca0;
+ localparam [255:0] PY_2 = 256'hc0bc77e1339e899101f8e8eccf79c3f7f4bbdd1bf96f6446199bd423026a60d6;
+ localparam [255:0] PZ_2 = 256'hdd27cb52a31d1f6e041accf1103de05ba0a5edd74b738d51fe3397de0e3fc306;
+
+ localparam [255:0] RX_2 = 256'he6afae63e774df21244609cb4c35d17d28b36b8b9fb7c58929af247f34ac72f9;
+ localparam [255:0] RY_2 = 256'h061076db7a5745adc90b2e9eebe0ad6482309690f50b60835c265cf83a1b34eb;
+ localparam [255:0] RZ_2 = 256'h1b6bfd04f2a41d68e85423655db1142d97ebaec0c67c450408f427e35c4f054f;
+
+ localparam [255:0] PX_3 = 256'hb0f824c88ec62df89912ca9ffbcbbb4ffb4d80f8a7d7b4a992273261a2f7be7f;
+ localparam [255:0] PY_3 = 256'h403e34c78c2b816fce2b1f8d73cfeef28113b8de8bda4a447d17b619bef73705;
+ localparam [255:0] PZ_3 = 256'h0e3e81bb8e954f3164ae54a6cffa7fcc9631dfddee55fac61e46415f1f5fe5e2;
+
+ localparam [255:0] RX_3 = 256'hd4e725920c88cc2f57847a315f3b6c180abb278b8fa2a47da3d1a191a8c29e19;
+ localparam [255:0] RY_3 = 256'ha798ad8dbd66c98b53414ab1d04b0f871929a90fea996c88b96d9d68eb8eb0dc;
+ localparam [255:0] RZ_3 = 256'ha7ead72c01294eaf2899bb6b84f7d26417e6758e3db29f3b5c2ca8e9911067f5;
+
+ localparam [255:0] PX_4 = 256'hXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
+ localparam [255:0] PY_4 = 256'hXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
+ localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] RX_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RY_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+
+
+ //
+ // TODO: Test special cases!
+ //
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (PX, PY, PZ, RX, RY, RZ, Q)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_q_addr;
+
+ wire core_rx_wren;
+ wire core_ry_wren;
+ wire core_rz_wren;
+
+ wire [ 32-1:0] core_px_data;
+ wire [ 32-1:0] core_py_data;
+ wire [ 32-1:0] core_pz_data;
+
+ wire [ 32-1:0] core_rx_data_wr;
+ wire [ 32-1:0] core_ry_data_wr;
+ wire [ 32-1:0] core_rz_data_wr;
+
+ wire [ 32-1:0] core_rx_data_rd;
+ wire [ 32-1:0] core_ry_data_rd;
+ wire [ 32-1:0] core_rz_data_rd;
+
+ wire [ 32-1:0] core_q_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr;
+ reg tb_xyzq_wren;
+
+ reg [ 31:0] tb_px_data;
+ reg [ 31:0] tb_py_data;
+ reg [ 31:0] tb_pz_data;
+ wire [ 31:0] tb_rx_data;
+ wire [ 31:0] tb_ry_data;
+ wire [ 31:0] tb_rz_data;
+ reg [ 31:0] tb_q_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_px
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(),
+ .b_addr(core_px_addr), .b_out(core_px_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_py
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(),
+ .b_addr(core_py_addr), .b_out(core_py_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_pz
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(),
+ .b_addr(core_pz_addr), .b_out(core_pz_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_q
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(),
+ .b_addr(core_q_addr), .b_out(core_q_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rx
+ ( .clk(clk),
+ .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rx_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_ry
+ ( .clk(clk),
+ .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_ry_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rz
+ ( .clk(clk),
+ .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rz_data)
+ );
+
+
+ //
+ // Opcode
+ //
+ wire [ 5: 0] dbl_uop_addr;
+ wire [19: 0] dbl_uop;
+
+ uop_dbl_rom dbl_rom
+ (
+ .clk (clk),
+ .addr (dbl_uop_addr),
+ .data (dbl_uop)
+ );
+
+
+ //
+ // UUT
+ //
+ curve_dbl_add_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .uop_addr (dbl_uop_addr),
+ .uop (dbl_uop),
+
+ .px_addr (core_px_addr),
+ .py_addr (core_py_addr),
+ .pz_addr (core_pz_addr),
+ .rx_addr (core_rx_addr),
+ .ry_addr (core_ry_addr),
+ .rz_addr (core_rz_addr),
+ .q_addr (core_q_addr),
+
+ .rx_wren (core_rx_wren),
+ .ry_wren (core_ry_wren),
+ .rz_wren (core_rz_wren),
+
+ .px_din (core_px_data),
+ .py_din (core_py_data),
+ .pz_din (core_pz_data),
+ .rx_din (core_rx_data_rd),
+ .ry_din (core_ry_data_rd),
+ .rz_din (core_rz_data_rd),
+ .rx_dout (core_rx_data_wr),
+ .ry_dout (core_ry_data_wr),
+ .rz_dout (core_rz_data_wr),
+ .q_din (core_q_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_curve_doubler(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1);
+ test_curve_doubler(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2);
+ test_curve_doubler(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3);
+ test_curve_doubler(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4);
+
+ /* print result */
+ if (ok) $display("tb_curve_doubler_256: SUCCESS");
+ else $display("tb_curve_doubler_256: FAILURE");
+ //
+// $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg t_ok;
+
+ integer w;
+
+ task test_curve_doubler;
+
+ input [255:0] px;
+ input [255:0] py;
+ input [255:0] pz;
+
+ input [255:0] rx;
+ input [255:0] ry;
+ input [255:0] rz;
+
+ reg [255:0] px_shreg;
+ reg [255:0] py_shreg;
+ reg [255:0] pz_shreg;
+
+ reg [255:0] rx_shreg;
+ reg [255:0] ry_shreg;
+ reg [255:0] rz_shreg;
+
+ reg [255:0] q_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xyzq_wren = 1;
+
+ /* initialize shift registers */
+ px_shreg = px;
+ py_shreg = py;
+ pz_shreg = pz;
+ q_shreg = Q;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_px_data = px_shreg[31:0];
+ tb_py_data = py_shreg[31:0];
+ tb_pz_data = pz_shreg[31:0];
+ tb_q_data = q_shreg[31:0];
+
+ /* shift inputs */
+ px_shreg = {{32{1'bX}}, px_shreg[255:32]};
+ py_shreg = {{32{1'bX}}, py_shreg[255:32]};
+ pz_shreg = {{32{1'bX}}, pz_shreg[255:32]};
+ q_shreg = {{32{1'bX}}, q_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyzq_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_px_data = {32{1'bX}};
+ tb_py_data = {32{1'bX}};
+ tb_pz_data = {32{1'bX}};
+ tb_q_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyzq_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ rx_shreg = {tb_rx_data, rx_shreg[255:32]};
+ ry_shreg = {tb_ry_data, ry_shreg[255:32]};
+ rz_shreg = {tb_rz_data, rz_shreg[255:32]};
+
+ end
+
+ /* compare */
+ t_ok = (rx_shreg == rx) &&
+ (ry_shreg == ry) &&
+ (rz_shreg == rz);
+
+ /* display results */
+ $display("test_curve_doubler(): %s", t_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && t_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_curve_multiplier_256.v b/bench/tb_curve_multiplier_256.v
new file mode 100644
index 0000000..bcca034
--- /dev/null
+++ b/bench/tb_curve_multiplier_256.v
@@ -0,0 +1,281 @@
+//------------------------------------------------------------------------------
+//
+// tb_curve_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Testbench for 256-bit curve point scalar multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_curve_multiplier_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] K_1 = 256'h70a12c2db16845ed56ff68cfc21a472b3f04d7d6851bf6349f2d7d5b3452b38a;
+ localparam [255:0] PX_1 = 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
+ localparam [255:0] PY_1 = 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
+
+ localparam [255:0] K_2 = 256'h580ec00d856434334cef3f71ecaed4965b12ae37fa47055b1965c7b134ee45d0;
+ localparam [255:0] PX_2 = 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
+ localparam [255:0] PY_2 = 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
+
+ localparam [255:0] K_3 = 256'hffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551;
+ localparam [255:0] PX_3 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+ localparam [255:0] PY_3 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (K, PX, PY)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_k_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
+
+ wire core_px_wren;
+ wire core_py_wren;
+
+ wire [ 32-1:0] core_k_data;
+ wire [ 32-1:0] core_px_data;
+ wire [ 32-1:0] core_py_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_k_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_pxy_addr;
+
+ reg tb_k_wren;
+
+ reg [ 31:0] tb_k_data;
+ wire [ 31:0] tb_px_data;
+ wire [ 31:0] tb_py_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_k
+ ( .clk(clk),
+ .a_addr(tb_k_addr), .a_wr(tb_k_wren), .a_in(tb_k_data), .a_out(),
+ .b_addr(core_k_addr), .b_out(core_k_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_px
+ ( .clk(clk),
+ .a_addr(core_px_addr), .a_wr(core_px_wren), .a_in(core_px_data), .a_out(),
+ .b_addr(tb_pxy_addr), .b_out(tb_px_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_py
+ ( .clk(clk),
+ .a_addr(core_py_addr), .a_wr(core_py_wren), .a_in(core_py_data), .a_out(),
+ .b_addr(tb_pxy_addr), .b_out(tb_py_data)
+ );
+
+
+ //
+ // UUT
+ //
+ curve_mul_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .k_addr (core_k_addr),
+ .rx_addr (core_px_addr),
+ .ry_addr (core_py_addr),
+
+ .rx_wren (core_px_wren),
+ .ry_wren (core_py_wren),
+
+ .k_din (core_k_data),
+
+ .rx_dout (core_px_data),
+ .ry_dout (core_py_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_curve_multiplier(K_1, PX_1, PY_1);
+ test_curve_multiplier(K_2, PX_2, PY_2);
+ test_curve_multiplier(K_3, PX_3, PY_3);
+
+ /* print result */
+ if (ok) $display("tb_curve_multiplier_256: SUCCESS");
+ else $display("tb_curve_multiplier_256: FAILURE");
+ //
+ //$finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg p_ok;
+
+ integer w;
+
+ task test_curve_multiplier;
+
+ input [255:0] k;
+ input [255:0] px;
+ input [255:0] py;
+
+ reg [255:0] k_shreg;
+ reg [255:0] px_shreg;
+ reg [255:0] py_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_k_wren = 1;
+
+ /* initialize shift registers */
+ k_shreg = k;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_k_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_k_data = k_shreg[31:0];
+
+ /* shift inputs */
+ k_shreg = {{32{1'bX}}, k_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_k_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_k_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_k_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_pxy_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ px_shreg = {tb_px_data, px_shreg[255:32]};
+ py_shreg = {tb_py_data, py_shreg[255:32]};
+
+ end
+
+ /* compare */
+ p_ok = (px_shreg == px) &&
+ (py_shreg == py);
+
+ /* display results */
+ $display("test_curve_multiplier(): %s", p_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && p_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_adder32.v b/bench/tb_lowlevel_adder32.v
new file mode 100644
index 0000000..2caffbd
--- /dev/null
+++ b/bench/tb_lowlevel_adder32.v
@@ -0,0 +1,175 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder32;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [31: 0] a;
+ reg [31: 0] b;
+ reg c_in;
+
+ //
+ // Outputs
+ //
+ wire [31: 0] s;
+ wire c_out;
+
+ //
+ // Test Vectors {a, b, c_in}
+ //
+ wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no carry
+ wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with carry
+ wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no carry
+ wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with carry
+ wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no carry
+ wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with carry
+ wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no carry
+ wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with carry
+
+ wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no carry
+ wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no carry
+ wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no carry
+ wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no carry
+ wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with carry
+ wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with carry
+ wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with carry
+ wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with carry
+
+
+ //
+ // UUT
+ //
+ adder32_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .s (s),
+ .c_in (c_in),
+ .c_out (c_out)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_adder32(vec_0);
+ test_adder32(vec_1);
+ test_adder32(vec_2);
+ test_adder32(vec_3);
+ test_adder32(vec_4);
+ test_adder32(vec_5);
+ test_adder32(vec_6);
+ test_adder32(vec_7);
+ //
+ test_adder32(vec_8);
+ test_adder32(vec_9);
+ test_adder32(vec_10);
+ test_adder32(vec_11);
+ test_adder32(vec_12);
+ test_adder32(vec_13);
+ test_adder32(vec_14);
+ test_adder32(vec_15);
+ //
+ if (ok) $display("tb_lowlevel_adder32: SUCCESS");
+ else $display("tb_lowlevel_adder32: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [31: 0] ss; // reference value of sum
+ reg cc; // reference value of carry
+ reg ss_ok; // result matches reference value
+
+ task test_adder32;
+
+ input [64: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[64:33];
+ b = vec[32: 1];
+ c_in = vec[ 0: 0];
+
+ /* calculate reference values */
+ {cc, ss} = {1'b0, a} + {1'b0, b} + {32'd0, c_in};
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ ss_ok = (s == ss) && (c_out == cc);
+
+ /* display results */
+ $display("test_adder32(): 0x%08X + 0x%08X + %01d = {%01d, 0x%08X} [%0s]", a, b, c_in, c_out, s, ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && ss_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_adder47.v b/bench/tb_lowlevel_adder47.v
new file mode 100644
index 0000000..2a575a8
--- /dev/null
+++ b/bench/tb_lowlevel_adder47.v
@@ -0,0 +1,151 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder47.v
+// -----------------------------------------------------------------------------
+// Testbench for 47-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder47;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [46: 0] a;
+ reg [46: 0] b;
+
+ //
+ // Outputs
+ //
+ wire [46: 0] s;
+
+ //
+ // Test Vectors {a, b}
+ //
+ wire [93: 0] vec_0 = {47'h2a87ca22be8b, 47'h05378eb1c71e};
+ wire [93: 0] vec_1 = {47'h7320ad746e1d, 47'h3b628ba79b98};
+ wire [93: 0] vec_2 = {47'h59f741e08254, 47'h2a385502f25d};
+ wire [93: 0] vec_3 = {47'h3f55296c3a54, 47'h5e3872760ab7};
+ wire [93: 0] vec_4 = {47'h3617de4a9626, 47'h2c6f5d9e98bf};
+ wire [93: 0] vec_5 = {47'h1292dc29f8f4, 47'h1dbd289a147c};
+ wire [93: 0] vec_6 = {47'h69da3113b5f0, 47'h38c00a60b1ce};
+ wire [93: 0] vec_7 = {47'h1d7e819d7a43, 47'h1d7c90ea0e5f};
+
+ //
+ // UUT
+ //
+ adder47_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .s (s)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_adder47(vec_0);
+ test_adder47(vec_1);
+ test_adder47(vec_2);
+ test_adder47(vec_3);
+ test_adder47(vec_4);
+ test_adder47(vec_5);
+ test_adder47(vec_6);
+ test_adder47(vec_7);
+ //
+ if (ok) $display("tb_lowlevel_adder47: SUCCESS");
+ else $display("tb_lowlevel_adder47: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [46: 0] ss; // reference value of sum
+ reg cc; // reference value of carry
+ reg ss_ok; // result matches reference value
+
+ task test_adder47;
+
+ input [93: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[93:47];
+ b = vec[46: 0];
+
+ /* calculate reference values */
+ ss = a + b;
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ ss_ok = (s == ss);
+
+ /* display results */
+ $display("test_adder47(): %s", ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && ss_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_subtractor32.v b/bench/tb_lowlevel_subtractor32.v
new file mode 100644
index 0000000..e1129e2
--- /dev/null
+++ b/bench/tb_lowlevel_subtractor32.v
@@ -0,0 +1,174 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_subtractor32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_lowlevel_subtractor32;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [31: 0] a;
+ reg [31: 0] b;
+ reg b_in;
+
+ //
+ // Outputs
+ //
+ wire [31: 0] d;
+ wire b_out;
+
+ //
+ // Test Vectors {a, b, b_in}
+ //
+ wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no borrow
+ wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with borrow
+ wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no borrow
+ wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with borrow
+ wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no borrow
+ wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with borrow
+ wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no borrow
+ wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with borrow
+
+ wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with borrow
+ wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with borrow
+ wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with borrow
+ wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with borrow
+
+
+ //
+ // UUT
+ //
+ subtractor32_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .d (d),
+ .b_in (b_in),
+ .b_out (b_out)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_subtractor32(vec_0);
+ test_subtractor32(vec_1);
+ test_subtractor32(vec_2);
+ test_subtractor32(vec_3);
+ test_subtractor32(vec_4);
+ test_subtractor32(vec_5);
+ test_subtractor32(vec_6);
+ test_subtractor32(vec_7);
+ //
+ test_subtractor32(vec_8);
+ test_subtractor32(vec_9);
+ test_subtractor32(vec_10);
+ test_subtractor32(vec_11);
+ test_subtractor32(vec_12);
+ test_subtractor32(vec_13);
+ test_subtractor32(vec_14);
+ test_subtractor32(vec_15);
+ //
+ if (ok) $display("tb_lowlevel_subtractor32: SUCCESS");
+ else $display("tb_lowlevel_subtractor32: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [31: 0] dd; // reference value of difference
+ reg bb; // reference value of borrow
+ reg dd_ok; // result matches reference value
+
+ task test_subtractor32;
+
+ input [64: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[64:33];
+ b = vec[32: 1];
+ b_in = vec[ 0: 0];
+
+ /* calculate reference values */
+ {bb, dd} = {1'b0, a} - {1'b0, b} - {32'd0, b_in};
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ dd_ok = (d == dd) && (b_out == bb);
+
+ /* display results */
+ $display("test_subtractor32(): 0x%08X - (0x%08X + %01d) = {%01d, 0x%08X} [%0s]", a, b, b_in, b_out, d, dd_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && dd_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_adder.v b/bench/tb_modular_adder.v
new file mode 100644
index 0000000..1015b77
--- /dev/null
+++ b/bench/tb_modular_adder.v
@@ -0,0 +1,357 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_adder_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_adder_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+ localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_s_addr;
+ wire core_s_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_s_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_s_addr;
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_s_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_s_addr),
+ .a_wr (core_s_wren),
+ .a_in (core_s_data),
+ .a_out (),
+
+ .b_addr (tb_s_addr),
+ .b_out (tb_s_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_adder #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ab_addr (core_xy_addr),
+ .n_addr (core_n_addr),
+ .s_addr (core_s_addr),
+ .s_wren (core_s_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .s_dout (core_s_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_adder(X_1, Y_1, N);
+ test_modular_adder(X_2, Y_2, N);
+ test_modular_adder(Y_1, X_1, N);
+ test_modular_adder(Y_2, X_2, N);
+
+ test_modular_adder(X_1, X_2, N);
+ test_modular_adder(X_2, X_1, N);
+ test_modular_adder(Y_1, Y_2, N);
+ test_modular_adder(Y_2, Y_1, N);
+
+ test_modular_adder(X_1, Y_2, N);
+ test_modular_adder(Y_2, X_1, N);
+ test_modular_adder(X_2, Y_1, N);
+ test_modular_adder(Y_1, X_2, N);
+
+ /* print result */
+ if (ok) $display("tb_modular_adder_256: SUCCESS");
+ else $display("tb_modular_adder_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [256:0] s;
+ wire [255:0] s_dummy = s[255:0];
+ reg s_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] s_shreg;
+
+ task test_modular_adder;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ s = {1'b0, x} + {1'b0, y};
+ if (s >= {1'b0, n})
+ s = s - {1'b0, n};
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_s_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ s_shreg = {tb_s_data, s_shreg[255:32]};
+
+ end
+
+ /* compare */
+ s_ok = (s_shreg == s[255:0]);
+
+ /* display results */
+ $display("test_modular_adder(): %s", s_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && s_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_invertor.v b/bench/tb_modular_invertor.v
new file mode 100644
index 0000000..0ef7c88
--- /dev/null
+++ b/bench/tb_modular_invertor.v
@@ -0,0 +1,226 @@
+`timescale 1ns / 1ps
+
+module tb_modular_invertor;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] A_1 = 256'hd3e73ccd63a5b10da308c615bb9ebd3f76e2c5fccc256fd9f629dcc956bf2382;
+ localparam [255:0] A1_1 = 256'h93fb26d5d199bbb7232a4b7c98e97ba9bb7530d304b5f07736ea4027bbb57ecd;
+
+ localparam [255:0] A_2 = 256'h57b6c628a5c4e870740b2517975ace2216acbe094ac54568b53212ef45e69d22;
+ localparam [255:0] A1_2 = 256'hcd2af4766642d7d2f3f3f67d92c575c496772ef7d55c75eb46bd07e8d5f9a4aa;
+
+
+ //
+ // Clock
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (A, A1, Q)
+ //
+ wire [ 2: 0] core_a_addr;
+ wire [ 2: 0] core_q_addr;
+ wire [ 2: 0] core_a1_addr;
+ wire core_a1_wren;
+
+ wire [31: 0] core_a_data;
+ wire [31: 0] core_q_data;
+ wire [31: 0] core_a1_data;
+
+ reg [ 2: 0] tb_aq_addr;
+ reg tb_aq_wren;
+ reg [ 2: 0] tb_a1_addr;
+
+ reg [31: 0] tb_a_data;
+ reg [31: 0] tb_q_data;
+ wire [31: 0] tb_a1_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_a
+ ( .clk(clk),
+ .a_addr(tb_aq_addr), .a_wr(tb_aq_wren), .a_in(tb_a_data), .a_out(),
+ .b_addr(core_a_addr), .b_out(core_a_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_q
+ ( .clk(clk),
+ .a_addr(tb_aq_addr), .a_wr(tb_aq_wren), .a_in(tb_q_data), .a_out(),
+ .b_addr(core_q_addr), .b_out(core_q_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_a1
+ ( .clk(clk),
+ .a_addr(core_a1_addr), .a_wr(core_a1_wren), .a_in(core_a1_data), .a_out(),
+ .b_addr(tb_a1_addr), .b_out(tb_a1_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_invertor #
+ (
+ .MAX_OPERAND_WIDTH (256)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .a_addr (core_a_addr),
+ .q_addr (core_q_addr),
+ .a1_addr (core_a1_addr),
+ .a1_wren (core_a1_wren),
+
+ .a_din (core_a_data),
+ .q_din (core_q_data),
+ .a1_dout (core_a1_data)
+ );
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_invertor(A_1, A1_1, Q);
+ test_modular_invertor(A_2, A1_2, Q);
+
+ /* print result */
+ if (ok) $display("tb_modular_invertor: SUCCESS");
+ else $display("tb_modular_invertor: FAILURE");
+ //
+ //$finish;
+ //
+
+ end
+
+
+ //
+ // Test Task
+ //
+ reg a1_ok;
+
+ integer w;
+
+ task test_modular_invertor;
+
+ input [255:0] a;
+ input [255:0] a1;
+ input [255:0] q;
+
+ reg [255:0] a_shreg;
+ reg [255:0] a1_shreg;
+ reg [255:0] q_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_aq_wren = 1;
+
+ /* initialize shift registers */
+ a_shreg = a;
+ q_shreg = q;
+
+ /* write all the words */
+ for (w=0; w<8; w=w+1) begin
+
+ /* set addresses */
+ tb_aq_addr = w[2:0];
+
+ /* set data words */
+ tb_a_data = a_shreg[31:0];
+ tb_q_data = q_shreg[31:0];
+
+ /* shift inputs */
+ a_shreg = {{32{1'bX}}, a_shreg[255:32]};
+ q_shreg = {{32{1'bX}}, q_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_aq_addr = {3{1'bX}};
+
+ /* wipe data words */
+ tb_a_data = {32{1'bX}};
+ tb_q_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_aq_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<8; w=w+1) begin
+
+ /* set address */
+ tb_a1_addr = w[2:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ a1_shreg = {tb_a1_data, a1_shreg[255:32]};
+
+ end
+
+ /* compare */
+ a1_ok = (a1_shreg == a1);
+
+ /* display results */
+ $display("test_modular_invertor(): %s", a1_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && a1_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
diff --git a/bench/tb_modular_multiplier_256.v b/bench/tb_modular_multiplier_256.v
new file mode 100644
index 0000000..3f62767
--- /dev/null
+++ b/bench/tb_modular_multiplier_256.v
@@ -0,0 +1,366 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_multiplier_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
+ localparam [255:0] Y_1 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
+ localparam [255:0] P_1 = 256'h823cd15f6dd3c71933565064513a6b2bd183e554c6a08622f713ebbbface98be;
+
+ localparam [255:0] X_2 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0;
+ localparam [255:0] Y_2 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07;
+ localparam [255:0] P_2 = 256'h76b2571d1d009ab0e7d1cc086c7d3648f08755b2e2585e780d11f053b06fb6ec;
+
+ localparam [255:0] X_3 = 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
+ localparam [255:0] Y_3 = 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
+ localparam [255:0] P_3 = 256'h944fea6a4fac7ae475a6bb211db4bbd394bd9b3ee9a038f6c17125a00b3a5375;
+
+ localparam [255:0] X_4 = 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
+ localparam [255:0] Y_4 = 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
+ localparam [255:0] P_4 = 256'h78d3e33c81ab9c652679363c76df004ea6f9a9e3a242a0fb71a4e8fdf41ab519;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N, P)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_p_addr;
+
+ wire core_p_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_p_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_p_addr;
+
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_p_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_x_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_y_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_p_addr),
+ .a_wr (core_p_wren),
+ .a_in (core_p_data),
+ .a_out (),
+
+ .b_addr (tb_p_addr),
+ .b_out (tb_p_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_multiplier_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .a_addr (core_x_addr),
+ .b_addr (core_y_addr),
+ .n_addr (core_n_addr),
+ .p_addr (core_p_addr),
+ .p_wren (core_p_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .p_dout (core_p_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_multiplier(X_1, Y_1, N, P_1);
+ test_modular_multiplier(X_2, Y_2, N, P_2);
+ test_modular_multiplier(X_3, Y_3, N, P_3);
+ test_modular_multiplier(X_4, Y_4, N, P_4);
+
+ /* print result */
+ if (ok) $display("tb_modular_multiplier_256: SUCCESS");
+ else $display("tb_modular_multiplier_256: FAILURE");
+ //
+ //$finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [255:0] p;
+ reg p_ok;
+
+ integer w;
+
+ reg [511:0] pp_full;
+ reg [255:0] pp_ref;
+
+ task test_modular_multiplier;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+ input [255:0] pp;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] p_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ pp_full = {{256{1'b0}}, x} * {{256{1'b0}}, y};
+ pp_ref = pp_full % {{256{1'b0}}, n};
+
+ /* compare reference value against hard-coded one */
+ if (pp_ref != pp) begin
+ $display("ERROR: pp_ref != pp");
+ $finish;
+ end
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_p_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ p_shreg = {tb_p_data, p_shreg[255:32]};
+
+ end
+
+ /* compare */
+ p_ok = (p_shreg == pp);
+
+ /* display results */
+ $display("test_modular_multiplier(): %s", p_ok ? "OK" : "ERROR");
+
+ /* update flag */
+ ok = ok && p_ok;
+
+ end
+
+ endtask
+
+
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_subtractor.v b/bench/tb_modular_subtractor.v
new file mode 100644
index 0000000..f45a286
--- /dev/null
+++ b/bench/tb_modular_subtractor.v
@@ -0,0 +1,356 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_subtractor_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_subtractor_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+ localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_d_addr;
+ wire core_d_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_d_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_d_addr;
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_d_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_d_addr),
+ .a_wr (core_d_wren),
+ .a_in (core_d_data),
+ .a_out (),
+
+ .b_addr (tb_d_addr),
+ .b_out (tb_d_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_subtractor #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ab_addr (core_xy_addr),
+ .n_addr (core_n_addr),
+ .d_addr (core_d_addr),
+ .d_wren (core_d_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .d_dout (core_d_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_subtractor(X_1, Y_1, N);
+ test_modular_subtractor(X_2, Y_2, N);
+ test_modular_subtractor(Y_1, X_1, N);
+ test_modular_subtractor(Y_2, X_2, N);
+
+ test_modular_subtractor(X_1, X_2, N);
+ test_modular_subtractor(X_2, X_1, N);
+ test_modular_subtractor(Y_1, Y_2, N);
+ test_modular_subtractor(Y_2, Y_1, N);
+
+ test_modular_subtractor(X_1, Y_2, N);
+ test_modular_subtractor(Y_2, X_1, N);
+ test_modular_subtractor(X_2, Y_1, N);
+ test_modular_subtractor(Y_1, X_2, N);
+
+ /* print result */
+ if (ok) $display("tb_modular_subtractor_256: SUCCESS");
+ else $display("tb_modular_subtractor_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [256:0] d;
+ wire [255:0] d_dummy = d[255:0];
+ reg d_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] d_shreg;
+
+ task test_modular_subtractor;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ d = {1'b0, (x < y) ? n : {256{1'b0}}};
+ d = d + {1'b0, x} - {1'b0, y};
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_d_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ d_shreg = {tb_d_data, d_shreg[255:32]};
+
+ end
+
+ /* compare */
+ d_ok = (d_shreg == d[255:0]);
+
+ /* display results */
+ $display("test_modular_subtractor(): %s", d_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && d_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_comparator.v b/bench/tb_mw_comparator.v
new file mode 100644
index 0000000..abab8bc
--- /dev/null
+++ b/bench/tb_mw_comparator.v
@@ -0,0 +1,322 @@
+//------------------------------------------------------------------------------
+//
+// tb_mw_comparator.v
+// -----------------------------------------------------------------------------
+// Testbench for multi-word comparator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_mw_comparator;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [383:0] A_0 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_1 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F75;
+ localparam [383:0] A_2 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F77;
+
+ localparam [383:0] A_3 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_375D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_4 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_575D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_5 = 384'hBDC7B53C_616B13B5_77622510_75BA95FB_475D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_6 = 384'hBDC7B53C_616B13B5_77622510_75BA95FD_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_7 = 384'hADC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_8 = 384'hCDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] B_0 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_1 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E4;
+ localparam [383:0] B_2 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E6;
+
+ localparam [383:0] B_3 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_9D784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_4 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_BD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_5 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3BF_AD784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_6 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C1_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_7 = 384'h248A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_8 = 384'h448A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ parameter OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+ wire core_cmp_l;
+ wire core_cmp_e;
+ wire core_cmp_g;
+
+
+ //
+ // Buffers (X, Y)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+
+ wire [ 32-1:0] core_x_data;
+ wire [ 32-1:0] core_y_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xy_addr;
+ reg tb_xy_wren;
+
+ reg [ 32-1:0] tb_x_data;
+ reg [ 32-1:0] tb_y_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xy_addr),
+ .a_wr (tb_xy_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xy_addr),
+ .a_wr (tb_xy_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+
+ //
+ // UUT
+ //
+ mw_comparator #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .xy_addr (core_xy_addr),
+ .x_din (core_x_data),
+ .y_din (core_y_data),
+
+ .cmp_l (core_cmp_l),
+ .cmp_e (core_cmp_e),
+ .cmp_g (core_cmp_g)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xy_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_mw_comparator(A_0, A_0);
+
+ test_mw_comparator(A_0, A_1);
+ test_mw_comparator(A_0, A_2);
+ test_mw_comparator(A_0, A_3);
+ test_mw_comparator(A_0, A_4);
+ test_mw_comparator(A_0, A_5);
+ test_mw_comparator(A_0, A_6);
+ test_mw_comparator(A_0, A_7);
+ test_mw_comparator(A_0, A_8);
+
+ test_mw_comparator(B_0, B_0);
+
+ test_mw_comparator(B_0, B_1);
+ test_mw_comparator(B_0, B_2);
+ test_mw_comparator(B_0, B_3);
+ test_mw_comparator(B_0, B_4);
+ test_mw_comparator(B_0, B_5);
+ test_mw_comparator(B_0, B_6);
+ test_mw_comparator(B_0, B_7);
+ test_mw_comparator(B_0, B_8);
+
+ /* print result */
+ if (ok) $display("tb_mw_comparator: SUCCESS");
+ else $display("tb_mw_comparator: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg cmp_l;
+ reg cmp_e;
+ reg cmp_g;
+ reg cmp_ok;
+
+ integer w;
+
+ task test_mw_comparator;
+
+ input [255:0] x;
+ input [255:0] y;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xy_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xy_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xy_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xy_wren = 0;
+
+ /* calculate reference values */
+ cmp_l = (x < y) ? 1 : 0;
+ cmp_e = (x == y) ? 1 : 0;
+ cmp_g = (x > y) ? 1 : 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* compare */
+ cmp_ok = (cmp_l == core_cmp_l) && (cmp_e == core_cmp_e) && (cmp_g == core_cmp_g);
+
+ /* display results */
+ $display("test_mw_comparator(): %s", cmp_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && cmp_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_mover.v b/bench/tb_mw_mover.v
new file mode 100644
index 0000000..be767fc
--- /dev/null
+++ b/bench/tb_mw_mover.v
@@ -0,0 +1,282 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_mover.v
+// -----------------------------------------------------------------------------
+// Testbench for multi-word data mover.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_mw_mover;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] X_2 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+ localparam [255:0] X_3 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] X_4 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+ wire core_y_wren;
+
+ wire [ 32-1:0] core_x_data;
+ wire [ 32-1:0] core_y_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_y_addr;
+ reg tb_x_wren;
+
+ reg [ 32-1:0] tb_x_data;
+ wire [ 32-1:0] tb_y_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_x_addr),
+ .a_wr (tb_x_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_x_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_d
+ (
+ .clk (clk),
+
+ .a_addr (core_y_addr),
+ .a_wr (core_y_wren),
+ .a_in (core_y_data),
+ .a_out (),
+
+ .b_addr (tb_y_addr),
+ .b_out (tb_y_data)
+ );
+
+
+ //
+ // UUT
+ //
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .x_addr (core_x_addr),
+ .y_addr (core_y_addr),
+ .y_wren (core_y_wren),
+
+ .x_din (core_x_data),
+ .y_dout (core_y_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_x_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_mover(X_1);
+ test_modular_mover(X_2);
+ test_modular_mover(X_3);
+ test_modular_mover(X_4);
+
+ /* print result */
+ if (ok) $display("tb_modular_mover: SUCCESS");
+ else $display("tb_modular_mover: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [255:0] y;
+ reg y_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+
+ task test_modular_mover;
+
+ input [255:0] x;
+
+ begin
+
+ /* start filling memories */
+ tb_x_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_x_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_x_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_y_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ y_shreg = {tb_y_data, y_shreg[255:32]};
+
+ end
+
+ /* compare */
+ y_ok = (y_shreg == x);
+
+ /* display results */
+ $display("test_modular_mover(): %s", y_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && y_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/curve_dbl_add_256.v b/rtl/curve/curve_dbl_add_256.v
new file mode 100644
index 0000000..08a9931
--- /dev/null
+++ b/rtl/curve/curve_dbl_add_256.v
@@ -0,0 +1,868 @@
+//------------------------------------------------------------------------------
+//
+// curve_adder_256.v
+// -----------------------------------------------------------------------------
+// Elliptic curve point adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module curve_dbl_add_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ uop_addr, uop,
+ px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr,
+ rx_wren, ry_wren, rz_wren,
+ px_din, py_din, pz_din,
+ rx_din, ry_din, rz_din,
+ rx_dout, ry_dout, rz_dout, q_din, v_din
+ );
+
+
+ //
+ // Microcode
+ //
+`include "uop_ecdsa.v"
+
+
+ //
+ // Constants
+ //
+ localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
+ localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output reg [ 6-1: 0] uop_addr;
+ input wire [20-1: 0] uop;
+
+ output reg [WORD_COUNTER_WIDTH-1:0] px_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] py_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] pz_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] rx_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] ry_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] rz_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] v_addr;
+ output wire [WORD_COUNTER_WIDTH-1:0] q_addr;
+
+ output wire rx_wren;
+ output wire ry_wren;
+ output wire rz_wren;
+
+ input wire [ 32-1:0] px_din;
+ input wire [ 32-1:0] py_din;
+ input wire [ 32-1:0] pz_din;
+ input wire [ 32-1:0] rx_din;
+ input wire [ 32-1:0] ry_din;
+ input wire [ 32-1:0] rz_din;
+ output wire [ 32-1:0] rx_dout;
+ output wire [ 32-1:0] ry_dout;
+ output wire [ 32-1:0] rz_dout;
+ input wire [ 32-1:0] q_din;
+ input wire [ 32-1:0] v_din;
+
+
+ //
+ // Microcode
+ //
+ wire [ 4: 0] uop_opcode = uop[19:15];
+ wire [ 4: 0] uop_src_a = uop[14:10];
+ wire [ 4: 0] uop_src_b = uop[ 9: 5];
+ wire [ 2: 0] uop_dst = uop[ 4: 2];
+ wire [ 1: 0] uop_exec = uop[ 1: 0];
+
+
+ //
+ // Multi-Word Comparator
+ //
+ wire mw_cmp_ena;
+ wire mw_cmp_rdy;
+
+ wire mw_cmp_out_l;
+ wire mw_cmp_out_e;
+ wire mw_cmp_out_g;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy;
+
+ wire [ 32-1:0] mw_cmp_din_x;
+ wire [ 32-1:0] mw_cmp_din_y;
+
+ // flags
+ reg flag_pz_is_zero;
+ reg flag_t1_is_zero;
+ reg flag_t2_is_zero;
+
+ mw_comparator #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ mw_comparator_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mw_cmp_ena),
+ .rdy (mw_cmp_rdy),
+
+ .xy_addr (mw_cmp_addr_xy),
+ .x_din (mw_cmp_din_x),
+ .y_din (mw_cmp_din_y),
+
+ .cmp_l (mw_cmp_out_l),
+ .cmp_e (mw_cmp_out_e),
+ .cmp_g (mw_cmp_out_g)
+ );
+
+
+ //
+ // Modular Adder
+ //
+ wire mod_add_ena;
+ wire mod_add_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s;
+ wire mod_add_wren_s;
+
+ wire [ 32-1:0] mod_add_din_a;
+ wire [ 32-1:0] mod_add_din_b;
+ wire [ 32-1:0] mod_add_din_n;
+ wire [ 32-1:0] mod_add_dout_s;
+
+ assign mod_add_din_n = q_din;
+
+ modular_adder #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ modular_adder_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_add_ena),
+ .rdy (mod_add_rdy),
+
+ .ab_addr (mod_add_addr_ab),
+ .n_addr (mod_add_addr_n),
+ .s_addr (mod_add_addr_s),
+ .s_wren (mod_add_wren_s),
+
+ .a_din (mod_add_din_a),
+ .b_din (mod_add_din_b),
+ .n_din (mod_add_din_n),
+ .s_dout (mod_add_dout_s)
+ );
+
+
+ //
+ // Modular Subtractor
+ //
+ wire mod_sub_ena;
+ wire mod_sub_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d;
+ wire mod_sub_wren_d;
+
+ wire [ 32-1:0] mod_sub_din_a;
+ wire [ 32-1:0] mod_sub_din_b;
+ wire [ 32-1:0] mod_sub_din_n;
+ wire [ 32-1:0] mod_sub_dout_d;
+
+ assign mod_sub_din_n = q_din;
+
+ modular_subtractor #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ modular_subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_sub_ena),
+ .rdy (mod_sub_rdy),
+
+ .ab_addr (mod_sub_addr_ab),
+ .n_addr (mod_sub_addr_n),
+ .d_addr (mod_sub_addr_d),
+ .d_wren (mod_sub_wren_d),
+
+ .a_din (mod_sub_din_a),
+ .b_din (mod_sub_din_b),
+ .n_din (mod_sub_din_n),
+ .d_dout (mod_sub_dout_d)
+ );
+
+
+ //
+ // Modular Multiplier
+ //
+ wire mod_mul_ena;
+ wire mod_mul_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p;
+ wire mod_mul_wren_p;
+
+ wire [ 32-1:0] mod_mul_din_a;
+ wire [ 32-1:0] mod_mul_din_b;
+ wire [ 32-1:0] mod_mul_din_n;
+ wire [ 32-1:0] mod_mul_dout_p;
+
+ assign mod_mul_din_n = q_din;
+
+ modular_multiplier_256 modular_multiplier_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_mul_ena),
+ .rdy (mod_mul_rdy),
+
+ .a_addr (mod_mul_addr_a),
+ .b_addr (mod_mul_addr_b),
+ .n_addr (mod_mul_addr_n),
+ .p_addr (mod_mul_addr_p),
+ .p_wren (mod_mul_wren_p),
+
+ .a_din (mod_mul_din_a),
+ .b_din (mod_mul_din_b),
+ .n_din (mod_mul_din_n),
+ .p_dout (mod_mul_dout_p)
+ );
+
+
+ //
+ // Multi-Word Data Mover
+ //
+ wire mw_mov_ena;
+ wire mw_mov_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x;
+ wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y;
+ wire mw_mov_wren_y;
+
+ wire [ 32-1:0] mw_mov_din_x;
+ wire [ 32-1:0] mw_mov_dout_y;
+
+ mw_mover mw_mover_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mw_mov_ena),
+ .rdy (mw_mov_rdy),
+
+ .x_addr (mw_mov_addr_x),
+ .y_addr (mw_mov_addr_y),
+ .y_wren (mw_mov_wren_y),
+
+ .x_din (mw_mov_din_x),
+ .y_dout (mw_mov_dout_y)
+ );
+
+
+ //
+ // ROMs
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr;
+ //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr;
+
+ wire [ 32-1:0] brom_one_dout;
+ wire [ 32-1:0] brom_zero_dout;
+ wire [ 32-1:0] brom_delta_dout;
+ wire [ 32-1:0] brom_g_x_dout;
+ wire [ 32-1:0] brom_g_y_dout;
+ wire [ 32-1:0] brom_h_x_dout;
+ wire [ 32-1:0] brom_h_y_dout;
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_one brom_one_inst
+ (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout));
+
+ brom_p256_zero brom_zero_inst
+ (.b_out(brom_zero_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_delta brom_delta_inst
+ (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_g_x brom_g_x_inst
+ (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_g_y brom_g_y_inst
+ (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_h_x brom_h_x_inst
+ (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_h_y brom_h_y_inst
+ (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout));
+
+
+ //
+ // Temporary Variables
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr;
+
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr;
+
+ wire bram_t1_wr_en;
+ wire bram_t2_wr_en;
+ wire bram_t3_wr_en;
+ wire bram_t4_wr_en;
+
+ wire [ 32-1:0] bram_t1_wr_data;
+ wire [ 32-1:0] bram_t2_wr_data;
+ wire [ 32-1:0] bram_t3_wr_data;
+ wire [ 32-1:0] bram_t4_wr_data;
+
+ wire [ 32-1:0] bram_t1_rd_data;
+ wire [ 32-1:0] bram_t2_rd_data;
+ wire [ 32-1:0] bram_t3_rd_data;
+ wire [ 32-1:0] bram_t4_rd_data;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t1
+ ( .clk (clk),
+ .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(),
+ .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t2
+ ( .clk (clk),
+ .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(),
+ .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t3
+ ( .clk (clk),
+ .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(),
+ .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t4
+ ( .clk (clk),
+ .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(),
+ .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data)
+ );
+
+
+ //
+ // uOP Trigger Logic
+ //
+ reg uop_trig;
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) uop_trig <= 1'b0;
+ else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0;
+
+
+ //
+ // FSM
+ //
+ localparam [ 1: 0] FSM_STATE_STALL = 2'b00;
+ localparam [ 1: 0] FSM_STATE_FETCH = 2'b01;
+ localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10;
+
+ reg [ 1: 0] fsm_state = FSM_STATE_STALL;
+ wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH;
+
+
+ //
+ // FSM Transition Logic
+ //
+ reg uop_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL;
+ else case (fsm_state)
+ FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL;
+ FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE;
+ FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE;
+ default: fsm_state <= FSM_STATE_STALL;
+ endcase
+
+
+ //
+ // uOP Address Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_STALL)
+ uop_addr <= 5'd0;
+ else if (fsm_state == FSM_STATE_EXECUTE)
+ if (!uop_trig && uop_done)
+ uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1;
+
+
+ //
+ // uOP Completion Logic
+ //
+ always @(*)
+ //
+ case (uop_opcode)
+ OPCODE_CMP: uop_done = mw_cmp_rdy;
+ OPCODE_MOV: uop_done = mw_mov_rdy;
+ OPCODE_ADD: uop_done = mod_add_rdy;
+ OPCODE_SUB: uop_done = mod_sub_rdy;
+ OPCODE_MUL: uop_done = mod_mul_rdy;
+ OPCODE_RDY: uop_done = 1'b1;
+ default: uop_done = 1'b0;
+ endcase
+
+
+ //
+ // Helper Modules Enable Logic
+ //
+ assign mw_cmp_ena = uop_opcode[0] & uop_trig;
+ assign mw_mov_ena = uop_opcode[1] & uop_trig;
+ assign mod_add_ena = uop_opcode[2] & uop_trig;
+ assign mod_sub_ena = uop_opcode[3] & uop_trig;
+ assign mod_mul_ena = uop_opcode[4] & uop_trig;
+
+
+ //
+ // uOP Source Value Decoding Logic
+ //
+ reg [31: 0] uop_src_a_value;
+
+ always @(*)
+ //
+ case (uop_src_a)
+ UOP_SRC_PX: uop_src_a_value = px_din;
+ UOP_SRC_PY: uop_src_a_value = py_din;
+ UOP_SRC_PZ: uop_src_a_value = pz_din;
+
+ UOP_SRC_RX: uop_src_a_value = rx_din;
+ UOP_SRC_RY: uop_src_a_value = ry_din;
+ UOP_SRC_RZ: uop_src_a_value = rz_din;
+
+ UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data;
+ UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data;
+ UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data;
+ UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data;
+
+ UOP_SRC_ONE: uop_src_a_value = brom_one_dout;
+ UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout;
+ UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout;
+
+ UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout;
+ UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout;
+
+ UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout;
+ UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout;
+
+ UOP_SRC_V: uop_src_a_value = v_din;
+
+ default: uop_src_a_value = {32{1'bX}};
+ endcase
+
+
+ assign mw_cmp_din_x = uop_src_a_value;
+ assign mw_mov_din_x = uop_src_a_value;
+ assign mod_add_din_a = uop_src_a_value;
+ assign mod_sub_din_a = uop_src_a_value;
+ assign mod_mul_din_a = uop_src_a_value;
+
+ reg [31: 0] uop_src_b_value;
+
+ always @(*)
+ //
+ case (uop_src_b)
+ UOP_SRC_PX: uop_src_b_value = px_din;
+ UOP_SRC_PY: uop_src_b_value = py_din;
+ UOP_SRC_PZ: uop_src_b_value = pz_din;
+
+ UOP_SRC_RX: uop_src_b_value = rx_din;
+ UOP_SRC_RY: uop_src_b_value = ry_din;
+ UOP_SRC_RZ: uop_src_b_value = rz_din;
+
+ UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data;
+ UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data;
+ UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data;
+ UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data;
+
+ UOP_SRC_ONE: uop_src_b_value = brom_one_dout;
+ UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout;
+ UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout;
+
+ UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout;
+ UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout;
+
+ UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout;
+ UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout;
+
+ UOP_SRC_V: uop_src_b_value = v_din;
+
+ default: uop_src_b_value = {32{1'bX}};
+ endcase
+
+ assign mw_cmp_din_y = uop_src_b_value;
+ assign mod_add_din_b = uop_src_b_value;
+ assign mod_sub_din_b = uop_src_b_value;
+ assign mod_mul_din_b = uop_src_b_value;
+
+
+ //
+ // uOP Source & Destination Address Decoding Logic
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr;
+
+ assign q_addr = uop_q_addr;
+
+ always @(*)
+ //
+ case (uop_opcode)
+ //
+ OPCODE_CMP: begin
+ uop_src_a_addr = mw_cmp_addr_xy;
+ uop_src_b_addr = mw_cmp_addr_xy;
+ uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ OPCODE_MOV: begin
+ uop_src_a_addr = mw_mov_addr_x;
+ uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_dst_addr = mw_mov_addr_y;
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ OPCODE_ADD: begin
+ uop_src_a_addr = mod_add_addr_ab;
+ uop_src_b_addr = mod_add_addr_ab;
+ uop_dst_addr = mod_add_addr_s;
+ uop_q_addr = mod_add_addr_n;
+ end
+ //
+ OPCODE_SUB: begin
+ uop_src_a_addr = mod_sub_addr_ab;
+ uop_src_b_addr = mod_sub_addr_ab;
+ uop_dst_addr = mod_sub_addr_d;
+ uop_q_addr = mod_sub_addr_n;
+ end
+ //
+ OPCODE_MUL: begin
+ uop_src_a_addr = mod_mul_addr_a;
+ uop_src_b_addr = mod_mul_addr_b;
+ uop_dst_addr = mod_mul_addr_p;
+ uop_q_addr = mod_mul_addr_n;
+ end
+ //
+ default: begin
+ uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+ //
+ // uOP Conditional Execution Logic
+ //
+ reg uop_exec_effective;
+
+ always @(*)
+ //
+ case (uop_exec)
+ UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1;
+ UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero;
+ UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero;
+ UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero;
+ endcase
+
+
+ //
+ // uOP Destination Store Logic
+ //
+ reg uop_dst_wren;
+
+ always @(*)
+ //
+ case (uop_opcode)
+ //
+ OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective;
+ OPCODE_ADD: uop_dst_wren = mod_add_wren_s;
+ OPCODE_SUB: uop_dst_wren = mod_sub_wren_d;
+ OPCODE_MUL: uop_dst_wren = mod_mul_wren_p;
+ default: uop_dst_wren = 1'b0;
+ //
+ endcase
+
+
+ always @(*) begin
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr;
+ else px_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr;
+ else py_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr;
+ else pz_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr;
+ else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr;
+ //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr;
+ //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr;
+ else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr;
+ else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr;
+ else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr;
+ else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr;
+ else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr;
+ else v_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr;
+ else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr;
+ else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr;
+ else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr;
+ else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr;
+ else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr;
+ else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr;
+ else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr;
+ else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr;
+ else rx_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr;
+ else ry_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr;
+ else rz_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ end
+
+
+ assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX);
+ assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY);
+ assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ);
+
+ assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1);
+ assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2);
+ assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3);
+ assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4);
+
+
+
+ //
+ // Destination Value Selector
+ //
+ reg [31: 0] uop_dst_value;
+
+ always @(*)
+ //
+ case (uop_opcode)
+
+ OPCODE_MOV: uop_dst_value = mw_mov_dout_y;
+ OPCODE_ADD: uop_dst_value = mod_add_dout_s;
+ OPCODE_SUB: uop_dst_value = mod_sub_dout_d;
+ OPCODE_MUL: uop_dst_value = mod_mul_dout_p;
+
+ default: uop_dst_value = {32{1'bX}};
+
+ endcase
+
+ assign rx_dout = uop_dst_value;
+ assign ry_dout = uop_dst_value;
+ assign rz_dout = uop_dst_value;
+
+ assign bram_t1_wr_data = uop_dst_value;
+ assign bram_t2_wr_data = uop_dst_value;
+ assign bram_t3_wr_data = uop_dst_value;
+ assign bram_t4_wr_data = uop_dst_value;
+
+
+ //
+ // Latch Comparison Flags
+ //
+ always @(posedge clk)
+ //
+ if ( (fsm_state == FSM_STATE_EXECUTE) &&
+ (uop_opcode == OPCODE_CMP) &&
+ (uop_done && !uop_trig) ) begin
+
+ if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ end
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag */
+ if (fsm_state == FSM_STATE_STALL)
+ if (ena) rdy_reg <= 1'b0;
+
+ /* set flag */
+ if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done)
+ if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1;
+
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/curve_mul_256.v b/rtl/curve/curve_mul_256.v
new file mode 100644
index 0000000..0ac2be0
--- /dev/null
+++ b/rtl/curve/curve_mul_256.v
@@ -0,0 +1,720 @@
+//------------------------------------------------------------------------------
+//
+// curve_mul_256.v
+// -----------------------------------------------------------------------------
+// Elliptic curve point scalar multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module curve_mul_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ k_addr, rx_addr, ry_addr,
+ rx_wren, ry_wren,
+ k_din,
+ rx_dout, ry_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
+ localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [ 2: 0] k_addr;
+ output wire [ 2: 0] rx_addr;
+ output wire [ 2: 0] ry_addr;
+
+ output wire rx_wren;
+ output wire ry_wren;
+
+ input wire [31: 0] k_din;
+
+ output wire [31: 0] rx_dout;
+ output wire [31: 0] ry_dout;
+
+
+ //
+ // Temporary Variables
+ //
+ reg [ 2: 0] bram_tx_wr_addr;
+ reg [ 2: 0] bram_ty_wr_addr;
+ reg [ 2: 0] bram_tz_wr_addr;
+
+ reg [ 2: 0] bram_rx_wr_addr;
+ reg [ 2: 0] bram_ry_wr_addr;
+ reg [ 2: 0] bram_rz_wr_addr;
+ wire [ 2: 0] bram_rz1_wr_addr;
+
+ reg [ 2: 0] bram_tx_rd_addr;
+ reg [ 2: 0] bram_ty_rd_addr;
+ reg [ 2: 0] bram_tz_rd_addr;
+
+ reg [ 2: 0] bram_rx_rd_addr;
+ reg [ 2: 0] bram_ry_rd_addr;
+ reg [ 2: 0] bram_rz_rd_addr;
+ wire [ 2: 0] bram_rz1_rd_addr;
+
+ reg bram_tx_wr_en;
+ reg bram_ty_wr_en;
+ reg bram_tz_wr_en;
+
+ reg bram_rx_wr_en;
+ reg bram_ry_wr_en;
+ reg bram_rz_wr_en;
+ wire bram_rz1_wr_en;
+
+ wire [31: 0] bram_tx_rd_data;
+ wire [31: 0] bram_ty_rd_data;
+ wire [31: 0] bram_tz_rd_data;
+
+ wire [31: 0] bram_rx_rd_data;
+ wire [31: 0] bram_ry_rd_data;
+ wire [31: 0] bram_rz_rd_data;
+ wire [31: 0] bram_rz1_rd_data;
+
+ reg [31: 0] bram_tx_wr_data_in;
+ reg [31: 0] bram_ty_wr_data_in;
+ reg [31: 0] bram_tz_wr_data_in;
+
+ reg [31: 0] bram_rx_wr_data_in;
+ reg [31: 0] bram_ry_wr_data_in;
+ reg [31: 0] bram_rz_wr_data_in;
+ wire [31: 0] bram_rz1_wr_data_in;
+
+ wire [31: 0] bram_tx_wr_data_out;
+ wire [31: 0] bram_ty_wr_data_out;
+ wire [31: 0] bram_tz_wr_data_out;
+
+ wire [31: 0] bram_rx_wr_data_out;
+ wire [31: 0] bram_ry_wr_data_out;
+ wire [31: 0] bram_rz_wr_data_out;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_tx (.clk(clk),
+ .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out),
+ .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_ty (.clk(clk),
+ .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out),
+ .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_tz (.clk(clk),
+ .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out),
+ .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rx (.clk(clk),
+ .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out),
+ .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_ry (.clk(clk),
+ .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out),
+ .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rz (.clk(clk),
+ .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out),
+ .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rz1 (.clk(clk),
+ .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(),
+ .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data));
+
+
+ //
+ // FSM
+ //
+ localparam [ 3: 0] FSM_STATE_IDLE = 4'd00;
+ localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01;
+ localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02;
+ localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03;
+ localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04;
+ localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05;
+ localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06;
+ localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07;
+ localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08;
+ localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09;
+ localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10;
+ localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11;
+ localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12;
+ localparam [ 3: 0] FSM_STATE_DONE = 4'd13;
+
+ reg [3:0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Round Counter
+ //
+ reg [ 7: 0] bit_counter;
+ wire [ 7: 0] bit_counter_max = 8'd255;
+ wire [ 7: 0] bit_counter_zero = 8'd0;
+ wire [ 7: 0] bit_counter_next =
+ (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero;
+
+
+ //
+ // Round Completion
+ //
+ wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ?
+ FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG;
+
+
+ //
+ // OP Trigger Logic
+ //
+ reg op_trig;
+ wire op_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) op_trig <= 1'b0;
+ else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) ||
+ (fsm_state == FSM_STATE_DOUBLE_TRIG) ||
+ (fsm_state == FSM_STATE_ADD_TRIG) ||
+ (fsm_state == FSM_STATE_CONVERT_TRIG);
+
+ //
+ // Microprograms
+ //
+ wire [ 5: 0] op_rom_addr;
+ wire [19: 0] op_rom_init_data;
+ wire [19: 0] op_rom_dbl_data;
+ wire [19: 0] op_rom_add_data;
+ wire [19: 0] op_rom_conv_data;
+ reg [19: 0] op_rom_mux_data;
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_init_rom op_rom_init
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_init_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_dbl_rom op_rom_dbl
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_dbl_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_add_rom op_rom_add
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_add_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_conv_rom op_rom_conv
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_conv_data)
+ );
+
+ always @(*)
+ //
+ case (fsm_state)
+ FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data;
+ FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data;
+ FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data;
+ FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data;
+ default: op_rom_mux_data = {20{1'bX}};
+ endcase
+
+
+
+ //
+ // Modulus
+ //
+ reg [ 2: 0] rom_q_addr;
+ wire [31: 0] rom_q_data;
+
+ brom_p256_q rom_q
+ (
+ .clk (clk),
+ .b_addr (rom_q_addr),
+ .b_out (rom_q_data)
+ );
+
+
+ //
+ // Worker
+ //
+ wire [ 2: 0] worker_addr_px;
+ wire [ 2: 0] worker_addr_py;
+ wire [ 2: 0] worker_addr_pz;
+
+ wire [ 2: 0] worker_addr_rx;
+ wire [ 2: 0] worker_addr_ry;
+ wire [ 2: 0] worker_addr_rz;
+
+ wire [ 2: 0] worker_addr_q;
+
+ wire worker_wren_rx;
+ wire worker_wren_ry;
+ wire worker_wren_rz;
+
+ reg [31: 0] worker_din_px;
+ reg [31: 0] worker_din_py;
+ reg [31: 0] worker_din_pz;
+
+ reg [31: 0] worker_din_rx;
+ reg [31: 0] worker_din_ry;
+ reg [31: 0] worker_din_rz;
+
+ wire [31: 0] worker_dout_rx;
+ wire [31: 0] worker_dout_ry;
+ wire [31: 0] worker_dout_rz;
+
+ curve_dbl_add_256 worker
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (op_trig),
+ .rdy (op_done),
+
+ .uop_addr (op_rom_addr),
+ .uop (op_rom_mux_data),
+
+ .px_addr (worker_addr_px),
+ .py_addr (worker_addr_py),
+ .pz_addr (worker_addr_pz),
+
+ .rx_addr (worker_addr_rx),
+ .ry_addr (worker_addr_ry),
+ .rz_addr (worker_addr_rz),
+
+ .q_addr (worker_addr_q),
+
+ .v_addr (bram_rz1_rd_addr),
+
+ .rx_wren (worker_wren_rx),
+ .ry_wren (worker_wren_ry),
+ .rz_wren (worker_wren_rz),
+
+ .px_din (worker_din_px),
+ .py_din (worker_din_py),
+ .pz_din (worker_din_pz),
+
+ .rx_din (worker_din_rx),
+ .ry_din (worker_din_ry),
+ .rz_din (worker_din_rz),
+
+ .rx_dout (worker_dout_rx),
+ .ry_dout (worker_dout_ry),
+ .rz_dout (worker_dout_rz),
+
+ .q_din (rom_q_data),
+
+ .v_din (bram_rz1_rd_data)
+ );
+
+
+ //
+ // Mover
+ //
+ reg move_trig;
+ wire move_done;
+
+ wire [ 2: 0] mover_addr_x;
+ wire [ 2: 0] mover_addr_y;
+
+ wire mover_wren_y;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) move_trig <= 1'b0;
+ else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG);
+
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (3),
+ .OPERAND_NUM_WORDS (8)
+ )
+ mover
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (move_trig),
+ .rdy (move_done),
+
+ .x_addr (mover_addr_x),
+ .y_addr (mover_addr_y),
+ .y_wren (mover_wren_y),
+
+ .x_din ({32{1'bX}}),
+ .y_dout ()
+ );
+
+
+ //
+ // Invertor
+ //
+ reg invert_trig;
+ wire invert_done;
+
+ wire [ 2: 0] invertor_addr_a;
+ wire [ 2: 0] invertor_addr_q;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) invert_trig <= 1'b0;
+ else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG);
+
+ modular_invertor #
+ (
+ .MAX_OPERAND_WIDTH(256)
+ )
+ invertor
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (invert_trig),
+ .rdy (invert_done),
+
+ .a_addr (invertor_addr_a),
+ .q_addr (invertor_addr_q),
+ .a1_addr (bram_rz1_wr_addr),
+ .a1_wren (bram_rz1_wr_en),
+
+ .a_din (bram_rz_rd_data),
+ .q_din (rom_q_data),
+ .a1_dout (bram_rz1_wr_data_in)
+ );
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ else case (fsm_state)
+
+ FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE;
+
+ FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT;
+ FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT;
+
+ FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT;
+ FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT;
+
+ FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT;
+ FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT;
+
+ FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT;
+ FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT;
+
+ FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT;
+ FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT;
+
+ FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT;
+ FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT;
+
+ FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+ //
+ // Bit Counter Increment
+ //
+ always @(posedge clk) begin
+ //
+ if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done)
+ bit_counter <= bit_counter_zero;
+ //
+ if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done)
+ bit_counter <= bit_counter_next;
+ //
+ end
+
+
+ //
+ // K Latch Logic
+ //
+ reg [ 2: 0] k_addr_reg;
+ reg [31: 0] k_din_reg;
+
+ assign k_addr = k_addr_reg;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_state == FSM_STATE_DOUBLE_TRIG)
+ k_addr_reg <= 3'd7 - bit_counter[7:5];
+ //
+ if (fsm_state == FSM_STATE_ADD_TRIG)
+ k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX};
+ //
+ end
+
+
+
+ //
+ // Copy Inhibit Logic
+ //
+ wire move_inhibit = k_din_reg[31];
+
+ wire copy_t2r_int = mover_wren_y & ~move_inhibit;
+
+
+ always @(*) begin
+ //
+ // Q
+ //
+ case (fsm_state)
+ FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q;
+ FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q;
+ FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q;
+ FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q;
+ default: rom_q_addr = worker_addr_q;
+ endcase
+
+ //
+ // R(X,Y,Z)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_PREPARE_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
+ bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
+ bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
+ bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
+ bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_COPY_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y;
+ bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int;
+ bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data;
+ //
+ end
+ //
+ FSM_STATE_INVERT_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= invertor_addr_a;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_CONVERT_WAIT: begin
+ //
+ bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+
+ //
+ default: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ // T(X,Y,Z)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
+ bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz;
+ bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz;
+ bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz;
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_COPY_WAIT: begin
+ //
+ bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x;
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+
+ //
+ default: begin
+ //
+ bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ // Worker
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
+ worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out;
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data;
+ worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out;
+ //
+ end
+ //
+ FSM_STATE_CONVERT_WAIT: begin
+ //
+ worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
+ worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
+ //
+ end
+ //
+ default: begin
+ //
+ worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}};
+ worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Output Mapping
+ //
+ assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT);
+ assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT);
+
+ assign rx_dout = worker_dout_rx;
+ assign ry_dout = worker_dout_ry;
+
+ assign rx_addr = worker_addr_rx;
+ assign ry_addr = worker_addr_ry;
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag */
+ if ((fsm_state == FSM_STATE_IDLE) && ena)
+ rdy_reg <= 1'b0;
+
+ /* set flag */
+ if (fsm_state == FSM_STATE_DONE)
+ rdy_reg <= 1'b1;
+
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/rom/brom_p256_delta.v b/rtl/curve/rom/brom_p256_delta.v
new file mode 100644
index 0000000..b9a345a
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_delta.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_delta
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h00000000;
+ 3'b001: bram_reg_b <= 32'h00000000;
+ 3'b010: bram_reg_b <= 32'h80000000;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h80000000;
+ 3'b110: bram_reg_b <= 32'h80000000;
+ 3'b111: bram_reg_b <= 32'h7fffffff;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_g_x.v b/rtl/curve/rom/brom_p256_g_x.v
new file mode 100644
index 0000000..0816ef6
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_g_x.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_g_x
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hd898c296;
+ 3'b001: bram_reg_b <= 32'hf4a13945;
+ 3'b010: bram_reg_b <= 32'h2deb33a0;
+ 3'b011: bram_reg_b <= 32'h77037d81;
+ 3'b100: bram_reg_b <= 32'h63a440f2;
+ 3'b101: bram_reg_b <= 32'hf8bce6e5;
+ 3'b110: bram_reg_b <= 32'he12c4247;
+ 3'b111: bram_reg_b <= 32'h6b17d1f2;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_g_y.v b/rtl/curve/rom/brom_p256_g_y.v
new file mode 100644
index 0000000..4d9c61e
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_g_y.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_g_y
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h37bf51f5;
+ 3'b001: bram_reg_b <= 32'hcbb64068;
+ 3'b010: bram_reg_b <= 32'h6b315ece;
+ 3'b011: bram_reg_b <= 32'h2bce3357;
+ 3'b100: bram_reg_b <= 32'h7c0f9e16;
+ 3'b101: bram_reg_b <= 32'h8ee7eb4a;
+ 3'b110: bram_reg_b <= 32'hfe1a7f9b;
+ 3'b111: bram_reg_b <= 32'h4fe342e2;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_h_x.v b/rtl/curve/rom/brom_p256_h_x.v
new file mode 100644
index 0000000..0b69f77
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_h_x.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_h_x
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h4ece7ad0;
+ 3'b001: bram_reg_b <= 32'h16bd8d74;
+ 3'b010: bram_reg_b <= 32'ha42998be;
+ 3'b011: bram_reg_b <= 32'h11f904fe;
+ 3'b100: bram_reg_b <= 32'h38b77e1b;
+ 3'b101: bram_reg_b <= 32'h0e863235;
+ 3'b110: bram_reg_b <= 32'h3da77b71;
+ 3'b111: bram_reg_b <= 32'h29d05c19;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_h_y.v b/rtl/curve/rom/brom_p256_h_y.v
new file mode 100644
index 0000000..362fce6
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_h_y.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_h_y
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hc840ae07;
+ 3'b001: bram_reg_b <= 32'h3449bf97;
+ 3'b010: bram_reg_b <= 32'h94cea131;
+ 3'b011: bram_reg_b <= 32'hd431cca9;
+ 3'b100: bram_reg_b <= 32'h83f061e9;
+ 3'b101: bram_reg_b <= 32'h711814b5;
+ 3'b110: bram_reg_b <= 32'h01e58065;
+ 3'b111: bram_reg_b <= 32'hb01cbd1c;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_one.v b/rtl/curve/rom/brom_p256_one.v
new file mode 100644
index 0000000..4097874
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_one.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_one
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h00000001;
+ 3'b001: bram_reg_b <= 32'h00000000;
+ 3'b010: bram_reg_b <= 32'h00000000;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h00000000;
+ 3'b110: bram_reg_b <= 32'h00000000;
+ 3'b111: bram_reg_b <= 32'h00000000;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_q.v b/rtl/curve/rom/brom_p256_q.v
new file mode 100644
index 0000000..fe94593
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_q.v
@@ -0,0 +1,68 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_q
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ //
+ // Output Registers
+ //
+ reg [31:0] bram_reg_b;
+
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hffffffff;
+ 3'b001: bram_reg_b <= 32'hffffffff;
+ 3'b010: bram_reg_b <= 32'hffffffff;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h00000000;
+ 3'b110: bram_reg_b <= 32'h00000001;
+ 3'b111: bram_reg_b <= 32'hffffffff;
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/rom/brom_p256_zero.v b/rtl/curve/rom/brom_p256_zero.v
new file mode 100644
index 0000000..f6d19a1
--- /dev/null
+++ b/rtl/curve/rom/brom_p256_zero.v
@@ -0,0 +1,70 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module brom_p256_zero
+ (
+ //input wire clk,
+ //input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
+
+
+ assign b_out = {32{1'b0}};
+
+ //
+ // Output Registers
+ //
+ //reg [31:0] bram_reg_b;
+
+ //assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Only Port B
+ //
+ //always @(posedge clk)
+ //
+ //case (b_addr)
+ //3'b000: bram_reg_b <= 32'h00000000;
+ //3'b001: bram_reg_b <= 32'h00000000;
+ //3'b010: bram_reg_b <= 32'h00000000;
+ //3'b011: bram_reg_b <= 32'h00000000;
+ //3'b100: bram_reg_b <= 32'h00000000;
+ //3'b101: bram_reg_b <= 32'h00000000;
+ //3'b110: bram_reg_b <= 32'h00000000;
+ //3'b111: bram_reg_b <= 32'h00000000;
+ //endcase
+
+
+endmodule
diff --git a/rtl/curve/uop/uop_add_rom.v b/rtl/curve/uop/uop_add_rom.v
new file mode 100644
index 0000000..c807736
--- /dev/null
+++ b/rtl/curve/uop/uop_add_rom.v
@@ -0,0 +1,66 @@
+`timescale 1ns / 1ps
+
+module uop_add_rom
+ (
+ input wire clk,
+ input wire [ 5: 0] addr,
+ output reg [19: 0] data
+ );
+
+
+ //
+ // Microcode
+ //
+`include "..\uop_ecdsa.v"
+
+
+ //
+ // Addition Microprogram
+ //
+ always @(posedge clk)
+
+ case (addr)
+
+/* 2. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
+/* 3. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 6'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 4. */6'd03: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
+/* 5. */6'd04: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_G_X, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 6. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_G_Y, UOP_DST_T2, UOP_EXEC_ALWAYS};
+/* 7. */6'd06: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_PX, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 8. */6'd07: data <= {OPCODE_SUB, UOP_SRC_T2, UOP_SRC_PY, UOP_DST_T2, UOP_EXEC_ALWAYS};
+/* 9. */6'd08: data <= {OPCODE_CMP, UOP_SRC_T1, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
+ 6'd09: data <= {OPCODE_CMP, UOP_SRC_T2, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
+/* 10. */6'd10: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_RZ, UOP_EXEC_ALWAYS};
+/* 11. */6'd11: data <= {OPCODE_MOV, UOP_SRC_T1, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS};
+ 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
+/* 12. */6'd13: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T4, UOP_EXEC_ALWAYS};
+/* 13. */6'd14: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
+/* 14. */6'd15: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 15. */6'd16: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
+ 6'd17: data <= {OPCODE_MUL, UOP_SRC_RX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
+/* 16. */6'd18: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS};
+/* 17. */6'd19: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T4, UOP_DST_RX, UOP_EXEC_ALWAYS};
+/* 18. */6'd20: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T3, UOP_EXEC_ALWAYS};
+/* 19. */6'd21: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
+/* 20. */6'd22: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T4, UOP_DST_T4, UOP_EXEC_ALWAYS};
+/* 21. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_T4, UOP_DST_RY, UOP_EXEC_ALWAYS};
+
+ 6'd24: data <= {OPCODE_MOV, UOP_SRC_G_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
+ 6'd25: data <= {OPCODE_MOV, UOP_SRC_G_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
+ 6'd26: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX};
+
+ 6'd27: data <= {OPCODE_MOV, UOP_SRC_H_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_100};
+ 6'd28: data <= {OPCODE_MOV, UOP_SRC_H_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_100};
+ 6'd29: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_100};
+
+ 6'd30: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_101};
+ 6'd31: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_101};
+ 6'd32: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_101};
+
+ default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
+
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/uop/uop_conv_rom.v b/rtl/curve/uop/uop_conv_rom.v
new file mode 100644
index 0000000..3097736
--- /dev/null
+++ b/rtl/curve/uop/uop_conv_rom.v
@@ -0,0 +1,38 @@
+`timescale 1ns / 1ps
+
+module uop_conv_rom
+ (
+ input wire clk,
+ input wire [ 5: 0] addr,
+ output reg [19: 0] data
+ );
+
+
+ //
+ // Microcode
+ //
+`include "..\uop_ecdsa.v"
+
+
+ //
+ // Doubling Microprogram
+ //
+ always @(posedge clk)
+
+ case (addr)
+
+ 6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
+ 6'd01: data <= {OPCODE_MOV, UOP_SRC_V, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 6'd02: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
+ 6'd03: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T2, UOP_DST_T3, UOP_EXEC_ALWAYS};
+ 6'd04: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
+ 6'd05: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS};
+ 6'd06: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
+ 6'd07: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
+
+ default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
+
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/uop/uop_dbl_rom.v b/rtl/curve/uop/uop_dbl_rom.v
new file mode 100644
index 0000000..1939ca9
--- /dev/null
+++ b/rtl/curve/uop/uop_dbl_rom.v
@@ -0,0 +1,58 @@
+`timescale 1ns / 1ps
+
+module uop_dbl_rom
+ (
+ input wire clk,
+ input wire [ 5: 0] addr,
+ output reg [19: 0] data
+ );
+
+
+ //
+ // Microcode
+ //
+`include "..\uop_ecdsa.v"
+
+
+ //
+ // Doubling Microprogram
+ //
+ always @(posedge clk)
+
+ case (addr)
+
+/* 1. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
+/* 2. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 5'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 3. */6'd03: data <= {OPCODE_SUB, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
+/* 4. */6'd04: data <= {OPCODE_ADD, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 5. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS};
+/* 6. */6'd06: data <= {OPCODE_ADD, UOP_SRC_T2, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 6'd07: data <= {OPCODE_ADD, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS};
+/* 7. */6'd08: data <= {OPCODE_ADD, UOP_SRC_PY, UOP_SRC_PY, UOP_DST_RY, UOP_EXEC_ALWAYS};
+/* 8. */6'd09: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_RY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
+/* 9. */6'd10: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 6'd11: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS};
+ 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS};
+/* 10. */6'd13: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_RY, UOP_DST_T3, UOP_EXEC_ALWAYS};
+/* 11. */6'd14: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 6'd15: data <= {OPCODE_MUL, UOP_SRC_RY, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 12. */6'd16: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_DELTA, UOP_DST_RY, UOP_EXEC_ALWAYS};
+/* 13. */6'd17: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
+ 6'd18: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
+/* 14. */6'd19: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 15. */6'd20: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS};
+/* 16. */6'd21: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 17. */6'd22: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS};
+/* 18. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_RY, UOP_DST_RY, UOP_EXEC_ALWAYS};
+
+ 6'd24: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
+ 6'd25: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
+ 6'd26: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX};
+
+ default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
+
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/uop/uop_init_rom.v b/rtl/curve/uop/uop_init_rom.v
new file mode 100644
index 0000000..ac44b55
--- /dev/null
+++ b/rtl/curve/uop/uop_init_rom.v
@@ -0,0 +1,33 @@
+`timescale 1ns / 1ps
+
+module uop_init_rom
+ (
+ input wire clk,
+ input wire [ 5: 0] addr,
+ output reg [19: 0] data
+ );
+
+
+ //
+ // Microcode
+ //
+`include "..\uop_ecdsa.v"
+
+
+ //
+ // Doubling Microprogram
+ //
+ always @(posedge clk)
+
+ case (addr)
+
+ 6'd00: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
+ 6'd01: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS};
+ 6'd02: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
+
+ default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
+
+ endcase
+
+
+endmodule
diff --git a/rtl/curve/uop_ecdsa.v b/rtl/curve/uop_ecdsa.v
new file mode 100644
index 0000000..e64119d
--- /dev/null
+++ b/rtl/curve/uop_ecdsa.v
@@ -0,0 +1,50 @@
+localparam [ 4: 0] OPCODE_CMP = 5'b00001;
+localparam [ 4: 0] OPCODE_MOV = 5'b00010;
+localparam [ 4: 0] OPCODE_ADD = 5'b00100;
+localparam [ 4: 0] OPCODE_SUB = 5'b01000;
+localparam [ 4: 0] OPCODE_MUL = 5'b10000;
+localparam [ 4: 0] OPCODE_RDY = 5'b00000;
+
+localparam [ 4: 0] UOP_SRC_PX = 5'h0_0;
+localparam [ 4: 0] UOP_SRC_PY = 5'h0_1;
+localparam [ 4: 0] UOP_SRC_PZ = 5'h0_2;
+
+localparam [ 4: 0] UOP_SRC_RX = 5'h0_3;
+localparam [ 4: 0] UOP_SRC_RY = 5'h0_4;
+localparam [ 4: 0] UOP_SRC_RZ = 5'h0_5;
+
+localparam [ 4: 0] UOP_SRC_T1 = 5'h0_6;
+localparam [ 4: 0] UOP_SRC_T2 = 5'h0_7;
+localparam [ 4: 0] UOP_SRC_T3 = 5'h0_8;
+localparam [ 4: 0] UOP_SRC_T4 = 5'h0_9;
+
+localparam [ 4: 0] UOP_SRC_ONE = 5'h0_A;
+localparam [ 4: 0] UOP_SRC_ZERO = 5'h0_B;
+localparam [ 4: 0] UOP_SRC_DELTA = 5'h0_C;
+
+localparam [ 4: 0] UOP_SRC_V = 5'h0_F;
+
+localparam [ 4: 0] UOP_SRC_G_X = 5'h1_0;
+localparam [ 4: 0] UOP_SRC_G_Y = 5'h1_1;
+
+localparam [ 4: 0] UOP_SRC_H_X = 5'h1_2;
+localparam [ 4: 0] UOP_SRC_H_Y = 5'h1_3;
+
+localparam [ 4: 0] UOP_SRC_DUMMY = 5'hX_X;
+
+localparam [ 2: 0] UOP_DST_RX = 3'd0;
+localparam [ 2: 0] UOP_DST_RY = 3'd1;
+localparam [ 2: 0] UOP_DST_RZ = 3'd2;
+
+localparam [ 2: 0] UOP_DST_T1 = 3'd3;
+localparam [ 2: 0] UOP_DST_T2 = 3'd4;
+localparam [ 2: 0] UOP_DST_T3 = 3'd5;
+localparam [ 2: 0] UOP_DST_T4 = 3'd6;
+
+localparam [ 2: 0] UOP_DST_DUMMY = 3'dX;
+
+localparam UOP_EXEC_ALWAYS = 2'b11; // R
+localparam UOP_EXEC_PZT1T2_0XX = 2'b10; // G
+localparam UOP_EXEC_PZT1T2_100 = 2'b00; // H
+localparam UOP_EXEC_PZT1T2_101 = 2'b01; // O
+
diff --git a/rtl/ecdsa256.v b/rtl/ecdsa256.v
new file mode 100644
index 0000000..86e22e5
--- /dev/null
+++ b/rtl/ecdsa256.v
@@ -0,0 +1,160 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module ecdsa256
+ (
+ input wire clk,
+ input wire rst_n,
+
+ input wire next,
+ output wire valid,
+
+ input wire bus_cs,
+ input wire bus_we,
+ input wire [ 4:0] bus_addr,
+ input wire [31:0] bus_data_wr,
+ output wire [31:0] bus_data_rd
+ );
+
+
+ //
+ // Memory Banks
+ //
+ localparam [1:0] BUS_ADDR_BANK_K = 2'b00;
+ localparam [1:0] BUS_ADDR_BANK_X = 2'b01;
+ localparam [1:0] BUS_ADDR_BANK_Y = 2'b10;
+
+ wire [1:0] bus_addr_upper = bus_addr[4:3];
+ wire [2:0] bus_addr_lower = bus_addr[2:0];
+
+
+ //
+ // Memories
+ //
+
+ wire [31:0] user_rw_k_bram_out;
+ wire [31:0] user_ro_x_bram_out;
+ wire [31:0] user_ro_y_bram_out;
+
+ wire [ 2:0] core_ro_k_bram_addr;
+ wire [ 2:0] core_rw_x_bram_addr;
+ wire [ 2:0] core_rw_y_bram_addr;
+
+ wire core_rw_x_bram_wren;
+ wire core_rw_y_bram_wren;
+
+ wire [31:0] core_ro_k_bram_dout;
+ wire [31:0] core_rw_x_bram_din;
+ wire [31:0] core_rw_y_bram_din;
+
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ )
+ bram_k
+ ( .clk(clk),
+ .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr),
+ .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ )
+ bram_x
+ ( .clk(clk),
+ .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din),
+ .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ )
+ bram_y
+ ( .clk(clk),
+ .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din),
+ .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out)
+ );
+
+
+ //
+ // Montgomery Coefficient Calculator
+ //
+ reg next_dly;
+
+ always @(posedge clk) next_dly <= next;
+
+ wire next_trig = next && !next_dly;
+
+ curve_mul_256 base_point_multiplier_p256
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (next_trig),
+ .rdy (valid),
+
+ .k_addr (core_ro_k_bram_addr),
+ .rx_addr (core_rw_x_bram_addr),
+ .ry_addr (core_rw_y_bram_addr),
+
+ .rx_wren (core_rw_x_bram_wren),
+ .ry_wren (core_rw_y_bram_wren),
+
+ .k_din (core_ro_k_bram_dout),
+ .rx_dout (core_rw_x_bram_din),
+ .ry_dout (core_rw_y_bram_din)
+ );
+
+ //
+ // Output Selector
+ //
+ reg [1:0] bus_addr_upper_prev;
+ always @(posedge clk) bus_addr_upper_prev = bus_addr_upper;
+
+ reg [31: 0] bus_data_rd_mux;
+ assign bus_data_rd = bus_data_rd_mux;
+
+ always @(*)
+ //
+ case (bus_addr_upper_prev)
+ //
+ BUS_ADDR_BANK_K: bus_data_rd_mux = user_rw_k_bram_out;
+ BUS_ADDR_BANK_X: bus_data_rd_mux = user_ro_x_bram_out;
+ BUS_ADDR_BANK_Y: bus_data_rd_mux = user_ro_y_bram_out;
+ //
+ default: bus_data_rd_mux = {32{1'b0}};
+ //
+ endcase
+
+endmodule
diff --git a/rtl/ecdsa256_wrapper.v b/rtl/ecdsa256_wrapper.v
new file mode 100644
index 0000000..74f2cbe
--- /dev/null
+++ b/rtl/ecdsa256_wrapper.v
@@ -0,0 +1,177 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module ecdsa256_wrapper
+ (
+ input wire clk,
+ input wire rst_n,
+
+ input wire cs,
+ input wire we,
+
+ input wire [5: 0] address,
+ input wire [31: 0] write_data,
+ output wire [31: 0] read_data
+ );
+
+
+ //
+ // Address Decoder
+ //
+ localparam ADDR_MSB_REGS = 1'b0;
+ localparam ADDR_MSB_CORE = 1'b1;
+
+ wire [0:0] addr_msb = address[5];
+ wire [4:0] addr_lsb = address[4:0];
+
+
+ //
+ // Output Mux
+ //
+ wire [31: 0] read_data_regs;
+ wire [31: 0] read_data_core;
+
+
+ //
+ // Registers
+ //
+ localparam ADDR_NAME0 = 5'h00;
+ localparam ADDR_NAME1 = 5'h01;
+ localparam ADDR_VERSION = 5'h02;
+
+ localparam ADDR_CONTROL = 5'h08; // {next, init}
+ localparam ADDR_STATUS = 5'h09; // {valid, ready}
+ localparam ADDR_DUMMY = 5'h0F; // don't care
+
+// localparam CONTROL_INIT_BIT = 0; -- not used
+ localparam CONTROL_NEXT_BIT = 1;
+
+ localparam STATUS_READY_BIT = 0;
+// localparam STATUS_VALID_BIT = 1; -- hardcoded to always read 1
+
+ localparam CORE_NAME0 = 32'h65636473; // "ecds"
+ localparam CORE_NAME1 = 32'h61323536; // "a256"
+ localparam CORE_VERSION = 32'h302E3130; // "0.10"
+
+
+ //
+ // Registers
+ //
+ reg reg_control;
+ reg [31:0] reg_dummy;
+
+
+ //
+ // Wires
+ //
+ wire reg_status;
+
+
+ //
+ // ECDSA256
+ //
+ ecdsa256 ecdsa256_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .next (reg_control),
+ .valid (reg_status),
+
+ .bus_cs (cs && (addr_msb == ADDR_MSB_CORE)),
+ .bus_we (we),
+ .bus_addr (addr_lsb),
+ .bus_data_wr (write_data),
+ .bus_data_rd (read_data_core)
+ );
+
+
+ //
+ // Read Latch
+ //
+ reg [31: 0] tmp_read_data;
+
+
+ //
+ // Read/Write Interface
+ //
+ always @(posedge clk)
+ //
+ if (!rst_n) begin
+ //
+ reg_control <= 1'b0;
+ //
+ end else if (cs && (addr_msb == ADDR_MSB_REGS)) begin
+ //
+ if (we) begin
+ //
+ // Write Handler
+ //
+ case (addr_lsb)
+ //
+ ADDR_CONTROL: reg_control <= write_data[1];
+ ADDR_DUMMY: reg_dummy <= write_data[31:0];
+ //
+ endcase
+ //
+ end else begin
+ //
+ // Read Handler
+ //
+ case (address)
+ //
+ ADDR_NAME0: tmp_read_data <= CORE_NAME0;
+ ADDR_NAME1: tmp_read_data <= CORE_NAME1;
+ ADDR_VERSION: tmp_read_data <= CORE_VERSION;
+ ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control, 1'b0};
+ ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1};
+ ADDR_DUMMY: tmp_read_data <= reg_dummy;
+ //
+ default: tmp_read_data <= 32'h00000000;
+ //
+ endcase
+ //
+ end
+ //
+ end
+
+
+ //
+ // Register / Core Memory Selector
+ //
+ reg addr_msb_last;
+ always @(posedge clk) addr_msb_last = addr_msb;
+
+ assign read_data = (addr_msb_last == ADDR_MSB_REGS) ? tmp_read_data : read_data_core;
+
+
+endmodule
diff --git a/rtl/lowlevel/adder32_wrapper.v b/rtl/lowlevel/adder32_wrapper.v
new file mode 100644
index 0000000..ebfd8ce
--- /dev/null
+++ b/rtl/lowlevel/adder32_wrapper.v
@@ -0,0 +1,73 @@
+//------------------------------------------------------------------------------
+//
+// adder32_wrapper.v
+// -----------------------------------------------------------------------------
+// Wrapper for 32-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module adder32_wrapper
+ (
+ input clk, // clock
+ input [31: 0] a, // operand input
+ input [31: 0] b, // operand input
+ output [31: 0] s, // sum output
+ input c_in, // carry input
+ output c_out // carry output
+ );
+
+ //
+ // Include Primitive Selector
+ //
+`include "ecdsa_lowlevel_settings.v"
+
+
+ //
+ // Instantiate Vendor/Generic Primitive
+ //
+ `ADDER32_PRIMITIVE adder32_inst
+ (
+ .clk(clk),
+ .a(a),
+ .b(b),
+ .s(s),
+ .c_in(c_in),
+ .c_out(c_out)
+ );
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/adder47_wrapper.v b/rtl/lowlevel/adder47_wrapper.v
new file mode 100644
index 0000000..1a0a18e
--- /dev/null
+++ b/rtl/lowlevel/adder47_wrapper.v
@@ -0,0 +1,69 @@
+//------------------------------------------------------------------------------
+//
+// adder47_wrapper.v
+// -----------------------------------------------------------------------------
+// Wrapper for 47-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module adder47_wrapper
+ (
+ input clk, // clock
+ input [46: 0] a, // operand input
+ input [46: 0] b, // operand input
+ output [46: 0] s // sum output
+ );
+
+ //
+ // Include Primitive Selector
+ //
+`include "ecdsa_lowlevel_settings.v"
+
+
+ //
+ // Instantiate Vendor/Generic Primitive
+ //
+ `ADDER47_PRIMITIVE adder47_inst
+ (
+ .clk(clk),
+ .a(a),
+ .b(b),
+ .s(s)
+ );
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder32_artix7.v b/rtl/lowlevel/artix7/adder32_artix7.v
new file mode 100644
index 0000000..5f9ba79
--- /dev/null
+++ b/rtl/lowlevel/artix7/adder32_artix7.v
@@ -0,0 +1,96 @@
+//------------------------------------------------------------------------------
+//
+// adder32_artix7.v
+// -----------------------------------------------------------------------------
+// Hardware (Artix-7 DSP48E1) 32-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module adder32_artix7
+ (
+ input clk, // clock
+ input [31: 0] a, // operand input
+ input [31: 0] b, // operand input
+ output [31: 0] s, // sum output
+ input c_in, // carry input
+ output c_out // carry output
+ );
+
+ //
+ // Lower and higher parts of operand
+ //
+ wire [17: 0] bl = b[17: 0];
+ wire [13: 0] bh = b[31:18];
+
+
+ //
+ // DSP48E1 Slice
+ //
+
+ /* Operation Mode */
+ wire [ 3: 0] dsp48e1_alumode = 4'b0000;
+ wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
+
+ /* Internal Product */
+ wire [47: 0] p_int;
+
+ dsp48e1_wrapper dsp_adder
+ (
+ .clk (clk),
+
+ .ce (1'b1),
+
+ .carry (c_in),
+
+ .alumode (dsp48e1_alumode),
+ .opmode (dsp48e1_opmode),
+
+ .a ({{16{1'b0}}, bh}),
+ .b (bl),
+ .c ({{16{1'b0}}, a}),
+
+ .p (p_int)
+ );
+
+ //
+ // Output Mapping
+ //
+ assign s = p_int[31: 0];
+ assign c_out = p_int[32];
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder47_artix7.v b/rtl/lowlevel/artix7/adder47_artix7.v
new file mode 100644
index 0000000..00566e4
--- /dev/null
+++ b/rtl/lowlevel/artix7/adder47_artix7.v
@@ -0,0 +1,91 @@
+//------------------------------------------------------------------------------
+//
+// adder47_artix7.v
+// -----------------------------------------------------------------------------
+// Hardware (Artix-7 DSP48E1) 47-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module adder47_artix7
+ (
+ input clk, // clock
+ input [46: 0] a, // operand input
+ input [46: 0] b, // operand input
+ output [46: 0] s // sum output
+ );
+
+ //
+ // Lower and higher parts of operand
+ //
+ wire [17: 0] bl = b[17: 0];
+ wire [28: 0] bh = b[46:18];
+
+ //
+ // DSP48E1 Slice
+ //
+
+ /* Operation Mode */
+ wire [ 3: 0] dsp48e1_alumode = 4'b0000;
+ wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
+
+ /* Internal Product */
+ wire [47: 0] p_int;
+
+ dsp48e1_wrapper dsp_adder
+ (
+ .clk (clk),
+
+ .ce (1'b1),
+
+ .carry (1'b0),
+
+ .alumode (dsp48e1_alumode),
+ .opmode (dsp48e1_opmode),
+
+ .a ({1'b0, bh}),
+ .b (bl),
+ .c ({1'b0, a}),
+
+ .p (p_int)
+ );
+
+ //
+ // Output Mapping
+ //
+ assign s = p_int[46: 0];
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/dsp48e1_wrapper.v b/rtl/lowlevel/artix7/dsp48e1_wrapper.v
new file mode 100644
index 0000000..9f29ac1
--- /dev/null
+++ b/rtl/lowlevel/artix7/dsp48e1_wrapper.v
@@ -0,0 +1,159 @@
+//------------------------------------------------------------------------------
+//
+// dsp48e1_wrapper.v
+// -----------------------------------------------------------------------------
+// Hardware (Artix-7 DSP48E1) tile wrapper.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module dsp48e1_wrapper
+ (
+ input clk,
+
+ input ce,
+
+ input [ 6: 0] opmode,
+ input [ 3: 0] alumode,
+
+ input carry,
+
+ input [29: 0] a,
+ input [17: 0] b,
+ input [47: 0] c,
+
+ output [47: 0] p
+ );
+
+
+ //
+ // Tile instantiation
+ //
+ DSP48E1 #
+ (
+ .AREG (0),
+ .BREG (0),
+ .CREG (0),
+ .DREG (1),
+ .MREG (0),
+ .PREG (1),
+ .ADREG (0),
+
+ .ACASCREG (0),
+ .BCASCREG (0),
+ .ALUMODEREG (0),
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .A_INPUT ("DIRECT"),
+ .B_INPUT ("DIRECT"),
+
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("DYNAMIC"),
+ .USE_SIMD ("ONE48"),
+
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+ .SEL_PATTERN ("PATTERN"),
+ .SEL_MASK ("MASK"),
+ .PATTERN (48'h000000000000),
+ .MASK (48'h3fffffffffff),
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .CEA1 (1'b0),
+ .CEA2 (1'b0),
+ .CEB1 (1'b0),
+ .CEB2 (1'b0),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (1'b0),
+ .CEP (ce),
+ .CEAD (1'b0),
+ .CEALUMODE (1'b0),
+ .CEINMODE (1'b0),
+
+ .CECTRL (1'b0),
+ .CECARRYIN (1'b0),
+
+ .A (a),
+ .B (b),
+ .C (c),
+ .D ({25{1'b1}}),
+ .P (p),
+
+ .CARRYIN (carry),
+ .CARRYOUT (),
+ .CARRYINSEL (3'b000),
+
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .OPMODE (opmode),
+ .ALUMODE (alumode),
+ .INMODE (5'b00000),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT (),
+
+ .UNDERFLOW (),
+ .OVERFLOW (),
+
+ .ACIN (30'd0),
+ .BCIN (18'd0),
+ .PCIN (48'd0),
+
+ .ACOUT (),
+ .BCOUT (),
+ .PCOUT ()
+ );
+
+endmodule
diff --git a/rtl/lowlevel/artix7/mac16_artix7.v b/rtl/lowlevel/artix7/mac16_artix7.v
new file mode 100644
index 0000000..09a2413
--- /dev/null
+++ b/rtl/lowlevel/artix7/mac16_artix7.v
@@ -0,0 +1,90 @@
+//------------------------------------------------------------------------------
+//
+// mac16_artix7.v
+// -----------------------------------------------------------------------------
+// Hardware (Artix-7 DSP48E1) 16-bit multiplier and 48-bit accumulator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module mac16_artix7
+ (
+ input clk, // clock
+ input clr, // clear accumulator (active-high)
+ input ce, // enable clock (active-high)
+ input [15: 0] a, // operand input
+ input [15: 0] b, // operand input
+ output [46: 0] s // sum output
+ );
+
+
+ //
+ // DSP48E1 Slice
+ //
+
+ /* Operation Mode */
+ wire [ 3: 0] dsp48e1_alumode = 4'b0000;
+ wire [ 6: 0] dsp48e1_opmode = {2'b01, clr, 4'b0101};
+
+ /* Internal Product */
+ wire [47: 0] p_int;
+
+ dsp48e1_wrapper dsp_adder
+ (
+ .clk (clk),
+
+ .ce (ce),
+
+ .carry (1'b0),
+
+ .alumode (dsp48e1_alumode),
+ .opmode (dsp48e1_opmode),
+
+ .a ({{14{1'b0}}, a}),
+ .b ({{ 2{1'b0}}, b}),
+ .c ({48{1'b0}}),
+
+ .p (p_int)
+ );
+
+ //
+ // Output Mapping
+ //
+ assign s = p_int[46:0];
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/subtractor32_artix7.v b/rtl/lowlevel/artix7/subtractor32_artix7.v
new file mode 100644
index 0000000..b46ac5c
--- /dev/null
+++ b/rtl/lowlevel/artix7/subtractor32_artix7.v
@@ -0,0 +1,94 @@
+//------------------------------------------------------------------------------
+//
+// subtractor32_artix7.v
+// -----------------------------------------------------------------------------
+// Hardware (Artix-7 DSP48E1) 32-bit subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module subtractor32_artix7
+ (
+ input clk,
+ input [31: 0] a,
+ input [31: 0] b,
+ output [31: 0] d,
+ input b_in,
+ output b_out
+ );
+
+ //
+ // Lower and higher parts of operand
+ //
+ wire [17: 0] bl = b[17: 0];
+ wire [13: 0] bh = b[31:18];
+
+ //
+ // DSP48E1 Slice
+ //
+
+ /* Operation Mode */
+ wire [ 3: 0] dsp48e1_alumode = 4'b0011;
+ wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
+
+ /* Internal Product */
+ wire [47: 0] p_int;
+
+ dsp48e1_wrapper dsp_subtractor
+ (
+ .clk (clk),
+
+ .ce (1'b1),
+
+ .carry (b_in),
+
+ .alumode (dsp48e1_alumode),
+ .opmode (dsp48e1_opmode),
+
+ .a ({{16{1'b0}}, bh}),
+ .b (bl),
+ .c ({{16{1'b0}}, a}),
+
+ .p (p_int)
+ );
+
+ //
+ // Output Mapping
+ //
+ assign d = p_int[31: 0];
+ assign b_out = p_int[32];
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/ecdsa_lowlevel_settings.v b/rtl/lowlevel/ecdsa_lowlevel_settings.v
new file mode 100644
index 0000000..8f95e2f
--- /dev/null
+++ b/rtl/lowlevel/ecdsa_lowlevel_settings.v
@@ -0,0 +1,17 @@
+`define USE_VENDOR_PRIMITIVES
+
+`ifdef USE_VENDOR_PRIMITIVES
+
+`define MAC16_PRIMITIVE mac16_artix7
+`define ADDER32_PRIMITIVE adder32_artix7
+`define ADDER47_PRIMITIVE adder47_artix7
+`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7
+
+`else
+
+`define MAC16_PRIMITIVE mac16_generic
+`define ADDER32_PRIMITIVE adder32_generic
+`define ADDER47_PRIMITIVE adder47_generic
+`define SUBTRACTOR32_PRIMITIVE subtractor32_generic
+
+`endif
diff --git a/rtl/lowlevel/mac16_wrapper.v b/rtl/lowlevel/mac16_wrapper.v
new file mode 100644
index 0000000..b91e518
--- /dev/null
+++ b/rtl/lowlevel/mac16_wrapper.v
@@ -0,0 +1,75 @@
+//------------------------------------------------------------------------------
+//
+// mac16_wrapper.v
+// -----------------------------------------------------------------------------
+// Wrapper for 16-bit multiplier and 48-bit accumulator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module mac16_wrapper
+ (
+ input clk, // clock
+ input clr, // clear accumulator (active-high)
+ input ce, // enable clock (active-high)
+ input [15: 0] a, // operand input
+ input [15: 0] b, // operand input
+ output [46: 0] s // sum output
+ );
+
+
+ //
+ // Include Primitive Selector
+ //
+`include "ecdsa_lowlevel_settings.v"
+
+
+ //
+ // Instantiate Vendor/Generic Primitive
+ //
+ `MAC16_PRIMITIVE mac16_inst
+ (
+ .clk(clk),
+ .clr(clr),
+ .ce(ce),
+ .a(a),
+ .b(b),
+ .s(s)
+ );
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/subtractor32_wrapper.v b/rtl/lowlevel/subtractor32_wrapper.v
new file mode 100644
index 0000000..3c7e5e9
--- /dev/null
+++ b/rtl/lowlevel/subtractor32_wrapper.v
@@ -0,0 +1,72 @@
+//------------------------------------------------------------------------------
+//
+// subtractor32_wrapper.v
+// -----------------------------------------------------------------------------
+// Wrapper for 32-bit subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module subtractor32_wrapper
+ (
+ input clk,
+ input [31: 0] a,
+ input [31: 0] b,
+ output [31: 0] d,
+ input b_in,
+ output b_out
+ );
+
+ //
+ // Include Primitive Selector
+ //
+`include "ecdsa_lowlevel_settings.v"
+
+
+ //
+ // Instantiate Vendor/Generic Primitive
+ //
+ `SUBTRACTOR32_PRIMITIVE subtractor32_inst
+ (
+ .clk(clk),
+ .a(a),
+ .b(b),
+ .d(d),
+ .b_in(b_in),
+ .b_out(b_out)
+ );
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v
new file mode 100644
index 0000000..5641feb
--- /dev/null
+++ b/rtl/modular/modular_adder.v
@@ -0,0 +1,298 @@
+//------------------------------------------------------------------------------
+//
+// modular_adder.v
+// -----------------------------------------------------------------------------
+// Modular adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_adder
+ (
+ clk, rst_n,
+ ena, rdy,
+ ab_addr, n_addr, s_addr, s_wren,
+ a_din, b_din, n_din, s_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] s_addr; // index of current S word
+ output wire s_wren; // store current S word now
+
+ input wire [ 31:0] a_din; // A
+ input wire [ 31:0] b_din; // B
+ input wire [ 31:0] n_din; // N
+ output wire [ 31:0] s_dout; // S = (A + B) mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_ab;
+ reg [WORD_COUNTER_WIDTH-1:0] index_n;
+ reg [WORD_COUNTER_WIDTH-1:0] index_s;
+
+ /* map registers to output ports */
+ assign ab_addr = index_ab;
+ assign n_addr = index_n;
+ assign s_addr = index_s;
+
+
+ //
+ // Adder
+ //
+ wire [31: 0] add32_s;
+ wire add32_c_in;
+ wire add32_c_out;
+
+ adder32_wrapper adder32
+ (
+ .clk (clk),
+ .a (a_din),
+ .b (b_din),
+ .s (add32_s),
+ .c_in (add32_c_in),
+ .c_out (add32_c_out)
+ );
+
+
+ //
+ // Subtractor
+ //
+ wire [31: 0] sub32_d;
+ wire sub32_b_in;
+ wire sub32_b_out;
+
+ subtractor32_wrapper subtractor32
+ (
+ .clk (clk),
+ .a (add32_s),
+ .b (n_din),
+ .d (sub32_d),
+ .b_in (sub32_b_in),
+ .b_out (sub32_b_out)
+ );
+
+
+ //
+ // FSM
+ //
+
+ localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
+
+ wire fsm_latch_msb_carry = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
+
+ wire inc_index_ab = |fsm_shreg_inc_index_ab;
+ wire inc_index_n = |fsm_shreg_inc_index_n;
+ wire store_sum_ab = |fsm_shreg_store_sum_ab;
+ wire store_sum_ab_n = |fsm_shreg_store_sum_ab_n;
+ wire store_data_s = |fsm_shreg_store_data_s;
+ wire inc_index_s = |fsm_shreg_inc_index_s;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+
+
+
+
+
+ //
+ // Carry & Borrow Masking Logic
+ //
+ reg add32_c_mask;
+ reg sub32_b_mask;
+
+ always @(posedge clk) begin
+ //
+ add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ sub32_b_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ //
+ end
+
+ assign add32_c_in = add32_c_out & ~add32_c_mask;
+ assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
+
+
+ //
+ // Carry & Borrow Latch Logic
+ //
+ reg add32_carry_latch;
+ reg sub32_borrow_latch;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out;
+ if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
+ //
+ end
+
+
+ //
+ // Intermediate Results
+ //
+ reg [32*OPERAND_NUM_WORDS-1:0] s_ab;
+ reg [32*OPERAND_NUM_WORDS-1:0] s_ab_n;
+
+ always @(posedge clk)
+ //
+ if (store_data_s) begin
+ //
+ s_ab <= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]};
+ s_ab_n <= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end else begin
+ //
+ if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]};
+ if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ index_ab <= WORD_INDEX_ZERO;
+ index_n <= WORD_INDEX_ZERO;
+ index_s <= WORD_INDEX_ZERO;
+ //
+ end else begin
+ //
+ if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
+ if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
+ if (inc_index_s) index_s <= WORD_INDEX_NEXT_OR_ZERO(index_s);
+ //
+ end
+
+
+ //
+ // Output Sum Selector
+ //
+ wire mux_select_ab = sub32_borrow_latch && !add32_carry_latch;
+
+
+ //
+ // Output Data and Write Enable Logic
+ //
+ reg s_wren_reg;
+ reg [31: 0] s_dout_reg;
+ wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0];
+
+ assign s_wren = s_wren_reg;
+ assign s_dout = s_dout_reg;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ s_wren_reg <= 1'b0;
+ s_dout_reg <= {32{1'bX}};
+ //
+ end else begin
+ //
+ s_wren_reg <= store_data_s;
+ s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}};
+ //
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
new file mode 100644
index 0000000..07c1b4f
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
@@ -0,0 +1,148 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_copy
+ (
+ clk, rst_n,
+ ena, rdy,
+ s_addr, s_din,
+ a1_addr, a1_wren, a1_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter OPERAND_ADDR_BITS = 3;
+
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 2;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+
+ input wire ena;
+ output wire rdy;
+
+ output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
+
+ output wire a1_wren;
+
+ input wire [ 31:0] s_din;
+
+ output wire [ 31:0] a1_dout;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [OPERAND_ADDR_BITS-1:0] addr_s;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_s_max = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_s_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_s_next = (addr_s < addr_s_max) ?
+ addr_s + 1'b1 : addr_s_zero;
+
+ reg [OPERAND_ADDR_BITS-1:0] addr_a1;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_a1_max = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_a1_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_a1_next = (addr_a1 < addr_a1_max) ?
+ addr_a1 + 1'b1 : addr_a1_zero;
+
+ assign s_addr = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s};
+ assign a1_addr = addr_a1;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_s;
+ wire inc_addr_a1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_stop = OPERAND_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_stop = OPERAND_NUM_WORDS + 1;
+
+ assign inc_addr_s = (proc_cnt >= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop);
+ assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_s) addr_s <= addr_s_next;
+ else addr_s <= addr_s_zero;
+ //
+ if (inc_addr_a1) addr_a1 <= addr_a1_next;
+ else addr_a1 <= addr_a1_zero;
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_a1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1;
+
+ assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop);
+
+ assign a1_wren = wren_a1;
+
+
+ //
+ // Data Logic
+ //
+ assign a1_dout = s_din;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v
new file mode 100644
index 0000000..0468134
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_init.v
@@ -0,0 +1,172 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_init
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, a_din,
+ q_addr, q_din,
+ r_addr, r_wren, r_dout,
+ s_addr, s_wren, s_dout,
+ u_addr, u_wren, u_dout,
+ v_addr, v_wren, v_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter OPERAND_ADDR_BITS = 3;
+
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ output wire [OPERAND_ADDR_BITS-1:0] a_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] q_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
+
+ output wire r_wren;
+ output wire s_wren;
+ output wire u_wren;
+ output wire v_wren;
+
+ input wire [ 31:0] a_din;
+ input wire [ 31:0] q_din;
+ output wire [ 31:0] r_dout;
+ output wire [ 31:0] s_dout;
+ output wire [ 31:0] u_dout;
+ output wire [ 31:0] v_dout;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [OPERAND_ADDR_BITS-1:0] addr_aq;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ?
+ addr_aq + 1'b1 : addr_aq_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_rsuv;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ?
+ addr_rsuv + 1'b1 : addr_rsuv_zero;
+
+ assign a_addr = addr_aq;
+ assign q_addr = addr_aq;
+
+ assign r_addr = addr_rsuv;
+ assign s_addr = addr_rsuv;
+ assign u_addr = addr_rsuv;
+ assign v_addr = addr_rsuv;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_aq;
+ wire inc_addr_rsuv;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1;
+
+ assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop);
+ assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_aq) addr_aq <= addr_aq_next;
+ else addr_aq <= addr_aq_zero;
+ //
+ if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next;
+ else addr_rsuv <= addr_rsuv_zero;
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_rsuv;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1;
+
+ assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop);
+
+ assign r_wren = wren_rsuv;
+ assign s_wren = wren_rsuv;
+ assign u_wren = wren_rsuv;
+ assign v_wren = wren_rsuv;
+
+
+ //
+ // Data Logic
+ //
+ assign r_dout = 32'd0;
+ assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0;
+ assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0;
+ assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
new file mode 100644
index 0000000..6b65eb1
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
@@ -0,0 +1,286 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_invert_compare
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ u_addr, u_din,
+ v_addr, v_din,
+
+ u_gt_v, v_eq_1,
+ u_is_even, v_is_even
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ input wire [ 32-1:0] u_din;
+ input wire [ 32-1:0] v_din;
+
+ output wire u_gt_v;
+ output wire v_eq_1;
+ output wire u_is_even;
+ output wire v_is_even;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
+ addr_in - 1'b1 : addr_in_last;
+
+ assign u_addr = addr_in;
+ assign v_addr = addr_in;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Decrement Logic
+ //
+ wire dec_addr_in;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0;
+
+ assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
+
+ always @(posedge clk)
+ //
+ if (rdy) addr_in <= addr_in_last;
+ else if (dec_addr_in) addr_in <= addr_in_prev;
+
+
+ //
+ // Comparison Stage Flags
+ //
+ wire calc_leg;
+ wire calc_leg_final;
+ wire calc_parity;
+
+ wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2;
+ wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1;
+
+ assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop);
+ assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop);
+ assign calc_parity = (proc_cnt == cnt_calc_parity);
+
+
+ //
+ // Dummy Input
+ //
+ reg sub32_din_1_lsb;
+ wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb};
+
+ always @(posedge clk)
+ //
+ sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0;
+
+
+ //
+ // Subtractor (u - v)
+ //
+ wire [31: 0] sub32_u_minus_v_difference_out;
+ wire sub32_u_minus_v_borrow_in;
+ wire sub32_u_minus_v_borrow_out;
+
+ subtractor32_wrapper sub32_u_minus_v
+ (
+ .clk (clk),
+ .a (u_din),
+ .b (v_din),
+ .d (sub32_u_minus_v_difference_out),
+ .b_in (sub32_u_minus_v_borrow_in),
+ .b_out (sub32_u_minus_v_borrow_out)
+ );
+
+
+ //
+ // Subtractor (v - 1)
+ //
+ wire [31: 0] sub32_v_minus_1_difference_out;
+ wire sub32_v_minus_1_borrow_in;
+ wire sub32_v_minus_1_borrow_out;
+
+ subtractor32_wrapper sub32_v_minus_1
+ (
+ .clk (clk),
+ .a (v_din),
+ .b (sub32_din_1),
+ .d (sub32_v_minus_1_difference_out),
+ .b_in (sub32_v_minus_1_borrow_in),
+ .b_out (sub32_v_minus_1_borrow_out)
+ );
+
+
+
+ //
+ // Borrow Masking Logic
+ //
+ reg mask_borrow;
+
+ always @(posedge clk)
+ //
+ mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ?
+ 1'b0 : 1'b1;
+
+ assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow;
+ assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow;
+
+
+ //
+ // Comparison Logic
+ //
+ reg cmp_u_v_l;
+ reg cmp_u_v_e;
+ reg cmp_u_v_g;
+
+ reg cmp_v_1_l;
+ reg cmp_v_1_e;
+ reg cmp_v_1_g;
+
+ wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g);
+ wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g);
+
+ wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0;
+ wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0;
+
+ wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0;
+ wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0;
+
+ reg u_is_even_reg;
+ reg v_is_even_reg;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ if (ena) begin
+ //
+ cmp_u_v_l <= 1'b0;
+ cmp_u_v_e <= 1'b0;
+ cmp_u_v_g <= 1'b0;
+ //
+ cmp_v_1_l <= 1'b0;
+ cmp_v_1_e <= 1'b0;
+ cmp_v_1_g <= 1'b0;
+ //
+ u_is_even_reg <= 1'bX;
+ v_is_even_reg <= 1'bX;
+ //
+ end
+ //
+ end else begin
+ //
+ // parity
+ //
+ if (calc_parity) begin
+ u_is_even_reg <= ~u_din[0];
+ v_is_even_reg <= ~v_din[0];
+ end
+ //
+ // u <> v
+ //
+ if (cmp_unresolved_u_v && calc_leg) begin
+ //
+ if (cmp_u_v_borrow_is_set)
+ cmp_u_v_l <= 1'b1;
+ //
+ if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero)
+ cmp_u_v_g <= 1'b1;
+ //
+ if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final)
+ cmp_u_v_e <= 1'b1;
+ //
+ end
+ //
+ // v <> 1
+ //
+ if (cmp_unresolved_v_1 && calc_leg) begin
+ //
+ if (cmp_v_1_borrow_is_set)
+ cmp_v_1_l <= 1'b1;
+ //
+ if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero)
+ cmp_v_1_g <= 1'b1;
+ //
+ if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final)
+ cmp_v_1_e <= 1'b1;
+ //
+ end
+ //
+ end
+
+
+ //
+ // Output Flags
+ //
+ assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g;
+ assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g;
+
+ assign u_is_even = u_is_even_reg;
+ assign v_is_even = v_is_even_reg;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
new file mode 100644
index 0000000..ab15563
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
@@ -0,0 +1,408 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_invert_precalc
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ r_addr, r_din,
+ s_addr, s_din,
+ u_addr, u_din,
+ v_addr, v_din,
+
+ r_dbl_addr, r_dbl_wren, r_dbl_dout,
+ s_dbl_addr, s_dbl_wren, s_dbl_dout,
+ r_plus_s_addr, r_plus_s_wren, r_plus_s_dout,
+ u_half_addr, u_half_wren, u_half_dout,
+ v_half_addr, v_half_wren, v_half_dout,
+ u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din,
+ v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din,
+ u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
+ v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ input wire [ 32-1:0] r_din;
+ input wire [ 32-1:0] s_din;
+ input wire [ 32-1:0] u_din;
+ input wire [ 32-1:0] v_din;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
+
+ output wire [ 32-1:0] r_dbl_dout;
+ output wire [ 32-1:0] s_dbl_dout;
+ output wire [ 32-1:0] r_plus_s_dout;
+ output wire [ 32-1:0] u_half_dout;
+ output wire [ 32-1:0] v_half_dout;
+ output wire [ 32-1:0] u_minus_v_dout;
+ output wire [ 32-1:0] v_minus_u_dout;
+ output wire [ 32-1:0] u_minus_v_half_dout;
+ output wire [ 32-1:0] v_minus_u_half_dout;
+
+ output wire r_dbl_wren;
+ output wire s_dbl_wren;
+ output wire r_plus_s_wren;
+ output wire u_half_wren;
+ output wire v_half_wren;
+ output wire u_minus_v_wren;
+ output wire v_minus_u_wren;
+ output wire u_minus_v_half_wren;
+ output wire v_minus_u_half_wren;
+
+ input wire [ 32-1:0] u_minus_v_din;
+ input wire [ 32-1:0] v_minus_u_din;
+
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ?
+ addr_in + 1'b1 : addr_in_zero;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
+ addr_in - 1'b1 : addr_in_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out1;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
+ addr_out1 + 1'b1 : addr_out1_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out2;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ?
+ addr_out2 + 1'b1 : addr_out2_zero;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
+ addr_out2 - 1'b1 : addr_out2_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out3;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
+ addr_out3 - 1'b1 : addr_out3_last;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out4;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ?
+ addr_out4 - 1'b1 : addr_out4_last;
+
+
+ assign r_addr = addr_in;
+ assign s_addr = addr_in;
+ assign u_addr = addr_in;
+ assign v_addr = addr_in;
+
+ assign r_dbl_addr = addr_out1;
+ assign s_dbl_addr = addr_out1;
+ assign r_plus_s_addr = addr_out2;
+ assign u_half_addr = addr_out3;
+ assign v_half_addr = addr_out3;
+ assign u_minus_v_addr = addr_out2;
+ assign v_minus_u_addr = addr_out2;
+ assign u_minus_v_half_addr = addr_out4;
+ assign v_minus_u_half_addr = addr_out4;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment/Decrement Logic
+ //
+ wire inc_addr_in;
+ wire dec_addr_in;
+ wire inc_addr_out1;
+ wire inc_addr_out2;
+ wire dec_addr_out2;
+ wire dec_addr_out3;
+ wire dec_addr_out4;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
+ assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
+ assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
+ assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
+ assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
+ assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
+ assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
+
+
+ always @(posedge clk) begin
+ //
+ if (rdy) begin
+ //
+ addr_in <= addr_in_zero;
+ addr_out1 <= addr_out1_zero;
+ addr_out2 <= addr_out2_zero;
+ addr_out3 <= addr_out3_last;
+ addr_out4 <= addr_out4_last;
+ //
+ end else begin
+ //
+ if (inc_addr_in) addr_in <= addr_in_next;
+ else if (dec_addr_in) addr_in <= addr_in_prev;
+ //
+ if (inc_addr_out1) addr_out1 <= addr_out1_next;
+ else addr_out1 <= addr_out1_zero;
+ //
+ if (inc_addr_out2) addr_out2 <= addr_out2_next;
+ else if (dec_addr_out2) addr_out2 <= addr_out2_prev;
+ //
+ if (dec_addr_out3) addr_out3 <= addr_out3_prev;
+ else addr_out3 <= addr_out3_last;
+ //
+ if (dec_addr_out4) addr_out4 <= addr_out4_prev;
+ else addr_out4 <= addr_out4_last;
+ //
+ end
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out1;
+ wire wren_out2;
+ wire wren_out3;
+ wire wren_out4;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
+ assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
+ assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
+ assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);
+
+ assign r_dbl_wren = wren_out1;
+ assign s_dbl_wren = wren_out1;
+ assign r_plus_s_wren = wren_out2;
+ assign u_half_wren = wren_out3;
+ assign v_half_wren = wren_out3;
+ assign u_minus_v_wren = wren_out2;
+ assign v_minus_u_wren = wren_out2;
+ assign u_minus_v_half_wren = wren_out4;
+ assign v_minus_u_half_wren = wren_out4;
+
+
+ //
+ // Adder (r + s)
+ //
+ wire [31: 0] add32_r_plus_s_sum_out;
+ wire add32_r_plus_s_carry_in;
+ wire add32_r_plus_s_carry_out;
+
+ adder32_wrapper add32_r_plus_s
+ (
+ .clk (clk),
+ .a (r_din),
+ .b (s_din),
+ .s (add32_r_plus_s_sum_out),
+ .c_in (add32_r_plus_s_carry_in),
+ .c_out (add32_r_plus_s_carry_out)
+ );
+
+ //
+ // Subtractor (u - v)
+ //
+ wire [31: 0] sub32_u_minus_v_difference_out;
+ wire sub32_u_minus_v_borrow_in;
+ wire sub32_u_minus_v_borrow_out;
+
+ subtractor32_wrapper sub32_u_minus_v
+ (
+ .clk (clk),
+ .a (u_din),
+ .b (v_din),
+ .d (sub32_u_minus_v_difference_out),
+ .b_in (sub32_u_minus_v_borrow_in),
+ .b_out (sub32_u_minus_v_borrow_out)
+ );
+
+ //
+ // Subtractor (v - u)
+ //
+ wire [31: 0] sub32_v_minus_u_difference_out;
+ wire sub32_v_minus_u_borrow_in;
+ wire sub32_v_minus_u_borrow_out;
+
+ subtractor32_wrapper sub32_v_minus_u
+ (
+ .clk (clk),
+ .a (v_din),
+ .b (u_din),
+ .d (sub32_v_minus_u_difference_out),
+ .b_in (sub32_v_minus_u_borrow_in),
+ .b_out (sub32_v_minus_u_borrow_out)
+ );
+
+
+ //
+ // Carry & Borrow Masking Logic
+ //
+ reg mask_carry_borrow;
+
+ always @(posedge clk)
+ //
+ mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
+ 1'b0 : 1'b1;
+
+ assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow;
+ assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
+ assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
+
+
+ //
+ // Carry Bits
+ //
+ reg r_dbl_carry;
+ reg s_dbl_carry;
+ reg u_half_carry;
+ reg v_half_carry;
+ reg u_minus_v_half_carry;
+ reg v_minus_u_half_carry;
+
+ always @(posedge clk) begin
+
+ r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
+ r_din[31] : 1'b0;
+
+ s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
+ s_din[31] : 1'b0;
+
+ u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
+ u_din[0] : 1'b0;
+
+ v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
+ v_din[0] : 1'b0;
+
+ u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
+ u_minus_v_din[0] : 1'b0;
+
+ v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
+ v_minus_u_din[0] : 1'b0;
+
+ end
+
+
+ //
+ // Data Mapper
+ //
+ assign r_dbl_dout = {r_din[30:0], r_dbl_carry};
+ assign s_dbl_dout = {s_din[30:0], s_dbl_carry};
+ assign r_plus_s_dout = add32_r_plus_s_sum_out;
+ assign u_half_dout = {u_half_carry, u_din[31:1]};
+ assign v_half_dout = {v_half_carry, v_din[31:1]};
+ assign u_minus_v_dout = sub32_u_minus_v_difference_out;
+ assign v_minus_u_dout = sub32_v_minus_u_difference_out;
+ assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]};
+ assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]};
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
new file mode 100644
index 0000000..0cd6ac5
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
@@ -0,0 +1,257 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_invert_update
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ u_gt_v, v_eq_1,
+ u_is_even, v_is_even,
+
+ r_addr, r_wren, r_dout,
+ s_addr, s_wren, s_dout,
+ u_addr, u_wren, u_dout,
+ v_addr, v_wren, v_dout,
+
+ r_dbl_addr, r_dbl_din,
+ s_dbl_addr, s_dbl_din,
+ r_plus_s_addr, r_plus_s_din,
+ u_half_addr, u_half_din,
+ v_half_addr, v_half_din,
+ u_minus_v_half_addr, u_minus_v_half_din,
+ v_minus_u_half_addr, v_minus_u_half_din
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ input wire u_gt_v;
+ input wire v_eq_1;
+ input wire u_is_even;
+ input wire v_is_even;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ output wire r_wren;
+ output wire s_wren;
+ output wire u_wren;
+ output wire v_wren;
+
+ output wire [ 32-1:0] r_dout;
+ output wire [ 32-1:0] s_dout;
+ output wire [ 32-1:0] u_dout;
+ output wire [ 32-1:0] v_dout;
+
+ output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
+
+ input wire [ 32-1:0] r_dbl_din;
+ input wire [ 32-1:0] s_dbl_din;
+ input wire [ 32-1:0] r_plus_s_din;
+ input wire [ 32-1:0] u_half_din;
+ input wire [ 32-1:0] v_half_din;
+ input wire [ 32-1:0] u_minus_v_half_din;
+ input wire [ 32-1:0] v_minus_u_half_din;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
+ addr_in + 1'b1 : addr_in_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
+ addr_out + 1'b1 : addr_out_zero;
+
+ assign r_addr = addr_out;
+ assign s_addr = addr_out;
+ assign u_addr = addr_out;
+ assign v_addr = addr_out;
+
+ assign r_dbl_addr = addr_in;
+ assign s_dbl_addr = addr_in;
+ assign r_plus_s_addr = addr_in;
+ assign u_half_addr = addr_in;
+ assign v_half_addr = addr_in;
+ assign u_minus_v_half_addr = addr_in;
+ assign v_minus_u_half_addr = addr_in;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_in;
+ wire inc_addr_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
+ assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_in) addr_in <= addr_in_next;
+ else addr_in <= addr_in_zero;
+ //
+ if (inc_addr_out) addr_out <= addr_out_next;
+ else addr_out <= addr_out_zero;
+ //
+ end
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
+
+ reg r_wren_allow;
+ reg s_wren_allow;
+ reg u_wren_allow;
+ reg v_wren_allow;
+
+ assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy;
+ assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy;
+ assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy;
+ assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy;
+
+
+ //
+ // Data Logic
+ //
+ reg [31: 0] r_dout_mux;
+ reg [31: 0] s_dout_mux;
+ reg [31: 0] u_dout_mux;
+ reg [31: 0] v_dout_mux;
+
+ assign r_dout = r_dout_mux;
+ assign s_dout = s_dout_mux;
+ assign u_dout = u_dout_mux;
+ assign v_dout = v_dout_mux;
+
+ always @(*) begin
+ //
+ // r, s, u, v
+ //
+ if (u_is_even) begin
+ //
+ u_dout_mux = u_half_din;
+ v_dout_mux = {32{1'bX}};
+ r_dout_mux = {32{1'bX}};
+ s_dout_mux = s_dbl_din;
+ //
+ u_wren_allow = 1'b1;
+ v_wren_allow = 1'b0;
+ r_wren_allow = 1'b0;
+ s_wren_allow = 1'b1;
+ //
+ end else begin
+ //
+ if (v_is_even) begin
+ //
+ u_dout_mux = {32{1'bX}};
+ v_dout_mux = v_half_din;
+ r_dout_mux = r_dbl_din;
+ s_dout_mux = {32{1'bX}};
+ //
+ u_wren_allow = 1'b0;
+ v_wren_allow = 1'b1;
+ r_wren_allow = 1'b1;
+ s_wren_allow = 1'b0;
+ //
+ end else begin
+ //
+ u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}};
+ v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din;
+ r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din;
+ s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din;
+ //
+ u_wren_allow = u_gt_v;
+ v_wren_allow = !u_gt_v;
+ r_wren_allow = 1'b1;
+ s_wren_allow = 1'b1;
+ //
+ end
+ //
+ end
+ //
+ end
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
new file mode 100644
index 0000000..fb858a6
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
@@ -0,0 +1,328 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_reduce_precalc
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ k,
+
+ s_is_odd, k_is_nul,
+
+ r_addr, r_din, r_wren, r_dout,
+ s_addr, s_din,
+ u_addr, u_wren, u_dout,
+ v_addr, v_wren, v_dout,
+ q_addr, q_din
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter OPERAND_ADDR_BITS = 3;
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+ parameter K_NUM_BITS = 10;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ input wire [ K_NUM_BITS-1:0] k;
+
+ output wire s_is_odd;
+ output wire k_is_nul;
+
+ output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] q_addr;
+
+ input wire [ 32-1:0] r_din;
+ input wire [ 32-1:0] s_din;
+ input wire [ 32-1:0] q_din;
+
+ output wire r_wren;
+ output wire u_wren;
+ output wire v_wren;
+
+ output wire [ 32-1:0] r_dout;
+ output wire [ 32-1:0] u_dout;
+ output wire [ 32-1:0] v_dout;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf;
+ reg [OPERAND_ADDR_BITS-1:0] addr_in_op;
+ reg [ BUFFER_ADDR_BITS-1:0] addr_out1;
+ reg [ BUFFER_ADDR_BITS-1:0] addr_out2;
+ reg [ BUFFER_ADDR_BITS-1:0] addr_out3;
+
+ wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1;
+ wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ?
+ addr_in_buf + 1'b1 : addr_in_buf_zero;
+ wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ?
+ addr_in_buf - 1'b1 : addr_in_buf_zero;
+
+ wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1;
+ wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}};
+ wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ?
+ addr_in_op + 1'b1 : addr_in_op_zero;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
+ addr_out1 + 1'b1 : addr_out1_zero;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ?
+ addr_out1 - 1'b1 : addr_out1_zero;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
+ addr_out2 - 1'b1 : addr_out2_last;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
+ addr_out3 - 1'b1 : addr_out3_last;
+
+
+ assign s_addr = addr_in_buf;
+ assign q_addr = addr_in_op;
+ assign r_addr = addr_out1;
+ assign u_addr = addr_out2;
+ assign v_addr = addr_out3;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment/Decrement Logic
+ //
+ wire inc_addr_buf_in;
+ wire dec_addr_buf_in;
+ wire inc_addr_op_in;
+ wire inc_addr_out1;
+ wire dec_addr_out1;
+ wire dec_addr_out2;
+ wire dec_addr_out3;
+
+ wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4;
+ wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop);
+ assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
+ assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop);
+ assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
+ assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop);
+ assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
+ assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
+
+ always @(posedge clk) begin
+ //
+ if (rdy) begin
+ //
+ addr_in_buf <= addr_in_buf_zero;
+ addr_in_op <= addr_in_op_zero;
+ addr_out1 <= addr_out1_zero;
+ addr_out2 <= addr_out2_last;
+ addr_out3 <= addr_out3_last;
+ //
+ end else begin
+ //
+ if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next;
+ else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev;
+ //
+ if (inc_addr_op_in) addr_in_op <= addr_in_op_next;
+ else addr_in_op <= addr_in_op_zero;
+ //
+ if (inc_addr_out1) addr_out1 <= addr_out1_next;
+ else if (dec_addr_out1) addr_out1 <= addr_out1_prev;
+ //
+ if (dec_addr_out2) addr_out2 <= addr_out2_prev;
+ else addr_out2 <= addr_out2_last;
+ //
+ if (dec_addr_out3) addr_out3 <= addr_out3_prev;
+ else addr_out3 <= addr_out3_last;
+ //
+ end
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out1;
+ wire wren_out2;
+ wire wren_out3;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
+
+ assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
+ assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
+ assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
+
+ assign r_wren = wren_out1;
+ assign u_wren = wren_out2;
+ assign v_wren = wren_out3;
+
+ //
+ // Adder (s + q)
+ //
+ wire [31: 0] q_din_masked;
+ wire [31: 0] add32_s_plus_q_sum_out;
+ wire add32_s_plus_q_carry_in;
+ wire add32_s_plus_q_carry_out;
+
+ adder32_wrapper add32_r_plus_s
+ (
+ .clk (clk),
+ .a (s_din),
+ .b (q_din_masked),
+ .s (add32_s_plus_q_sum_out),
+ .c_in (add32_s_plus_q_carry_in),
+ .c_out (add32_s_plus_q_carry_out)
+ );
+
+
+ //
+ // Carry Masking Logic
+ //
+ wire mask_carry;
+
+ assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;
+
+
+ //
+ // Addend Masking Logic
+ //
+ reg q_din_mask;
+
+ always @(posedge clk)
+ q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0;
+
+ assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;
+
+ assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry;
+
+
+ //
+ // Carry Bits
+ //
+ reg s_half_carry;
+ reg s_plus_q_half_carry;
+
+ always @(posedge clk) begin
+ //
+ s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ?
+ s_din[0] : 1'b0;
+ //
+ s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
+ r_din[0] : 1'b0;
+ //
+ end
+
+ //
+ // Data Mapper
+ //
+ assign r_dout = add32_s_plus_q_sum_out;
+ assign u_dout = {s_half_carry, s_din[31:1]};
+ assign v_dout = {s_plus_q_half_carry, r_din[31:1]};
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+ //
+ // Output Flags
+ //
+ reg s_is_odd_reg;
+ reg k_is_nul_reg;
+
+ assign s_is_odd = s_is_odd_reg;
+ assign k_is_nul = k_is_nul_reg;
+
+ always @(posedge clk)
+ //
+ if (proc_cnt == cnt_calc_flags) begin
+ s_is_odd_reg <= s_din[0];
+ k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
new file mode 100644
index 0000000..ea5b854
--- /dev/null
+++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
@@ -0,0 +1,153 @@
+`timescale 1ns / 1ps
+
+module modinv_helper_reduce_update
+ (
+ clk, rst_n,
+ ena, rdy,
+
+ s_is_odd, k_is_nul,
+
+ s_addr, s_wren, s_dout,
+ u_addr, u_din,
+ v_addr, v_din
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter BUFFER_NUM_WORDS = 9;
+ parameter BUFFER_ADDR_BITS = 4;
+
+
+ //
+ // clog2
+ //
+`include "..\modinv_clog2.v"
+
+
+ //
+ // Constants
+ //
+ localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
+ localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+ input wire ena;
+ output wire rdy;
+
+ input wire s_is_odd;
+ input wire k_is_nul;
+
+ output wire [BUFFER_ADDR_BITS-1:0] s_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] u_addr;
+ output wire [BUFFER_ADDR_BITS-1:0] v_addr;
+
+ output wire s_wren;
+
+ output wire [ 32-1:0] s_dout;
+
+ input wire [ 32-1:0] u_din;
+ input wire [ 32-1:0] v_din;
+
+
+ //
+ // Counter
+ //
+ reg [PROC_CNT_BITS-1:0] proc_cnt;
+
+ wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
+ wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
+ wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
+ proc_cnt + 1'b1 : proc_cnt_zero;
+
+ //
+ // Addresses
+ //
+ reg [BUFFER_ADDR_BITS-1:0] addr_in;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
+ addr_in + 1'b1 : addr_in_zero;
+
+ reg [BUFFER_ADDR_BITS-1:0] addr_out;
+
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
+ wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
+ addr_out + 1'b1 : addr_out_zero;
+
+ assign s_addr = addr_out;
+ assign u_addr = addr_in;
+ assign v_addr = addr_in;
+
+
+ //
+ // Ready Flag
+ //
+ assign rdy = (proc_cnt == proc_cnt_zero);
+
+
+ //
+ // Address Increment Logic
+ //
+ wire inc_addr_in;
+ wire inc_addr_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
+
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
+ assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
+
+ always @(posedge clk) begin
+ //
+ if (inc_addr_in) addr_in <= addr_in_next;
+ else addr_in <= addr_in_zero;
+ //
+ if (inc_addr_out) addr_out <= addr_out_next;
+ else addr_out <= addr_out_zero;
+ //
+ end
+
+ //
+ // Write Enable Logic
+ //
+ wire wren_out;
+
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
+ wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
+
+ assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
+
+ assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy;
+
+
+ //
+ // Data Logic
+ //
+ assign s_dout = s_is_odd ? v_din : u_din;
+
+
+ //
+ // Primary Counter Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
+ else begin
+ if (!rdy) proc_cnt <= proc_cnt_next;
+ else if (ena) proc_cnt <= proc_cnt_next;
+ end
+
+
+endmodule
diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v
new file mode 100644
index 0000000..2f7b64d
--- /dev/null
+++ b/rtl/modular/modular_invertor/modinv_clog2.v
@@ -0,0 +1,10 @@
+function integer clog2;
+ input integer value;
+ integer result;
+ begin
+ value = value - 1;
+ for (result = 0; value > 0; result = result + 1)
+ value = value >> 1;
+ clog2 = result;
+ end
+endfunction
diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v
new file mode 100644
index 0000000..e9f2460
--- /dev/null
+++ b/rtl/modular/modular_invertor/modular_invertor.v
@@ -0,0 +1,981 @@
+//------------------------------------------------------------------------------
+//
+// modular_invertor.v
+// -----------------------------------------------------------------------------
+// Modular invertor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_invertor
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, q_addr, a1_addr, a1_wren,
+ a_din, q_din, a1_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter MAX_OPERAND_WIDTH = 256;
+
+
+ //
+ // clog2
+ //
+`include "modinv_clog2.v"
+
+
+ //
+ // More Parameters
+ //
+ localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32;
+ localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS);
+
+ localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1;
+ localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS);
+
+ localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH;
+ localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS);
+
+ localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1);
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire rst_n;
+
+ input wire ena;
+ output wire rdy;
+
+ output wire [OPERAND_ADDR_BITS-1:0] a_addr;
+ output reg [OPERAND_ADDR_BITS-1:0] q_addr;
+ output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
+ output wire a1_wren;
+
+ input wire [32-1:0] a_din;
+ input wire [32-1:0] q_din;
+ output wire [32-1:0] a1_dout;
+
+
+ //
+ // "Redundant" Power of 2 (K)
+ //
+ reg [K_NUM_BITS-1:0] k;
+
+
+ //
+ // Buffers
+ //
+ reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr;
+ reg buf_r_wr_en;
+ reg [ 32-1:0] buf_r_wr_din;
+ wire [ 32-1:0] buf_r_wr_dout;
+ wire [ 32-1:0] buf_r_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_r
+ ( .clk(clk),
+ .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din),
+ .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout)
+ );
+
+ reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr;
+ reg buf_s_wr_en;
+ reg [ 32-1:0] buf_s_wr_din;
+ wire [ 32-1:0] buf_s_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_s
+ ( .clk(clk),
+ .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din),
+ .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout)
+ );
+
+ reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr;
+ reg buf_u_wr_en;
+ reg [ 32-1:0] buf_u_wr_din;
+ wire [ 32-1:0] buf_u_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_u
+ ( .clk(clk),
+ .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din),
+ .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout)
+ );
+
+ reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr;
+ reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr;
+ reg buf_v_wr_en;
+ reg [ 32-1:0] buf_v_wr_din;
+ wire [ 32-1:0] buf_v_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_v
+ ( .clk(clk),
+ .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din),
+ .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr;
+ wire buf_r_dbl_wr_en;
+ wire [ 32-1:0] buf_r_dbl_wr_din;
+ wire [ 32-1:0] buf_r_dbl_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_r_dbl
+ ( .clk(clk),
+ .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din),
+ .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr;
+ wire buf_s_dbl_wr_en;
+ wire [ 32-1:0] buf_s_dbl_wr_din;
+ wire [ 32-1:0] buf_s_dbl_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_s_dbl
+ ( .clk(clk),
+ .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din),
+ .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr;
+ wire buf_r_plus_s_wr_en;
+ wire [ 32-1:0] buf_r_plus_s_wr_din;
+ wire [ 32-1:0] buf_r_plus_s_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_r_plus_s
+ ( .clk(clk),
+ .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din),
+ .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr;
+ wire buf_u_minus_v_wr_en;
+ wire [ 32-1:0] buf_u_minus_v_wr_din;
+ wire [ 32-1:0] buf_u_minus_v_wr_dout;
+
+ assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_u_minus_v
+ ( .clk(clk),
+ .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din),
+ .b_addr(buf_u_minus_v_rd_addr), .b_out()
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr;
+ wire buf_v_minus_u_wr_en;
+ wire [ 32-1:0] buf_v_minus_u_wr_din;
+ wire [ 32-1:0] buf_v_minus_u_wr_dout;
+
+ assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_v_minus_u
+ ( .clk(clk),
+ .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din),
+ .b_addr(buf_v_minus_u_rd_addr), .b_out()
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr;
+ wire buf_u_half_wr_en;
+ wire [ 32-1:0] buf_u_half_wr_din;
+ wire [ 32-1:0] buf_u_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_u_half
+ ( .clk(clk),
+ .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din),
+ .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr;
+ wire buf_v_half_wr_en;
+ wire [ 32-1:0] buf_v_half_wr_din;
+ wire [ 32-1:0] buf_v_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_v_half
+ ( .clk(clk),
+ .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din),
+ .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr;
+ wire buf_u_minus_v_half_wr_en;
+ wire [ 32-1:0] buf_u_minus_v_half_wr_din;
+ wire [ 32-1:0] buf_u_minus_v_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_u_minus_v_half
+ ( .clk(clk),
+ .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din),
+ .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout)
+ );
+
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr;
+ wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr;
+ wire buf_v_minus_u_half_wr_en;
+ wire [ 32-1:0] buf_v_minus_u_half_wr_din;
+ wire [ 32-1:0] buf_v_minus_u_half_rd_dout;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
+ )
+ buf_v_minus_u_half
+ ( .clk(clk),
+ .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din),
+ .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout)
+ );
+
+
+ //
+ // Helper Modules
+ //
+ wire helper_init_ena;
+ wire helper_invert_precalc_ena;
+ wire helper_invert_compare_ena;
+ wire helper_invert_update_ena;
+ wire helper_reduce_precalc_ena;
+ wire helper_reduce_update_ena;
+ wire helper_copy_ena;
+
+ wire helper_init_rdy;
+ wire helper_invert_precalc_rdy;
+ wire helper_invert_compare_rdy;
+ wire helper_invert_update_rdy;
+ wire helper_reduce_precalc_rdy;
+ wire helper_reduce_update_rdy;
+ wire helper_copy_rdy;
+
+ wire helper_init_done = helper_init_rdy && !helper_init_ena;
+ wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena;
+ wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena;
+ wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena;
+ wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena;
+ wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena;
+ wire helper_copy_done = helper_copy_rdy && !helper_copy_ena;
+
+
+ //
+ // Helper Module - Initialization
+ //
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr;
+ wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr;
+
+ wire helper_init_r_wren;
+ wire helper_init_s_wren;
+ wire helper_init_u_wren;
+ wire helper_init_v_wren;
+
+ wire [ 32-1:0] helper_init_r_data;
+ wire [ 32-1:0] helper_init_s_data;
+ wire [ 32-1:0] helper_init_u_data;
+ wire [ 32-1:0] helper_init_v_data;
+
+ modinv_helper_init #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
+
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_init
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_init_ena),
+ .rdy (helper_init_rdy),
+
+ .a_addr (a_addr),
+ .q_addr (helper_init_q_addr),
+
+ .r_addr (helper_init_r_addr),
+ .s_addr (helper_init_s_addr),
+ .u_addr (helper_init_u_addr),
+ .v_addr (helper_init_v_addr),
+
+ .q_din (q_din),
+ .a_din (a_din),
+
+ .r_dout (helper_init_r_data),
+ .s_dout (helper_init_s_data),
+ .u_dout (helper_init_u_data),
+ .v_dout (helper_init_v_data),
+
+ .r_wren (helper_init_r_wren),
+ .s_wren (helper_init_s_wren),
+ .u_wren (helper_init_u_wren),
+ .v_wren (helper_init_v_wren)
+ );
+
+
+ //
+ // Helper Module - Inversion Pre-Calculation
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr;
+
+ modinv_helper_invert_precalc #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_invert_precalc
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_invert_precalc_ena),
+ .rdy (helper_invert_precalc_rdy),
+
+ .r_addr (helper_invert_precalc_r_addr),
+ .s_addr (helper_invert_precalc_s_addr),
+ .u_addr (helper_invert_precalc_u_addr),
+ .v_addr (helper_invert_precalc_v_addr),
+
+ .r_din (buf_r_rd_dout),
+ .s_din (buf_s_rd_dout),
+ .u_din (buf_u_rd_dout),
+ .v_din (buf_v_rd_dout),
+
+ .r_dbl_addr (buf_r_dbl_wr_addr),
+ .s_dbl_addr (buf_s_dbl_wr_addr),
+ .r_plus_s_addr (buf_r_plus_s_wr_addr),
+
+ .u_half_addr (buf_u_half_wr_addr),
+ .v_half_addr (buf_v_half_wr_addr),
+ .u_minus_v_addr (buf_u_minus_v_wr_addr),
+ .v_minus_u_addr (buf_v_minus_u_wr_addr),
+ .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr),
+ .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr),
+
+ .r_dbl_dout (buf_r_dbl_wr_din),
+ .s_dbl_dout (buf_s_dbl_wr_din),
+ .r_plus_s_dout (buf_r_plus_s_wr_din),
+
+ .u_half_dout (buf_u_half_wr_din),
+ .v_half_dout (buf_v_half_wr_din),
+ .u_minus_v_dout (buf_u_minus_v_wr_din),
+ .v_minus_u_dout (buf_v_minus_u_wr_din),
+ .u_minus_v_half_dout (buf_u_minus_v_half_wr_din),
+ .v_minus_u_half_dout (buf_v_minus_u_half_wr_din),
+
+ .r_dbl_wren (buf_r_dbl_wr_en),
+ .s_dbl_wren (buf_s_dbl_wr_en),
+ .r_plus_s_wren (buf_r_plus_s_wr_en),
+
+ .u_half_wren (buf_u_half_wr_en),
+ .v_half_wren (buf_v_half_wr_en),
+ .u_minus_v_wren (buf_u_minus_v_wr_en),
+ .v_minus_u_wren (buf_v_minus_u_wr_en),
+ .u_minus_v_half_wren (buf_u_minus_v_half_wr_en),
+ .v_minus_u_half_wren (buf_v_minus_u_half_wr_en),
+
+ .u_minus_v_din (buf_u_minus_v_wr_dout),
+ .v_minus_u_din (buf_v_minus_u_wr_dout)
+ );
+
+
+ //
+ // Helper Module - Inversion Comparison
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr;
+
+ wire flag_invert_u_gt_v;
+ wire flag_invert_v_eq_1;
+ wire flag_invert_u_is_even;
+ wire flag_invert_v_is_even;
+
+ modinv_helper_invert_compare #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_invert_compare
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_invert_compare_ena),
+ .rdy (helper_invert_compare_rdy),
+
+ .u_addr (helper_invert_compare_u_addr),
+ .v_addr (helper_invert_compare_v_addr),
+
+ .u_din (buf_u_rd_dout),
+ .v_din (buf_v_rd_dout),
+
+ .u_gt_v (flag_invert_u_gt_v),
+ .v_eq_1 (flag_invert_v_eq_1),
+ .u_is_even (flag_invert_u_is_even),
+ .v_is_even (flag_invert_v_is_even)
+ );
+
+
+ //
+ // Helper Module - Inversion Update
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr;
+
+ wire helper_invert_update_r_wren;
+ wire helper_invert_update_s_wren;
+ wire helper_invert_update_u_wren;
+ wire helper_invert_update_v_wren;
+
+ wire [ 32-1:0] helper_invert_update_r_data;
+ wire [ 32-1:0] helper_invert_update_s_data;
+ wire [ 32-1:0] helper_invert_update_u_data;
+ wire [ 32-1:0] helper_invert_update_v_data;
+
+ modinv_helper_invert_update #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_invert_update
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_invert_update_ena),
+ .rdy (helper_invert_update_rdy),
+
+ .u_gt_v (flag_invert_u_gt_v),
+ .v_eq_1 (flag_invert_v_eq_1),
+ .u_is_even (flag_invert_u_is_even),
+ .v_is_even (flag_invert_v_is_even),
+
+ .r_addr (helper_invert_update_r_addr),
+ .s_addr (helper_invert_update_s_addr),
+ .u_addr (helper_invert_update_u_addr),
+ .v_addr (helper_invert_update_v_addr),
+
+ .r_wren (helper_invert_update_r_wren),
+ .s_wren (helper_invert_update_s_wren),
+ .u_wren (helper_invert_update_u_wren),
+ .v_wren (helper_invert_update_v_wren),
+
+ .r_dout (helper_invert_update_r_data),
+ .s_dout (helper_invert_update_s_data),
+ .u_dout (helper_invert_update_u_data),
+ .v_dout (helper_invert_update_v_data),
+
+ .r_dbl_addr (buf_r_dbl_rd_addr),
+ .s_dbl_addr (buf_s_dbl_rd_addr),
+ .r_plus_s_addr (buf_r_plus_s_rd_addr),
+ .u_half_addr (buf_u_half_rd_addr),
+ .v_half_addr (buf_v_half_rd_addr),
+ .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr),
+ .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr),
+
+ .r_dbl_din (buf_r_dbl_rd_dout),
+ .s_dbl_din (buf_s_dbl_rd_dout),
+ .r_plus_s_din (buf_r_plus_s_rd_dout),
+ .u_half_din (buf_u_half_rd_dout),
+ .v_half_din (buf_v_half_rd_dout),
+ .u_minus_v_half_din (buf_u_minus_v_half_rd_dout),
+ .v_minus_u_half_din (buf_v_minus_u_half_rd_dout)
+ );
+
+
+ //
+ // Helper Module - Reduction Pre-Calculation
+ //
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr;
+ wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr;
+ wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr;
+
+ wire helper_reduce_precalc_r_wren;
+ wire helper_reduce_precalc_u_wren;
+ wire helper_reduce_precalc_v_wren;
+
+ wire [ 32-1:0] helper_reduce_precalc_r_data;
+ wire [ 32-1:0] helper_reduce_precalc_u_data;
+ wire [ 32-1:0] helper_reduce_precalc_v_data;
+
+ wire flag_reduce_s_is_odd;
+ wire flag_invert_k_is_nul;
+
+ modinv_helper_reduce_precalc #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS),
+ .K_NUM_BITS (K_NUM_BITS)
+ )
+ helper_reduce_precalc
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_reduce_precalc_ena),
+ .rdy (helper_reduce_precalc_rdy),
+
+ .r_addr (helper_reduce_precalc_r_addr),
+ .s_addr (helper_reduce_precalc_s_addr),
+ .u_addr (helper_reduce_precalc_u_addr),
+ .v_addr (helper_reduce_precalc_v_addr),
+ .q_addr (helper_reduce_precalc_q_addr),
+
+ .k (k),
+
+ .s_is_odd (flag_reduce_s_is_odd),
+ .k_is_nul (flag_reduce_k_is_nul),
+
+ .r_din (buf_r_wr_dout),
+ .s_din (buf_s_rd_dout),
+ .q_din (q_din),
+
+ .r_wren (helper_reduce_precalc_r_wren),
+ .u_wren (helper_reduce_precalc_u_wren),
+ .v_wren (helper_reduce_precalc_v_wren),
+
+ .r_dout (helper_reduce_precalc_r_data),
+ .u_dout (helper_reduce_precalc_u_data),
+ .v_dout (helper_reduce_precalc_v_data)
+ );
+
+ //
+ // Helper Module - Reduction Update
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr;
+ wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr;
+
+ wire helper_reduce_update_s_wren;
+
+ wire [ 32-1:0] helper_reduce_update_s_data;
+
+ modinv_helper_reduce_update #
+ (
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_reduce_update
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_reduce_update_ena),
+ .rdy (helper_reduce_update_rdy),
+
+ .s_is_odd (flag_reduce_s_is_odd),
+ .k_is_nul (flag_reduce_k_is_nul),
+
+ .s_addr (helper_reduce_update_s_addr),
+ .u_addr (helper_reduce_update_u_addr),
+ .v_addr (helper_reduce_update_v_addr),
+
+ .s_wren (helper_reduce_update_s_wren),
+
+ .s_dout (helper_reduce_update_s_data),
+
+ .u_din (buf_u_rd_dout),
+ .v_din (buf_v_rd_dout)
+ );
+
+
+ //
+ // Helper Module - Copying
+ //
+ wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr;
+
+ modinv_helper_copy #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
+
+ .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
+ .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
+ )
+ helper_copy
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (helper_copy_ena),
+ .rdy (helper_copy_rdy),
+
+ .s_addr (helper_copy_s_addr),
+ .a1_addr (a1_addr),
+
+ .s_din (buf_s_rd_dout),
+
+ .a1_dout (a1_dout),
+
+ .a1_wren (a1_wren)
+ );
+
+
+ //
+ // Round Counter
+ //
+ reg [ROUND_COUNTER_BITS-1:0] round_counter;
+ wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1;
+ wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}};
+ wire [ROUND_COUNTER_BITS-1:0] round_counter_next =
+ (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_STATE_IDLE = 4'd0;
+
+ localparam FSM_STATE_INIT = 4'd1;
+
+ localparam FSM_STATE_INVERT_PRECALC = 4'd11;
+ localparam FSM_STATE_INVERT_COMPARE = 4'd12;
+ localparam FSM_STATE_INVERT_UPDATE = 4'd13;
+
+ localparam FSM_STATE_REDUCE_PRECALC = 4'd14;
+ localparam FSM_STATE_REDUCE_UPDATE = 4'd15;
+
+ localparam FSM_STATE_COPY = 4'd2;
+
+ localparam FSM_STATE_DONE = 4'd3;
+
+ reg [3:0] fsm_state = FSM_STATE_IDLE;
+ reg [3:0] fsm_state_dly = FSM_STATE_IDLE;
+
+ wire fsm_state_new = (fsm_state != fsm_state_dly);
+
+ wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ?
+ FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC;
+
+ wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ?
+ FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ else case (fsm_state)
+ FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE;
+ FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT;
+ FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC;
+ FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE;
+ FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE;
+ FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC;
+ FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE;
+ FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY;
+ FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
+ default: fsm_state <= FSM_STATE_IDLE;
+ endcase
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE;
+ else fsm_state_dly <= fsm_state;
+
+
+ assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new;
+ assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new;
+ assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new;
+ assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new;
+ assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new;
+ assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new;
+ assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new;
+
+
+ //
+ // Counter Increment
+ //
+ always @(posedge clk) begin
+ //
+ if ((fsm_state == FSM_STATE_INIT) && helper_init_done)
+ round_counter <= round_counter_zero;
+ //
+ if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done)
+ round_counter <= round_counter_next;
+ //
+ if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done)
+ round_counter <= round_counter_next;
+ //
+ end
+
+
+ //
+ // Q Address Selector
+ //
+ always @(*) begin
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: q_addr = helper_init_q_addr;
+ FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr;
+ default: q_addr = {OPERAND_ADDR_BITS{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Buffer Address Selector
+ //
+ always @(*) begin
+ //
+ // Write Ports
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr;
+ FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr;
+ default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr;
+ FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr;
+ FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr;
+ default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr;
+ FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr;
+ default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr;
+ FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr;
+ default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ // Read Ports
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr;
+ default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr;
+ FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr;
+ FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr;
+ default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr;
+ FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr;
+ FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr;
+ default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr;
+ FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr;
+ FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr;
+ default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Buffer Write Enable Logic
+ //
+ always @(*) begin
+ //
+ // Write Ports
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren;
+ FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren;
+ FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren;
+ default: buf_r_wr_en = 1'b0;
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren;
+ FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren;
+ FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren;
+ default: buf_s_wr_en = 1'b0;
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren;
+ FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren;
+ FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren;
+ default: buf_u_wr_en = 1'b0;
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren;
+ FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren;
+ FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren;
+ default: buf_v_wr_en = 1'b0;
+ endcase
+ //
+ end
+
+
+ //
+ // Buffer Write Data Selector
+ //
+ always @(*) begin
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data;
+ FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data;
+ FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data;
+ default: buf_r_wr_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data;
+ FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data;
+ FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data;
+ default: buf_s_wr_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data;
+ FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data;
+ FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data;
+ default: buf_u_wr_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data;
+ FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data;
+ FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data;
+ default: buf_v_wr_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear */
+ if (rdy && ena) rdy_reg <= 1'b0;
+
+ /* set */
+ if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1;
+
+ end
+
+
+ //
+ // Store Redundant Power of 2 (K)
+ //
+ always @(posedge clk)
+ //
+ if (helper_init_ena)
+ k <= {K_NUM_BITS{1'b0}};
+ else begin
+
+ if (helper_invert_update_ena && !flag_invert_v_eq_1)
+ k <= k + 1'b1;
+
+ if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}}))
+ k <= k - 1'b1;
+
+ end
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v
new file mode 100644
index 0000000..8487aee
--- /dev/null
+++ b/rtl/modular/modular_multiplier_256.v
@@ -0,0 +1,402 @@
+//------------------------------------------------------------------------------
+//
+// modular_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Modular multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_multiplier_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, b_addr, n_addr, p_addr, p_wren,
+ a_din, b_din, n_din, p_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word
+ output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] a_din; // A
+ input wire [ 31:0] b_din; // B
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = A * B mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_a;
+ reg [WORD_COUNTER_WIDTH-1:0] index_b;
+
+ /* map registers to output ports */
+ assign a_addr = index_a;
+ assign b_addr = index_b;
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+ wire inc_index_a = |fsm_shreg_inc_index_a;
+ wire store_word_a = |fsm_shreg_store_word_a;
+ wire inc_index_b = |fsm_shreg_inc_index_b;
+ wire clear_mac_ab = |fsm_shreg_inc_index_b;
+ wire shift_wide_a = |fsm_shreg_inc_index_b;
+ wire enable_mac_ab = |fsm_shreg_inc_index_b;
+ wire store_si_msb = |fsm_shreg_store_si_msb;
+ wire store_si_lsb = fsm_shreg_store_si_lsb;
+ wire shift_si = |fsm_shreg_shift_si;
+ wire mask_cw1_sum = fsm_shreg_mask_cw1_sum;
+ wire store_c_word = |fsm_shreg_store_c_word;
+ wire reduce_start = fsm_shreg_reduce_start;
+ wire reduce_stop = fsm_shreg_reduce_stop;
+
+
+ //
+ // FSM Logic
+ //
+ wire reduce_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done)
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ reg index_b_ff;
+
+ always @(posedge clk)
+ //
+ if (inc_index_b) index_b_ff <= ~index_b_ff;
+ else index_b_ff <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ index_a <= WORD_INDEX_ZERO;
+ index_b <= WORD_INDEX_LAST;
+ //
+ end else begin
+ //
+ if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+ if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+ //
+ end
+
+
+ //
+ // Wide Operand Buffer
+ //
+ reg [255:0] buf_a_wide;
+
+ always @(posedge clk)
+ //
+ if (store_word_a)
+ buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
+ else if (shift_wide_a)
+ buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+
+
+ //
+ // Multiplier Array
+ //
+ wire mac_inhibit; // control signal to pause all accumulators
+
+ wire [46: 0] mac[0:15]; // outputs of all accumulators
+ reg [15: 0] mac_clear; // individual per-accumulator clear flag
+
+ assign mac_inhibit = ~enable_mac_ab;
+
+ always @(posedge clk)
+ //
+ if (!clear_mac_ab)
+ mac_clear <= {16{1'b1}};
+ else begin
+
+ if (mac_clear == {16{1'b1}})
+ mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
+ else
+ mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
+
+
+ end
+
+ //
+ // Array of parallel multipliers
+ //
+ genvar i;
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_mac_array
+ //
+ mac16_wrapper mac16_inst
+ (
+ .clk (clk),
+ .ce (~mac_inhibit),
+
+ .clr (mac_clear[i]),
+
+ .a (buf_a_wide[16*i+:16]),
+ .b (index_b_ff ? b_din[15:0] : b_din[31:16]),
+ .s (mac[i])
+ );
+ //
+ end
+ endgenerate
+
+ //
+ // Intermediate Words
+ //
+ reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
+ reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
+
+
+ wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
+ wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
+
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_si_lsb_new
+ assign si_lsb_new[47*i+:47] = mac[15-i];
+ end
+ endgenerate
+
+ generate for (i=1; i<16; i=i+1)
+ begin : gen_si_msb_new
+ assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
+ end
+ endgenerate
+
+ always @(posedge clk) begin
+ //
+ if (shift_si) begin
+ si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
+ si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
+ end else begin
+
+ if (store_si_msb)
+ si_msb <= si_msb_new;
+
+ if (store_si_lsb)
+ si_lsb <= si_lsb_new;
+ end
+
+ end
+
+
+ //
+ // Accumulators
+ //
+ wire [46: 0] add48_cw0_s;
+ wire [46: 0] add48_cw1_s;
+
+
+ //
+ // cw0, b, cw1, b
+ //
+ reg [30: 0] si_prev_dly;
+ reg [15: 0] si_next_dly;
+
+ always @(posedge clk)
+ //
+ if (shift_si)
+ si_prev_dly <= si_lsb[93:63];
+ else
+ si_prev_dly <= {31{1'b0}};
+
+ always @(posedge clk)
+ //
+ si_next_dly <= si_lsb[62:47];
+
+ wire [46: 0] add48_cw0_a = si_lsb[46:0];
+ wire [46: 0] add48_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+ wire [46: 0] add48_cw1_a = add48_cw0_s;
+ wire [46: 0] add48_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add48_cw1_s[46:32]}};
+
+ adder47_wrapper add48_cw0_inst
+ (
+ .clk (clk),
+ .a (add48_cw0_a),
+ .b (add48_cw0_b),
+ .s (add48_cw0_s)
+ );
+
+ adder47_wrapper add48_cw1_inst
+ (
+ .clk (clk),
+ .a (add48_cw1_a),
+ .b (add48_cw1_b),
+ .s (add48_cw1_s)
+ );
+
+
+
+ //
+ // Full-Size Product
+ //
+ reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
+
+ wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
+ wire [ 31:0] reduce_c_word;
+
+ always @(posedge clk)
+ //
+ if (store_c_word)
+ bram_c_addr <= bram_c_addr + 1'b1;
+ else
+ bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1)
+ )
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_c_addr),
+ .a_wr (store_c_word),
+ .a_in (add48_cw1_s[31:0]),
+ .a_out (),
+
+ .b_addr (reduce_c_addr),
+ .b_out (reduce_c_word)
+ );
+
+
+ //
+ // Reduction Stage
+ //
+ modular_reductor_256 reduce_256_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_done),
+
+ .x_addr (reduce_c_addr),
+ .n_addr (n_addr),
+ .p_addr (p_addr),
+ .p_wren (p_wren),
+
+ .x_din (reduce_c_word),
+ .n_din (n_din),
+ .p_dout (p_dout)
+ );
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v
new file mode 100644
index 0000000..774f42e
--- /dev/null
+++ b/rtl/modular/modular_reductor_256.v
@@ -0,0 +1,666 @@
+//------------------------------------------------------------------------------
+//
+// modular_reductor_256.v
+// -----------------------------------------------------------------------------
+// Modular reductor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_reductor_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ x_addr, n_addr, p_addr, p_wren,
+ x_din, n_din, p_dout
+ );
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] x_din; // X
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = X mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH:0] index_x;
+
+
+ /* map registers to output ports */
+ assign x_addr = index_x;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
+
+ wire [5-1:0] fsm_shreg_reduce_stage_start;
+ wire [5-1:0] fsm_shreg_reduce_stage_stop;
+
+ genvar s;
+ generate for (s=0; s<5; s=s+1)
+ begin : gen_fsm_shreg_reduce_stages
+ assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+ assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+ end
+ endgenerate
+
+ wire inc_index_x = |fsm_shreg_inc_index_x;
+ wire store_word_z = |fsm_shreg_store_word_z;
+ wire reduce_start = |fsm_shreg_reduce_stage_start;
+ wire reduce_stop = |fsm_shreg_reduce_stage_stop;
+ wire store_p = fsm_shreg_reduce_stage_stop[0];
+
+
+ wire reduce_adder0_done;
+ wire reduce_adder1_done;
+ wire reduce_subtractor_done;
+
+ wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ //
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done_all)
+ //
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy)
+ //
+ index_x <= WORD_INDEX_LAST;
+ //
+ else if (inc_index_x)
+ //
+ index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+ //
+ // Look-up Table
+ //
+
+ // TODO: Explain s5!!!
+
+ reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
+ reg [9 -1:0] z_wren; //
+ reg [9 -1:0] z_mask; // mask input to store zero word
+ reg [9 -1:0] z_save; // save previous word once again
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // s9 s8 s7 s6 s5 s4 s3 s2 s1
+ // || || || || || || || || ||
+ 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
+ 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
+ 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
+ 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
+ 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
+ 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
+ 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
+ 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
+ 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
+ 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
+ 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
+ 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
+ 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
+ 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
+ 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
+ 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
+ //
+ default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ //
+ default: z_wren <= {9{1'b0}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
+ 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ //
+ default: z_mask <= {9{1'bX}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ //
+ default: z_save <= {9{1'bX}};
+ //
+ endcase
+
+
+ //
+ // Intermediate Numbers
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
+ wire [ 32-1:0] reduce_z_dout[1:9];
+
+ reg [31: 0] x_din_dly;
+ always @(posedge clk)
+ //
+ x_din_dly <= x_din;
+
+
+ genvar z;
+ generate for (z=1; z<=9; z=z+1)
+ //
+ begin : gen_z_bram
+ //
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
+ .a_wr (z_wren[z-1] & store_word_z),
+ .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
+ .a_out (),
+
+ .b_addr (reduce_z_addr[z]),
+ .b_out (reduce_z_dout[z])
+ );
+ //
+ end
+ //
+ endgenerate
+
+
+
+
+ wire [ 32-1:0] bram_sum0_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
+ wire bram_sum0_wr_wren;
+
+ wire [ 32-1:0] bram_sum1_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
+ wire bram_sum1_wr_wren;
+
+ wire [ 32-1:0] bram_diff_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
+ wire bram_diff_wr_wren;
+
+ wire [ 32-1:0] bram_sum0_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
+
+ wire [ 32-1:0] bram_sum1_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
+
+ wire [ 32-1:0] bram_diff_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
+
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_sum0_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_sum0_wr_addr),
+ .a_wr (bram_sum0_wr_wren),
+ .a_in (bram_sum0_wr_din),
+ .a_out (),
+
+ .b_addr (bram_sum0_rd_addr),
+ .b_out (bram_sum0_rd_dout)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_sum1_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_sum1_wr_addr),
+ .a_wr (bram_sum1_wr_wren),
+ .a_in (bram_sum1_wr_din),
+ .a_out (),
+
+ .b_addr (bram_sum1_rd_addr),
+ .b_out (bram_sum1_rd_dout)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_diff_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_diff_wr_addr),
+ .a_wr (bram_diff_wr_wren),
+ .a_in (bram_diff_wr_din),
+ .a_out (),
+
+ .b_addr (bram_diff_rd_addr),
+ .b_out (bram_diff_rd_dout)
+ );
+
+
+
+ wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
+
+ reg [ 32-1:0] adder0_a_din;
+ reg [ 32-1:0] adder0_b_din;
+
+ reg [ 32-1:0] adder1_a_din;
+ reg [ 32-1:0] adder1_b_din;
+
+ reg [ 32-1:0] subtractor_a_din;
+ reg [ 32-1:0] subtractor_b_din;
+
+ // n_addr - only 1 output, because all modules are in sync
+
+ modular_adder adder_inst0
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder0_done),
+
+ .ab_addr (adder0_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum0_wr_addr),
+ .s_wren (bram_sum0_wr_wren),
+
+ .a_din (adder0_a_din),
+ .b_din (adder0_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum0_wr_din)
+ );
+
+ modular_adder adder_inst1
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder1_done),
+
+ .ab_addr (adder1_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum1_wr_addr),
+ .s_wren (bram_sum1_wr_wren),
+
+ .a_din (adder1_a_din),
+ .b_din (adder1_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum1_wr_din)
+ );
+
+ modular_subtractor subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_subtractor_done),
+
+ .ab_addr (subtractor_ab_addr),
+ .n_addr (n_addr),
+ .d_addr (bram_diff_wr_addr),
+ .d_wren (bram_diff_wr_wren),
+
+ .a_din (subtractor_a_din),
+ .b_din (subtractor_b_din),
+ .n_din (n_din),
+ .d_dout (bram_diff_wr_din)
+ );
+
+
+ //
+ // address
+ //
+ always @(*)
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ //
+ 5'b10000: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = adder0_ab_addr;
+ reduce_z_addr[3] = adder1_ab_addr;
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = subtractor_ab_addr;
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ 5'b01000: begin
+ reduce_z_addr[1] = adder0_ab_addr;
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = adder1_ab_addr;
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = subtractor_ab_addr;
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00100: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = adder0_ab_addr;
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = subtractor_ab_addr;
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00010: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = subtractor_ab_addr;
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder0_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00001: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = adder0_ab_addr;
+ end
+ //
+ default: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+
+ //
+ // adder 0
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_a_din = reduce_z_dout[2];
+ 5'b01000: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00100: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00010: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00001: adder0_a_din = bram_sum0_rd_dout;
+ default: adder0_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_b_din = reduce_z_dout[2];
+ 5'b01000: adder0_b_din = reduce_z_dout[1];
+ 5'b00100: adder0_b_din = reduce_z_dout[5];
+ 5'b00010: adder0_b_din = bram_sum1_rd_dout;
+ 5'b00001: adder0_b_din = bram_diff_rd_dout;
+ default: adder0_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+ //
+ // adder 1
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_a_din = reduce_z_dout[3];
+ 5'b01000: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00100: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00010: adder1_a_din = {32{1'bX}};
+ 5'b00001: adder1_a_din = {32{1'bX}};
+ default: adder1_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_b_din = reduce_z_dout[3];
+ 5'b01000: adder1_b_din = reduce_z_dout[4];
+ 5'b00100: adder1_b_din = {32{1'b0}};
+ 5'b00010: adder1_b_din = {32{1'bX}};
+ 5'b00001: adder1_b_din = {32{1'bX}};
+ default: adder1_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // subtractor
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_a_din = {32{1'b0}};
+ 5'b01000: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00100: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00010: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00001: subtractor_a_din = {32{1'bX}};
+ default: subtractor_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_b_din = reduce_z_dout[6];
+ 5'b01000: subtractor_b_din = reduce_z_dout[7];
+ 5'b00100: subtractor_b_din = reduce_z_dout[8];
+ 5'b00010: subtractor_b_din = reduce_z_dout[9];
+ 5'b00001: subtractor_b_din = {32{1'bX}};
+ default: subtractor_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+
+
+ assign p_addr = bram_sum0_wr_addr;
+ assign p_wren = bram_sum0_wr_wren & store_p;
+ assign p_dout = bram_sum0_wr_din;
+
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v
new file mode 100644
index 0000000..322aec4
--- /dev/null
+++ b/rtl/modular/modular_subtractor.v
@@ -0,0 +1,292 @@
+//------------------------------------------------------------------------------
+//
+// modular_subtractor.v
+// -----------------------------------------------------------------------------
+// Modular subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_subtractor
+ (
+ clk, rst_n,
+ ena, rdy,
+ ab_addr, n_addr, d_addr, d_wren,
+ a_din, b_din, n_din, d_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter OPERAND_NUM_WORDS = 8;
+ parameter WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word
+ output wire d_wren; // store current D word now
+
+ input wire [ 31:0] a_din; // A
+ input wire [ 31:0] b_din; // B
+ input wire [ 31:0] n_din; // N
+ output wire [ 31:0] d_dout; // D = (A - B) mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_ab;
+ reg [WORD_COUNTER_WIDTH-1:0] index_n;
+ reg [WORD_COUNTER_WIDTH-1:0] index_d;
+
+ /* map registers to output ports */
+ assign ab_addr = index_ab;
+ assign n_addr = index_n;
+ assign d_addr = index_d;
+
+
+ //
+ // Subtractor
+ //
+ wire [31: 0] sub32_d;
+ wire sub32_b_in;
+ wire sub32_b_out;
+
+ subtractor32_wrapper subtractor32
+ (
+ .clk (clk),
+ .a (a_din),
+ .b (b_din),
+ .d (sub32_d),
+ .b_in (sub32_b_in),
+ .b_out (sub32_b_out)
+ );
+
+
+ //
+ // Adder
+ //
+ wire [31: 0] add32_s;
+ wire add32_c_in;
+ wire add32_c_out;
+
+ adder32_wrapper adder32
+ (
+ .clk (clk),
+ .a (sub32_d),
+ .b (n_din),
+ .s (add32_s),
+ .c_in (add32_c_in),
+ .c_out (add32_c_out)
+ );
+
+
+ //
+ // FSM
+ //
+
+ localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
+
+ wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+
+ wire inc_index_ab = |fsm_shreg_inc_index_ab;
+ wire inc_index_n = |fsm_shreg_inc_index_n;
+ wire store_dif_ab = |fsm_shreg_store_dif_ab;
+ wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n;
+ wire store_data_d = |fsm_shreg_store_data_d;
+ wire inc_index_d = |fsm_shreg_inc_index_d;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Borrow & Carry Masking Logic
+ //
+ reg sub32_b_mask;
+ reg add32_c_mask;
+
+
+ always @(posedge clk) begin
+ //
+ sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
+ //
+ end
+
+ assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
+ assign add32_c_in = add32_c_out & ~add32_c_mask;
+
+
+
+ //
+ // Borrow & Carry Latch Logic
+ //
+ reg sub32_borrow_latch;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
+ //
+ end
+
+
+ //
+ // Intermediate Results
+ //
+ reg [32*OPERAND_NUM_WORDS-1:0] d_ab;
+ reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n;
+
+ always @(posedge clk)
+ //
+ if (store_data_d) begin
+ //
+ d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]};
+ d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end else begin
+ //
+ if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]};
+ if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ index_ab <= WORD_INDEX_ZERO;
+ index_n <= WORD_INDEX_ZERO;
+ index_d <= WORD_INDEX_ZERO;
+ //
+ end else begin
+ //
+ if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
+ if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
+ if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d);
+ //
+ end
+
+
+ //
+ // Output Sum Selector
+ //
+ wire mux_select_ab_n = sub32_borrow_latch;
+
+
+ //
+ // Output Data and Write Enable Logic
+ //
+ reg d_wren_reg;
+ reg [31: 0] d_dout_reg;
+ wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0];
+
+ assign d_wren = d_wren_reg;
+ assign d_dout = d_dout_reg;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ d_wren_reg <= 1'b0;
+ d_dout_reg <= {32{1'bX}};
+ //
+ end else begin
+ //
+ d_wren_reg <= store_data_d;
+ d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}};
+ //
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v
new file mode 100644
index 0000000..b97a6cf
--- /dev/null
+++ b/rtl/multiword/mw_comparator.v
@@ -0,0 +1,220 @@
+//------------------------------------------------------------------------------
+//
+// mw_comparator.v
+// -----------------------------------------------------------------------------
+// Multi-word comparator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module mw_comparator
+ (
+ clk, rst_n,
+ ena, rdy,
+ xy_addr, x_din, y_din,
+ cmp_l, cmp_e, cmp_g
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter WORD_COUNTER_WIDTH = 3;
+ parameter OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREV_OR_LAST;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] xy_addr; // address of current X and Y words
+ input wire [ 32-1:0] x_din; // current X word
+ input wire [ 32-1:0] y_din; // current Y word
+
+ output wire cmp_l; // X < Y ?
+ output wire cmp_e; // X = Y ?
+ output wire cmp_g; // X > Y ?
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_xy;
+
+ reg reg_cmp_l;
+ reg reg_cmp_e;
+ reg reg_cmp_g;
+
+
+ //
+ // Output Mapping
+ //
+ assign xy_addr = index_xy;
+
+ assign cmp_l = reg_cmp_l;
+ assign cmp_e = reg_cmp_e;
+ assign cmp_g = reg_cmp_g;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_xy = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_calc_leg = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+ wire fsm_shreg_calc_leg_last = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
+
+ wire dec_index_xy = |fsm_shreg_dec_index_xy;
+ wire calc_leg = |fsm_shreg_calc_leg;
+ wire calc_leg_last = fsm_shreg_calc_leg_last;
+
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy) index_xy <= WORD_INDEX_LAST;
+ else if (dec_index_xy) index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy);
+
+
+ //
+ // 32-bit Subtractor
+ //
+ wire [31: 0] sub32_d_out;
+ wire sub32_b_in;
+ wire sub32_b_out;
+
+ subtractor32_wrapper subtractor32_inst
+ (
+ .clk (clk),
+
+ .a (x_din),
+ .b (y_din),
+
+ .d (sub32_d_out),
+
+ .b_in (sub32_b_in),
+ .b_out (sub32_b_out)
+ );
+
+
+ //
+ // Borrow Masking Logic
+ //
+ reg sub32_b_mask;
+
+ always @(posedge clk)
+ //
+ sub32_b_mask <= (index_xy == WORD_INDEX_LAST) ? 1'b1 : 1'b0;
+
+ assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
+
+ //
+ // Output Logic
+ //
+ wire cmp_unresolved = !(cmp_l || cmp_g);
+
+ wire cmp_borrow_is_set = (sub32_b_out == 1'b1) ? 1'b1 : 1'b0;
+ wire cmp_difference_is_nonzero = (sub32_d_out != 32'd0) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ if (ena) begin
+ //
+ reg_cmp_l <= 1'b0;
+ reg_cmp_e <= 1'b0;
+ reg_cmp_g <= 1'b0;
+ //
+ end
+ //
+ end else if (cmp_unresolved && calc_leg) begin
+ //
+ if ( cmp_borrow_is_set) reg_cmp_l <= 1'b1;
+ if (!cmp_borrow_is_set && cmp_difference_is_nonzero) reg_cmp_g <= 1'b1;
+ if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last) reg_cmp_e <= 1'b1;
+ //
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v
new file mode 100644
index 0000000..5db95a7
--- /dev/null
+++ b/rtl/multiword/mw_mover.v
@@ -0,0 +1,175 @@
+//------------------------------------------------------------------------------
+//
+// mw_mover.v
+// -----------------------------------------------------------------------------
+// Multi-word data mover.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module mw_mover
+ (
+ clk, rst_n,
+ ena, rdy,
+ x_addr, y_addr, y_wren,
+ x_din, y_dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter WORD_COUNTER_WIDTH = 3;
+ parameter OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] x_addr; // address of current X word
+ output wire [WORD_COUNTER_WIDTH-1:0] y_addr; // address of current Y word
+ output wire y_wren; // store current Y word
+
+ input wire [ 32-1:0] x_din; // current X word
+ output wire [ 32-1:0] y_dout; // current Y word
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_x;
+ reg [WORD_COUNTER_WIDTH-1:0] index_y;
+
+
+ //
+ // Output Mapping
+ //
+ assign x_addr = index_x;
+ assign y_addr = index_y;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_y = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+
+ wire inc_index_x = |fsm_shreg_inc_index_x;
+ wire inc_index_y = |fsm_shreg_inc_index_y;
+ wire store_word_y = |fsm_shreg_inc_index_x;
+
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ index_x <= WORD_INDEX_ZERO;
+ index_y <= WORD_INDEX_ZERO;
+ end else begin
+ if (inc_index_x) index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x);
+ if (inc_index_y) index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y);
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+ reg y_wren_reg;
+
+ assign y_wren = y_wren_reg;
+
+ always @(posedge clk)
+ //
+ if (rdy) y_wren_reg <= 1'b0;
+ else y_wren_reg <= store_word_y;
+
+
+ //
+ // Output Logic
+ //
+ assign y_dout = x_din;
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/util/bram_1rw_1ro_readfirst.v
new file mode 100644
index 0000000..28782c2
--- /dev/null
+++ b/rtl/util/bram_1rw_1ro_readfirst.v
@@ -0,0 +1,101 @@
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module bram_1rw_1ro_readfirst
+ #(parameter MEM_WIDTH = 32,
+ parameter MEM_ADDR_BITS = 8)
+ (
+ input wire clk,
+
+ input wire [MEM_ADDR_BITS-1:0] a_addr,
+ input wire a_wr,
+ input wire [MEM_WIDTH-1:0] a_in,
+ output wire [MEM_WIDTH-1:0] a_out,
+
+ input wire [MEM_ADDR_BITS-1:0] b_addr,
+ output wire [MEM_WIDTH-1:0] b_out
+ );
+
+
+ //
+ // BRAM
+ //
+ (* RAM_STYLE="BLOCK" *)
+ reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1];
+
+
+ //
+ // Initialization
+ //
+ /**
+ integer c;
+ initial begin
+ for (c=0; c<(2**MEM_ADDR_BITS); c=c+1)
+ bram[c] = {MEM_WIDTH{1'b0}};
+ end
+ **/
+
+
+
+ //
+ // Output Registers
+ //
+ reg [MEM_WIDTH-1:0] bram_reg_a;
+ reg [MEM_WIDTH-1:0] bram_reg_b;
+
+ assign a_out = bram_reg_a;
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Write Port A
+ //
+ always @(posedge clk) begin
+ //
+ bram_reg_a <= bram[a_addr];
+ //
+ if (a_wr) bram[a_addr] <= a_in;
+ //
+ end
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ bram_reg_b <= bram[b_addr];
+
+
+endmodule