aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md166
-rw-r--r--bench/tb_curve_adder_256.v738
-rw-r--r--bench/tb_curve_doubler_256.v716
-rw-r--r--bench/tb_curve_multiplier_256.v460
-rw-r--r--bench/tb_lowlevel_adder32.v350
-rw-r--r--bench/tb_lowlevel_adder47.v302
-rw-r--r--bench/tb_lowlevel_subtractor32.v348
-rw-r--r--bench/tb_modular_adder.v614
-rw-r--r--bench/tb_modular_invertor.v449
-rw-r--r--bench/tb_modular_multiplier_256.v632
-rw-r--r--bench/tb_modular_subtractor.v612
-rw-r--r--bench/tb_mw_comparator.v560
-rw-r--r--bench/tb_mw_mover.v464
-rw-r--r--rtl/curve/curve_dbl_add_256.v1748
-rw-r--r--rtl/curve/curve_mul_256.v1440
-rw-r--r--rtl/curve/rom/brom_p256_delta.v38
-rw-r--r--rtl/curve/rom/brom_p256_g_x.v38
-rw-r--r--rtl/curve/rom/brom_p256_g_y.v40
-rw-r--r--rtl/curve/rom/brom_p256_h_x.v38
-rw-r--r--rtl/curve/rom/brom_p256_h_y.v40
-rw-r--r--rtl/curve/rom/brom_p256_one.v38
-rw-r--r--rtl/curve/rom/brom_p256_q.v40
-rw-r--r--rtl/curve/rom/brom_p256_zero.v40
-rw-r--r--rtl/curve/uop/uop_add_rom.v66
-rw-r--r--rtl/curve/uop/uop_conv_rom.v38
-rw-r--r--rtl/curve/uop/uop_dbl_rom.v58
-rw-r--r--rtl/curve/uop/uop_init_rom.v33
-rw-r--r--rtl/curve/uop_ecdsa.v50
-rw-r--r--rtl/ecdsa256.v180
-rw-r--r--rtl/ecdsa256_wrapper.v56
-rw-r--r--rtl/lowlevel/adder32_wrapper.v73
-rw-r--r--rtl/lowlevel/adder47_wrapper.v69
-rw-r--r--rtl/lowlevel/artix7/adder32_artix7.v96
-rw-r--r--rtl/lowlevel/artix7/adder47_artix7.v91
-rw-r--r--rtl/lowlevel/artix7/dsp48e1_wrapper.v159
-rw-r--r--rtl/lowlevel/artix7/mac16_artix7.v90
-rw-r--r--rtl/lowlevel/artix7/subtractor32_artix7.v94
-rw-r--r--rtl/lowlevel/ecdsa_lowlevel_settings.v17
-rw-r--r--rtl/lowlevel/generic/adder32_generic.v67
-rw-r--r--rtl/lowlevel/generic/adder47_generic.v64
-rw-r--r--rtl/lowlevel/generic/mac16_generic.v74
-rw-r--r--rtl/lowlevel/generic/subtractor32_generic.v67
-rw-r--r--rtl/lowlevel/mac16_wrapper.v75
-rw-r--r--rtl/lowlevel/subtractor32_wrapper.v72
-rw-r--r--rtl/modular/modular_adder.v298
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_copy.v148
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_init.v172
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v286
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v408
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v257
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v328
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v153
-rw-r--r--rtl/modular/modular_invertor/modinv_clog2.v10
-rw-r--r--rtl/modular/modular_invertor/modular_invertor.v981
-rw-r--r--rtl/modular/modular_multiplier_256.v804
-rw-r--r--rtl/modular/modular_reductor_256.v1384
-rw-r--r--rtl/modular/modular_subtractor.v292
-rw-r--r--rtl/multiword/mw_comparator.v220
-rw-r--r--rtl/multiword/mw_mover.v175
-rw-r--r--rtl/util/bram_1rw_1ro_readfirst.v101
-rw-r--r--stm32_driver/ecdsa256_driver_sample.c344
-rw-r--r--stm32_driver/ecdsa_model.h408
62 files changed, 6542 insertions, 11727 deletions
diff --git a/README.md b/README.md
index 2ff17ae..054855d 100644
--- a/README.md
+++ b/README.md
@@ -1,83 +1,83 @@
-# ecdsa256
-
-## Core Description
-
-This core implements the scalar base point multiplier for ECDSA curve P-256. It can be used during generation of public keys, the core can also be used as part of the signing operation.
-
-## API Specification
-
-The core interface is similar to other Cryptech cores. FMC memory map looks like the following:
-
-`0x0000 | NAME0`
-`0x0004 | NAME1`
-`0x0008 | VERSION`
-
-`0x0020 | CONTROL`
-`0x0024 | STATUS`
-
-`0x0080 | K0`
-`0x0084 | K1`
-`...`
-`0x009C | K7`
-`0x00A0 | X0`
-`0x00A4 | X1`
-`...`
-`0x00BC | X7`
-`0x00C0 | Y0`
-`0x00C4 | Y1`
-`...`
-`0x00DC | Y7`
-
-The core has the following registers:
-
- * **NAME0**, **NAME1**
-Read-only core name ("ecdsa256").
-
- * **VERSION**
-Read-only core version, currently "0.11".
-
- * **CONTROL**
-Control register bits:
-[31:2] Don't care, always read as 0
-[1] "next" control bit
-[0] Don't care, always read as 0
-The core starts multiplication when the "next" control bit changes from 0 to 1. This way when the bit is set, the core will only perform one multiplication and then stop. To start another operation, the bit must be cleared at first and then set to 1 again.
-
- * **STATUS**
-Read-only status register bits:
-[31:2] Don't care, always read as 0
-[1] "valid" control bit
-[0] "ready" control bit (always read as 1)
-The "valid" control bit is cleared as soon as the core starts operation, and gets set after the multiplication operations is complete. Note, that unlike some other Cryptech cores, this core doesn't need any special initialization, so the "ready" control bit is simply hardwired to always read as 1. This is to keep general core interface consistency.
-
- * **K0**-**K7**
-Buffer for the 256-bit multiplication factor (multiplier) K. The core will compute Q = K * G (the base point G is the multiplicand). K0 is the least significant 32-bit word of K, i.e. bits [31:0], while K7 is the most significant 32-bit word of K, i.e. bits [255:224].
-
- * **X0**-**X7**, **Y0**-**Y7**
-Buffers for the 256-bit coordinates X and Y of the product Q = K * G. Values are returned in affine coordinates. X0 and Y0 contain the least significant 32-bit words, i.e. bits [31:0], while X7 and Y7 contain the most significant 32-bit words, i.e. bits [255:224].
-
-## Implementation Details
-
-The top-level core module contains block memory buffers for input and output operands and the base point multiplier, that reads from the input buffer and writes to the output buffers.
-
-The base point multiplier itself consists of the following:
- * Buffers for storage of temporary values
- * Configurable "worker" unit
- * Microprograms for the worker unit
- * Multi-word mover unit
- * Modular inversion unit
-
-The "worker" unit can execute five basic operations:
- * comparison
- * copying
- * modular addition
- * modular subtraction
- * modular multiplications
-
-There are two primary microprograms, that the worker runs: curve point doubling and addition of curve point to the base point. Those microprograms use projective Jacobian coordinates, so one more microprogram is used to convert the product into affine coordinates with the help of modular inversion unit.
-
-Note, that the core is supplemented by a reference model written in C, that has extensive comments describing tricky corners of the underlying math.
-
-## Vendor-specific Primitives
-
-Cryptech Alpha platform is based on Xilinx Artix-7 200T FPGA, so this core takes advantage of Xilinx-specific DSP slices to carry out math-intensive operations. All vendor-specific math primitives are placed under /rtl/lowlevel/artix7, the core also offers generic replacements under /rtl/lowlevel/generic, they can be used for simulation with 3rd party tools, that are not aware of Xilinx-specific stuff. Selection of vendor/generic primitives is done in ecdsa_lowlevel_settings.v, when porting to other architectures, only those four low-level modules need to be ported.
+# ecdsa256
+
+## Core Description
+
+This core implements the scalar base point multiplier for ECDSA curve P-256. It can be used during generation of public keys, the core can also be used as part of the signing operation.
+
+## API Specification
+
+The core interface is similar to other Cryptech cores. FMC memory map looks like the following:
+
+`0x0000 | NAME0`
+`0x0004 | NAME1`
+`0x0008 | VERSION`
+
+`0x0020 | CONTROL`
+`0x0024 | STATUS`
+
+`0x0080 | K0`
+`0x0084 | K1`
+`...`
+`0x009C | K7`
+`0x00A0 | X0`
+`0x00A4 | X1`
+`...`
+`0x00BC | X7`
+`0x00C0 | Y0`
+`0x00C4 | Y1`
+`...`
+`0x00DC | Y7`
+
+The core has the following registers:
+
+ * **NAME0**, **NAME1**
+Read-only core name ("ecdsa256").
+
+ * **VERSION**
+Read-only core version, currently "0.11".
+
+ * **CONTROL**
+Control register bits:
+[31:2] Don't care, always read as 0
+[1] "next" control bit
+[0] Don't care, always read as 0
+The core starts multiplication when the "next" control bit changes from 0 to 1. This way when the bit is set, the core will only perform one multiplication and then stop. To start another operation, the bit must be cleared at first and then set to 1 again.
+
+ * **STATUS**
+Read-only status register bits:
+[31:2] Don't care, always read as 0
+[1] "valid" control bit
+[0] "ready" control bit (always read as 1)
+The "valid" control bit is cleared as soon as the core starts operation, and gets set after the multiplication operations is complete. Note, that unlike some other Cryptech cores, this core doesn't need any special initialization, so the "ready" control bit is simply hardwired to always read as 1. This is to keep general core interface consistency.
+
+ * **K0**-**K7**
+Buffer for the 256-bit multiplication factor (multiplier) K. The core will compute Q = K * G (the base point G is the multiplicand). K0 is the least significant 32-bit word of K, i.e. bits [31:0], while K7 is the most significant 32-bit word of K, i.e. bits [255:224].
+
+ * **X0**-**X7**, **Y0**-**Y7**
+Buffers for the 256-bit coordinates X and Y of the product Q = K * G. Values are returned in affine coordinates. X0 and Y0 contain the least significant 32-bit words, i.e. bits [31:0], while X7 and Y7 contain the most significant 32-bit words, i.e. bits [255:224].
+
+## Implementation Details
+
+The top-level core module contains block memory buffers for input and output operands and the base point multiplier, that reads from the input buffer and writes to the output buffers.
+
+The base point multiplier itself consists of the following:
+ * Buffers for storage of temporary values
+ * Configurable "worker" unit
+ * Microprograms for the worker unit
+ * Multi-word mover unit
+ * Modular inversion unit
+
+The "worker" unit can execute five basic operations:
+ * comparison
+ * copying
+ * modular addition
+ * modular subtraction
+ * modular multiplications
+
+There are two primary microprograms, that the worker runs: curve point doubling and addition of curve point to the base point. Those microprograms use projective Jacobian coordinates, so one more microprogram is used to convert the product into affine coordinates with the help of modular inversion unit.
+
+Note, that the core is supplemented by a reference model written in C, that has extensive comments describing tricky corners of the underlying math.
+
+## Vendor-specific Primitives
+
+Cryptech Alpha platform is based on Xilinx Artix-7 200T FPGA, so this core takes advantage of Xilinx-specific DSP slices to carry out math-intensive operations. All vendor-specific math primitives are placed under /rtl/lowlevel/artix7, the core also offers generic replacements under /rtl/lowlevel/generic, they can be used for simulation with 3rd party tools, that are not aware of Xilinx-specific stuff. Selection of vendor/generic primitives is done in ecdsa_lowlevel_settings.v, when porting to other architectures, only those four low-level modules need to be ported.
diff --git a/bench/tb_curve_adder_256.v b/bench/tb_curve_adder_256.v
index a20743a..c9b50ce 100644
--- a/bench/tb_curve_adder_256.v
+++ b/bench/tb_curve_adder_256.v
@@ -43,377 +43,377 @@
module tb_curve_adder_256;
- //
- // Test Vectors
- //
- localparam [255:0] PX_1 = 256'ha536512112e4bb911ae72744a914761ddc53700f889c88e583e0edd45c179b08;
- localparam [255:0] PY_1 = 256'h239e73bf40f4831ab71ccea072291893ac8582982ea6fec6bd6aaf36ac32d22e;
- localparam [255:0] PZ_1 = 256'h32258ae04c5498bb34b29c54a7f95afc10c009540c51731eae164750ca385029;
-
- localparam [255:0] RX_1 = 256'he4fcdd1a151b405b2a567d20d7674031c6d5b207b0b5dcf277015d81784492d5;
- localparam [255:0] RY_1 = 256'h4782c540b58988b07bb8e0c5ad3ff562dd45c075a39ee71896d5eb33702dd656;
- localparam [255:0] RZ_1 = 256'hae637ff2fd5468780241afb3a8ebaeb8618e86b4a1a211b350546c9e6fea93d4;
-
- localparam [255:0] PX_2 = 256'he58a6470e038f6b261d5a9a72fb2bd96b6bad433ff7baea6a40b5facf5085189;
- localparam [255:0] PY_2 = 256'h03dd8785b592307811ee5512e2d713c5dc65f60f01883340fe0f56f858a39474;
- localparam [255:0] PZ_2 = 256'h1b4657b1e79c9074fbf7f63f96ce2854db4808afc72841fac623dc68d9bff64d;
-
- localparam [255:0] RX_2 = 256'hc354e99a827a3f1c30f29f6b1d72273eb0daaeb06bb373ed315e305b89d857ca;
- localparam [255:0] RY_2 = 256'h0cb054f95589c1fcbe763df3b8d7badd568d5e93a667076dddfc70dcfab74948;
- localparam [255:0] RZ_2 = 256'hd79d9170dd628aee82d149715a6ec6cc44426ccae236d2a146edbd15a564ea53;
-
- localparam [255:0] PX_3 = 256'hbf5fe30c79025a0b638b0fd62bf1349aee0a9fc7fc2719291b0c23535c16eb52;
- localparam [255:0] PY_3 = 256'h8a637c7c0b9459de664d40a717e1abc0f843f03169fae943e0835cbe767da06b;
- localparam [255:0] PZ_3 = 256'h0871d93601d654216912866514a788a92e8a9b6047611bf185d459e204727377;
-
- localparam [255:0] RX_3 = 256'h1ba6259b5b750e4d6e4f490f661646cd9491be16965f47044ac2688048e567c5;
- localparam [255:0] RY_3 = 256'h80e55c16f403f8d7282bca628477771a45330567caa5aaab9a54919dbe05e3e4;
- localparam [255:0] RZ_3 = 256'hb99663f045c9602b05f23aaaa508e6167d15740be900175dbeceb957a9dad951;
-
- localparam [255:0] PX_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
- localparam [255:0] PY_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
- localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
-
- localparam [255:0] RX_4 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
- localparam [255:0] RY_4 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
- localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
-
- localparam [255:0] PX_5 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
- localparam [255:0] PY_5 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
- localparam [255:0] PZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
-
- localparam [255:0] RX_5 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0; // H.x
- localparam [255:0] RY_5 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07; // H.y
- localparam [255:0] RZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
-
- localparam [255:0] PX_6 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
- localparam [255:0] PY_6 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
- localparam [255:0] PZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
-
- localparam [255:0] RX_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
- localparam [255:0] RY_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
- localparam [255:0] RZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
-
- localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (PX, PY, PZ, RX, RY, RZ, Q)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr;
-
- wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr;
-
- wire [WORD_COUNTER_WIDTH-1:0] core_q_addr;
-
- wire core_rx_wren;
- wire core_ry_wren;
- wire core_rz_wren;
-
- wire [ 32-1:0] core_px_data;
- wire [ 32-1:0] core_py_data;
- wire [ 32-1:0] core_pz_data;
-
- wire [ 32-1:0] core_rx_data_wr;
- wire [ 32-1:0] core_ry_data_wr;
- wire [ 32-1:0] core_rz_data_wr;
-
- wire [ 32-1:0] core_rx_data_rd;
- wire [ 32-1:0] core_ry_data_rd;
- wire [ 32-1:0] core_rz_data_rd;
-
- wire [ 32-1:0] core_q_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr;
- reg tb_xyzq_wren;
-
- reg [ 31:0] tb_px_data;
- reg [ 31:0] tb_py_data;
- reg [ 31:0] tb_pz_data;
- wire [ 31:0] tb_rx_data;
- wire [ 31:0] tb_ry_data;
- wire [ 31:0] tb_rz_data;
- reg [ 31:0] tb_q_data;
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_px
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(),
- .b_addr(core_px_addr), .b_out(core_px_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_py
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(),
- .b_addr(core_py_addr), .b_out(core_py_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_pz
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(),
- .b_addr(core_pz_addr), .b_out(core_pz_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_q
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(),
- .b_addr(core_q_addr), .b_out(core_q_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_rx
- ( .clk(clk),
- .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd),
- .b_addr(tb_xyzq_addr), .b_out(tb_rx_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_ry
- ( .clk(clk),
- .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd),
- .b_addr(tb_xyzq_addr), .b_out(tb_ry_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_rz
- ( .clk(clk),
- .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd),
- .b_addr(tb_xyzq_addr), .b_out(tb_rz_data)
- );
-
-
- //
- // Opcode
- //
- wire [ 5: 0] add_uop_addr;
- wire [19: 0] add_uop;
-
- uop_add_rom add_rom
- (
- .clk (clk),
- .addr (add_uop_addr),
- .data (add_uop)
- );
-
- //
- // UUT
- //
- curve_dbl_add_256 uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .uop_addr (add_uop_addr),
- .uop (add_uop),
-
- .px_addr (core_px_addr),
- .py_addr (core_py_addr),
- .pz_addr (core_pz_addr),
- .rx_addr (core_rx_addr),
- .ry_addr (core_ry_addr),
- .rz_addr (core_rz_addr),
- .q_addr (core_q_addr),
-
- .rx_wren (core_rx_wren),
- .ry_wren (core_ry_wren),
- .rz_wren (core_rz_wren),
-
- .px_din (core_px_data),
- .py_din (core_py_data),
- .pz_din (core_pz_data),
- .rx_din (core_rx_data_rd),
- .ry_din (core_ry_data_rd),
- .rz_din (core_rz_data_rd),
- .rx_dout (core_rx_data_wr),
- .ry_dout (core_ry_data_wr),
- .rz_dout (core_rz_data_wr),
- .q_din (core_q_data)
+ //
+ // Test Vectors
+ //
+ localparam [255:0] PX_1 = 256'ha536512112e4bb911ae72744a914761ddc53700f889c88e583e0edd45c179b08;
+ localparam [255:0] PY_1 = 256'h239e73bf40f4831ab71ccea072291893ac8582982ea6fec6bd6aaf36ac32d22e;
+ localparam [255:0] PZ_1 = 256'h32258ae04c5498bb34b29c54a7f95afc10c009540c51731eae164750ca385029;
+
+ localparam [255:0] RX_1 = 256'he4fcdd1a151b405b2a567d20d7674031c6d5b207b0b5dcf277015d81784492d5;
+ localparam [255:0] RY_1 = 256'h4782c540b58988b07bb8e0c5ad3ff562dd45c075a39ee71896d5eb33702dd656;
+ localparam [255:0] RZ_1 = 256'hae637ff2fd5468780241afb3a8ebaeb8618e86b4a1a211b350546c9e6fea93d4;
+
+ localparam [255:0] PX_2 = 256'he58a6470e038f6b261d5a9a72fb2bd96b6bad433ff7baea6a40b5facf5085189;
+ localparam [255:0] PY_2 = 256'h03dd8785b592307811ee5512e2d713c5dc65f60f01883340fe0f56f858a39474;
+ localparam [255:0] PZ_2 = 256'h1b4657b1e79c9074fbf7f63f96ce2854db4808afc72841fac623dc68d9bff64d;
+
+ localparam [255:0] RX_2 = 256'hc354e99a827a3f1c30f29f6b1d72273eb0daaeb06bb373ed315e305b89d857ca;
+ localparam [255:0] RY_2 = 256'h0cb054f95589c1fcbe763df3b8d7badd568d5e93a667076dddfc70dcfab74948;
+ localparam [255:0] RZ_2 = 256'hd79d9170dd628aee82d149715a6ec6cc44426ccae236d2a146edbd15a564ea53;
+
+ localparam [255:0] PX_3 = 256'hbf5fe30c79025a0b638b0fd62bf1349aee0a9fc7fc2719291b0c23535c16eb52;
+ localparam [255:0] PY_3 = 256'h8a637c7c0b9459de664d40a717e1abc0f843f03169fae943e0835cbe767da06b;
+ localparam [255:0] PZ_3 = 256'h0871d93601d654216912866514a788a92e8a9b6047611bf185d459e204727377;
+
+ localparam [255:0] RX_3 = 256'h1ba6259b5b750e4d6e4f490f661646cd9491be16965f47044ac2688048e567c5;
+ localparam [255:0] RY_3 = 256'h80e55c16f403f8d7282bca628477771a45330567caa5aaab9a54919dbe05e3e4;
+ localparam [255:0] RZ_3 = 256'hb99663f045c9602b05f23aaaa508e6167d15740be900175dbeceb957a9dad951;
+
+ localparam [255:0] PX_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
+ localparam [255:0] PY_4 = 256'hxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;
+ localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] RX_4 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
+ localparam [255:0] RY_4 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
+ localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] PX_5 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
+ localparam [255:0] PY_5 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
+ localparam [255:0] PZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] RX_5 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0; // H.x
+ localparam [255:0] RY_5 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07; // H.y
+ localparam [255:0] RZ_5 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] PX_6 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296; // G.x
+ localparam [255:0] PY_6 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5; // G.y
+ localparam [255:0] PZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] RX_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RY_6 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RZ_6 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (PX, PY, PZ, RX, RY, RZ, Q)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_q_addr;
+
+ wire core_rx_wren;
+ wire core_ry_wren;
+ wire core_rz_wren;
+
+ wire [ 32-1:0] core_px_data;
+ wire [ 32-1:0] core_py_data;
+ wire [ 32-1:0] core_pz_data;
+
+ wire [ 32-1:0] core_rx_data_wr;
+ wire [ 32-1:0] core_ry_data_wr;
+ wire [ 32-1:0] core_rz_data_wr;
+
+ wire [ 32-1:0] core_rx_data_rd;
+ wire [ 32-1:0] core_ry_data_rd;
+ wire [ 32-1:0] core_rz_data_rd;
+
+ wire [ 32-1:0] core_q_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr;
+ reg tb_xyzq_wren;
+
+ reg [ 31:0] tb_px_data;
+ reg [ 31:0] tb_py_data;
+ reg [ 31:0] tb_pz_data;
+ wire [ 31:0] tb_rx_data;
+ wire [ 31:0] tb_ry_data;
+ wire [ 31:0] tb_rz_data;
+ reg [ 31:0] tb_q_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_px
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(),
+ .b_addr(core_px_addr), .b_out(core_px_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_py
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(),
+ .b_addr(core_py_addr), .b_out(core_py_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_pz
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(),
+ .b_addr(core_pz_addr), .b_out(core_pz_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_q
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(),
+ .b_addr(core_q_addr), .b_out(core_q_data)
);
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_curve_adder(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1);
- test_curve_adder(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2);
- test_curve_adder(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3);
- test_curve_adder(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4);
- test_curve_adder(PX_5, PY_5, PZ_5, RX_5, RY_5, RZ_5);
- test_curve_adder(PX_6, Q - PY_6, PZ_6, RX_6, RY_6, RZ_6);
-
- /* print result */
- if (ok) $display("tb_curve_adder_256: SUCCESS");
- else $display("tb_curve_adder_256: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg t_ok;
-
- integer w;
-
- task test_curve_adder;
-
- input [255:0] px;
- input [255:0] py;
- input [255:0] pz;
-
- input [255:0] rx;
- input [255:0] ry;
- input [255:0] rz;
-
- reg [255:0] px_shreg;
- reg [255:0] py_shreg;
- reg [255:0] pz_shreg;
-
- reg [255:0] rx_shreg;
- reg [255:0] ry_shreg;
- reg [255:0] rz_shreg;
-
- reg [255:0] q_shreg;
-
- begin
-
- /* start filling memories */
- tb_xyzq_wren = 1;
-
- /* initialize shift registers */
- px_shreg = px;
- py_shreg = py;
- pz_shreg = pz;
- q_shreg = Q;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_px_data = px_shreg[31:0];
- tb_py_data = py_shreg[31:0];
- tb_pz_data = pz_shreg[31:0];
- tb_q_data = q_shreg[31:0];
-
- /* shift inputs */
- px_shreg = {{32{1'bX}}, px_shreg[255:32]};
- py_shreg = {{32{1'bX}}, py_shreg[255:32]};
- pz_shreg = {{32{1'bX}}, pz_shreg[255:32]};
- q_shreg = {{32{1'bX}}, q_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_xyzq_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_px_data = {32{1'bX}};
- tb_py_data = {32{1'bX}};
- tb_pz_data = {32{1'bX}};
- tb_q_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_xyzq_wren = 0;
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- rx_shreg = {tb_rx_data, rx_shreg[255:32]};
- ry_shreg = {tb_ry_data, ry_shreg[255:32]};
- rz_shreg = {tb_rz_data, rz_shreg[255:32]};
-
- end
-
- /* compare */
- t_ok = (rx_shreg == rx) &&
- (ry_shreg == ry) &&
- (rz_shreg == rz);
-
- /* display results */
- $display("test_curve_adder(): %s", t_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && t_ok;
-
- end
-
- endtask
-
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rx
+ ( .clk(clk),
+ .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rx_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_ry
+ ( .clk(clk),
+ .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_ry_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rz
+ ( .clk(clk),
+ .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rz_data)
+ );
+
+
+ //
+ // Opcode
+ //
+ wire [ 5: 0] add_uop_addr;
+ wire [19: 0] add_uop;
+
+ uop_add_rom add_rom
+ (
+ .clk (clk),
+ .addr (add_uop_addr),
+ .data (add_uop)
+ );
+
+ //
+ // UUT
+ //
+ curve_dbl_add_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .uop_addr (add_uop_addr),
+ .uop (add_uop),
+
+ .px_addr (core_px_addr),
+ .py_addr (core_py_addr),
+ .pz_addr (core_pz_addr),
+ .rx_addr (core_rx_addr),
+ .ry_addr (core_ry_addr),
+ .rz_addr (core_rz_addr),
+ .q_addr (core_q_addr),
+
+ .rx_wren (core_rx_wren),
+ .ry_wren (core_ry_wren),
+ .rz_wren (core_rz_wren),
+
+ .px_din (core_px_data),
+ .py_din (core_py_data),
+ .pz_din (core_pz_data),
+ .rx_din (core_rx_data_rd),
+ .ry_din (core_ry_data_rd),
+ .rz_din (core_rz_data_rd),
+ .rx_dout (core_rx_data_wr),
+ .ry_dout (core_ry_data_wr),
+ .rz_dout (core_rz_data_wr),
+ .q_din (core_q_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_curve_adder(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1);
+ test_curve_adder(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2);
+ test_curve_adder(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3);
+ test_curve_adder(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4);
+ test_curve_adder(PX_5, PY_5, PZ_5, RX_5, RY_5, RZ_5);
+ test_curve_adder(PX_6, Q - PY_6, PZ_6, RX_6, RY_6, RZ_6);
+
+ /* print result */
+ if (ok) $display("tb_curve_adder_256: SUCCESS");
+ else $display("tb_curve_adder_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg t_ok;
+
+ integer w;
+
+ task test_curve_adder;
+
+ input [255:0] px;
+ input [255:0] py;
+ input [255:0] pz;
+
+ input [255:0] rx;
+ input [255:0] ry;
+ input [255:0] rz;
+
+ reg [255:0] px_shreg;
+ reg [255:0] py_shreg;
+ reg [255:0] pz_shreg;
+
+ reg [255:0] rx_shreg;
+ reg [255:0] ry_shreg;
+ reg [255:0] rz_shreg;
+
+ reg [255:0] q_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xyzq_wren = 1;
+
+ /* initialize shift registers */
+ px_shreg = px;
+ py_shreg = py;
+ pz_shreg = pz;
+ q_shreg = Q;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_px_data = px_shreg[31:0];
+ tb_py_data = py_shreg[31:0];
+ tb_pz_data = pz_shreg[31:0];
+ tb_q_data = q_shreg[31:0];
+
+ /* shift inputs */
+ px_shreg = {{32{1'bX}}, px_shreg[255:32]};
+ py_shreg = {{32{1'bX}}, py_shreg[255:32]};
+ pz_shreg = {{32{1'bX}}, pz_shreg[255:32]};
+ q_shreg = {{32{1'bX}}, q_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyzq_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_px_data = {32{1'bX}};
+ tb_py_data = {32{1'bX}};
+ tb_pz_data = {32{1'bX}};
+ tb_q_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyzq_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ rx_shreg = {tb_rx_data, rx_shreg[255:32]};
+ ry_shreg = {tb_ry_data, ry_shreg[255:32]};
+ rz_shreg = {tb_rz_data, rz_shreg[255:32]};
+
+ end
+
+ /* compare */
+ t_ok = (rx_shreg == rx) &&
+ (ry_shreg == ry) &&
+ (rz_shreg == rz);
+
+ /* display results */
+ $display("test_curve_adder(): %s", t_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && t_ok;
+
+ end
+
+ endtask
+
endmodule
-
+
//------------------------------------------------------------------------------
// End-of-File
diff --git a/bench/tb_curve_doubler_256.v b/bench/tb_curve_doubler_256.v
index c7b7541..c7a1c0f 100644
--- a/bench/tb_curve_doubler_256.v
+++ b/bench/tb_curve_doubler_256.v
@@ -43,365 +43,365 @@
module tb_curve_doubler_256;
- //
- // Test Vectors
- //
- localparam [255:0] PX_1 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
- localparam [255:0] PY_1 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
- localparam [255:0] PZ_1 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
-
- localparam [255:0] RX_1 = 256'h9a978f59acd1b5ad570e7d52dcfcde43804b42274f61ddcf1e7d848391d6c70f;
- localparam [255:0] RY_1 = 256'h4126885e7f786af905338238e5346d5fe77fc46388668bd0fd59be3190d2f5d1;
- localparam [255:0] RZ_1 = 256'h9fc685c5fc34ff371dcfd694f81f3c2c579c66aed662bd9d976c80d06f7ea3ea;
-
- localparam [255:0] PX_2 = 256'h0ec88440c8b00a9e572bf1bceb7d0c5906bd65990a9b7081130bd72e2c136ca0;
- localparam [255:0] PY_2 = 256'hc0bc77e1339e899101f8e8eccf79c3f7f4bbdd1bf96f6446199bd423026a60d6;
- localparam [255:0] PZ_2 = 256'hdd27cb52a31d1f6e041accf1103de05ba0a5edd74b738d51fe3397de0e3fc306;
-
- localparam [255:0] RX_2 = 256'he6afae63e774df21244609cb4c35d17d28b36b8b9fb7c58929af247f34ac72f9;
- localparam [255:0] RY_2 = 256'h061076db7a5745adc90b2e9eebe0ad6482309690f50b60835c265cf83a1b34eb;
- localparam [255:0] RZ_2 = 256'h1b6bfd04f2a41d68e85423655db1142d97ebaec0c67c450408f427e35c4f054f;
-
- localparam [255:0] PX_3 = 256'hb0f824c88ec62df89912ca9ffbcbbb4ffb4d80f8a7d7b4a992273261a2f7be7f;
- localparam [255:0] PY_3 = 256'h403e34c78c2b816fce2b1f8d73cfeef28113b8de8bda4a447d17b619bef73705;
- localparam [255:0] PZ_3 = 256'h0e3e81bb8e954f3164ae54a6cffa7fcc9631dfddee55fac61e46415f1f5fe5e2;
-
- localparam [255:0] RX_3 = 256'hd4e725920c88cc2f57847a315f3b6c180abb278b8fa2a47da3d1a191a8c29e19;
- localparam [255:0] RY_3 = 256'ha798ad8dbd66c98b53414ab1d04b0f871929a90fea996c88b96d9d68eb8eb0dc;
- localparam [255:0] RZ_3 = 256'ha7ead72c01294eaf2899bb6b84f7d26417e6758e3db29f3b5c2ca8e9911067f5;
-
- localparam [255:0] PX_4 = 256'hXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
- localparam [255:0] PY_4 = 256'hXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
- localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
-
- localparam [255:0] RX_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
- localparam [255:0] RY_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
- localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
-
- localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
-
-
-
- //
- // TODO: Test special cases!
- //
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (PX, PY, PZ, RX, RY, RZ, Q)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr;
-
- wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr;
-
- wire [WORD_COUNTER_WIDTH-1:0] core_q_addr;
-
- wire core_rx_wren;
- wire core_ry_wren;
- wire core_rz_wren;
-
- wire [ 32-1:0] core_px_data;
- wire [ 32-1:0] core_py_data;
- wire [ 32-1:0] core_pz_data;
-
- wire [ 32-1:0] core_rx_data_wr;
- wire [ 32-1:0] core_ry_data_wr;
- wire [ 32-1:0] core_rz_data_wr;
-
- wire [ 32-1:0] core_rx_data_rd;
- wire [ 32-1:0] core_ry_data_rd;
- wire [ 32-1:0] core_rz_data_rd;
-
- wire [ 32-1:0] core_q_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr;
- reg tb_xyzq_wren;
-
- reg [ 31:0] tb_px_data;
- reg [ 31:0] tb_py_data;
- reg [ 31:0] tb_pz_data;
- wire [ 31:0] tb_rx_data;
- wire [ 31:0] tb_ry_data;
- wire [ 31:0] tb_rz_data;
- reg [ 31:0] tb_q_data;
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_px
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(),
- .b_addr(core_px_addr), .b_out(core_px_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_py
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(),
- .b_addr(core_py_addr), .b_out(core_py_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_pz
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(),
- .b_addr(core_pz_addr), .b_out(core_pz_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_q
- ( .clk(clk),
- .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(),
- .b_addr(core_q_addr), .b_out(core_q_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_rx
- ( .clk(clk),
- .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd),
- .b_addr(tb_xyzq_addr), .b_out(tb_rx_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_ry
- ( .clk(clk),
- .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd),
- .b_addr(tb_xyzq_addr), .b_out(tb_ry_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_rz
- ( .clk(clk),
- .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd),
- .b_addr(tb_xyzq_addr), .b_out(tb_rz_data)
- );
-
-
- //
- // Opcode
- //
- wire [ 5: 0] dbl_uop_addr;
- wire [19: 0] dbl_uop;
-
- uop_dbl_rom dbl_rom
- (
- .clk (clk),
- .addr (dbl_uop_addr),
- .data (dbl_uop)
- );
-
-
- //
- // UUT
- //
- curve_dbl_add_256 uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .uop_addr (dbl_uop_addr),
- .uop (dbl_uop),
-
- .px_addr (core_px_addr),
- .py_addr (core_py_addr),
- .pz_addr (core_pz_addr),
- .rx_addr (core_rx_addr),
- .ry_addr (core_ry_addr),
- .rz_addr (core_rz_addr),
- .q_addr (core_q_addr),
-
- .rx_wren (core_rx_wren),
- .ry_wren (core_ry_wren),
- .rz_wren (core_rz_wren),
-
- .px_din (core_px_data),
- .py_din (core_py_data),
- .pz_din (core_pz_data),
- .rx_din (core_rx_data_rd),
- .ry_din (core_ry_data_rd),
- .rz_din (core_rz_data_rd),
- .rx_dout (core_rx_data_wr),
- .ry_dout (core_ry_data_wr),
- .rz_dout (core_rz_data_wr),
- .q_din (core_q_data)
+ //
+ // Test Vectors
+ //
+ localparam [255:0] PX_1 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
+ localparam [255:0] PY_1 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
+ localparam [255:0] PZ_1 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+
+ localparam [255:0] RX_1 = 256'h9a978f59acd1b5ad570e7d52dcfcde43804b42274f61ddcf1e7d848391d6c70f;
+ localparam [255:0] RY_1 = 256'h4126885e7f786af905338238e5346d5fe77fc46388668bd0fd59be3190d2f5d1;
+ localparam [255:0] RZ_1 = 256'h9fc685c5fc34ff371dcfd694f81f3c2c579c66aed662bd9d976c80d06f7ea3ea;
+
+ localparam [255:0] PX_2 = 256'h0ec88440c8b00a9e572bf1bceb7d0c5906bd65990a9b7081130bd72e2c136ca0;
+ localparam [255:0] PY_2 = 256'hc0bc77e1339e899101f8e8eccf79c3f7f4bbdd1bf96f6446199bd423026a60d6;
+ localparam [255:0] PZ_2 = 256'hdd27cb52a31d1f6e041accf1103de05ba0a5edd74b738d51fe3397de0e3fc306;
+
+ localparam [255:0] RX_2 = 256'he6afae63e774df21244609cb4c35d17d28b36b8b9fb7c58929af247f34ac72f9;
+ localparam [255:0] RY_2 = 256'h061076db7a5745adc90b2e9eebe0ad6482309690f50b60835c265cf83a1b34eb;
+ localparam [255:0] RZ_2 = 256'h1b6bfd04f2a41d68e85423655db1142d97ebaec0c67c450408f427e35c4f054f;
+
+ localparam [255:0] PX_3 = 256'hb0f824c88ec62df89912ca9ffbcbbb4ffb4d80f8a7d7b4a992273261a2f7be7f;
+ localparam [255:0] PY_3 = 256'h403e34c78c2b816fce2b1f8d73cfeef28113b8de8bda4a447d17b619bef73705;
+ localparam [255:0] PZ_3 = 256'h0e3e81bb8e954f3164ae54a6cffa7fcc9631dfddee55fac61e46415f1f5fe5e2;
+
+ localparam [255:0] RX_3 = 256'hd4e725920c88cc2f57847a315f3b6c180abb278b8fa2a47da3d1a191a8c29e19;
+ localparam [255:0] RY_3 = 256'ha798ad8dbd66c98b53414ab1d04b0f871929a90fea996c88b96d9d68eb8eb0dc;
+ localparam [255:0] RZ_3 = 256'ha7ead72c01294eaf2899bb6b84f7d26417e6758e3db29f3b5c2ca8e9911067f5;
+
+ localparam [255:0] PX_4 = 256'hXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
+ localparam [255:0] PY_4 = 256'hXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
+ localparam [255:0] PZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] RX_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RY_4 = 256'h0000000000000000000000000000000000000000000000000000000000000001;
+ localparam [255:0] RZ_4 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+ localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+
+
+ //
+ // TODO: Test special cases!
+ //
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (PX, PY, PZ, RX, RY, RZ, Q)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_pz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_rx_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_ry_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_rz_addr;
+
+ wire [WORD_COUNTER_WIDTH-1:0] core_q_addr;
+
+ wire core_rx_wren;
+ wire core_ry_wren;
+ wire core_rz_wren;
+
+ wire [ 32-1:0] core_px_data;
+ wire [ 32-1:0] core_py_data;
+ wire [ 32-1:0] core_pz_data;
+
+ wire [ 32-1:0] core_rx_data_wr;
+ wire [ 32-1:0] core_ry_data_wr;
+ wire [ 32-1:0] core_rz_data_wr;
+
+ wire [ 32-1:0] core_rx_data_rd;
+ wire [ 32-1:0] core_ry_data_rd;
+ wire [ 32-1:0] core_rz_data_rd;
+
+ wire [ 32-1:0] core_q_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyzq_addr;
+ reg tb_xyzq_wren;
+
+ reg [ 31:0] tb_px_data;
+ reg [ 31:0] tb_py_data;
+ reg [ 31:0] tb_pz_data;
+ wire [ 31:0] tb_rx_data;
+ wire [ 31:0] tb_ry_data;
+ wire [ 31:0] tb_rz_data;
+ reg [ 31:0] tb_q_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_px
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_px_data), .a_out(),
+ .b_addr(core_px_addr), .b_out(core_px_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_py
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_py_data), .a_out(),
+ .b_addr(core_py_addr), .b_out(core_py_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_pz
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_pz_data), .a_out(),
+ .b_addr(core_pz_addr), .b_out(core_pz_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_q
+ ( .clk(clk),
+ .a_addr(tb_xyzq_addr), .a_wr(tb_xyzq_wren), .a_in(tb_q_data), .a_out(),
+ .b_addr(core_q_addr), .b_out(core_q_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rx
+ ( .clk(clk),
+ .a_addr(core_rx_addr), .a_wr(core_rx_wren), .a_in(core_rx_data_wr), .a_out(core_rx_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rx_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_ry
+ ( .clk(clk),
+ .a_addr(core_ry_addr), .a_wr(core_ry_wren), .a_in(core_ry_data_wr), .a_out(core_ry_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_ry_data)
);
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_curve_doubler(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1);
- test_curve_doubler(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2);
- test_curve_doubler(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3);
- test_curve_doubler(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4);
-
- /* print result */
- if (ok) $display("tb_curve_doubler_256: SUCCESS");
- else $display("tb_curve_doubler_256: FAILURE");
- //
-// $finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg t_ok;
-
- integer w;
-
- task test_curve_doubler;
-
- input [255:0] px;
- input [255:0] py;
- input [255:0] pz;
-
- input [255:0] rx;
- input [255:0] ry;
- input [255:0] rz;
-
- reg [255:0] px_shreg;
- reg [255:0] py_shreg;
- reg [255:0] pz_shreg;
-
- reg [255:0] rx_shreg;
- reg [255:0] ry_shreg;
- reg [255:0] rz_shreg;
-
- reg [255:0] q_shreg;
-
- begin
-
- /* start filling memories */
- tb_xyzq_wren = 1;
-
- /* initialize shift registers */
- px_shreg = px;
- py_shreg = py;
- pz_shreg = pz;
- q_shreg = Q;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_px_data = px_shreg[31:0];
- tb_py_data = py_shreg[31:0];
- tb_pz_data = pz_shreg[31:0];
- tb_q_data = q_shreg[31:0];
-
- /* shift inputs */
- px_shreg = {{32{1'bX}}, px_shreg[255:32]};
- py_shreg = {{32{1'bX}}, py_shreg[255:32]};
- pz_shreg = {{32{1'bX}}, pz_shreg[255:32]};
- q_shreg = {{32{1'bX}}, q_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_xyzq_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_px_data = {32{1'bX}};
- tb_py_data = {32{1'bX}};
- tb_pz_data = {32{1'bX}};
- tb_q_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_xyzq_wren = 0;
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- rx_shreg = {tb_rx_data, rx_shreg[255:32]};
- ry_shreg = {tb_ry_data, ry_shreg[255:32]};
- rz_shreg = {tb_rz_data, rz_shreg[255:32]};
-
- end
-
- /* compare */
- t_ok = (rx_shreg == rx) &&
- (ry_shreg == ry) &&
- (rz_shreg == rz);
-
- /* display results */
- $display("test_curve_doubler(): %s", t_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && t_ok;
-
- end
-
- endtask
-
-
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_rz
+ ( .clk(clk),
+ .a_addr(core_rz_addr), .a_wr(core_rz_wren), .a_in(core_rz_data_wr), .a_out(core_rz_data_rd),
+ .b_addr(tb_xyzq_addr), .b_out(tb_rz_data)
+ );
+
+
+ //
+ // Opcode
+ //
+ wire [ 5: 0] dbl_uop_addr;
+ wire [19: 0] dbl_uop;
+
+ uop_dbl_rom dbl_rom
+ (
+ .clk (clk),
+ .addr (dbl_uop_addr),
+ .data (dbl_uop)
+ );
+
+
+ //
+ // UUT
+ //
+ curve_dbl_add_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .uop_addr (dbl_uop_addr),
+ .uop (dbl_uop),
+
+ .px_addr (core_px_addr),
+ .py_addr (core_py_addr),
+ .pz_addr (core_pz_addr),
+ .rx_addr (core_rx_addr),
+ .ry_addr (core_ry_addr),
+ .rz_addr (core_rz_addr),
+ .q_addr (core_q_addr),
+
+ .rx_wren (core_rx_wren),
+ .ry_wren (core_ry_wren),
+ .rz_wren (core_rz_wren),
+
+ .px_din (core_px_data),
+ .py_din (core_py_data),
+ .pz_din (core_pz_data),
+ .rx_din (core_rx_data_rd),
+ .ry_din (core_ry_data_rd),
+ .rz_din (core_rz_data_rd),
+ .rx_dout (core_rx_data_wr),
+ .ry_dout (core_ry_data_wr),
+ .rz_dout (core_rz_data_wr),
+ .q_din (core_q_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_curve_doubler(PX_1, PY_1, PZ_1, RX_1, RY_1, RZ_1);
+ test_curve_doubler(PX_2, PY_2, PZ_2, RX_2, RY_2, RZ_2);
+ test_curve_doubler(PX_3, PY_3, PZ_3, RX_3, RY_3, RZ_3);
+ test_curve_doubler(PX_4, PY_4, PZ_4, RX_4, RY_4, RZ_4);
+
+ /* print result */
+ if (ok) $display("tb_curve_doubler_256: SUCCESS");
+ else $display("tb_curve_doubler_256: FAILURE");
+ //
+ // $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg t_ok;
+
+ integer w;
+
+ task test_curve_doubler;
+
+ input [255:0] px;
+ input [255:0] py;
+ input [255:0] pz;
+
+ input [255:0] rx;
+ input [255:0] ry;
+ input [255:0] rz;
+
+ reg [255:0] px_shreg;
+ reg [255:0] py_shreg;
+ reg [255:0] pz_shreg;
+
+ reg [255:0] rx_shreg;
+ reg [255:0] ry_shreg;
+ reg [255:0] rz_shreg;
+
+ reg [255:0] q_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xyzq_wren = 1;
+
+ /* initialize shift registers */
+ px_shreg = px;
+ py_shreg = py;
+ pz_shreg = pz;
+ q_shreg = Q;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_px_data = px_shreg[31:0];
+ tb_py_data = py_shreg[31:0];
+ tb_pz_data = pz_shreg[31:0];
+ tb_q_data = q_shreg[31:0];
+
+ /* shift inputs */
+ px_shreg = {{32{1'bX}}, px_shreg[255:32]};
+ py_shreg = {{32{1'bX}}, py_shreg[255:32]};
+ pz_shreg = {{32{1'bX}}, pz_shreg[255:32]};
+ q_shreg = {{32{1'bX}}, q_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyzq_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_px_data = {32{1'bX}};
+ tb_py_data = {32{1'bX}};
+ tb_pz_data = {32{1'bX}};
+ tb_q_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyzq_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_xyzq_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ rx_shreg = {tb_rx_data, rx_shreg[255:32]};
+ ry_shreg = {tb_ry_data, ry_shreg[255:32]};
+ rz_shreg = {tb_rz_data, rz_shreg[255:32]};
+
+ end
+
+ /* compare */
+ t_ok = (rx_shreg == rx) &&
+ (ry_shreg == ry) &&
+ (rz_shreg == rz);
+
+ /* display results */
+ $display("test_curve_doubler(): %s", t_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && t_ok;
+
+ end
+
+ endtask
+
+
endmodule
//------------------------------------------------------------------------------
diff --git a/bench/tb_curve_multiplier_256.v b/bench/tb_curve_multiplier_256.v
index bcca034..20498a2 100644
--- a/bench/tb_curve_multiplier_256.v
+++ b/bench/tb_curve_multiplier_256.v
@@ -43,238 +43,238 @@
module tb_curve_multiplier_256;
- //
- // Test Vectors
- //
- localparam [255:0] K_1 = 256'h70a12c2db16845ed56ff68cfc21a472b3f04d7d6851bf6349f2d7d5b3452b38a;
- localparam [255:0] PX_1 = 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
- localparam [255:0] PY_1 = 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
-
- localparam [255:0] K_2 = 256'h580ec00d856434334cef3f71ecaed4965b12ae37fa47055b1965c7b134ee45d0;
- localparam [255:0] PX_2 = 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
- localparam [255:0] PY_2 = 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
-
- localparam [255:0] K_3 = 256'hffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551;
- localparam [255:0] PX_3 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
- localparam [255:0] PY_3 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (K, PX, PY)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_k_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
-
- wire core_px_wren;
- wire core_py_wren;
-
- wire [ 32-1:0] core_k_data;
- wire [ 32-1:0] core_px_data;
- wire [ 32-1:0] core_py_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_k_addr;
- reg [WORD_COUNTER_WIDTH-1:0] tb_pxy_addr;
-
- reg tb_k_wren;
-
- reg [ 31:0] tb_k_data;
- wire [ 31:0] tb_px_data;
- wire [ 31:0] tb_py_data;
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_k
- ( .clk(clk),
- .a_addr(tb_k_addr), .a_wr(tb_k_wren), .a_in(tb_k_data), .a_out(),
- .b_addr(core_k_addr), .b_out(core_k_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_px
- ( .clk(clk),
- .a_addr(core_px_addr), .a_wr(core_px_wren), .a_in(core_px_data), .a_out(),
- .b_addr(tb_pxy_addr), .b_out(tb_px_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_py
- ( .clk(clk),
- .a_addr(core_py_addr), .a_wr(core_py_wren), .a_in(core_py_data), .a_out(),
- .b_addr(tb_pxy_addr), .b_out(tb_py_data)
- );
-
-
- //
- // UUT
- //
- curve_mul_256 uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .k_addr (core_k_addr),
- .rx_addr (core_px_addr),
- .ry_addr (core_py_addr),
-
- .rx_wren (core_px_wren),
- .ry_wren (core_py_wren),
-
- .k_din (core_k_data),
-
- .rx_dout (core_px_data),
- .ry_dout (core_py_data)
+ //
+ // Test Vectors
+ //
+ localparam [255:0] K_1 = 256'h70a12c2db16845ed56ff68cfc21a472b3f04d7d6851bf6349f2d7d5b3452b38a;
+ localparam [255:0] PX_1 = 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
+ localparam [255:0] PY_1 = 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
+
+ localparam [255:0] K_2 = 256'h580ec00d856434334cef3f71ecaed4965b12ae37fa47055b1965c7b134ee45d0;
+ localparam [255:0] PX_2 = 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
+ localparam [255:0] PY_2 = 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
+
+ localparam [255:0] K_3 = 256'hffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551;
+ localparam [255:0] PX_3 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+ localparam [255:0] PY_3 = 256'h0000000000000000000000000000000000000000000000000000000000000000;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (K, PX, PY)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_k_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_px_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_py_addr;
+
+ wire core_px_wren;
+ wire core_py_wren;
+
+ wire [ 32-1:0] core_k_data;
+ wire [ 32-1:0] core_px_data;
+ wire [ 32-1:0] core_py_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_k_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_pxy_addr;
+
+ reg tb_k_wren;
+
+ reg [ 31:0] tb_k_data;
+ wire [ 31:0] tb_px_data;
+ wire [ 31:0] tb_py_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_k
+ ( .clk(clk),
+ .a_addr(tb_k_addr), .a_wr(tb_k_wren), .a_in(tb_k_data), .a_out(),
+ .b_addr(core_k_addr), .b_out(core_k_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_px
+ ( .clk(clk),
+ .a_addr(core_px_addr), .a_wr(core_px_wren), .a_in(core_px_data), .a_out(),
+ .b_addr(tb_pxy_addr), .b_out(tb_px_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_py
+ ( .clk(clk),
+ .a_addr(core_py_addr), .a_wr(core_py_wren), .a_in(core_py_data), .a_out(),
+ .b_addr(tb_pxy_addr), .b_out(tb_py_data)
);
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_curve_multiplier(K_1, PX_1, PY_1);
- test_curve_multiplier(K_2, PX_2, PY_2);
- test_curve_multiplier(K_3, PX_3, PY_3);
-
- /* print result */
- if (ok) $display("tb_curve_multiplier_256: SUCCESS");
- else $display("tb_curve_multiplier_256: FAILURE");
- //
- //$finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg p_ok;
-
- integer w;
-
- task test_curve_multiplier;
-
- input [255:0] k;
- input [255:0] px;
- input [255:0] py;
-
- reg [255:0] k_shreg;
- reg [255:0] px_shreg;
- reg [255:0] py_shreg;
-
- begin
-
- /* start filling memories */
- tb_k_wren = 1;
-
- /* initialize shift registers */
- k_shreg = k;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_k_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_k_data = k_shreg[31:0];
-
- /* shift inputs */
- k_shreg = {{32{1'bX}}, k_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_k_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_k_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_k_wren = 0;
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_pxy_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- px_shreg = {tb_px_data, px_shreg[255:32]};
- py_shreg = {tb_py_data, py_shreg[255:32]};
-
- end
-
- /* compare */
- p_ok = (px_shreg == px) &&
- (py_shreg == py);
-
- /* display results */
- $display("test_curve_multiplier(): %s", p_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && p_ok;
-
- end
-
- endtask
-
+
+
+ //
+ // UUT
+ //
+ curve_mul_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .k_addr (core_k_addr),
+ .rx_addr (core_px_addr),
+ .ry_addr (core_py_addr),
+
+ .rx_wren (core_px_wren),
+ .ry_wren (core_py_wren),
+
+ .k_din (core_k_data),
+
+ .rx_dout (core_px_data),
+ .ry_dout (core_py_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_curve_multiplier(K_1, PX_1, PY_1);
+ test_curve_multiplier(K_2, PX_2, PY_2);
+ test_curve_multiplier(K_3, PX_3, PY_3);
+
+ /* print result */
+ if (ok) $display("tb_curve_multiplier_256: SUCCESS");
+ else $display("tb_curve_multiplier_256: FAILURE");
+ //
+ //$finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg p_ok;
+
+ integer w;
+
+ task test_curve_multiplier;
+
+ input [255:0] k;
+ input [255:0] px;
+ input [255:0] py;
+
+ reg [255:0] k_shreg;
+ reg [255:0] px_shreg;
+ reg [255:0] py_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_k_wren = 1;
+
+ /* initialize shift registers */
+ k_shreg = k;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_k_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_k_data = k_shreg[31:0];
+
+ /* shift inputs */
+ k_shreg = {{32{1'bX}}, k_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_k_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_k_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_k_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_pxy_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ px_shreg = {tb_px_data, px_shreg[255:32]};
+ py_shreg = {tb_py_data, py_shreg[255:32]};
+
+ end
+
+ /* compare */
+ p_ok = (px_shreg == px) &&
+ (py_shreg == py);
+
+ /* display results */
+ $display("test_curve_multiplier(): %s", p_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && p_ok;
+
+ end
+
+ endtask
+
endmodule
-
+
//------------------------------------------------------------------------------
// End-of-File
diff --git a/bench/tb_lowlevel_adder32.v b/bench/tb_lowlevel_adder32.v
index 2caffbd..70fbb15 100644
--- a/bench/tb_lowlevel_adder32.v
+++ b/bench/tb_lowlevel_adder32.v
@@ -1,175 +1,175 @@
-//------------------------------------------------------------------------------
-//
-// tb_lowlevel_adder32.v
-// -----------------------------------------------------------------------------
-// Testbench for 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-`timescale 1ns / 1ps
-//------------------------------------------------------------------------------
-
-
-module tb_lowlevel_adder32;
-
- //
- // Inputs
- //
- reg clk;
- reg [31: 0] a;
- reg [31: 0] b;
- reg c_in;
-
- //
- // Outputs
- //
- wire [31: 0] s;
- wire c_out;
-
- //
- // Test Vectors {a, b, c_in}
- //
- wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no carry
- wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with carry
- wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no carry
- wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with carry
- wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no carry
- wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with carry
- wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no carry
- wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with carry
-
- wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no carry
- wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no carry
- wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no carry
- wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no carry
- wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with carry
- wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with carry
- wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with carry
- wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with carry
-
-
- //
- // UUT
- //
- adder32_wrapper uut
- (
- .clk (clk),
- .a (a),
- .b (b),
- .s (s),
- .c_in (c_in),
- .c_out (c_out)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
- //
- clk = 0;
- //
- #100;
- //
- test_adder32(vec_0);
- test_adder32(vec_1);
- test_adder32(vec_2);
- test_adder32(vec_3);
- test_adder32(vec_4);
- test_adder32(vec_5);
- test_adder32(vec_6);
- test_adder32(vec_7);
- //
- test_adder32(vec_8);
- test_adder32(vec_9);
- test_adder32(vec_10);
- test_adder32(vec_11);
- test_adder32(vec_12);
- test_adder32(vec_13);
- test_adder32(vec_14);
- test_adder32(vec_15);
- //
- if (ok) $display("tb_lowlevel_adder32: SUCCESS");
- else $display("tb_lowlevel_adder32: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Routine
- //
- reg [31: 0] ss; // reference value of sum
- reg cc; // reference value of carry
- reg ss_ok; // result matches reference value
-
- task test_adder32;
-
- input [64: 0] vec;
-
- begin
-
- /* break down test vector */
- a = vec[64:33];
- b = vec[32: 1];
- c_in = vec[ 0: 0];
-
- /* calculate reference values */
- {cc, ss} = {1'b0, a} + {1'b0, b} + {32'd0, c_in};
-
- /* send one clock tick */
- #10 clk = 1;
- #10 clk = 0;
-
- /* check outputs */
- ss_ok = (s == ss) && (c_out == cc);
-
- /* display results */
- $display("test_adder32(): 0x%08X + 0x%08X + %01d = {%01d, 0x%08X} [%0s]", a, b, c_in, c_out, s, ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && ss_ok;
-
- end
-
- endtask
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder32;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [31: 0] a;
+ reg [31: 0] b;
+ reg c_in;
+
+ //
+ // Outputs
+ //
+ wire [31: 0] s;
+ wire c_out;
+
+ //
+ // Test Vectors {a, b, c_in}
+ //
+ wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no carry
+ wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with carry
+ wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no carry
+ wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with carry
+ wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no carry
+ wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with carry
+ wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no carry
+ wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with carry
+
+ wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no carry
+ wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no carry
+ wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no carry
+ wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no carry
+ wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with carry
+ wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with carry
+ wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with carry
+ wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with carry
+
+
+ //
+ // UUT
+ //
+ adder32_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .s (s),
+ .c_in (c_in),
+ .c_out (c_out)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_adder32(vec_0);
+ test_adder32(vec_1);
+ test_adder32(vec_2);
+ test_adder32(vec_3);
+ test_adder32(vec_4);
+ test_adder32(vec_5);
+ test_adder32(vec_6);
+ test_adder32(vec_7);
+ //
+ test_adder32(vec_8);
+ test_adder32(vec_9);
+ test_adder32(vec_10);
+ test_adder32(vec_11);
+ test_adder32(vec_12);
+ test_adder32(vec_13);
+ test_adder32(vec_14);
+ test_adder32(vec_15);
+ //
+ if (ok) $display("tb_lowlevel_adder32: SUCCESS");
+ else $display("tb_lowlevel_adder32: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [31: 0] ss; // reference value of sum
+ reg cc; // reference value of carry
+ reg ss_ok; // result matches reference value
+
+ task test_adder32;
+
+ input [64: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[64:33];
+ b = vec[32: 1];
+ c_in = vec[ 0: 0];
+
+ /* calculate reference values */
+ {cc, ss} = {1'b0, a} + {1'b0, b} + {32'd0, c_in};
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ ss_ok = (s == ss) && (c_out == cc);
+
+ /* display results */
+ $display("test_adder32(): 0x%08X + 0x%08X + %01d = {%01d, 0x%08X} [%0s]", a, b, c_in, c_out, s, ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && ss_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_adder47.v b/bench/tb_lowlevel_adder47.v
index 2a575a8..663e8f9 100644
--- a/bench/tb_lowlevel_adder47.v
+++ b/bench/tb_lowlevel_adder47.v
@@ -1,151 +1,151 @@
-//------------------------------------------------------------------------------
-//
-// tb_lowlevel_adder47.v
-// -----------------------------------------------------------------------------
-// Testbench for 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-`timescale 1ns / 1ps
-//------------------------------------------------------------------------------
-
-
-module tb_lowlevel_adder47;
-
- //
- // Inputs
- //
- reg clk;
- reg [46: 0] a;
- reg [46: 0] b;
-
- //
- // Outputs
- //
- wire [46: 0] s;
-
- //
- // Test Vectors {a, b}
- //
- wire [93: 0] vec_0 = {47'h2a87ca22be8b, 47'h05378eb1c71e};
- wire [93: 0] vec_1 = {47'h7320ad746e1d, 47'h3b628ba79b98};
- wire [93: 0] vec_2 = {47'h59f741e08254, 47'h2a385502f25d};
- wire [93: 0] vec_3 = {47'h3f55296c3a54, 47'h5e3872760ab7};
- wire [93: 0] vec_4 = {47'h3617de4a9626, 47'h2c6f5d9e98bf};
- wire [93: 0] vec_5 = {47'h1292dc29f8f4, 47'h1dbd289a147c};
- wire [93: 0] vec_6 = {47'h69da3113b5f0, 47'h38c00a60b1ce};
- wire [93: 0] vec_7 = {47'h1d7e819d7a43, 47'h1d7c90ea0e5f};
-
- //
- // UUT
- //
- adder47_wrapper uut
- (
- .clk (clk),
- .a (a),
- .b (b),
- .s (s)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
- //
- clk = 0;
- //
- #100;
- //
- test_adder47(vec_0);
- test_adder47(vec_1);
- test_adder47(vec_2);
- test_adder47(vec_3);
- test_adder47(vec_4);
- test_adder47(vec_5);
- test_adder47(vec_6);
- test_adder47(vec_7);
- //
- if (ok) $display("tb_lowlevel_adder47: SUCCESS");
- else $display("tb_lowlevel_adder47: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Routine
- //
- reg [46: 0] ss; // reference value of sum
- reg cc; // reference value of carry
- reg ss_ok; // result matches reference value
-
- task test_adder47;
-
- input [93: 0] vec;
-
- begin
-
- /* break down test vector */
- a = vec[93:47];
- b = vec[46: 0];
-
- /* calculate reference values */
- ss = a + b;
-
- /* send one clock tick */
- #10 clk = 1;
- #10 clk = 0;
-
- /* check outputs */
- ss_ok = (s == ss);
-
- /* display results */
- $display("test_adder47(): %s", ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && ss_ok;
-
- end
-
- endtask
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder47.v
+// -----------------------------------------------------------------------------
+// Testbench for 47-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder47;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [46: 0] a;
+ reg [46: 0] b;
+
+ //
+ // Outputs
+ //
+ wire [46: 0] s;
+
+ //
+ // Test Vectors {a, b}
+ //
+ wire [93: 0] vec_0 = {47'h2a87ca22be8b, 47'h05378eb1c71e};
+ wire [93: 0] vec_1 = {47'h7320ad746e1d, 47'h3b628ba79b98};
+ wire [93: 0] vec_2 = {47'h59f741e08254, 47'h2a385502f25d};
+ wire [93: 0] vec_3 = {47'h3f55296c3a54, 47'h5e3872760ab7};
+ wire [93: 0] vec_4 = {47'h3617de4a9626, 47'h2c6f5d9e98bf};
+ wire [93: 0] vec_5 = {47'h1292dc29f8f4, 47'h1dbd289a147c};
+ wire [93: 0] vec_6 = {47'h69da3113b5f0, 47'h38c00a60b1ce};
+ wire [93: 0] vec_7 = {47'h1d7e819d7a43, 47'h1d7c90ea0e5f};
+
+ //
+ // UUT
+ //
+ adder47_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .s (s)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_adder47(vec_0);
+ test_adder47(vec_1);
+ test_adder47(vec_2);
+ test_adder47(vec_3);
+ test_adder47(vec_4);
+ test_adder47(vec_5);
+ test_adder47(vec_6);
+ test_adder47(vec_7);
+ //
+ if (ok) $display("tb_lowlevel_adder47: SUCCESS");
+ else $display("tb_lowlevel_adder47: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [46: 0] ss; // reference value of sum
+ reg cc; // reference value of carry
+ reg ss_ok; // result matches reference value
+
+ task test_adder47;
+
+ input [93: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[93:47];
+ b = vec[46: 0];
+
+ /* calculate reference values */
+ ss = a + b;
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ ss_ok = (s == ss);
+
+ /* display results */
+ $display("test_adder47(): %s", ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && ss_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_subtractor32.v b/bench/tb_lowlevel_subtractor32.v
index e1129e2..fd96000 100644
--- a/bench/tb_lowlevel_subtractor32.v
+++ b/bench/tb_lowlevel_subtractor32.v
@@ -1,174 +1,174 @@
-//------------------------------------------------------------------------------
-//
-// tb_lowlevel_subtractor32.v
-// -----------------------------------------------------------------------------
-// Testbench for 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-`timescale 1ns / 1ps
-//------------------------------------------------------------------------------
-
-module tb_lowlevel_subtractor32;
-
- //
- // Inputs
- //
- reg clk;
- reg [31: 0] a;
- reg [31: 0] b;
- reg b_in;
-
- //
- // Outputs
- //
- wire [31: 0] d;
- wire b_out;
-
- //
- // Test Vectors {a, b, b_in}
- //
- wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no borrow
- wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with borrow
- wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no borrow
- wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with borrow
- wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no borrow
- wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with borrow
- wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no borrow
- wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with borrow
-
- wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no borrow
- wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no borrow
- wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no borrow
- wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no borrow
- wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with borrow
- wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with borrow
- wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with borrow
- wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with borrow
-
-
- //
- // UUT
- //
- subtractor32_wrapper uut
- (
- .clk (clk),
- .a (a),
- .b (b),
- .d (d),
- .b_in (b_in),
- .b_out (b_out)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
- //
- clk = 0;
- //
- #100;
- //
- test_subtractor32(vec_0);
- test_subtractor32(vec_1);
- test_subtractor32(vec_2);
- test_subtractor32(vec_3);
- test_subtractor32(vec_4);
- test_subtractor32(vec_5);
- test_subtractor32(vec_6);
- test_subtractor32(vec_7);
- //
- test_subtractor32(vec_8);
- test_subtractor32(vec_9);
- test_subtractor32(vec_10);
- test_subtractor32(vec_11);
- test_subtractor32(vec_12);
- test_subtractor32(vec_13);
- test_subtractor32(vec_14);
- test_subtractor32(vec_15);
- //
- if (ok) $display("tb_lowlevel_subtractor32: SUCCESS");
- else $display("tb_lowlevel_subtractor32: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Routine
- //
- reg [31: 0] dd; // reference value of difference
- reg bb; // reference value of borrow
- reg dd_ok; // result matches reference value
-
- task test_subtractor32;
-
- input [64: 0] vec;
-
- begin
-
- /* break down test vector */
- a = vec[64:33];
- b = vec[32: 1];
- b_in = vec[ 0: 0];
-
- /* calculate reference values */
- {bb, dd} = {1'b0, a} - {1'b0, b} - {32'd0, b_in};
-
- /* send one clock tick */
- #10 clk = 1;
- #10 clk = 0;
-
- /* check outputs */
- dd_ok = (d == dd) && (b_out == bb);
-
- /* display results */
- $display("test_subtractor32(): 0x%08X - (0x%08X + %01d) = {%01d, 0x%08X} [%0s]", a, b, b_in, b_out, d, dd_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && dd_ok;
-
- end
-
- endtask
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_subtractor32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_lowlevel_subtractor32;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [31: 0] a;
+ reg [31: 0] b;
+ reg b_in;
+
+ //
+ // Outputs
+ //
+ wire [31: 0] d;
+ wire b_out;
+
+ //
+ // Test Vectors {a, b, b_in}
+ //
+ wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no borrow
+ wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with borrow
+ wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no borrow
+ wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with borrow
+ wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no borrow
+ wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with borrow
+ wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no borrow
+ wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with borrow
+
+ wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with borrow
+ wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with borrow
+ wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with borrow
+ wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with borrow
+
+
+ //
+ // UUT
+ //
+ subtractor32_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .d (d),
+ .b_in (b_in),
+ .b_out (b_out)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_subtractor32(vec_0);
+ test_subtractor32(vec_1);
+ test_subtractor32(vec_2);
+ test_subtractor32(vec_3);
+ test_subtractor32(vec_4);
+ test_subtractor32(vec_5);
+ test_subtractor32(vec_6);
+ test_subtractor32(vec_7);
+ //
+ test_subtractor32(vec_8);
+ test_subtractor32(vec_9);
+ test_subtractor32(vec_10);
+ test_subtractor32(vec_11);
+ test_subtractor32(vec_12);
+ test_subtractor32(vec_13);
+ test_subtractor32(vec_14);
+ test_subtractor32(vec_15);
+ //
+ if (ok) $display("tb_lowlevel_subtractor32: SUCCESS");
+ else $display("tb_lowlevel_subtractor32: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [31: 0] dd; // reference value of difference
+ reg bb; // reference value of borrow
+ reg dd_ok; // result matches reference value
+
+ task test_subtractor32;
+
+ input [64: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[64:33];
+ b = vec[32: 1];
+ b_in = vec[ 0: 0];
+
+ /* calculate reference values */
+ {bb, dd} = {1'b0, a} - {1'b0, b} - {32'd0, b_in};
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ dd_ok = (d == dd) && (b_out == bb);
+
+ /* display results */
+ $display("test_subtractor32(): 0x%08X - (0x%08X + %01d) = {%01d, 0x%08X} [%0s]", a, b, b_in, b_out, d, dd_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && dd_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_adder.v b/bench/tb_modular_adder.v
index 1015b77..713ff22 100644
--- a/bench/tb_modular_adder.v
+++ b/bench/tb_modular_adder.v
@@ -43,313 +43,313 @@
module tb_modular_adder_256;
- //
- // Test Vectors
- //
- localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
-
- localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
- localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
-
- localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
- localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (X, Y, N)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_s_addr;
- wire core_s_wren;
-
- wire [ 31:0] core_x_data;
- wire [ 31:0] core_y_data;
- wire [ 31:0] core_n_data;
- wire [ 31:0] core_s_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
- reg [WORD_COUNTER_WIDTH-1:0] tb_s_addr;
- reg tb_xyn_wren;
-
- reg [ 31:0] tb_x_data;
- reg [ 31:0] tb_y_data;
- reg [ 31:0] tb_n_data;
- wire [ 31:0] tb_s_data;
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_x
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_x_data),
- .a_out (),
-
- .b_addr (core_xy_addr),
- .b_out (core_x_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_y
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_y_data),
- .a_out (),
-
- .b_addr (core_xy_addr),
- .b_out (core_y_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_n
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_n_data),
- .a_out (),
-
- .b_addr (core_n_addr),
- .b_out (core_n_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_s
- (
- .clk (clk),
-
- .a_addr (core_s_addr),
- .a_wr (core_s_wren),
- .a_in (core_s_data),
- .a_out (),
-
- .b_addr (tb_s_addr),
- .b_out (tb_s_data)
- );
-
-
- //
- // UUT
- //
- modular_adder #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .ab_addr (core_xy_addr),
- .n_addr (core_n_addr),
- .s_addr (core_s_addr),
- .s_wren (core_s_wren),
-
- .a_din (core_x_data),
- .b_din (core_y_data),
- .n_din (core_n_data),
- .s_dout (core_s_data)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- tb_xyn_wren = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_modular_adder(X_1, Y_1, N);
- test_modular_adder(X_2, Y_2, N);
- test_modular_adder(Y_1, X_1, N);
- test_modular_adder(Y_2, X_2, N);
-
- test_modular_adder(X_1, X_2, N);
- test_modular_adder(X_2, X_1, N);
- test_modular_adder(Y_1, Y_2, N);
- test_modular_adder(Y_2, Y_1, N);
-
- test_modular_adder(X_1, Y_2, N);
- test_modular_adder(Y_2, X_1, N);
- test_modular_adder(X_2, Y_1, N);
- test_modular_adder(Y_1, X_2, N);
-
- /* print result */
- if (ok) $display("tb_modular_adder_256: SUCCESS");
- else $display("tb_modular_adder_256: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg [256:0] s;
- wire [255:0] s_dummy = s[255:0];
- reg s_ok;
-
- integer w;
-
- reg [255:0] x_shreg;
- reg [255:0] y_shreg;
- reg [255:0] n_shreg;
- reg [255:0] s_shreg;
-
- task test_modular_adder;
-
- input [255:0] x;
- input [255:0] y;
- input [255:0] n;
-
- begin
-
- /* start filling memories */
- tb_xyn_wren = 1;
-
- /* initialize shift registers */
- x_shreg = x;
- y_shreg = y;
- n_shreg = n;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_x_data = x_shreg[31:0];
- tb_y_data = y_shreg[31:0];
- tb_n_data = n_shreg[31:0];
-
- /* shift inputs */
- x_shreg = {{32{1'bX}}, x_shreg[255:32]};
- y_shreg = {{32{1'bX}}, y_shreg[255:32]};
- n_shreg = {{32{1'bX}}, n_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_x_data = {32{1'bX}};
- tb_y_data = {32{1'bX}};
- tb_n_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_xyn_wren = 0;
-
- /* calculate reference value */
- s = {1'b0, x} + {1'b0, y};
- if (s >= {1'b0, n})
- s = s - {1'b0, n};
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_s_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- s_shreg = {tb_s_data, s_shreg[255:32]};
-
- end
-
- /* compare */
- s_ok = (s_shreg == s[255:0]);
-
- /* display results */
- $display("test_modular_adder(): %s", s_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && s_ok;
-
- end
-
- endtask
-
-
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+ localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_s_addr;
+ wire core_s_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_s_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_s_addr;
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_s_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_s_addr),
+ .a_wr (core_s_wren),
+ .a_in (core_s_data),
+ .a_out (),
+
+ .b_addr (tb_s_addr),
+ .b_out (tb_s_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_adder #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ab_addr (core_xy_addr),
+ .n_addr (core_n_addr),
+ .s_addr (core_s_addr),
+ .s_wren (core_s_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .s_dout (core_s_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_adder(X_1, Y_1, N);
+ test_modular_adder(X_2, Y_2, N);
+ test_modular_adder(Y_1, X_1, N);
+ test_modular_adder(Y_2, X_2, N);
+
+ test_modular_adder(X_1, X_2, N);
+ test_modular_adder(X_2, X_1, N);
+ test_modular_adder(Y_1, Y_2, N);
+ test_modular_adder(Y_2, Y_1, N);
+
+ test_modular_adder(X_1, Y_2, N);
+ test_modular_adder(Y_2, X_1, N);
+ test_modular_adder(X_2, Y_1, N);
+ test_modular_adder(Y_1, X_2, N);
+
+ /* print result */
+ if (ok) $display("tb_modular_adder_256: SUCCESS");
+ else $display("tb_modular_adder_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [256:0] s;
+ wire [255:0] s_dummy = s[255:0];
+ reg s_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] s_shreg;
+
+ task test_modular_adder;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ s = {1'b0, x} + {1'b0, y};
+ if (s >= {1'b0, n})
+ s = s - {1'b0, n};
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_s_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ s_shreg = {tb_s_data, s_shreg[255:32]};
+
+ end
+
+ /* compare */
+ s_ok = (s_shreg == s[255:0]);
+
+ /* display results */
+ $display("test_modular_adder(): %s", s_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && s_ok;
+
+ end
+
+ endtask
+
+
endmodule
//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_invertor.v b/bench/tb_modular_invertor.v
index 0ef7c88..24f6db1 100644
--- a/bench/tb_modular_invertor.v
+++ b/bench/tb_modular_invertor.v
@@ -1,226 +1,225 @@
-`timescale 1ns / 1ps
-
-module tb_modular_invertor;
-
-
- //
- // Test Vectors
- //
- localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
-
- localparam [255:0] A_1 = 256'hd3e73ccd63a5b10da308c615bb9ebd3f76e2c5fccc256fd9f629dcc956bf2382;
- localparam [255:0] A1_1 = 256'h93fb26d5d199bbb7232a4b7c98e97ba9bb7530d304b5f07736ea4027bbb57ecd;
-
- localparam [255:0] A_2 = 256'h57b6c628a5c4e870740b2517975ace2216acbe094ac54568b53212ef45e69d22;
- localparam [255:0] A1_2 = 256'hcd2af4766642d7d2f3f3f67d92c575c496772ef7d55c75eb46bd07e8d5f9a4aa;
-
-
- //
- // Clock
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (A, A1, Q)
- //
- wire [ 2: 0] core_a_addr;
- wire [ 2: 0] core_q_addr;
- wire [ 2: 0] core_a1_addr;
- wire core_a1_wren;
-
- wire [31: 0] core_a_data;
- wire [31: 0] core_q_data;
- wire [31: 0] core_a1_data;
-
- reg [ 2: 0] tb_aq_addr;
- reg tb_aq_wren;
- reg [ 2: 0] tb_a1_addr;
-
- reg [31: 0] tb_a_data;
- reg [31: 0] tb_q_data;
- wire [31: 0] tb_a1_data;
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_a
- ( .clk(clk),
- .a_addr(tb_aq_addr), .a_wr(tb_aq_wren), .a_in(tb_a_data), .a_out(),
- .b_addr(core_a_addr), .b_out(core_a_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_q
- ( .clk(clk),
- .a_addr(tb_aq_addr), .a_wr(tb_aq_wren), .a_in(tb_q_data), .a_out(),
- .b_addr(core_q_addr), .b_out(core_q_data)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_a1
- ( .clk(clk),
- .a_addr(core_a1_addr), .a_wr(core_a1_wren), .a_in(core_a1_data), .a_out(),
- .b_addr(tb_a1_addr), .b_out(tb_a1_data)
- );
-
-
- //
- // UUT
- //
- modular_invertor #
- (
- .MAX_OPERAND_WIDTH (256)
- )
- uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .a_addr (core_a_addr),
- .q_addr (core_q_addr),
- .a1_addr (core_a1_addr),
- .a1_wren (core_a1_wren),
-
- .a_din (core_a_data),
- .q_din (core_q_data),
- .a1_dout (core_a1_data)
- );
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_modular_invertor(A_1, A1_1, Q);
- test_modular_invertor(A_2, A1_2, Q);
-
- /* print result */
- if (ok) $display("tb_modular_invertor: SUCCESS");
- else $display("tb_modular_invertor: FAILURE");
- //
- //$finish;
- //
-
- end
-
-
+`timescale 1ns / 1ps
+
+module tb_modular_invertor;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] Q = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] A_1 = 256'hd3e73ccd63a5b10da308c615bb9ebd3f76e2c5fccc256fd9f629dcc956bf2382;
+ localparam [255:0] A1_1 = 256'h93fb26d5d199bbb7232a4b7c98e97ba9bb7530d304b5f07736ea4027bbb57ecd;
+
+ localparam [255:0] A_2 = 256'h57b6c628a5c4e870740b2517975ace2216acbe094ac54568b53212ef45e69d22;
+ localparam [255:0] A1_2 = 256'hcd2af4766642d7d2f3f3f67d92c575c496772ef7d55c75eb46bd07e8d5f9a4aa;
+
+
+ //
+ // Clock
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (A, A1, Q)
+ //
+ wire [ 2: 0] core_a_addr;
+ wire [ 2: 0] core_q_addr;
+ wire [ 2: 0] core_a1_addr;
+ wire core_a1_wren;
+
+ wire [31: 0] core_a_data;
+ wire [31: 0] core_q_data;
+ wire [31: 0] core_a1_data;
+
+ reg [ 2: 0] tb_aq_addr;
+ reg tb_aq_wren;
+ reg [ 2: 0] tb_a1_addr;
+
+ reg [31: 0] tb_a_data;
+ reg [31: 0] tb_q_data;
+ wire [31: 0] tb_a1_data;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_a
+ ( .clk(clk),
+ .a_addr(tb_aq_addr), .a_wr(tb_aq_wren), .a_in(tb_a_data), .a_out(),
+ .b_addr(core_a_addr), .b_out(core_a_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_q
+ ( .clk(clk),
+ .a_addr(tb_aq_addr), .a_wr(tb_aq_wren), .a_in(tb_q_data), .a_out(),
+ .b_addr(core_q_addr), .b_out(core_q_data)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_a1
+ ( .clk(clk),
+ .a_addr(core_a1_addr), .a_wr(core_a1_wren), .a_in(core_a1_data), .a_out(),
+ .b_addr(tb_a1_addr), .b_out(tb_a1_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_invertor #
+ (
+ .MAX_OPERAND_WIDTH (256)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .a_addr (core_a_addr),
+ .q_addr (core_q_addr),
+ .a1_addr (core_a1_addr),
+ .a1_wren (core_a1_wren),
+
+ .a_din (core_a_data),
+ .q_din (core_q_data),
+ .a1_dout (core_a1_data)
+ );
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_invertor(A_1, A1_1, Q);
+ test_modular_invertor(A_2, A1_2, Q);
+
+ /* print result */
+ if (ok) $display("tb_modular_invertor: SUCCESS");
+ else $display("tb_modular_invertor: FAILURE");
+ //
+ //$finish;
//
- // Test Task
- //
- reg a1_ok;
-
- integer w;
-
- task test_modular_invertor;
-
- input [255:0] a;
- input [255:0] a1;
- input [255:0] q;
-
- reg [255:0] a_shreg;
- reg [255:0] a1_shreg;
- reg [255:0] q_shreg;
-
- begin
-
- /* start filling memories */
- tb_aq_wren = 1;
-
- /* initialize shift registers */
- a_shreg = a;
- q_shreg = q;
-
- /* write all the words */
- for (w=0; w<8; w=w+1) begin
-
- /* set addresses */
- tb_aq_addr = w[2:0];
-
- /* set data words */
- tb_a_data = a_shreg[31:0];
- tb_q_data = q_shreg[31:0];
-
- /* shift inputs */
- a_shreg = {{32{1'bX}}, a_shreg[255:32]};
- q_shreg = {{32{1'bX}}, q_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_aq_addr = {3{1'bX}};
-
- /* wipe data words */
- tb_a_data = {32{1'bX}};
- tb_q_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_aq_wren = 0;
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<8; w=w+1) begin
-
- /* set address */
- tb_a1_addr = w[2:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- a1_shreg = {tb_a1_data, a1_shreg[255:32]};
-
- end
-
- /* compare */
- a1_ok = (a1_shreg == a1);
-
- /* display results */
- $display("test_modular_invertor(): %s", a1_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && a1_ok;
-
- end
-
- endtask
-
-
-endmodule
-
+
+ end
+
+
+ //
+ // Test Task
+ //
+ reg a1_ok;
+
+ integer w;
+
+ task test_modular_invertor;
+
+ input [255:0] a;
+ input [255:0] a1;
+ input [255:0] q;
+
+ reg [255:0] a_shreg;
+ reg [255:0] a1_shreg;
+ reg [255:0] q_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_aq_wren = 1;
+
+ /* initialize shift registers */
+ a_shreg = a;
+ q_shreg = q;
+
+ /* write all the words */
+ for (w=0; w<8; w=w+1) begin
+
+ /* set addresses */
+ tb_aq_addr = w[2:0];
+
+ /* set data words */
+ tb_a_data = a_shreg[31:0];
+ tb_q_data = q_shreg[31:0];
+
+ /* shift inputs */
+ a_shreg = {{32{1'bX}}, a_shreg[255:32]};
+ q_shreg = {{32{1'bX}}, q_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_aq_addr = {3{1'bX}};
+
+ /* wipe data words */
+ tb_a_data = {32{1'bX}};
+ tb_q_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_aq_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<8; w=w+1) begin
+
+ /* set address */
+ tb_a1_addr = w[2:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ a1_shreg = {tb_a1_data, a1_shreg[255:32]};
+
+ end
+
+ /* compare */
+ a1_ok = (a1_shreg == a1);
+
+ /* display results */
+ $display("test_modular_invertor(): %s", a1_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && a1_ok;
+
+ end
+
+ endtask
+
+
+endmodule
diff --git a/bench/tb_modular_multiplier_256.v b/bench/tb_modular_multiplier_256.v
index 5277c20..676a183 100644
--- a/bench/tb_modular_multiplier_256.v
+++ b/bench/tb_modular_multiplier_256.v
@@ -43,322 +43,322 @@
module tb_modular_multiplier_256;
- //
- // Test Vectors
- //
- localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
-
- localparam [255:0] X_1 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
- localparam [255:0] Y_1 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
- localparam [255:0] P_1 = 256'h823cd15f6dd3c71933565064513a6b2bd183e554c6a08622f713ebbbface98be;
-
- localparam [255:0] X_2 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0;
- localparam [255:0] Y_2 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07;
- localparam [255:0] P_2 = 256'h76b2571d1d009ab0e7d1cc086c7d3648f08755b2e2585e780d11f053b06fb6ec;
-
- localparam [255:0] X_3 = 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
- localparam [255:0] Y_3 = 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
- localparam [255:0] P_3 = 256'h944fea6a4fac7ae475a6bb211db4bbd394bd9b3ee9a038f6c17125a00b3a5375;
-
- localparam [255:0] X_4 = 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
- localparam [255:0] Y_4 = 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
- localparam [255:0] P_4 = 256'h78d3e33c81ab9c652679363c76df004ea6f9a9e3a242a0fb71a4e8fdf41ab519;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (X, Y, N, P)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_p_addr;
-
- wire core_p_wren;
-
- wire [ 31:0] core_x_data;
- wire [ 31:0] core_y_data;
- wire [ 31:0] core_n_data;
- wire [ 31:0] core_p_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
- reg [WORD_COUNTER_WIDTH-1:0] tb_p_addr;
-
- reg tb_xyn_wren;
-
- reg [ 31:0] tb_x_data;
- reg [ 31:0] tb_y_data;
- reg [ 31:0] tb_n_data;
- wire [ 31:0] tb_p_data;
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_x
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_x_data),
- .a_out (),
-
- .b_addr (core_x_addr),
- .b_out (core_x_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_y
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_y_data),
- .a_out (),
-
- .b_addr (core_y_addr),
- .b_out (core_y_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_n
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_n_data),
- .a_out (),
-
- .b_addr (core_n_addr),
- .b_out (core_n_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_s
- (
- .clk (clk),
-
- .a_addr (core_p_addr),
- .a_wr (core_p_wren),
- .a_in (core_p_data),
- .a_out (),
-
- .b_addr (tb_p_addr),
- .b_out (tb_p_data)
- );
-
-
- //
- // UUT
- //
- modular_multiplier_256 uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .a_addr (core_x_addr),
- .b_addr (core_y_addr),
- .n_addr (core_n_addr),
- .p_addr (core_p_addr),
- .p_wren (core_p_wren),
-
- .a_din (core_x_data),
- .b_din (core_y_data),
- .n_din (core_n_data),
- .p_dout (core_p_data)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- tb_xyn_wren = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_modular_multiplier(X_1, Y_1, N, P_1);
- test_modular_multiplier(X_2, Y_2, N, P_2);
- test_modular_multiplier(X_3, Y_3, N, P_3);
- test_modular_multiplier(X_4, Y_4, N, P_4);
-
- /* print result */
- if (ok) $display("tb_modular_multiplier_256: SUCCESS");
- else $display("tb_modular_multiplier_256: FAILURE");
- //
- //$finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg [255:0] p;
- reg p_ok;
-
- integer w;
-
- reg [511:0] pp_full;
- reg [255:0] pp_ref;
-
- task test_modular_multiplier;
-
- input [255:0] x;
- input [255:0] y;
- input [255:0] n;
- input [255:0] pp;
-
- reg [255:0] x_shreg;
- reg [255:0] y_shreg;
- reg [255:0] n_shreg;
- reg [255:0] p_shreg;
-
- begin
-
- /* start filling memories */
- tb_xyn_wren = 1;
-
- /* initialize shift registers */
- x_shreg = x;
- y_shreg = y;
- n_shreg = n;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_x_data = x_shreg[31:0];
- tb_y_data = y_shreg[31:0];
- tb_n_data = n_shreg[31:0];
-
- /* shift inputs */
- x_shreg = {{32{1'bX}}, x_shreg[255:32]};
- y_shreg = {{32{1'bX}}, y_shreg[255:32]};
- n_shreg = {{32{1'bX}}, n_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_x_data = {32{1'bX}};
- tb_y_data = {32{1'bX}};
- tb_n_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_xyn_wren = 0;
-
- /* calculate reference value */
- pp_full = {{256{1'b0}}, x} * {{256{1'b0}}, y};
- pp_ref = pp_full % {{256{1'b0}}, n};
-
- /* compare reference value against hard-coded one */
- if (pp_ref != pp) begin
- $display("ERROR: pp_ref != pp");
- $finish;
- end
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_p_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- p_shreg = {tb_p_data, p_shreg[255:32]};
-
- end
-
- /* compare */
- p_ok = (p_shreg == pp);
-
- /* display results */
- $display("test_modular_multiplier_256(): %s", p_ok ? "OK" : "ERROR");
-
- /* update flag */
- ok = ok && p_ok;
-
- end
-
- endtask
-
-
-
-
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
+ localparam [255:0] Y_1 = 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
+ localparam [255:0] P_1 = 256'h823cd15f6dd3c71933565064513a6b2bd183e554c6a08622f713ebbbface98be;
+
+ localparam [255:0] X_2 = 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0;
+ localparam [255:0] Y_2 = 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07;
+ localparam [255:0] P_2 = 256'h76b2571d1d009ab0e7d1cc086c7d3648f08755b2e2585e780d11f053b06fb6ec;
+
+ localparam [255:0] X_3 = 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
+ localparam [255:0] Y_3 = 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
+ localparam [255:0] P_3 = 256'h944fea6a4fac7ae475a6bb211db4bbd394bd9b3ee9a038f6c17125a00b3a5375;
+
+ localparam [255:0] X_4 = 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
+ localparam [255:0] Y_4 = 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
+ localparam [255:0] P_4 = 256'h78d3e33c81ab9c652679363c76df004ea6f9a9e3a242a0fb71a4e8fdf41ab519;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N, P)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_p_addr;
+
+ wire core_p_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_p_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_p_addr;
+
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_p_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_x_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_y_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_p_addr),
+ .a_wr (core_p_wren),
+ .a_in (core_p_data),
+ .a_out (),
+
+ .b_addr (tb_p_addr),
+ .b_out (tb_p_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_multiplier_256 uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .a_addr (core_x_addr),
+ .b_addr (core_y_addr),
+ .n_addr (core_n_addr),
+ .p_addr (core_p_addr),
+ .p_wren (core_p_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .p_dout (core_p_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_multiplier(X_1, Y_1, N, P_1);
+ test_modular_multiplier(X_2, Y_2, N, P_2);
+ test_modular_multiplier(X_3, Y_3, N, P_3);
+ test_modular_multiplier(X_4, Y_4, N, P_4);
+
+ /* print result */
+ if (ok) $display("tb_modular_multiplier_256: SUCCESS");
+ else $display("tb_modular_multiplier_256: FAILURE");
+ //
+ //$finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [255:0] p;
+ reg p_ok;
+
+ integer w;
+
+ reg [511:0] pp_full;
+ reg [255:0] pp_ref;
+
+ task test_modular_multiplier;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+ input [255:0] pp;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] p_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ pp_full = {{256{1'b0}}, x} * {{256{1'b0}}, y};
+ pp_ref = pp_full % {{256{1'b0}}, n};
+
+ /* compare reference value against hard-coded one */
+ if (pp_ref != pp) begin
+ $display("ERROR: pp_ref != pp");
+ $finish;
+ end
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_p_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ p_shreg = {tb_p_data, p_shreg[255:32]};
+
+ end
+
+ /* compare */
+ p_ok = (p_shreg == pp);
+
+ /* display results */
+ $display("test_modular_multiplier_256(): %s", p_ok ? "OK" : "ERROR");
+
+ /* update flag */
+ ok = ok && p_ok;
+
+ end
+
+ endtask
+
+
+
+
endmodule
//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_subtractor.v b/bench/tb_modular_subtractor.v
index f45a286..6cf0e01 100644
--- a/bench/tb_modular_subtractor.v
+++ b/bench/tb_modular_subtractor.v
@@ -43,312 +43,312 @@
module tb_modular_subtractor_256;
- //
- // Test Vectors
- //
- localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
-
- localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
- localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
-
- localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
- localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (X, Y, N)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_d_addr;
- wire core_d_wren;
-
- wire [ 31:0] core_x_data;
- wire [ 31:0] core_y_data;
- wire [ 31:0] core_n_data;
- wire [ 31:0] core_d_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
- reg [WORD_COUNTER_WIDTH-1:0] tb_d_addr;
- reg tb_xyn_wren;
-
- reg [ 31:0] tb_x_data;
- reg [ 31:0] tb_y_data;
- reg [ 31:0] tb_n_data;
- wire [ 31:0] tb_d_data;
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_x
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_x_data),
- .a_out (),
-
- .b_addr (core_xy_addr),
- .b_out (core_x_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_y
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_y_data),
- .a_out (),
-
- .b_addr (core_xy_addr),
- .b_out (core_y_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_n
- (
- .clk (clk),
-
- .a_addr (tb_xyn_addr),
- .a_wr (tb_xyn_wren),
- .a_in (tb_n_data),
- .a_out (),
-
- .b_addr (core_n_addr),
- .b_out (core_n_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_s
- (
- .clk (clk),
-
- .a_addr (core_d_addr),
- .a_wr (core_d_wren),
- .a_in (core_d_data),
- .a_out (),
-
- .b_addr (tb_d_addr),
- .b_out (tb_d_data)
- );
-
-
- //
- // UUT
- //
- modular_subtractor #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .ab_addr (core_xy_addr),
- .n_addr (core_n_addr),
- .d_addr (core_d_addr),
- .d_wren (core_d_wren),
-
- .a_din (core_x_data),
- .b_din (core_y_data),
- .n_din (core_n_data),
- .d_dout (core_d_data)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- tb_xyn_wren = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_modular_subtractor(X_1, Y_1, N);
- test_modular_subtractor(X_2, Y_2, N);
- test_modular_subtractor(Y_1, X_1, N);
- test_modular_subtractor(Y_2, X_2, N);
-
- test_modular_subtractor(X_1, X_2, N);
- test_modular_subtractor(X_2, X_1, N);
- test_modular_subtractor(Y_1, Y_2, N);
- test_modular_subtractor(Y_2, Y_1, N);
-
- test_modular_subtractor(X_1, Y_2, N);
- test_modular_subtractor(Y_2, X_1, N);
- test_modular_subtractor(X_2, Y_1, N);
- test_modular_subtractor(Y_1, X_2, N);
-
- /* print result */
- if (ok) $display("tb_modular_subtractor_256: SUCCESS");
- else $display("tb_modular_subtractor_256: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg [256:0] d;
- wire [255:0] d_dummy = d[255:0];
- reg d_ok;
-
- integer w;
-
- reg [255:0] x_shreg;
- reg [255:0] y_shreg;
- reg [255:0] n_shreg;
- reg [255:0] d_shreg;
-
- task test_modular_subtractor;
-
- input [255:0] x;
- input [255:0] y;
- input [255:0] n;
-
- begin
-
- /* start filling memories */
- tb_xyn_wren = 1;
-
- /* initialize shift registers */
- x_shreg = x;
- y_shreg = y;
- n_shreg = n;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_x_data = x_shreg[31:0];
- tb_y_data = y_shreg[31:0];
- tb_n_data = n_shreg[31:0];
-
- /* shift inputs */
- x_shreg = {{32{1'bX}}, x_shreg[255:32]};
- y_shreg = {{32{1'bX}}, y_shreg[255:32]};
- n_shreg = {{32{1'bX}}, n_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_x_data = {32{1'bX}};
- tb_y_data = {32{1'bX}};
- tb_n_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_xyn_wren = 0;
-
- /* calculate reference value */
- d = {1'b0, (x < y) ? n : {256{1'b0}}};
- d = d + {1'b0, x} - {1'b0, y};
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_d_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- d_shreg = {tb_d_data, d_shreg[255:32]};
-
- end
-
- /* compare */
- d_ok = (d_shreg == d[255:0]);
-
- /* display results */
- $display("test_modular_subtractor(): %s", d_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && d_ok;
-
- end
-
- endtask
-
-
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+ localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_d_addr;
+ wire core_d_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_d_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_d_addr;
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_d_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_d_addr),
+ .a_wr (core_d_wren),
+ .a_in (core_d_data),
+ .a_out (),
+
+ .b_addr (tb_d_addr),
+ .b_out (tb_d_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_subtractor #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ab_addr (core_xy_addr),
+ .n_addr (core_n_addr),
+ .d_addr (core_d_addr),
+ .d_wren (core_d_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .d_dout (core_d_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_subtractor(X_1, Y_1, N);
+ test_modular_subtractor(X_2, Y_2, N);
+ test_modular_subtractor(Y_1, X_1, N);
+ test_modular_subtractor(Y_2, X_2, N);
+
+ test_modular_subtractor(X_1, X_2, N);
+ test_modular_subtractor(X_2, X_1, N);
+ test_modular_subtractor(Y_1, Y_2, N);
+ test_modular_subtractor(Y_2, Y_1, N);
+
+ test_modular_subtractor(X_1, Y_2, N);
+ test_modular_subtractor(Y_2, X_1, N);
+ test_modular_subtractor(X_2, Y_1, N);
+ test_modular_subtractor(Y_1, X_2, N);
+
+ /* print result */
+ if (ok) $display("tb_modular_subtractor_256: SUCCESS");
+ else $display("tb_modular_subtractor_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [256:0] d;
+ wire [255:0] d_dummy = d[255:0];
+ reg d_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] d_shreg;
+
+ task test_modular_subtractor;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ d = {1'b0, (x < y) ? n : {256{1'b0}}};
+ d = d + {1'b0, x} - {1'b0, y};
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_d_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ d_shreg = {tb_d_data, d_shreg[255:32]};
+
+ end
+
+ /* compare */
+ d_ok = (d_shreg == d[255:0]);
+
+ /* display results */
+ $display("test_modular_subtractor(): %s", d_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && d_ok;
+
+ end
+
+ endtask
+
+
endmodule
//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_comparator.v b/bench/tb_mw_comparator.v
index abab8bc..ed830a8 100644
--- a/bench/tb_mw_comparator.v
+++ b/bench/tb_mw_comparator.v
@@ -38,285 +38,285 @@
//------------------------------------------------------------------------------
`timescale 1ns / 1ps
-//------------------------------------------------------------------------------
-
-module tb_mw_comparator;
-
-
- //
- // Test Vectors
- //
- localparam [383:0] A_0 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
-
- localparam [383:0] A_1 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F75;
- localparam [383:0] A_2 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F77;
-
- localparam [383:0] A_3 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_375D568B_79E730D4_18A9143C_18905F76;
- localparam [383:0] A_4 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_575D568B_79E730D4_18A9143C_18905F76;
-
- localparam [383:0] A_5 = 384'hBDC7B53C_616B13B5_77622510_75BA95FB_475D568B_79E730D4_18A9143C_18905F76;
- localparam [383:0] A_6 = 384'hBDC7B53C_616B13B5_77622510_75BA95FD_475D568B_79E730D4_18A9143C_18905F76;
-
- localparam [383:0] A_7 = 384'hADC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
- localparam [383:0] A_8 = 384'hCDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
-
- localparam [383:0] B_0 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
-
- localparam [383:0] B_1 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E4;
- localparam [383:0] B_2 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E6;
-
- localparam [383:0] B_3 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_9D784F98_850046D4_10DDD64D_F6BB32E5;
- localparam [383:0] B_4 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_BD784F98_850046D4_10DDD64D_F6BB32E5;
-
- localparam [383:0] B_5 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3BF_AD784F98_850046D4_10DDD64D_F6BB32E5;
- localparam [383:0] B_6 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C1_AD784F98_850046D4_10DDD64D_F6BB32E5;
-
- localparam [383:0] B_7 = 384'h248A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
- localparam [383:0] B_8 = 384'h448A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- parameter OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
- wire core_cmp_l;
- wire core_cmp_e;
- wire core_cmp_g;
-
-
- //
- // Buffers (X, Y)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
-
- wire [ 32-1:0] core_x_data;
- wire [ 32-1:0] core_y_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_xy_addr;
- reg tb_xy_wren;
-
- reg [ 32-1:0] tb_x_data;
- reg [ 32-1:0] tb_y_data;
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_x
- (
- .clk (clk),
-
- .a_addr (tb_xy_addr),
- .a_wr (tb_xy_wren),
- .a_in (tb_x_data),
- .a_out (),
-
- .b_addr (core_xy_addr),
- .b_out (core_x_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_y
- (
- .clk (clk),
-
- .a_addr (tb_xy_addr),
- .a_wr (tb_xy_wren),
- .a_in (tb_y_data),
- .a_out (),
-
- .b_addr (core_xy_addr),
- .b_out (core_y_data)
- );
-
-
- //
- // UUT
- //
- mw_comparator #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .xy_addr (core_xy_addr),
- .x_din (core_x_data),
- .y_din (core_y_data),
-
- .cmp_l (core_cmp_l),
- .cmp_e (core_cmp_e),
- .cmp_g (core_cmp_g)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- tb_xy_wren = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_mw_comparator(A_0, A_0);
-
- test_mw_comparator(A_0, A_1);
- test_mw_comparator(A_0, A_2);
- test_mw_comparator(A_0, A_3);
- test_mw_comparator(A_0, A_4);
- test_mw_comparator(A_0, A_5);
- test_mw_comparator(A_0, A_6);
- test_mw_comparator(A_0, A_7);
- test_mw_comparator(A_0, A_8);
-
- test_mw_comparator(B_0, B_0);
-
- test_mw_comparator(B_0, B_1);
- test_mw_comparator(B_0, B_2);
- test_mw_comparator(B_0, B_3);
- test_mw_comparator(B_0, B_4);
- test_mw_comparator(B_0, B_5);
- test_mw_comparator(B_0, B_6);
- test_mw_comparator(B_0, B_7);
- test_mw_comparator(B_0, B_8);
-
- /* print result */
- if (ok) $display("tb_mw_comparator: SUCCESS");
- else $display("tb_mw_comparator: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg cmp_l;
- reg cmp_e;
- reg cmp_g;
- reg cmp_ok;
-
- integer w;
-
- task test_mw_comparator;
-
- input [255:0] x;
- input [255:0] y;
-
- reg [255:0] x_shreg;
- reg [255:0] y_shreg;
-
- begin
-
- /* start filling memories */
- tb_xy_wren = 1;
-
- /* initialize shift registers */
- x_shreg = x;
- y_shreg = y;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_xy_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_x_data = x_shreg[31:0];
- tb_y_data = y_shreg[31:0];
-
- /* shift inputs */
- x_shreg = {{32{1'bX}}, x_shreg[255:32]};
- y_shreg = {{32{1'bX}}, y_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_xy_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_x_data = {32{1'bX}};
- tb_y_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_xy_wren = 0;
-
- /* calculate reference values */
- cmp_l = (x < y) ? 1 : 0;
- cmp_e = (x == y) ? 1 : 0;
- cmp_g = (x > y) ? 1 : 0;
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* compare */
- cmp_ok = (cmp_l == core_cmp_l) && (cmp_e == core_cmp_e) && (cmp_g == core_cmp_g);
-
- /* display results */
- $display("test_mw_comparator(): %s", cmp_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && cmp_ok;
-
- end
-
- endtask
-
-endmodule
-
-
+//------------------------------------------------------------------------------
+
+module tb_mw_comparator;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [383:0] A_0 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_1 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F75;
+ localparam [383:0] A_2 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F77;
+
+ localparam [383:0] A_3 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_375D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_4 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_575D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_5 = 384'hBDC7B53C_616B13B5_77622510_75BA95FB_475D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_6 = 384'hBDC7B53C_616B13B5_77622510_75BA95FD_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_7 = 384'hADC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_8 = 384'hCDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] B_0 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_1 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E4;
+ localparam [383:0] B_2 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E6;
+
+ localparam [383:0] B_3 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_9D784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_4 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_BD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_5 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3BF_AD784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_6 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C1_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_7 = 384'h248A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_8 = 384'h448A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ parameter OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+ wire core_cmp_l;
+ wire core_cmp_e;
+ wire core_cmp_g;
+
+
+ //
+ // Buffers (X, Y)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+
+ wire [ 32-1:0] core_x_data;
+ wire [ 32-1:0] core_y_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xy_addr;
+ reg tb_xy_wren;
+
+ reg [ 32-1:0] tb_x_data;
+ reg [ 32-1:0] tb_y_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xy_addr),
+ .a_wr (tb_xy_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xy_addr),
+ .a_wr (tb_xy_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+
+ //
+ // UUT
+ //
+ mw_comparator #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .xy_addr (core_xy_addr),
+ .x_din (core_x_data),
+ .y_din (core_y_data),
+
+ .cmp_l (core_cmp_l),
+ .cmp_e (core_cmp_e),
+ .cmp_g (core_cmp_g)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xy_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_mw_comparator(A_0, A_0);
+
+ test_mw_comparator(A_0, A_1);
+ test_mw_comparator(A_0, A_2);
+ test_mw_comparator(A_0, A_3);
+ test_mw_comparator(A_0, A_4);
+ test_mw_comparator(A_0, A_5);
+ test_mw_comparator(A_0, A_6);
+ test_mw_comparator(A_0, A_7);
+ test_mw_comparator(A_0, A_8);
+
+ test_mw_comparator(B_0, B_0);
+
+ test_mw_comparator(B_0, B_1);
+ test_mw_comparator(B_0, B_2);
+ test_mw_comparator(B_0, B_3);
+ test_mw_comparator(B_0, B_4);
+ test_mw_comparator(B_0, B_5);
+ test_mw_comparator(B_0, B_6);
+ test_mw_comparator(B_0, B_7);
+ test_mw_comparator(B_0, B_8);
+
+ /* print result */
+ if (ok) $display("tb_mw_comparator: SUCCESS");
+ else $display("tb_mw_comparator: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg cmp_l;
+ reg cmp_e;
+ reg cmp_g;
+ reg cmp_ok;
+
+ integer w;
+
+ task test_mw_comparator;
+
+ input [255:0] x;
+ input [255:0] y;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xy_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xy_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xy_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xy_wren = 0;
+
+ /* calculate reference values */
+ cmp_l = (x < y) ? 1 : 0;
+ cmp_e = (x == y) ? 1 : 0;
+ cmp_g = (x > y) ? 1 : 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* compare */
+ cmp_ok = (cmp_l == core_cmp_l) && (cmp_e == core_cmp_e) && (cmp_g == core_cmp_g);
+
+ /* display results */
+ $display("test_mw_comparator(): %s", cmp_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && cmp_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+
//------------------------------------------------------------------------------
// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_mover.v b/bench/tb_mw_mover.v
index be767fc..08bdb9e 100644
--- a/bench/tb_mw_mover.v
+++ b/bench/tb_mw_mover.v
@@ -43,238 +43,238 @@
module tb_mw_mover;
- //
- // Test Vectors
- //
- localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
- localparam [255:0] X_2 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
- localparam [255:0] X_3 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
- localparam [255:0] X_4 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
-
-
- //
- // Core Parameters
- //
- localparam WORD_COUNTER_WIDTH = 3;
- localparam OPERAND_NUM_WORDS = 8;
-
-
- //
- // Clock (100 MHz)
- //
- reg clk = 1'b0;
- always #5 clk = ~clk;
-
-
- //
- // Inputs, Outputs
- //
- reg rst_n;
- reg ena;
- wire rdy;
-
-
- //
- // Buffers (X, Y)
- //
- wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
- wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
- wire core_y_wren;
-
- wire [ 32-1:0] core_x_data;
- wire [ 32-1:0] core_y_data;
-
- reg [WORD_COUNTER_WIDTH-1:0] tb_x_addr;
- reg [WORD_COUNTER_WIDTH-1:0] tb_y_addr;
- reg tb_x_wren;
-
- reg [ 32-1:0] tb_x_data;
- wire [ 32-1:0] tb_y_data;
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_x
- (
- .clk (clk),
-
- .a_addr (tb_x_addr),
- .a_wr (tb_x_wren),
- .a_in (tb_x_data),
- .a_out (),
-
- .b_addr (core_x_addr),
- .b_out (core_x_data)
- );
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
- )
- bram_d
- (
- .clk (clk),
-
- .a_addr (core_y_addr),
- .a_wr (core_y_wren),
- .a_in (core_y_data),
- .a_out (),
-
- .b_addr (tb_y_addr),
- .b_out (tb_y_data)
- );
-
-
- //
- // UUT
- //
- mw_mover #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .x_addr (core_x_addr),
- .y_addr (core_y_addr),
- .y_wren (core_y_wren),
-
- .x_din (core_x_data),
- .y_dout (core_y_data)
- );
-
-
- //
- // Testbench Routine
- //
- reg ok = 1;
- initial begin
-
- /* initialize control inputs */
- rst_n = 0;
- ena = 0;
-
- tb_x_wren = 0;
-
- /* wait for some time */
- #200;
-
- /* de-assert reset */
- rst_n = 1;
-
- /* wait for some time */
- #100;
-
- /* run tests */
- test_modular_mover(X_1);
- test_modular_mover(X_2);
- test_modular_mover(X_3);
- test_modular_mover(X_4);
-
- /* print result */
- if (ok) $display("tb_modular_mover: SUCCESS");
- else $display("tb_modular_mover: FAILURE");
- //
- $finish;
- //
- end
-
-
- //
- // Test Task
- //
- reg [255:0] y;
- reg y_ok;
-
- integer w;
-
- reg [255:0] x_shreg;
- reg [255:0] y_shreg;
-
- task test_modular_mover;
-
- input [255:0] x;
-
- begin
-
- /* start filling memories */
- tb_x_wren = 1;
-
- /* initialize shift registers */
- x_shreg = x;
-
- /* write all the words */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set addresses */
- tb_x_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* set data words */
- tb_x_data = x_shreg[31:0];
-
- /* shift inputs */
- x_shreg = {{32{1'bX}}, x_shreg[255:32]};
-
- /* wait for 1 clock tick */
- #10;
-
- end
-
- /* wipe addresses */
- tb_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
-
- /* wipe data words */
- tb_x_data = {32{1'bX}};
-
- /* stop filling memories */
- tb_x_wren = 0;
-
- /* start operation */
- ena = 1;
-
- /* clear flag */
- #10 ena = 0;
-
- /* wait for operation to complete */
- while (!rdy) #10;
-
- /* read result */
- for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-
- /* set address */
- tb_y_addr = w[WORD_COUNTER_WIDTH-1:0];
-
- /* wait for 1 clock tick */
- #10;
-
- /* store data word */
- y_shreg = {tb_y_data, y_shreg[255:32]};
-
- end
-
- /* compare */
- y_ok = (y_shreg == x);
-
- /* display results */
- $display("test_modular_mover(): %s", y_ok ? "OK" : "ERROR");
-
- /* update global flag */
- ok = ok && y_ok;
-
- end
-
- endtask
-
-
+ //
+ // Test Vectors
+ //
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] X_2 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+ localparam [255:0] X_3 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] X_4 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+ wire core_y_wren;
+
+ wire [ 32-1:0] core_x_data;
+ wire [ 32-1:0] core_y_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_y_addr;
+ reg tb_x_wren;
+
+ reg [ 32-1:0] tb_x_data;
+ wire [ 32-1:0] tb_y_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_x_addr),
+ .a_wr (tb_x_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_x_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_d
+ (
+ .clk (clk),
+
+ .a_addr (core_y_addr),
+ .a_wr (core_y_wren),
+ .a_in (core_y_data),
+ .a_out (),
+
+ .b_addr (tb_y_addr),
+ .b_out (tb_y_data)
+ );
+
+
+ //
+ // UUT
+ //
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .x_addr (core_x_addr),
+ .y_addr (core_y_addr),
+ .y_wren (core_y_wren),
+
+ .x_din (core_x_data),
+ .y_dout (core_y_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_x_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_mover(X_1);
+ test_modular_mover(X_2);
+ test_modular_mover(X_3);
+ test_modular_mover(X_4);
+
+ /* print result */
+ if (ok) $display("tb_modular_mover: SUCCESS");
+ else $display("tb_modular_mover: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [255:0] y;
+ reg y_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+
+ task test_modular_mover;
+
+ input [255:0] x;
+
+ begin
+
+ /* start filling memories */
+ tb_x_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_x_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_x_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_y_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ y_shreg = {tb_y_data, y_shreg[255:32]};
+
+ end
+
+ /* compare */
+ y_ok = (y_shreg == x);
+
+ /* display results */
+ $display("test_modular_mover(): %s", y_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && y_ok;
+
+ end
+
+ endtask
+
+
endmodule
//------------------------------------------------------------------------------
diff --git a/rtl/curve/curve_dbl_add_256.v b/rtl/curve/curve_dbl_add_256.v
index 8ef505d..6a93823 100644
--- a/rtl/curve/curve_dbl_add_256.v
+++ b/rtl/curve/curve_dbl_add_256.v
@@ -1,874 +1,874 @@
-//------------------------------------------------------------------------------
-//
-// curve_dbl_add_256.v
-// -----------------------------------------------------------------------------
-// Elliptic curve point adder and doubler.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module curve_dbl_add_256
- (
- clk, rst_n,
- ena, rdy,
- uop_addr, uop,
- px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr,
- rx_wren, ry_wren, rz_wren,
- px_din, py_din, pz_din,
- rx_din, ry_din, rz_din,
- rx_dout, ry_dout, rz_dout, q_din, v_din
- );
-
-
- //
- // Microcode
- //
-`include "uop_ecdsa.v"
-
-
- //
- // Constants
- //
- localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
- localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output reg [ 6-1: 0] uop_addr;
- input wire [20-1: 0] uop;
-
- output reg [WORD_COUNTER_WIDTH-1:0] px_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] py_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] pz_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] rx_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] ry_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] rz_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] v_addr;
- output wire [WORD_COUNTER_WIDTH-1:0] q_addr;
-
- output wire rx_wren;
- output wire ry_wren;
- output wire rz_wren;
-
- input wire [ 32-1:0] px_din;
- input wire [ 32-1:0] py_din;
- input wire [ 32-1:0] pz_din;
- input wire [ 32-1:0] rx_din;
- input wire [ 32-1:0] ry_din;
- input wire [ 32-1:0] rz_din;
- output wire [ 32-1:0] rx_dout;
- output wire [ 32-1:0] ry_dout;
- output wire [ 32-1:0] rz_dout;
- input wire [ 32-1:0] q_din;
- input wire [ 32-1:0] v_din;
-
-
- //
- // Microcode
- //
- wire [ 4: 0] uop_opcode = uop[19:15];
- wire [ 4: 0] uop_src_a = uop[14:10];
- wire [ 4: 0] uop_src_b = uop[ 9: 5];
- wire [ 2: 0] uop_dst = uop[ 4: 2];
- wire [ 1: 0] uop_exec = uop[ 1: 0];
-
-
- //
- // Multi-Word Comparator
- //
- wire mw_cmp_ena;
- wire mw_cmp_rdy;
-
- wire mw_cmp_out_l;
- wire mw_cmp_out_e;
- wire mw_cmp_out_g;
-
- wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy;
-
- wire [ 32-1:0] mw_cmp_din_x;
- wire [ 32-1:0] mw_cmp_din_y;
-
- // flags
- reg flag_pz_is_zero;
- reg flag_t1_is_zero;
- reg flag_t2_is_zero;
-
- mw_comparator #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- mw_comparator_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mw_cmp_ena),
- .rdy (mw_cmp_rdy),
-
- .xy_addr (mw_cmp_addr_xy),
- .x_din (mw_cmp_din_x),
- .y_din (mw_cmp_din_y),
-
- .cmp_l (mw_cmp_out_l),
- .cmp_e (mw_cmp_out_e),
- .cmp_g (mw_cmp_out_g)
- );
-
-
- //
- // Modular Adder
- //
- wire mod_add_ena;
- wire mod_add_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab;
- wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n;
- wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s;
- wire mod_add_wren_s;
-
- wire [ 32-1:0] mod_add_din_a;
- wire [ 32-1:0] mod_add_din_b;
- wire [ 32-1:0] mod_add_din_n;
- wire [ 32-1:0] mod_add_dout_s;
-
- assign mod_add_din_n = q_din;
-
- modular_adder #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- modular_adder_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mod_add_ena),
- .rdy (mod_add_rdy),
-
- .ab_addr (mod_add_addr_ab),
- .n_addr (mod_add_addr_n),
- .s_addr (mod_add_addr_s),
- .s_wren (mod_add_wren_s),
-
- .a_din (mod_add_din_a),
- .b_din (mod_add_din_b),
- .n_din (mod_add_din_n),
- .s_dout (mod_add_dout_s)
- );
-
-
- //
- // Modular Subtractor
- //
- wire mod_sub_ena;
- wire mod_sub_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab;
- wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n;
- wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d;
- wire mod_sub_wren_d;
-
- wire [ 32-1:0] mod_sub_din_a;
- wire [ 32-1:0] mod_sub_din_b;
- wire [ 32-1:0] mod_sub_din_n;
- wire [ 32-1:0] mod_sub_dout_d;
-
- assign mod_sub_din_n = q_din;
-
- modular_subtractor #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- modular_subtractor_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mod_sub_ena),
- .rdy (mod_sub_rdy),
-
- .ab_addr (mod_sub_addr_ab),
- .n_addr (mod_sub_addr_n),
- .d_addr (mod_sub_addr_d),
- .d_wren (mod_sub_wren_d),
-
- .a_din (mod_sub_din_a),
- .b_din (mod_sub_din_b),
- .n_din (mod_sub_din_n),
- .d_dout (mod_sub_dout_d)
- );
-
-
- //
- // Modular Multiplier
- //
- wire mod_mul_ena;
- wire mod_mul_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a;
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b;
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n;
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p;
- wire mod_mul_wren_p;
-
- wire [ 32-1:0] mod_mul_din_a;
- wire [ 32-1:0] mod_mul_din_b;
- wire [ 32-1:0] mod_mul_din_n;
- wire [ 32-1:0] mod_mul_dout_p;
-
- assign mod_mul_din_n = q_din;
-
- modular_multiplier_256 modular_multiplier_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mod_mul_ena),
- .rdy (mod_mul_rdy),
-
- .a_addr (mod_mul_addr_a),
- .b_addr (mod_mul_addr_b),
- .n_addr (mod_mul_addr_n),
- .p_addr (mod_mul_addr_p),
- .p_wren (mod_mul_wren_p),
-
- .a_din (mod_mul_din_a),
- .b_din (mod_mul_din_b),
- .n_din (mod_mul_din_n),
- .p_dout (mod_mul_dout_p)
- );
-
-
- //
- // Multi-Word Data Mover
- //
- wire mw_mov_ena;
- wire mw_mov_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x;
- wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y;
- wire mw_mov_wren_y;
-
- wire [ 32-1:0] mw_mov_din_x;
- wire [ 32-1:0] mw_mov_dout_y;
-
- mw_mover #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
-
- )
- mw_mover_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mw_mov_ena),
- .rdy (mw_mov_rdy),
-
- .x_addr (mw_mov_addr_x),
- .y_addr (mw_mov_addr_y),
- .y_wren (mw_mov_wren_y),
-
- .x_din (mw_mov_din_x),
- .y_dout (mw_mov_dout_y)
- );
-
-
- //
- // ROMs
- //
- reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr;
- //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr;
-
- wire [ 32-1:0] brom_one_dout;
- wire [ 32-1:0] brom_zero_dout;
- wire [ 32-1:0] brom_delta_dout;
- wire [ 32-1:0] brom_g_x_dout;
- wire [ 32-1:0] brom_g_y_dout;
- wire [ 32-1:0] brom_h_x_dout;
- wire [ 32-1:0] brom_h_y_dout;
-
- (* ROM_STYLE="BLOCK" *) brom_p256_one brom_one_inst
- (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout));
-
- brom_p256_zero brom_zero_inst
- (.b_out(brom_zero_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_delta brom_delta_inst
- (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_g_x brom_g_x_inst
- (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_g_y brom_g_y_inst
- (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_h_x brom_h_x_inst
- (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_h_y brom_h_y_inst
- (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout));
-
-
- //
- // Temporary Variables
- //
- reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr;
-
- reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr;
-
- wire bram_t1_wr_en;
- wire bram_t2_wr_en;
- wire bram_t3_wr_en;
- wire bram_t4_wr_en;
-
- wire [ 32-1:0] bram_t1_wr_data;
- wire [ 32-1:0] bram_t2_wr_data;
- wire [ 32-1:0] bram_t3_wr_data;
- wire [ 32-1:0] bram_t4_wr_data;
-
- wire [ 32-1:0] bram_t1_rd_data;
- wire [ 32-1:0] bram_t2_rd_data;
- wire [ 32-1:0] bram_t3_rd_data;
- wire [ 32-1:0] bram_t4_rd_data;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t1
- ( .clk (clk),
- .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(),
- .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t2
- ( .clk (clk),
- .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(),
- .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t3
- ( .clk (clk),
- .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(),
- .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t4
- ( .clk (clk),
- .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(),
- .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data)
- );
-
-
- //
- // uOP Trigger Logic
- //
- reg uop_trig;
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) uop_trig <= 1'b0;
- else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0;
-
-
- //
- // FSM
- //
- localparam [ 1: 0] FSM_STATE_STALL = 2'b00;
- localparam [ 1: 0] FSM_STATE_FETCH = 2'b01;
- localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10;
-
- reg [ 1: 0] fsm_state = FSM_STATE_STALL;
- wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH;
-
-
- //
- // FSM Transition Logic
- //
- reg uop_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL;
- else case (fsm_state)
- FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL;
- FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE;
- FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE;
- default: fsm_state <= FSM_STATE_STALL;
- endcase
-
-
- //
- // uOP Address Increment Logic
- //
- always @(posedge clk)
- //
- if (fsm_state == FSM_STATE_STALL)
- uop_addr <= 5'd0;
- else if (fsm_state == FSM_STATE_EXECUTE)
- if (!uop_trig && uop_done)
- uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1;
-
-
- //
- // uOP Completion Logic
- //
- always @(*)
- //
- case (uop_opcode)
- OPCODE_CMP: uop_done = mw_cmp_rdy;
- OPCODE_MOV: uop_done = mw_mov_rdy;
- OPCODE_ADD: uop_done = mod_add_rdy;
- OPCODE_SUB: uop_done = mod_sub_rdy;
- OPCODE_MUL: uop_done = mod_mul_rdy;
- OPCODE_RDY: uop_done = 1'b1;
- default: uop_done = 1'b0;
- endcase
-
-
- //
- // Helper Modules Enable Logic
- //
- assign mw_cmp_ena = uop_opcode[0] & uop_trig;
- assign mw_mov_ena = uop_opcode[1] & uop_trig;
- assign mod_add_ena = uop_opcode[2] & uop_trig;
- assign mod_sub_ena = uop_opcode[3] & uop_trig;
- assign mod_mul_ena = uop_opcode[4] & uop_trig;
-
-
- //
- // uOP Source Value Decoding Logic
- //
- reg [31: 0] uop_src_a_value;
-
- always @(*)
- //
- case (uop_src_a)
- UOP_SRC_PX: uop_src_a_value = px_din;
- UOP_SRC_PY: uop_src_a_value = py_din;
- UOP_SRC_PZ: uop_src_a_value = pz_din;
-
- UOP_SRC_RX: uop_src_a_value = rx_din;
- UOP_SRC_RY: uop_src_a_value = ry_din;
- UOP_SRC_RZ: uop_src_a_value = rz_din;
-
- UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data;
- UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data;
- UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data;
- UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data;
-
- UOP_SRC_ONE: uop_src_a_value = brom_one_dout;
- UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout;
- UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout;
-
- UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout;
- UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout;
-
- UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout;
- UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout;
-
- UOP_SRC_V: uop_src_a_value = v_din;
-
- default: uop_src_a_value = {32{1'bX}};
- endcase
-
-
- assign mw_cmp_din_x = uop_src_a_value;
- assign mw_mov_din_x = uop_src_a_value;
- assign mod_add_din_a = uop_src_a_value;
- assign mod_sub_din_a = uop_src_a_value;
- assign mod_mul_din_a = uop_src_a_value;
-
- reg [31: 0] uop_src_b_value;
-
- always @(*)
- //
- case (uop_src_b)
- UOP_SRC_PX: uop_src_b_value = px_din;
- UOP_SRC_PY: uop_src_b_value = py_din;
- UOP_SRC_PZ: uop_src_b_value = pz_din;
-
- UOP_SRC_RX: uop_src_b_value = rx_din;
- UOP_SRC_RY: uop_src_b_value = ry_din;
- UOP_SRC_RZ: uop_src_b_value = rz_din;
-
- UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data;
- UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data;
- UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data;
- UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data;
-
- UOP_SRC_ONE: uop_src_b_value = brom_one_dout;
- UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout;
- UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout;
-
- UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout;
- UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout;
-
- UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout;
- UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout;
-
- UOP_SRC_V: uop_src_b_value = v_din;
-
- default: uop_src_b_value = {32{1'bX}};
- endcase
-
- assign mw_cmp_din_y = uop_src_b_value;
- assign mod_add_din_b = uop_src_b_value;
- assign mod_sub_din_b = uop_src_b_value;
- assign mod_mul_din_b = uop_src_b_value;
-
-
- //
- // uOP Source & Destination Address Decoding Logic
- //
- reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr;
- reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr;
- reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr;
- reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr;
-
- assign q_addr = uop_q_addr;
-
- always @(*)
- //
- case (uop_opcode)
- //
- OPCODE_CMP: begin
- uop_src_a_addr = mw_cmp_addr_xy;
- uop_src_b_addr = mw_cmp_addr_xy;
- uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- OPCODE_MOV: begin
- uop_src_a_addr = mw_mov_addr_x;
- uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_dst_addr = mw_mov_addr_y;
- uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- OPCODE_ADD: begin
- uop_src_a_addr = mod_add_addr_ab;
- uop_src_b_addr = mod_add_addr_ab;
- uop_dst_addr = mod_add_addr_s;
- uop_q_addr = mod_add_addr_n;
- end
- //
- OPCODE_SUB: begin
- uop_src_a_addr = mod_sub_addr_ab;
- uop_src_b_addr = mod_sub_addr_ab;
- uop_dst_addr = mod_sub_addr_d;
- uop_q_addr = mod_sub_addr_n;
- end
- //
- OPCODE_MUL: begin
- uop_src_a_addr = mod_mul_addr_a;
- uop_src_b_addr = mod_mul_addr_b;
- uop_dst_addr = mod_mul_addr_p;
- uop_q_addr = mod_mul_addr_n;
- end
- //
- default: begin
- uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- endcase
-
-
- //
- // uOP Conditional Execution Logic
- //
- reg uop_exec_effective;
-
- always @(*)
- //
- case (uop_exec)
- UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1;
- UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero;
- UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero;
- UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero;
- endcase
-
-
- //
- // uOP Destination Store Logic
- //
- reg uop_dst_wren;
-
- always @(*)
- //
- case (uop_opcode)
- //
- OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective;
- OPCODE_ADD: uop_dst_wren = mod_add_wren_s;
- OPCODE_SUB: uop_dst_wren = mod_sub_wren_d;
- OPCODE_MUL: uop_dst_wren = mod_mul_wren_p;
- default: uop_dst_wren = 1'b0;
- //
- endcase
-
-
- always @(*) begin
- //
- //
- //
- if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr;
- else px_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr;
- else py_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr;
- else pz_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr;
- else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr;
- //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr;
- //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr;
- else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr;
- else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr;
- else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr;
- else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr;
- else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr;
- else v_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr;
- else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr;
- else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr;
- else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr;
- else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr;
- else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr;
- else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr;
- else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr;
- else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr;
- else begin
- if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr;
- else rx_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr;
- else begin
- if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr;
- else ry_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr;
- else begin
- if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr;
- else rz_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- end
-
-
- assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX);
- assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY);
- assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ);
-
- assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1);
- assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2);
- assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3);
- assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4);
-
-
-
- //
- // Destination Value Selector
- //
- reg [31: 0] uop_dst_value;
-
- always @(*)
- //
- case (uop_opcode)
-
- OPCODE_MOV: uop_dst_value = mw_mov_dout_y;
- OPCODE_ADD: uop_dst_value = mod_add_dout_s;
- OPCODE_SUB: uop_dst_value = mod_sub_dout_d;
- OPCODE_MUL: uop_dst_value = mod_mul_dout_p;
-
- default: uop_dst_value = {32{1'bX}};
-
- endcase
-
- assign rx_dout = uop_dst_value;
- assign ry_dout = uop_dst_value;
- assign rz_dout = uop_dst_value;
-
- assign bram_t1_wr_data = uop_dst_value;
- assign bram_t2_wr_data = uop_dst_value;
- assign bram_t3_wr_data = uop_dst_value;
- assign bram_t4_wr_data = uop_dst_value;
-
-
- //
- // Latch Comparison Flags
- //
- always @(posedge clk)
- //
- if ( (fsm_state == FSM_STATE_EXECUTE) &&
- (uop_opcode == OPCODE_CMP) &&
- (uop_done && !uop_trig) ) begin
-
- if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) )
- flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
-
- if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) )
- flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
-
- if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) )
- flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
-
- end
-
-
- //
- // Ready Flag Logic
- //
- reg rdy_reg = 1'b1;
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else begin
-
- /* clear flag */
- if (fsm_state == FSM_STATE_STALL)
- if (ena) rdy_reg <= 1'b0;
-
- /* set flag */
- if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done)
- if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1;
-
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// curve_dbl_add_256.v
+// -----------------------------------------------------------------------------
+// Elliptic curve point adder and doubler.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module curve_dbl_add_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ uop_addr, uop,
+ px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr,
+ rx_wren, ry_wren, rz_wren,
+ px_din, py_din, pz_din,
+ rx_din, ry_din, rz_din,
+ rx_dout, ry_dout, rz_dout, q_din, v_din
+ );
+
+
+ //
+ // Microcode
+ //
+`include "../../../../math/ecdsalib/rtl/curve/uop_ecdsa.v"
+
+
+ //
+ // Constants
+ //
+ localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
+ localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output reg [ 6-1: 0] uop_addr;
+ input wire [20-1: 0] uop;
+
+ output reg [WORD_COUNTER_WIDTH-1:0] px_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] py_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] pz_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] rx_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] ry_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] rz_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] v_addr;
+ output wire [WORD_COUNTER_WIDTH-1:0] q_addr;
+
+ output wire rx_wren;
+ output wire ry_wren;
+ output wire rz_wren;
+
+ input wire [ 32-1:0] px_din;
+ input wire [ 32-1:0] py_din;
+ input wire [ 32-1:0] pz_din;
+ input wire [ 32-1:0] rx_din;
+ input wire [ 32-1:0] ry_din;
+ input wire [ 32-1:0] rz_din;
+ output wire [ 32-1:0] rx_dout;
+ output wire [ 32-1:0] ry_dout;
+ output wire [ 32-1:0] rz_dout;
+ input wire [ 32-1:0] q_din;
+ input wire [ 32-1:0] v_din;
+
+
+ //
+ // Microcode
+ //
+ wire [ 4: 0] uop_opcode = uop[19:15];
+ wire [ 4: 0] uop_src_a = uop[14:10];
+ wire [ 4: 0] uop_src_b = uop[ 9: 5];
+ wire [ 2: 0] uop_dst = uop[ 4: 2];
+ wire [ 1: 0] uop_exec = uop[ 1: 0];
+
+
+ //
+ // Multi-Word Comparator
+ //
+ wire mw_cmp_ena;
+ wire mw_cmp_rdy;
+
+ wire mw_cmp_out_l;
+ wire mw_cmp_out_e;
+ wire mw_cmp_out_g;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy;
+
+ wire [ 32-1:0] mw_cmp_din_x;
+ wire [ 32-1:0] mw_cmp_din_y;
+
+ // flags
+ reg flag_pz_is_zero;
+ reg flag_t1_is_zero;
+ reg flag_t2_is_zero;
+
+ mw_comparator #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ mw_comparator_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mw_cmp_ena),
+ .rdy (mw_cmp_rdy),
+
+ .xy_addr (mw_cmp_addr_xy),
+ .x_din (mw_cmp_din_x),
+ .y_din (mw_cmp_din_y),
+
+ .cmp_l (mw_cmp_out_l),
+ .cmp_e (mw_cmp_out_e),
+ .cmp_g (mw_cmp_out_g)
+ );
+
+
+ //
+ // Modular Adder
+ //
+ wire mod_add_ena;
+ wire mod_add_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s;
+ wire mod_add_wren_s;
+
+ wire [ 32-1:0] mod_add_din_a;
+ wire [ 32-1:0] mod_add_din_b;
+ wire [ 32-1:0] mod_add_din_n;
+ wire [ 32-1:0] mod_add_dout_s;
+
+ assign mod_add_din_n = q_din;
+
+ modular_adder #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ modular_adder_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_add_ena),
+ .rdy (mod_add_rdy),
+
+ .ab_addr (mod_add_addr_ab),
+ .n_addr (mod_add_addr_n),
+ .s_addr (mod_add_addr_s),
+ .s_wren (mod_add_wren_s),
+
+ .a_din (mod_add_din_a),
+ .b_din (mod_add_din_b),
+ .n_din (mod_add_din_n),
+ .s_dout (mod_add_dout_s)
+ );
+
+
+ //
+ // Modular Subtractor
+ //
+ wire mod_sub_ena;
+ wire mod_sub_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d;
+ wire mod_sub_wren_d;
+
+ wire [ 32-1:0] mod_sub_din_a;
+ wire [ 32-1:0] mod_sub_din_b;
+ wire [ 32-1:0] mod_sub_din_n;
+ wire [ 32-1:0] mod_sub_dout_d;
+
+ assign mod_sub_din_n = q_din;
+
+ modular_subtractor #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ modular_subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_sub_ena),
+ .rdy (mod_sub_rdy),
+
+ .ab_addr (mod_sub_addr_ab),
+ .n_addr (mod_sub_addr_n),
+ .d_addr (mod_sub_addr_d),
+ .d_wren (mod_sub_wren_d),
+
+ .a_din (mod_sub_din_a),
+ .b_din (mod_sub_din_b),
+ .n_din (mod_sub_din_n),
+ .d_dout (mod_sub_dout_d)
+ );
+
+
+ //
+ // Modular Multiplier
+ //
+ wire mod_mul_ena;
+ wire mod_mul_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p;
+ wire mod_mul_wren_p;
+
+ wire [ 32-1:0] mod_mul_din_a;
+ wire [ 32-1:0] mod_mul_din_b;
+ wire [ 32-1:0] mod_mul_din_n;
+ wire [ 32-1:0] mod_mul_dout_p;
+
+ assign mod_mul_din_n = q_din;
+
+ modular_multiplier_256 modular_multiplier_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_mul_ena),
+ .rdy (mod_mul_rdy),
+
+ .a_addr (mod_mul_addr_a),
+ .b_addr (mod_mul_addr_b),
+ .n_addr (mod_mul_addr_n),
+ .p_addr (mod_mul_addr_p),
+ .p_wren (mod_mul_wren_p),
+
+ .a_din (mod_mul_din_a),
+ .b_din (mod_mul_din_b),
+ .n_din (mod_mul_din_n),
+ .p_dout (mod_mul_dout_p)
+ );
+
+
+ //
+ // Multi-Word Data Mover
+ //
+ wire mw_mov_ena;
+ wire mw_mov_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x;
+ wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y;
+ wire mw_mov_wren_y;
+
+ wire [ 32-1:0] mw_mov_din_x;
+ wire [ 32-1:0] mw_mov_dout_y;
+
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+
+ )
+ mw_mover_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mw_mov_ena),
+ .rdy (mw_mov_rdy),
+
+ .x_addr (mw_mov_addr_x),
+ .y_addr (mw_mov_addr_y),
+ .y_wren (mw_mov_wren_y),
+
+ .x_din (mw_mov_din_x),
+ .y_dout (mw_mov_dout_y)
+ );
+
+
+ //
+ // ROMs
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr;
+ //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr;
+
+ wire [ 32-1:0] brom_one_dout;
+ wire [ 32-1:0] brom_zero_dout;
+ wire [ 32-1:0] brom_delta_dout;
+ wire [ 32-1:0] brom_g_x_dout;
+ wire [ 32-1:0] brom_g_y_dout;
+ wire [ 32-1:0] brom_h_x_dout;
+ wire [ 32-1:0] brom_h_y_dout;
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_one brom_one_inst
+ (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout));
+
+ brom_p256_zero brom_zero_inst
+ (.b_out(brom_zero_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_delta brom_delta_inst
+ (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_g_x brom_g_x_inst
+ (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_g_y brom_g_y_inst
+ (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_h_x brom_h_x_inst
+ (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_h_y brom_h_y_inst
+ (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout));
+
+
+ //
+ // Temporary Variables
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr;
+
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr;
+
+ wire bram_t1_wr_en;
+ wire bram_t2_wr_en;
+ wire bram_t3_wr_en;
+ wire bram_t4_wr_en;
+
+ wire [ 32-1:0] bram_t1_wr_data;
+ wire [ 32-1:0] bram_t2_wr_data;
+ wire [ 32-1:0] bram_t3_wr_data;
+ wire [ 32-1:0] bram_t4_wr_data;
+
+ wire [ 32-1:0] bram_t1_rd_data;
+ wire [ 32-1:0] bram_t2_rd_data;
+ wire [ 32-1:0] bram_t3_rd_data;
+ wire [ 32-1:0] bram_t4_rd_data;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t1
+ ( .clk (clk),
+ .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(),
+ .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t2
+ ( .clk (clk),
+ .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(),
+ .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t3
+ ( .clk (clk),
+ .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(),
+ .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t4
+ ( .clk (clk),
+ .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(),
+ .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data)
+ );
+
+
+ //
+ // uOP Trigger Logic
+ //
+ reg uop_trig;
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) uop_trig <= 1'b0;
+ else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0;
+
+
+ //
+ // FSM
+ //
+ localparam [ 1: 0] FSM_STATE_STALL = 2'b00;
+ localparam [ 1: 0] FSM_STATE_FETCH = 2'b01;
+ localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10;
+
+ reg [ 1: 0] fsm_state = FSM_STATE_STALL;
+ wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH;
+
+
+ //
+ // FSM Transition Logic
+ //
+ reg uop_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL;
+ else case (fsm_state)
+ FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL;
+ FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE;
+ FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE;
+ default: fsm_state <= FSM_STATE_STALL;
+ endcase
+
+
+ //
+ // uOP Address Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_STALL)
+ uop_addr <= 5'd0;
+ else if (fsm_state == FSM_STATE_EXECUTE)
+ if (!uop_trig && uop_done)
+ uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1;
+
+
+ //
+ // uOP Completion Logic
+ //
+ always @(*)
+ //
+ case (uop_opcode)
+ OPCODE_CMP: uop_done = mw_cmp_rdy;
+ OPCODE_MOV: uop_done = mw_mov_rdy;
+ OPCODE_ADD: uop_done = mod_add_rdy;
+ OPCODE_SUB: uop_done = mod_sub_rdy;
+ OPCODE_MUL: uop_done = mod_mul_rdy;
+ OPCODE_RDY: uop_done = 1'b1;
+ default: uop_done = 1'b0;
+ endcase
+
+
+ //
+ // Helper Modules Enable Logic
+ //
+ assign mw_cmp_ena = uop_opcode[0] & uop_trig;
+ assign mw_mov_ena = uop_opcode[1] & uop_trig;
+ assign mod_add_ena = uop_opcode[2] & uop_trig;
+ assign mod_sub_ena = uop_opcode[3] & uop_trig;
+ assign mod_mul_ena = uop_opcode[4] & uop_trig;
+
+
+ //
+ // uOP Source Value Decoding Logic
+ //
+ reg [31: 0] uop_src_a_value;
+
+ always @(*)
+ //
+ case (uop_src_a)
+ UOP_SRC_PX: uop_src_a_value = px_din;
+ UOP_SRC_PY: uop_src_a_value = py_din;
+ UOP_SRC_PZ: uop_src_a_value = pz_din;
+
+ UOP_SRC_RX: uop_src_a_value = rx_din;
+ UOP_SRC_RY: uop_src_a_value = ry_din;
+ UOP_SRC_RZ: uop_src_a_value = rz_din;
+
+ UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data;
+ UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data;
+ UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data;
+ UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data;
+
+ UOP_SRC_ONE: uop_src_a_value = brom_one_dout;
+ UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout;
+ UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout;
+
+ UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout;
+ UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout;
+
+ UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout;
+ UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout;
+
+ UOP_SRC_V: uop_src_a_value = v_din;
+
+ default: uop_src_a_value = {32{1'bX}};
+ endcase
+
+
+ assign mw_cmp_din_x = uop_src_a_value;
+ assign mw_mov_din_x = uop_src_a_value;
+ assign mod_add_din_a = uop_src_a_value;
+ assign mod_sub_din_a = uop_src_a_value;
+ assign mod_mul_din_a = uop_src_a_value;
+
+ reg [31: 0] uop_src_b_value;
+
+ always @(*)
+ //
+ case (uop_src_b)
+ UOP_SRC_PX: uop_src_b_value = px_din;
+ UOP_SRC_PY: uop_src_b_value = py_din;
+ UOP_SRC_PZ: uop_src_b_value = pz_din;
+
+ UOP_SRC_RX: uop_src_b_value = rx_din;
+ UOP_SRC_RY: uop_src_b_value = ry_din;
+ UOP_SRC_RZ: uop_src_b_value = rz_din;
+
+ UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data;
+ UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data;
+ UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data;
+ UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data;
+
+ UOP_SRC_ONE: uop_src_b_value = brom_one_dout;
+ UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout;
+ UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout;
+
+ UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout;
+ UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout;
+
+ UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout;
+ UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout;
+
+ UOP_SRC_V: uop_src_b_value = v_din;
+
+ default: uop_src_b_value = {32{1'bX}};
+ endcase
+
+ assign mw_cmp_din_y = uop_src_b_value;
+ assign mod_add_din_b = uop_src_b_value;
+ assign mod_sub_din_b = uop_src_b_value;
+ assign mod_mul_din_b = uop_src_b_value;
+
+
+ //
+ // uOP Source & Destination Address Decoding Logic
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr;
+
+ assign q_addr = uop_q_addr;
+
+ always @(*)
+ //
+ case (uop_opcode)
+ //
+ OPCODE_CMP: begin
+ uop_src_a_addr = mw_cmp_addr_xy;
+ uop_src_b_addr = mw_cmp_addr_xy;
+ uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ OPCODE_MOV: begin
+ uop_src_a_addr = mw_mov_addr_x;
+ uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_dst_addr = mw_mov_addr_y;
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ OPCODE_ADD: begin
+ uop_src_a_addr = mod_add_addr_ab;
+ uop_src_b_addr = mod_add_addr_ab;
+ uop_dst_addr = mod_add_addr_s;
+ uop_q_addr = mod_add_addr_n;
+ end
+ //
+ OPCODE_SUB: begin
+ uop_src_a_addr = mod_sub_addr_ab;
+ uop_src_b_addr = mod_sub_addr_ab;
+ uop_dst_addr = mod_sub_addr_d;
+ uop_q_addr = mod_sub_addr_n;
+ end
+ //
+ OPCODE_MUL: begin
+ uop_src_a_addr = mod_mul_addr_a;
+ uop_src_b_addr = mod_mul_addr_b;
+ uop_dst_addr = mod_mul_addr_p;
+ uop_q_addr = mod_mul_addr_n;
+ end
+ //
+ default: begin
+ uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+ //
+ // uOP Conditional Execution Logic
+ //
+ reg uop_exec_effective;
+
+ always @(*)
+ //
+ case (uop_exec)
+ UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1;
+ UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero;
+ UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero;
+ UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero;
+ endcase
+
+
+ //
+ // uOP Destination Store Logic
+ //
+ reg uop_dst_wren;
+
+ always @(*)
+ //
+ case (uop_opcode)
+ //
+ OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective;
+ OPCODE_ADD: uop_dst_wren = mod_add_wren_s;
+ OPCODE_SUB: uop_dst_wren = mod_sub_wren_d;
+ OPCODE_MUL: uop_dst_wren = mod_mul_wren_p;
+ default: uop_dst_wren = 1'b0;
+ //
+ endcase
+
+
+ always @(*) begin
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr;
+ else px_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr;
+ else py_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr;
+ else pz_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr;
+ else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr;
+ //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr;
+ //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr;
+ else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr;
+ else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr;
+ else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr;
+ else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr;
+ else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr;
+ else v_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr;
+ else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr;
+ else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr;
+ else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr;
+ else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr;
+ else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr;
+ else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr;
+ else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr;
+ else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr;
+ else rx_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr;
+ else ry_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr;
+ else rz_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ end
+
+
+ assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX);
+ assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY);
+ assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ);
+
+ assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1);
+ assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2);
+ assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3);
+ assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4);
+
+
+
+ //
+ // Destination Value Selector
+ //
+ reg [31: 0] uop_dst_value;
+
+ always @(*)
+ //
+ case (uop_opcode)
+
+ OPCODE_MOV: uop_dst_value = mw_mov_dout_y;
+ OPCODE_ADD: uop_dst_value = mod_add_dout_s;
+ OPCODE_SUB: uop_dst_value = mod_sub_dout_d;
+ OPCODE_MUL: uop_dst_value = mod_mul_dout_p;
+
+ default: uop_dst_value = {32{1'bX}};
+
+ endcase
+
+ assign rx_dout = uop_dst_value;
+ assign ry_dout = uop_dst_value;
+ assign rz_dout = uop_dst_value;
+
+ assign bram_t1_wr_data = uop_dst_value;
+ assign bram_t2_wr_data = uop_dst_value;
+ assign bram_t3_wr_data = uop_dst_value;
+ assign bram_t4_wr_data = uop_dst_value;
+
+
+ //
+ // Latch Comparison Flags
+ //
+ always @(posedge clk)
+ //
+ if ( (fsm_state == FSM_STATE_EXECUTE) &&
+ (uop_opcode == OPCODE_CMP) &&
+ (uop_done && !uop_trig) ) begin
+
+ if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ end
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag */
+ if (fsm_state == FSM_STATE_STALL)
+ if (ena) rdy_reg <= 1'b0;
+
+ /* set flag */
+ if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done)
+ if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1;
+
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/curve_mul_256.v b/rtl/curve/curve_mul_256.v
index 0ac2be0..423c863 100644
--- a/rtl/curve/curve_mul_256.v
+++ b/rtl/curve/curve_mul_256.v
@@ -1,720 +1,720 @@
-//------------------------------------------------------------------------------
-//
-// curve_mul_256.v
-// -----------------------------------------------------------------------------
-// Elliptic curve point scalar multiplier.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module curve_mul_256
- (
- clk, rst_n,
- ena, rdy,
- k_addr, rx_addr, ry_addr,
- rx_wren, ry_wren,
- k_din,
- rx_dout, ry_dout
- );
-
-
- //
- // Constants
- //
- localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
- localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [ 2: 0] k_addr;
- output wire [ 2: 0] rx_addr;
- output wire [ 2: 0] ry_addr;
-
- output wire rx_wren;
- output wire ry_wren;
-
- input wire [31: 0] k_din;
-
- output wire [31: 0] rx_dout;
- output wire [31: 0] ry_dout;
-
-
- //
- // Temporary Variables
- //
- reg [ 2: 0] bram_tx_wr_addr;
- reg [ 2: 0] bram_ty_wr_addr;
- reg [ 2: 0] bram_tz_wr_addr;
-
- reg [ 2: 0] bram_rx_wr_addr;
- reg [ 2: 0] bram_ry_wr_addr;
- reg [ 2: 0] bram_rz_wr_addr;
- wire [ 2: 0] bram_rz1_wr_addr;
-
- reg [ 2: 0] bram_tx_rd_addr;
- reg [ 2: 0] bram_ty_rd_addr;
- reg [ 2: 0] bram_tz_rd_addr;
-
- reg [ 2: 0] bram_rx_rd_addr;
- reg [ 2: 0] bram_ry_rd_addr;
- reg [ 2: 0] bram_rz_rd_addr;
- wire [ 2: 0] bram_rz1_rd_addr;
-
- reg bram_tx_wr_en;
- reg bram_ty_wr_en;
- reg bram_tz_wr_en;
-
- reg bram_rx_wr_en;
- reg bram_ry_wr_en;
- reg bram_rz_wr_en;
- wire bram_rz1_wr_en;
-
- wire [31: 0] bram_tx_rd_data;
- wire [31: 0] bram_ty_rd_data;
- wire [31: 0] bram_tz_rd_data;
-
- wire [31: 0] bram_rx_rd_data;
- wire [31: 0] bram_ry_rd_data;
- wire [31: 0] bram_rz_rd_data;
- wire [31: 0] bram_rz1_rd_data;
-
- reg [31: 0] bram_tx_wr_data_in;
- reg [31: 0] bram_ty_wr_data_in;
- reg [31: 0] bram_tz_wr_data_in;
-
- reg [31: 0] bram_rx_wr_data_in;
- reg [31: 0] bram_ry_wr_data_in;
- reg [31: 0] bram_rz_wr_data_in;
- wire [31: 0] bram_rz1_wr_data_in;
-
- wire [31: 0] bram_tx_wr_data_out;
- wire [31: 0] bram_ty_wr_data_out;
- wire [31: 0] bram_tz_wr_data_out;
-
- wire [31: 0] bram_rx_wr_data_out;
- wire [31: 0] bram_ry_wr_data_out;
- wire [31: 0] bram_rz_wr_data_out;
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_tx (.clk(clk),
- .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out),
- .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_ty (.clk(clk),
- .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out),
- .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_tz (.clk(clk),
- .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out),
- .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_rx (.clk(clk),
- .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out),
- .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_ry (.clk(clk),
- .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out),
- .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_rz (.clk(clk),
- .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out),
- .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_rz1 (.clk(clk),
- .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(),
- .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data));
-
-
- //
- // FSM
- //
- localparam [ 3: 0] FSM_STATE_IDLE = 4'd00;
- localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01;
- localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02;
- localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03;
- localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04;
- localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05;
- localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06;
- localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07;
- localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08;
- localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09;
- localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10;
- localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11;
- localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12;
- localparam [ 3: 0] FSM_STATE_DONE = 4'd13;
-
- reg [3:0] fsm_state = FSM_STATE_IDLE;
-
-
- //
- // Round Counter
- //
- reg [ 7: 0] bit_counter;
- wire [ 7: 0] bit_counter_max = 8'd255;
- wire [ 7: 0] bit_counter_zero = 8'd0;
- wire [ 7: 0] bit_counter_next =
- (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero;
-
-
- //
- // Round Completion
- //
- wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ?
- FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG;
-
-
- //
- // OP Trigger Logic
- //
- reg op_trig;
- wire op_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) op_trig <= 1'b0;
- else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) ||
- (fsm_state == FSM_STATE_DOUBLE_TRIG) ||
- (fsm_state == FSM_STATE_ADD_TRIG) ||
- (fsm_state == FSM_STATE_CONVERT_TRIG);
-
- //
- // Microprograms
- //
- wire [ 5: 0] op_rom_addr;
- wire [19: 0] op_rom_init_data;
- wire [19: 0] op_rom_dbl_data;
- wire [19: 0] op_rom_add_data;
- wire [19: 0] op_rom_conv_data;
- reg [19: 0] op_rom_mux_data;
-
- (* RAM_STYLE="BLOCK" *)
- uop_init_rom op_rom_init
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_init_data)
- );
-
- (* RAM_STYLE="BLOCK" *)
- uop_dbl_rom op_rom_dbl
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_dbl_data)
- );
-
- (* RAM_STYLE="BLOCK" *)
- uop_add_rom op_rom_add
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_add_data)
- );
-
- (* RAM_STYLE="BLOCK" *)
- uop_conv_rom op_rom_conv
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_conv_data)
- );
-
- always @(*)
- //
- case (fsm_state)
- FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data;
- FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data;
- FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data;
- FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data;
- default: op_rom_mux_data = {20{1'bX}};
- endcase
-
-
-
- //
- // Modulus
- //
- reg [ 2: 0] rom_q_addr;
- wire [31: 0] rom_q_data;
-
- brom_p256_q rom_q
- (
- .clk (clk),
- .b_addr (rom_q_addr),
- .b_out (rom_q_data)
- );
-
-
- //
- // Worker
- //
- wire [ 2: 0] worker_addr_px;
- wire [ 2: 0] worker_addr_py;
- wire [ 2: 0] worker_addr_pz;
-
- wire [ 2: 0] worker_addr_rx;
- wire [ 2: 0] worker_addr_ry;
- wire [ 2: 0] worker_addr_rz;
-
- wire [ 2: 0] worker_addr_q;
-
- wire worker_wren_rx;
- wire worker_wren_ry;
- wire worker_wren_rz;
-
- reg [31: 0] worker_din_px;
- reg [31: 0] worker_din_py;
- reg [31: 0] worker_din_pz;
-
- reg [31: 0] worker_din_rx;
- reg [31: 0] worker_din_ry;
- reg [31: 0] worker_din_rz;
-
- wire [31: 0] worker_dout_rx;
- wire [31: 0] worker_dout_ry;
- wire [31: 0] worker_dout_rz;
-
- curve_dbl_add_256 worker
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (op_trig),
- .rdy (op_done),
-
- .uop_addr (op_rom_addr),
- .uop (op_rom_mux_data),
-
- .px_addr (worker_addr_px),
- .py_addr (worker_addr_py),
- .pz_addr (worker_addr_pz),
-
- .rx_addr (worker_addr_rx),
- .ry_addr (worker_addr_ry),
- .rz_addr (worker_addr_rz),
-
- .q_addr (worker_addr_q),
-
- .v_addr (bram_rz1_rd_addr),
-
- .rx_wren (worker_wren_rx),
- .ry_wren (worker_wren_ry),
- .rz_wren (worker_wren_rz),
-
- .px_din (worker_din_px),
- .py_din (worker_din_py),
- .pz_din (worker_din_pz),
-
- .rx_din (worker_din_rx),
- .ry_din (worker_din_ry),
- .rz_din (worker_din_rz),
-
- .rx_dout (worker_dout_rx),
- .ry_dout (worker_dout_ry),
- .rz_dout (worker_dout_rz),
-
- .q_din (rom_q_data),
-
- .v_din (bram_rz1_rd_data)
- );
-
-
- //
- // Mover
- //
- reg move_trig;
- wire move_done;
-
- wire [ 2: 0] mover_addr_x;
- wire [ 2: 0] mover_addr_y;
-
- wire mover_wren_y;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) move_trig <= 1'b0;
- else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG);
-
- mw_mover #
- (
- .WORD_COUNTER_WIDTH (3),
- .OPERAND_NUM_WORDS (8)
- )
- mover
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (move_trig),
- .rdy (move_done),
-
- .x_addr (mover_addr_x),
- .y_addr (mover_addr_y),
- .y_wren (mover_wren_y),
-
- .x_din ({32{1'bX}}),
- .y_dout ()
- );
-
-
- //
- // Invertor
- //
- reg invert_trig;
- wire invert_done;
-
- wire [ 2: 0] invertor_addr_a;
- wire [ 2: 0] invertor_addr_q;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) invert_trig <= 1'b0;
- else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG);
-
- modular_invertor #
- (
- .MAX_OPERAND_WIDTH(256)
- )
- invertor
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (invert_trig),
- .rdy (invert_done),
-
- .a_addr (invertor_addr_a),
- .q_addr (invertor_addr_q),
- .a1_addr (bram_rz1_wr_addr),
- .a1_wren (bram_rz1_wr_en),
-
- .a_din (bram_rz_rd_data),
- .q_din (rom_q_data),
- .a1_dout (bram_rz1_wr_data_in)
- );
-
-
- //
- // FSM Transition Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
- else case (fsm_state)
-
- FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE;
-
- FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT;
- FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT;
-
- FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT;
- FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT;
-
- FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT;
- FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT;
-
- FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT;
- FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT;
-
- FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT;
- FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT;
-
- FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT;
- FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT;
-
- FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
-
- default: fsm_state <= FSM_STATE_IDLE;
-
- endcase
-
-
- //
- // Bit Counter Increment
- //
- always @(posedge clk) begin
- //
- if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done)
- bit_counter <= bit_counter_zero;
- //
- if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done)
- bit_counter <= bit_counter_next;
- //
- end
-
-
- //
- // K Latch Logic
- //
- reg [ 2: 0] k_addr_reg;
- reg [31: 0] k_din_reg;
-
- assign k_addr = k_addr_reg;
-
- always @(posedge clk) begin
- //
- if (fsm_state == FSM_STATE_DOUBLE_TRIG)
- k_addr_reg <= 3'd7 - bit_counter[7:5];
- //
- if (fsm_state == FSM_STATE_ADD_TRIG)
- k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX};
- //
- end
-
-
-
- //
- // Copy Inhibit Logic
- //
- wire move_inhibit = k_din_reg[31];
-
- wire copy_t2r_int = mover_wren_y & ~move_inhibit;
-
-
- always @(*) begin
- //
- // Q
- //
- case (fsm_state)
- FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q;
- FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q;
- FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q;
- FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q;
- default: rom_q_addr = worker_addr_q;
- endcase
-
- //
- // R(X,Y,Z)
- //
- case (fsm_state)
- //
- FSM_STATE_PREPARE_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
- bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
- bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
- //
- end
- //
- FSM_STATE_DOUBLE_WAIT: begin
- //
- bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- FSM_STATE_ADD_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
- bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
- bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
- //
- end
- //
- FSM_STATE_COPY_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y;
- bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int;
- bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data;
- //
- end
- //
- FSM_STATE_INVERT_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= invertor_addr_a;
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- FSM_STATE_CONVERT_WAIT: begin
- //
- bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
-
- //
- default: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- endcase
- //
- // T(X,Y,Z)
- //
- case (fsm_state)
- //
- FSM_STATE_DOUBLE_WAIT: begin
- //
- bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
- bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz;
- bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz;
- bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz;
- //
- end
- //
- FSM_STATE_ADD_WAIT: begin
- //
- bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz;
- bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
- bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
- bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- FSM_STATE_COPY_WAIT: begin
- //
- bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x;
- bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
- bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
- bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
- //
- end
-
- //
- default: begin
- //
- bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
- bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
- bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
- bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- endcase
- //
- // Worker
- //
- case (fsm_state)
- //
- FSM_STATE_DOUBLE_WAIT: begin
- //
- worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
- worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out;
- //
- end
- //
- FSM_STATE_ADD_WAIT: begin
- //
- worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data;
- worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out;
- //
- end
- //
- FSM_STATE_CONVERT_WAIT: begin
- //
- worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
- worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
- //
- end
- //
- default: begin
- //
- worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}};
- worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
- //
- end
- //
- endcase
- //
- end
-
-
- //
- // Output Mapping
- //
- assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT);
- assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT);
-
- assign rx_dout = worker_dout_rx;
- assign ry_dout = worker_dout_ry;
-
- assign rx_addr = worker_addr_rx;
- assign ry_addr = worker_addr_ry;
-
-
- //
- // Ready Flag Logic
- //
- reg rdy_reg = 1'b1;
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
-
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else begin
-
- /* clear flag */
- if ((fsm_state == FSM_STATE_IDLE) && ena)
- rdy_reg <= 1'b0;
-
- /* set flag */
- if (fsm_state == FSM_STATE_DONE)
- rdy_reg <= 1'b1;
-
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// curve_mul_256.v
+// -----------------------------------------------------------------------------
+// Elliptic curve point scalar multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module curve_mul_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ k_addr, rx_addr, ry_addr,
+ rx_wren, ry_wren,
+ k_din,
+ rx_dout, ry_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
+ localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [ 2: 0] k_addr;
+ output wire [ 2: 0] rx_addr;
+ output wire [ 2: 0] ry_addr;
+
+ output wire rx_wren;
+ output wire ry_wren;
+
+ input wire [31: 0] k_din;
+
+ output wire [31: 0] rx_dout;
+ output wire [31: 0] ry_dout;
+
+
+ //
+ // Temporary Variables
+ //
+ reg [ 2: 0] bram_tx_wr_addr;
+ reg [ 2: 0] bram_ty_wr_addr;
+ reg [ 2: 0] bram_tz_wr_addr;
+
+ reg [ 2: 0] bram_rx_wr_addr;
+ reg [ 2: 0] bram_ry_wr_addr;
+ reg [ 2: 0] bram_rz_wr_addr;
+ wire [ 2: 0] bram_rz1_wr_addr;
+
+ reg [ 2: 0] bram_tx_rd_addr;
+ reg [ 2: 0] bram_ty_rd_addr;
+ reg [ 2: 0] bram_tz_rd_addr;
+
+ reg [ 2: 0] bram_rx_rd_addr;
+ reg [ 2: 0] bram_ry_rd_addr;
+ reg [ 2: 0] bram_rz_rd_addr;
+ wire [ 2: 0] bram_rz1_rd_addr;
+
+ reg bram_tx_wr_en;
+ reg bram_ty_wr_en;
+ reg bram_tz_wr_en;
+
+ reg bram_rx_wr_en;
+ reg bram_ry_wr_en;
+ reg bram_rz_wr_en;
+ wire bram_rz1_wr_en;
+
+ wire [31: 0] bram_tx_rd_data;
+ wire [31: 0] bram_ty_rd_data;
+ wire [31: 0] bram_tz_rd_data;
+
+ wire [31: 0] bram_rx_rd_data;
+ wire [31: 0] bram_ry_rd_data;
+ wire [31: 0] bram_rz_rd_data;
+ wire [31: 0] bram_rz1_rd_data;
+
+ reg [31: 0] bram_tx_wr_data_in;
+ reg [31: 0] bram_ty_wr_data_in;
+ reg [31: 0] bram_tz_wr_data_in;
+
+ reg [31: 0] bram_rx_wr_data_in;
+ reg [31: 0] bram_ry_wr_data_in;
+ reg [31: 0] bram_rz_wr_data_in;
+ wire [31: 0] bram_rz1_wr_data_in;
+
+ wire [31: 0] bram_tx_wr_data_out;
+ wire [31: 0] bram_ty_wr_data_out;
+ wire [31: 0] bram_tz_wr_data_out;
+
+ wire [31: 0] bram_rx_wr_data_out;
+ wire [31: 0] bram_ry_wr_data_out;
+ wire [31: 0] bram_rz_wr_data_out;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_tx (.clk(clk),
+ .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out),
+ .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_ty (.clk(clk),
+ .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out),
+ .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_tz (.clk(clk),
+ .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out),
+ .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rx (.clk(clk),
+ .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out),
+ .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_ry (.clk(clk),
+ .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out),
+ .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rz (.clk(clk),
+ .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out),
+ .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rz1 (.clk(clk),
+ .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(),
+ .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data));
+
+
+ //
+ // FSM
+ //
+ localparam [ 3: 0] FSM_STATE_IDLE = 4'd00;
+ localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01;
+ localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02;
+ localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03;
+ localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04;
+ localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05;
+ localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06;
+ localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07;
+ localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08;
+ localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09;
+ localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10;
+ localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11;
+ localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12;
+ localparam [ 3: 0] FSM_STATE_DONE = 4'd13;
+
+ reg [3:0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Round Counter
+ //
+ reg [ 7: 0] bit_counter;
+ wire [ 7: 0] bit_counter_max = 8'd255;
+ wire [ 7: 0] bit_counter_zero = 8'd0;
+ wire [ 7: 0] bit_counter_next =
+ (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero;
+
+
+ //
+ // Round Completion
+ //
+ wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ?
+ FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG;
+
+
+ //
+ // OP Trigger Logic
+ //
+ reg op_trig;
+ wire op_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) op_trig <= 1'b0;
+ else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) ||
+ (fsm_state == FSM_STATE_DOUBLE_TRIG) ||
+ (fsm_state == FSM_STATE_ADD_TRIG) ||
+ (fsm_state == FSM_STATE_CONVERT_TRIG);
+
+ //
+ // Microprograms
+ //
+ wire [ 5: 0] op_rom_addr;
+ wire [19: 0] op_rom_init_data;
+ wire [19: 0] op_rom_dbl_data;
+ wire [19: 0] op_rom_add_data;
+ wire [19: 0] op_rom_conv_data;
+ reg [19: 0] op_rom_mux_data;
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_init_rom op_rom_init
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_init_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_dbl_rom op_rom_dbl
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_dbl_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_add_rom op_rom_add
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_add_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_conv_rom op_rom_conv
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_conv_data)
+ );
+
+ always @(*)
+ //
+ case (fsm_state)
+ FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data;
+ FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data;
+ FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data;
+ FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data;
+ default: op_rom_mux_data = {20{1'bX}};
+ endcase
+
+
+
+ //
+ // Modulus
+ //
+ reg [ 2: 0] rom_q_addr;
+ wire [31: 0] rom_q_data;
+
+ brom_p256_q rom_q
+ (
+ .clk (clk),
+ .b_addr (rom_q_addr),
+ .b_out (rom_q_data)
+ );
+
+
+ //
+ // Worker
+ //
+ wire [ 2: 0] worker_addr_px;
+ wire [ 2: 0] worker_addr_py;
+ wire [ 2: 0] worker_addr_pz;
+
+ wire [ 2: 0] worker_addr_rx;
+ wire [ 2: 0] worker_addr_ry;
+ wire [ 2: 0] worker_addr_rz;
+
+ wire [ 2: 0] worker_addr_q;
+
+ wire worker_wren_rx;
+ wire worker_wren_ry;
+ wire worker_wren_rz;
+
+ reg [31: 0] worker_din_px;
+ reg [31: 0] worker_din_py;
+ reg [31: 0] worker_din_pz;
+
+ reg [31: 0] worker_din_rx;
+ reg [31: 0] worker_din_ry;
+ reg [31: 0] worker_din_rz;
+
+ wire [31: 0] worker_dout_rx;
+ wire [31: 0] worker_dout_ry;
+ wire [31: 0] worker_dout_rz;
+
+ curve_dbl_add_256 worker
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (op_trig),
+ .rdy (op_done),
+
+ .uop_addr (op_rom_addr),
+ .uop (op_rom_mux_data),
+
+ .px_addr (worker_addr_px),
+ .py_addr (worker_addr_py),
+ .pz_addr (worker_addr_pz),
+
+ .rx_addr (worker_addr_rx),
+ .ry_addr (worker_addr_ry),
+ .rz_addr (worker_addr_rz),
+
+ .q_addr (worker_addr_q),
+
+ .v_addr (bram_rz1_rd_addr),
+
+ .rx_wren (worker_wren_rx),
+ .ry_wren (worker_wren_ry),
+ .rz_wren (worker_wren_rz),
+
+ .px_din (worker_din_px),
+ .py_din (worker_din_py),
+ .pz_din (worker_din_pz),
+
+ .rx_din (worker_din_rx),
+ .ry_din (worker_din_ry),
+ .rz_din (worker_din_rz),
+
+ .rx_dout (worker_dout_rx),
+ .ry_dout (worker_dout_ry),
+ .rz_dout (worker_dout_rz),
+
+ .q_din (rom_q_data),
+
+ .v_din (bram_rz1_rd_data)
+ );
+
+
+ //
+ // Mover
+ //
+ reg move_trig;
+ wire move_done;
+
+ wire [ 2: 0] mover_addr_x;
+ wire [ 2: 0] mover_addr_y;
+
+ wire mover_wren_y;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) move_trig <= 1'b0;
+ else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG);
+
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (3),
+ .OPERAND_NUM_WORDS (8)
+ )
+ mover
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (move_trig),
+ .rdy (move_done),
+
+ .x_addr (mover_addr_x),
+ .y_addr (mover_addr_y),
+ .y_wren (mover_wren_y),
+
+ .x_din ({32{1'bX}}),
+ .y_dout ()
+ );
+
+
+ //
+ // Invertor
+ //
+ reg invert_trig;
+ wire invert_done;
+
+ wire [ 2: 0] invertor_addr_a;
+ wire [ 2: 0] invertor_addr_q;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) invert_trig <= 1'b0;
+ else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG);
+
+ modular_invertor #
+ (
+ .MAX_OPERAND_WIDTH(256)
+ )
+ invertor
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (invert_trig),
+ .rdy (invert_done),
+
+ .a_addr (invertor_addr_a),
+ .q_addr (invertor_addr_q),
+ .a1_addr (bram_rz1_wr_addr),
+ .a1_wren (bram_rz1_wr_en),
+
+ .a_din (bram_rz_rd_data),
+ .q_din (rom_q_data),
+ .a1_dout (bram_rz1_wr_data_in)
+ );
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ else case (fsm_state)
+
+ FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE;
+
+ FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT;
+ FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT;
+
+ FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT;
+ FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT;
+
+ FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT;
+ FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT;
+
+ FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT;
+ FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT;
+
+ FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT;
+ FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT;
+
+ FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT;
+ FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT;
+
+ FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+ //
+ // Bit Counter Increment
+ //
+ always @(posedge clk) begin
+ //
+ if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done)
+ bit_counter <= bit_counter_zero;
+ //
+ if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done)
+ bit_counter <= bit_counter_next;
+ //
+ end
+
+
+ //
+ // K Latch Logic
+ //
+ reg [ 2: 0] k_addr_reg;
+ reg [31: 0] k_din_reg;
+
+ assign k_addr = k_addr_reg;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_state == FSM_STATE_DOUBLE_TRIG)
+ k_addr_reg <= 3'd7 - bit_counter[7:5];
+ //
+ if (fsm_state == FSM_STATE_ADD_TRIG)
+ k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX};
+ //
+ end
+
+
+
+ //
+ // Copy Inhibit Logic
+ //
+ wire move_inhibit = k_din_reg[31];
+
+ wire copy_t2r_int = mover_wren_y & ~move_inhibit;
+
+
+ always @(*) begin
+ //
+ // Q
+ //
+ case (fsm_state)
+ FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q;
+ FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q;
+ FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q;
+ FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q;
+ default: rom_q_addr = worker_addr_q;
+ endcase
+
+ //
+ // R(X,Y,Z)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_PREPARE_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
+ bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
+ bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
+ bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
+ bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_COPY_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y;
+ bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int;
+ bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data;
+ //
+ end
+ //
+ FSM_STATE_INVERT_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= invertor_addr_a;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_CONVERT_WAIT: begin
+ //
+ bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+
+ //
+ default: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ // T(X,Y,Z)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
+ bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz;
+ bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz;
+ bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz;
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_COPY_WAIT: begin
+ //
+ bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x;
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+
+ //
+ default: begin
+ //
+ bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ // Worker
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
+ worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out;
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data;
+ worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out;
+ //
+ end
+ //
+ FSM_STATE_CONVERT_WAIT: begin
+ //
+ worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
+ worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
+ //
+ end
+ //
+ default: begin
+ //
+ worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}};
+ worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Output Mapping
+ //
+ assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT);
+ assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT);
+
+ assign rx_dout = worker_dout_rx;
+ assign ry_dout = worker_dout_ry;
+
+ assign rx_addr = worker_addr_rx;
+ assign ry_addr = worker_addr_ry;
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag */
+ if ((fsm_state == FSM_STATE_IDLE) && ena)
+ rdy_reg <= 1'b0;
+
+ /* set flag */
+ if (fsm_state == FSM_STATE_DONE)
+ rdy_reg <= 1'b1;
+
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/rom/brom_p256_delta.v b/rtl/curve/rom/brom_p256_delta.v
index b9a345a..4637575 100644
--- a/rtl/curve/rom/brom_p256_delta.v
+++ b/rtl/curve/rom/brom_p256_delta.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_delta
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h00000000;
- 3'b001: bram_reg_b <= 32'h00000000;
- 3'b010: bram_reg_b <= 32'h80000000;
- 3'b011: bram_reg_b <= 32'h00000000;
- 3'b100: bram_reg_b <= 32'h00000000;
- 3'b101: bram_reg_b <= 32'h80000000;
- 3'b110: bram_reg_b <= 32'h80000000;
- 3'b111: bram_reg_b <= 32'h7fffffff;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h00000000;
+ 3'b001: bram_reg_b <= 32'h00000000;
+ 3'b010: bram_reg_b <= 32'h80000000;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h80000000;
+ 3'b110: bram_reg_b <= 32'h80000000;
+ 3'b111: bram_reg_b <= 32'h7fffffff;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_g_x.v b/rtl/curve/rom/brom_p256_g_x.v
index 0816ef6..86aeafd 100644
--- a/rtl/curve/rom/brom_p256_g_x.v
+++ b/rtl/curve/rom/brom_p256_g_x.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_g_x
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'hd898c296;
- 3'b001: bram_reg_b <= 32'hf4a13945;
- 3'b010: bram_reg_b <= 32'h2deb33a0;
- 3'b011: bram_reg_b <= 32'h77037d81;
- 3'b100: bram_reg_b <= 32'h63a440f2;
- 3'b101: bram_reg_b <= 32'hf8bce6e5;
- 3'b110: bram_reg_b <= 32'he12c4247;
- 3'b111: bram_reg_b <= 32'h6b17d1f2;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hd898c296;
+ 3'b001: bram_reg_b <= 32'hf4a13945;
+ 3'b010: bram_reg_b <= 32'h2deb33a0;
+ 3'b011: bram_reg_b <= 32'h77037d81;
+ 3'b100: bram_reg_b <= 32'h63a440f2;
+ 3'b101: bram_reg_b <= 32'hf8bce6e5;
+ 3'b110: bram_reg_b <= 32'he12c4247;
+ 3'b111: bram_reg_b <= 32'h6b17d1f2;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_g_y.v b/rtl/curve/rom/brom_p256_g_y.v
index 4d9c61e..39f9116 100644
--- a/rtl/curve/rom/brom_p256_g_y.v
+++ b/rtl/curve/rom/brom_p256_g_y.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_g_y
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
-
+
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h37bf51f5;
- 3'b001: bram_reg_b <= 32'hcbb64068;
- 3'b010: bram_reg_b <= 32'h6b315ece;
- 3'b011: bram_reg_b <= 32'h2bce3357;
- 3'b100: bram_reg_b <= 32'h7c0f9e16;
- 3'b101: bram_reg_b <= 32'h8ee7eb4a;
- 3'b110: bram_reg_b <= 32'hfe1a7f9b;
- 3'b111: bram_reg_b <= 32'h4fe342e2;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h37bf51f5;
+ 3'b001: bram_reg_b <= 32'hcbb64068;
+ 3'b010: bram_reg_b <= 32'h6b315ece;
+ 3'b011: bram_reg_b <= 32'h2bce3357;
+ 3'b100: bram_reg_b <= 32'h7c0f9e16;
+ 3'b101: bram_reg_b <= 32'h8ee7eb4a;
+ 3'b110: bram_reg_b <= 32'hfe1a7f9b;
+ 3'b111: bram_reg_b <= 32'h4fe342e2;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_h_x.v b/rtl/curve/rom/brom_p256_h_x.v
index 0b69f77..554d346 100644
--- a/rtl/curve/rom/brom_p256_h_x.v
+++ b/rtl/curve/rom/brom_p256_h_x.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_h_x
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h4ece7ad0;
- 3'b001: bram_reg_b <= 32'h16bd8d74;
- 3'b010: bram_reg_b <= 32'ha42998be;
- 3'b011: bram_reg_b <= 32'h11f904fe;
- 3'b100: bram_reg_b <= 32'h38b77e1b;
- 3'b101: bram_reg_b <= 32'h0e863235;
- 3'b110: bram_reg_b <= 32'h3da77b71;
- 3'b111: bram_reg_b <= 32'h29d05c19;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h4ece7ad0;
+ 3'b001: bram_reg_b <= 32'h16bd8d74;
+ 3'b010: bram_reg_b <= 32'ha42998be;
+ 3'b011: bram_reg_b <= 32'h11f904fe;
+ 3'b100: bram_reg_b <= 32'h38b77e1b;
+ 3'b101: bram_reg_b <= 32'h0e863235;
+ 3'b110: bram_reg_b <= 32'h3da77b71;
+ 3'b111: bram_reg_b <= 32'h29d05c19;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_h_y.v b/rtl/curve/rom/brom_p256_h_y.v
index 362fce6..6b5b8d9 100644
--- a/rtl/curve/rom/brom_p256_h_y.v
+++ b/rtl/curve/rom/brom_p256_h_y.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_h_y
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
-
+
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'hc840ae07;
- 3'b001: bram_reg_b <= 32'h3449bf97;
- 3'b010: bram_reg_b <= 32'h94cea131;
- 3'b011: bram_reg_b <= 32'hd431cca9;
- 3'b100: bram_reg_b <= 32'h83f061e9;
- 3'b101: bram_reg_b <= 32'h711814b5;
- 3'b110: bram_reg_b <= 32'h01e58065;
- 3'b111: bram_reg_b <= 32'hb01cbd1c;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hc840ae07;
+ 3'b001: bram_reg_b <= 32'h3449bf97;
+ 3'b010: bram_reg_b <= 32'h94cea131;
+ 3'b011: bram_reg_b <= 32'hd431cca9;
+ 3'b100: bram_reg_b <= 32'h83f061e9;
+ 3'b101: bram_reg_b <= 32'h711814b5;
+ 3'b110: bram_reg_b <= 32'h01e58065;
+ 3'b111: bram_reg_b <= 32'hb01cbd1c;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_one.v b/rtl/curve/rom/brom_p256_one.v
index 4097874..15e3746 100644
--- a/rtl/curve/rom/brom_p256_one.v
+++ b/rtl/curve/rom/brom_p256_one.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_one
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h00000001;
- 3'b001: bram_reg_b <= 32'h00000000;
- 3'b010: bram_reg_b <= 32'h00000000;
- 3'b011: bram_reg_b <= 32'h00000000;
- 3'b100: bram_reg_b <= 32'h00000000;
- 3'b101: bram_reg_b <= 32'h00000000;
- 3'b110: bram_reg_b <= 32'h00000000;
- 3'b111: bram_reg_b <= 32'h00000000;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h00000001;
+ 3'b001: bram_reg_b <= 32'h00000000;
+ 3'b010: bram_reg_b <= 32'h00000000;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h00000000;
+ 3'b110: bram_reg_b <= 32'h00000000;
+ 3'b111: bram_reg_b <= 32'h00000000;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_q.v b/rtl/curve/rom/brom_p256_q.v
index fe94593..101a524 100644
--- a/rtl/curve/rom/brom_p256_q.v
+++ b/rtl/curve/rom/brom_p256_q.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_q
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
-
+
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'hffffffff;
- 3'b001: bram_reg_b <= 32'hffffffff;
- 3'b010: bram_reg_b <= 32'hffffffff;
- 3'b011: bram_reg_b <= 32'h00000000;
- 3'b100: bram_reg_b <= 32'h00000000;
- 3'b101: bram_reg_b <= 32'h00000000;
- 3'b110: bram_reg_b <= 32'h00000001;
- 3'b111: bram_reg_b <= 32'hffffffff;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hffffffff;
+ 3'b001: bram_reg_b <= 32'hffffffff;
+ 3'b010: bram_reg_b <= 32'hffffffff;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h00000000;
+ 3'b110: bram_reg_b <= 32'h00000001;
+ 3'b111: bram_reg_b <= 32'hffffffff;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_zero.v b/rtl/curve/rom/brom_p256_zero.v
index f6d19a1..2672cf2 100644
--- a/rtl/curve/rom/brom_p256_zero.v
+++ b/rtl/curve/rom/brom_p256_zero.v
@@ -33,14 +33,14 @@
`timescale 1ns / 1ps
module brom_p256_zero
- (
- //input wire clk,
- //input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ //input wire clk,
+ //input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
-
- assign b_out = {32{1'b0}};
+
+ assign b_out = {32{1'b0}};
//
// Output Registers
@@ -52,19 +52,19 @@ module brom_p256_zero
//
// Read-Only Port B
- //
- //always @(posedge clk)
- //
- //case (b_addr)
- //3'b000: bram_reg_b <= 32'h00000000;
- //3'b001: bram_reg_b <= 32'h00000000;
- //3'b010: bram_reg_b <= 32'h00000000;
- //3'b011: bram_reg_b <= 32'h00000000;
- //3'b100: bram_reg_b <= 32'h00000000;
- //3'b101: bram_reg_b <= 32'h00000000;
- //3'b110: bram_reg_b <= 32'h00000000;
- //3'b111: bram_reg_b <= 32'h00000000;
- //endcase
+ //
+ //always @(posedge clk)
+ //
+ //case (b_addr)
+ //3'b000: bram_reg_b <= 32'h00000000;
+ //3'b001: bram_reg_b <= 32'h00000000;
+ //3'b010: bram_reg_b <= 32'h00000000;
+ //3'b011: bram_reg_b <= 32'h00000000;
+ //3'b100: bram_reg_b <= 32'h00000000;
+ //3'b101: bram_reg_b <= 32'h00000000;
+ //3'b110: bram_reg_b <= 32'h00000000;
+ //3'b111: bram_reg_b <= 32'h00000000;
+ //endcase
endmodule
diff --git a/rtl/curve/uop/uop_add_rom.v b/rtl/curve/uop/uop_add_rom.v
deleted file mode 100644
index c807736..0000000
--- a/rtl/curve/uop/uop_add_rom.v
+++ /dev/null
@@ -1,66 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_add_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Addition Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
-/* 2. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 3. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 4. */6'd03: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 5. */6'd04: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_G_X, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 6. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_G_Y, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 7. */6'd06: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_PX, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 8. */6'd07: data <= {OPCODE_SUB, UOP_SRC_T2, UOP_SRC_PY, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 9. */6'd08: data <= {OPCODE_CMP, UOP_SRC_T1, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
- 6'd09: data <= {OPCODE_CMP, UOP_SRC_T2, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 10. */6'd10: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-/* 11. */6'd11: data <= {OPCODE_MOV, UOP_SRC_T1, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS};
- 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 12. */6'd13: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T4, UOP_EXEC_ALWAYS};
-/* 13. */6'd14: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 14. */6'd15: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 15. */6'd16: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
- 6'd17: data <= {OPCODE_MUL, UOP_SRC_RX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 16. */6'd18: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 17. */6'd19: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T4, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 18. */6'd20: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 19. */6'd21: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 20. */6'd22: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T4, UOP_DST_T4, UOP_EXEC_ALWAYS};
-/* 21. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_T4, UOP_DST_RY, UOP_EXEC_ALWAYS};
-
- 6'd24: data <= {OPCODE_MOV, UOP_SRC_G_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
- 6'd25: data <= {OPCODE_MOV, UOP_SRC_G_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
- 6'd26: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX};
-
- 6'd27: data <= {OPCODE_MOV, UOP_SRC_H_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_100};
- 6'd28: data <= {OPCODE_MOV, UOP_SRC_H_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_100};
- 6'd29: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_100};
-
- 6'd30: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_101};
- 6'd31: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_101};
- 6'd32: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_101};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_conv_rom.v b/rtl/curve/uop/uop_conv_rom.v
deleted file mode 100644
index 3097736..0000000
--- a/rtl/curve/uop/uop_conv_rom.v
+++ /dev/null
@@ -1,38 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_conv_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Doubling Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
- 6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
- 6'd01: data <= {OPCODE_MOV, UOP_SRC_V, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd02: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
- 6'd03: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T2, UOP_DST_T3, UOP_EXEC_ALWAYS};
- 6'd04: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
- 6'd05: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS};
- 6'd06: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
- 6'd07: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_dbl_rom.v b/rtl/curve/uop/uop_dbl_rom.v
deleted file mode 100644
index 1939ca9..0000000
--- a/rtl/curve/uop/uop_dbl_rom.v
+++ /dev/null
@@ -1,58 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_dbl_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Doubling Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
-/* 1. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 2. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 5'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 3. */6'd03: data <= {OPCODE_SUB, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 4. */6'd04: data <= {OPCODE_ADD, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 5. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 6. */6'd06: data <= {OPCODE_ADD, UOP_SRC_T2, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd07: data <= {OPCODE_ADD, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 7. */6'd08: data <= {OPCODE_ADD, UOP_SRC_PY, UOP_SRC_PY, UOP_DST_RY, UOP_EXEC_ALWAYS};
-/* 8. */6'd09: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_RY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-/* 9. */6'd10: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd11: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS};
- 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS};
-/* 10. */6'd13: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_RY, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 11. */6'd14: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd15: data <= {OPCODE_MUL, UOP_SRC_RY, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 12. */6'd16: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_DELTA, UOP_DST_RY, UOP_EXEC_ALWAYS};
-/* 13. */6'd17: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd18: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 14. */6'd19: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 15. */6'd20: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 16. */6'd21: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 17. */6'd22: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 18. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_RY, UOP_DST_RY, UOP_EXEC_ALWAYS};
-
- 6'd24: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
- 6'd25: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
- 6'd26: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_init_rom.v b/rtl/curve/uop/uop_init_rom.v
deleted file mode 100644
index ac44b55..0000000
--- a/rtl/curve/uop/uop_init_rom.v
+++ /dev/null
@@ -1,33 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_init_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Doubling Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
- 6'd00: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
- 6'd01: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS};
- 6'd02: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop_ecdsa.v b/rtl/curve/uop_ecdsa.v
deleted file mode 100644
index e64119d..0000000
--- a/rtl/curve/uop_ecdsa.v
+++ /dev/null
@@ -1,50 +0,0 @@
-localparam [ 4: 0] OPCODE_CMP = 5'b00001;
-localparam [ 4: 0] OPCODE_MOV = 5'b00010;
-localparam [ 4: 0] OPCODE_ADD = 5'b00100;
-localparam [ 4: 0] OPCODE_SUB = 5'b01000;
-localparam [ 4: 0] OPCODE_MUL = 5'b10000;
-localparam [ 4: 0] OPCODE_RDY = 5'b00000;
-
-localparam [ 4: 0] UOP_SRC_PX = 5'h0_0;
-localparam [ 4: 0] UOP_SRC_PY = 5'h0_1;
-localparam [ 4: 0] UOP_SRC_PZ = 5'h0_2;
-
-localparam [ 4: 0] UOP_SRC_RX = 5'h0_3;
-localparam [ 4: 0] UOP_SRC_RY = 5'h0_4;
-localparam [ 4: 0] UOP_SRC_RZ = 5'h0_5;
-
-localparam [ 4: 0] UOP_SRC_T1 = 5'h0_6;
-localparam [ 4: 0] UOP_SRC_T2 = 5'h0_7;
-localparam [ 4: 0] UOP_SRC_T3 = 5'h0_8;
-localparam [ 4: 0] UOP_SRC_T4 = 5'h0_9;
-
-localparam [ 4: 0] UOP_SRC_ONE = 5'h0_A;
-localparam [ 4: 0] UOP_SRC_ZERO = 5'h0_B;
-localparam [ 4: 0] UOP_SRC_DELTA = 5'h0_C;
-
-localparam [ 4: 0] UOP_SRC_V = 5'h0_F;
-
-localparam [ 4: 0] UOP_SRC_G_X = 5'h1_0;
-localparam [ 4: 0] UOP_SRC_G_Y = 5'h1_1;
-
-localparam [ 4: 0] UOP_SRC_H_X = 5'h1_2;
-localparam [ 4: 0] UOP_SRC_H_Y = 5'h1_3;
-
-localparam [ 4: 0] UOP_SRC_DUMMY = 5'hX_X;
-
-localparam [ 2: 0] UOP_DST_RX = 3'd0;
-localparam [ 2: 0] UOP_DST_RY = 3'd1;
-localparam [ 2: 0] UOP_DST_RZ = 3'd2;
-
-localparam [ 2: 0] UOP_DST_T1 = 3'd3;
-localparam [ 2: 0] UOP_DST_T2 = 3'd4;
-localparam [ 2: 0] UOP_DST_T3 = 3'd5;
-localparam [ 2: 0] UOP_DST_T4 = 3'd6;
-
-localparam [ 2: 0] UOP_DST_DUMMY = 3'dX;
-
-localparam UOP_EXEC_ALWAYS = 2'b11; // R
-localparam UOP_EXEC_PZT1T2_0XX = 2'b10; // G
-localparam UOP_EXEC_PZT1T2_100 = 2'b00; // H
-localparam UOP_EXEC_PZT1T2_101 = 2'b01; // O
-
diff --git a/rtl/ecdsa256.v b/rtl/ecdsa256.v
index 1e712bf..11276a2 100644
--- a/rtl/ecdsa256.v
+++ b/rtl/ecdsa256.v
@@ -34,115 +34,115 @@
module ecdsa256
(
- input wire clk,
- input wire rst_n,
+ input wire clk,
+ input wire rst_n,
- input wire next,
- output wire valid,
+ input wire next,
+ output wire valid,
- input wire bus_cs,
- input wire bus_we,
- input wire [ 4:0] bus_addr,
- input wire [31:0] bus_data_wr,
+ input wire bus_cs,
+ input wire bus_we,
+ input wire [ 4:0] bus_addr,
+ input wire [31:0] bus_data_wr,
output wire [31:0] bus_data_rd
);
-
- //
- // Memory Banks
- //
- localparam [1:0] BUS_ADDR_BANK_K = 2'b00;
- localparam [1:0] BUS_ADDR_BANK_X = 2'b01;
- localparam [1:0] BUS_ADDR_BANK_Y = 2'b10;
-
- wire [1:0] bus_addr_upper = bus_addr[4:3];
- wire [2:0] bus_addr_lower = bus_addr[2:0];
-
-
+
+ //
+ // Memory Banks
+ //
+ localparam [1:0] BUS_ADDR_BANK_K = 2'b00;
+ localparam [1:0] BUS_ADDR_BANK_X = 2'b01;
+ localparam [1:0] BUS_ADDR_BANK_Y = 2'b10;
+
+ wire [1:0] bus_addr_upper = bus_addr[4:3];
+ wire [2:0] bus_addr_lower = bus_addr[2:0];
+
+
//
// Memories
- //
-
- wire [31:0] user_rw_k_bram_out;
- wire [31:0] user_ro_x_bram_out;
- wire [31:0] user_ro_y_bram_out;
-
- wire [ 2:0] core_ro_k_bram_addr;
- wire [ 2:0] core_rw_x_bram_addr;
- wire [ 2:0] core_rw_y_bram_addr;
-
- wire core_rw_x_bram_wren;
- wire core_rw_y_bram_wren;
-
- wire [31:0] core_ro_k_bram_dout;
- wire [31:0] core_rw_x_bram_din;
- wire [31:0] core_rw_y_bram_din;
-
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ //
+
+ wire [31:0] user_rw_k_bram_out;
+ wire [31:0] user_ro_x_bram_out;
+ wire [31:0] user_ro_y_bram_out;
+
+ wire [ 2:0] core_ro_k_bram_addr;
+ wire [ 2:0] core_rw_x_bram_addr;
+ wire [ 2:0] core_rw_y_bram_addr;
+
+ wire core_rw_x_bram_wren;
+ wire core_rw_y_bram_wren;
+
+ wire [31:0] core_ro_k_bram_dout;
+ wire [31:0] core_rw_x_bram_din;
+ wire [31:0] core_rw_y_bram_din;
+
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
)
- bram_k
- ( .clk(clk),
- .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr),
- .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ bram_k
+ ( .clk(clk),
+ .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr),
+ .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
)
- bram_x
- ( .clk(clk),
- .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din),
- .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ bram_x
+ ( .clk(clk),
+ .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din),
+ .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
)
- bram_y
- ( .clk(clk),
- .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din),
- .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out)
- );
+ bram_y
+ ( .clk(clk),
+ .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din),
+ .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out)
+ );
//
// Curve Base Point Multiplier
- //
- reg next_dly;
-
- always @(posedge clk) next_dly <= next;
-
- wire next_trig = next && !next_dly;
-
- curve_mul_256 base_point_multiplier_p256
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (next_trig),
- .rdy (valid),
-
- .k_addr (core_ro_k_bram_addr),
- .rx_addr (core_rw_x_bram_addr),
- .ry_addr (core_rw_y_bram_addr),
-
- .rx_wren (core_rw_x_bram_wren),
- .ry_wren (core_rw_y_bram_wren),
-
- .k_din (core_ro_k_bram_dout),
- .rx_dout (core_rw_x_bram_din),
- .ry_dout (core_rw_y_bram_din)
- );
+ //
+ reg next_dly;
+
+ always @(posedge clk) next_dly <= next;
- //
+ wire next_trig = next && !next_dly;
+
+ curve_mul_256 base_point_multiplier_p256
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (next_trig),
+ .rdy (valid),
+
+ .k_addr (core_ro_k_bram_addr),
+ .rx_addr (core_rw_x_bram_addr),
+ .ry_addr (core_rw_y_bram_addr),
+
+ .rx_wren (core_rw_x_bram_wren),
+ .ry_wren (core_rw_y_bram_wren),
+
+ .k_din (core_ro_k_bram_dout),
+ .rx_dout (core_rw_x_bram_din),
+ .ry_dout (core_rw_y_bram_din)
+ );
+
+ //
// Output Selector
//
- reg [1:0] bus_addr_upper_prev;
+ reg [1:0] bus_addr_upper_prev;
always @(posedge clk) bus_addr_upper_prev = bus_addr_upper;
- reg [31: 0] bus_data_rd_mux;
+ reg [31: 0] bus_data_rd_mux;
assign bus_data_rd = bus_data_rd_mux;
always @(*)
diff --git a/rtl/ecdsa256_wrapper.v b/rtl/ecdsa256_wrapper.v
index c6e93ea..db043d0 100644
--- a/rtl/ecdsa256_wrapper.v
+++ b/rtl/ecdsa256_wrapper.v
@@ -32,11 +32,11 @@
module ecdsa256_wrapper
(
- input wire clk,
- input wire rst_n,
+ input wire clk,
+ input wire reset_n,
- input wire cs,
- input wire we,
+ input wire cs,
+ input wire we,
input wire [5: 0] address,
input wire [31: 0] write_data,
@@ -47,18 +47,18 @@ module ecdsa256_wrapper
//
// Address Decoder
//
- localparam ADDR_MSB_REGS = 1'b0;
- localparam ADDR_MSB_CORE = 1'b1;
-
- wire [0:0] addr_msb = address[5];
- wire [4:0] addr_lsb = address[4:0];
+ localparam ADDR_MSB_REGS = 1'b0;
+ localparam ADDR_MSB_CORE = 1'b1;
+
+ wire [0:0] addr_msb = address[5];
+ wire [4:0] addr_lsb = address[4:0];
//
// Output Mux
//
- wire [31: 0] read_data_regs;
- wire [31: 0] read_data_core;
+ wire [31: 0] read_data_regs;
+ wire [31: 0] read_data_core;
//
@@ -69,13 +69,13 @@ module ecdsa256_wrapper
localparam ADDR_VERSION = 5'h02;
localparam ADDR_CONTROL = 5'h08; // {next, init}
- localparam ADDR_STATUS = 5'h09; // {valid, ready}
- localparam ADDR_DUMMY = 5'h0F; // don't care
+ localparam ADDR_STATUS = 5'h09; // {valid, ready}
+ localparam ADDR_DUMMY = 5'h0F; // don't care
-// localparam CONTROL_INIT_BIT = 0; -- not used
+ // localparam CONTROL_INIT_BIT = 0; -- not used
localparam CONTROL_NEXT_BIT = 1;
-// localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1
+ // localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1
localparam STATUS_VALID_BIT = 1;
localparam CORE_NAME0 = 32'h65636473; // "ecds"
@@ -86,23 +86,23 @@ module ecdsa256_wrapper
//
// Registers
//
- reg reg_control;
- reg [31:0] reg_dummy;
+ reg reg_control;
+ reg [31:0] reg_dummy;
//
// Wires
//
- wire reg_status;
+ wire reg_status;
//
// ECDSA256
//
ecdsa256 ecdsa256_inst
- (
- .clk (clk),
- .rst_n (rst_n),
+ (
+ .clk (clk),
+ .rst_n (reset_n),
.next (reg_control),
.valid (reg_status),
@@ -112,13 +112,13 @@ module ecdsa256_wrapper
.bus_addr (addr_lsb),
.bus_data_wr (write_data),
.bus_data_rd (read_data_core)
- );
+ );
//
// Read Latch
//
- reg [31: 0] tmp_read_data;
+ reg [31: 0] tmp_read_data;
//
@@ -126,7 +126,7 @@ module ecdsa256_wrapper
//
always @(posedge clk)
//
- if (!rst_n) begin
+ if (!reset_n) begin
//
reg_control <= 1'b0;
//
@@ -138,8 +138,8 @@ module ecdsa256_wrapper
//
case (addr_lsb)
//
- ADDR_CONTROL: reg_control <= write_data[1];
- ADDR_DUMMY: reg_dummy <= write_data[31:0];
+ ADDR_CONTROL: reg_control <= write_data[1];
+ ADDR_DUMMY: reg_dummy <= write_data[31:0];
//
endcase
//
@@ -153,8 +153,8 @@ module ecdsa256_wrapper
ADDR_NAME1: tmp_read_data <= CORE_NAME1;
ADDR_VERSION: tmp_read_data <= CORE_VERSION;
ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control, 1'b0};
- ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1};
- ADDR_DUMMY: tmp_read_data <= reg_dummy;
+ ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1};
+ ADDR_DUMMY: tmp_read_data <= reg_dummy;
//
default: tmp_read_data <= 32'h00000000;
//
diff --git a/rtl/lowlevel/adder32_wrapper.v b/rtl/lowlevel/adder32_wrapper.v
deleted file mode 100644
index ebfd8ce..0000000
--- a/rtl/lowlevel/adder32_wrapper.v
+++ /dev/null
@@ -1,73 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_wrapper
- (
- input clk, // clock
- input [31: 0] a, // operand input
- input [31: 0] b, // operand input
- output [31: 0] s, // sum output
- input c_in, // carry input
- output c_out // carry output
- );
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `ADDER32_PRIMITIVE adder32_inst
- (
- .clk(clk),
- .a(a),
- .b(b),
- .s(s),
- .c_in(c_in),
- .c_out(c_out)
- );
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/adder47_wrapper.v b/rtl/lowlevel/adder47_wrapper.v
deleted file mode 100644
index 1a0a18e..0000000
--- a/rtl/lowlevel/adder47_wrapper.v
+++ /dev/null
@@ -1,69 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_wrapper
- (
- input clk, // clock
- input [46: 0] a, // operand input
- input [46: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `ADDER47_PRIMITIVE adder47_inst
- (
- .clk(clk),
- .a(a),
- .b(b),
- .s(s)
- );
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder32_artix7.v b/rtl/lowlevel/artix7/adder32_artix7.v
deleted file mode 100644
index 5f9ba79..0000000
--- a/rtl/lowlevel/artix7/adder32_artix7.v
+++ /dev/null
@@ -1,96 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_artix7
- (
- input clk, // clock
- input [31: 0] a, // operand input
- input [31: 0] b, // operand input
- output [31: 0] s, // sum output
- input c_in, // carry input
- output c_out // carry output
- );
-
- //
- // Lower and higher parts of operand
- //
- wire [17: 0] bl = b[17: 0];
- wire [13: 0] bh = b[31:18];
-
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0000;
- wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_adder
- (
- .clk (clk),
-
- .ce (1'b1),
-
- .carry (c_in),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({{16{1'b0}}, bh}),
- .b (bl),
- .c ({{16{1'b0}}, a}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign s = p_int[31: 0];
- assign c_out = p_int[32];
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder47_artix7.v b/rtl/lowlevel/artix7/adder47_artix7.v
deleted file mode 100644
index 00566e4..0000000
--- a/rtl/lowlevel/artix7/adder47_artix7.v
+++ /dev/null
@@ -1,91 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_artix7
- (
- input clk, // clock
- input [46: 0] a, // operand input
- input [46: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Lower and higher parts of operand
- //
- wire [17: 0] bl = b[17: 0];
- wire [28: 0] bh = b[46:18];
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0000;
- wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_adder
- (
- .clk (clk),
-
- .ce (1'b1),
-
- .carry (1'b0),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({1'b0, bh}),
- .b (bl),
- .c ({1'b0, a}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign s = p_int[46: 0];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/dsp48e1_wrapper.v b/rtl/lowlevel/artix7/dsp48e1_wrapper.v
deleted file mode 100644
index 11a21bc..0000000
--- a/rtl/lowlevel/artix7/dsp48e1_wrapper.v
+++ /dev/null
@@ -1,159 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// dsp48e1_wrapper.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) tile wrapper.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module dsp48e1_wrapper
- (
- input clk,
-
- input ce,
-
- input [ 6: 0] opmode,
- input [ 3: 0] alumode,
-
- input carry,
-
- input [29: 0] a,
- input [17: 0] b,
- input [47: 0] c,
-
- output [47: 0] p
- );
-
-
- //
- // Tile instantiation
- //
- DSP48E1 #
- (
- .AREG (0),
- .BREG (0),
- .CREG (0),
- .DREG (0),
- .MREG (0),
- .PREG (1),
- .ADREG (0),
-
- .ACASCREG (0),
- .BCASCREG (0),
- .ALUMODEREG (0),
- .INMODEREG (0),
- .OPMODEREG (0),
- .CARRYINREG (0),
- .CARRYINSELREG (0),
-
- .A_INPUT ("DIRECT"),
- .B_INPUT ("DIRECT"),
-
- .USE_DPORT ("FALSE"),
- .USE_MULT ("DYNAMIC"),
- .USE_SIMD ("ONE48"),
-
- .USE_PATTERN_DETECT ("NO_PATDET"),
- .SEL_PATTERN ("PATTERN"),
- .SEL_MASK ("MASK"),
- .PATTERN (48'h000000000000),
- .MASK (48'h3fffffffffff),
- .AUTORESET_PATDET ("NO_RESET")
- )
- DSP48E1_inst
- (
- .CLK (clk),
-
- .RSTA (1'b0),
- .RSTB (1'b0),
- .RSTC (1'b0),
- .RSTD (1'b0),
- .RSTM (1'b0),
- .RSTP (1'b0),
-
- .RSTCTRL (1'b0),
- .RSTINMODE (1'b0),
- .RSTALUMODE (1'b0),
- .RSTALLCARRYIN (1'b0),
-
- .CEA1 (1'b0),
- .CEA2 (1'b0),
- .CEB1 (1'b0),
- .CEB2 (1'b0),
- .CEC (1'b0),
- .CED (1'b0),
- .CEM (1'b0),
- .CEP (ce),
- .CEAD (1'b0),
- .CEALUMODE (1'b0),
- .CEINMODE (1'b0),
-
- .CECTRL (1'b0),
- .CECARRYIN (1'b0),
-
- .A (a),
- .B (b),
- .C (c),
- .D ({25{1'b1}}),
- .P (p),
-
- .CARRYIN (carry),
- .CARRYOUT (),
- .CARRYINSEL (3'b000),
-
- .CARRYCASCIN (1'b0),
- .CARRYCASCOUT (),
-
- .PATTERNDETECT (),
- .PATTERNBDETECT (),
-
- .OPMODE (opmode),
- .ALUMODE (alumode),
- .INMODE (5'b00000),
-
- .MULTSIGNIN (1'b0),
- .MULTSIGNOUT (),
-
- .UNDERFLOW (),
- .OVERFLOW (),
-
- .ACIN (30'd0),
- .BCIN (18'd0),
- .PCIN (48'd0),
-
- .ACOUT (),
- .BCOUT (),
- .PCOUT ()
- );
-
-endmodule
diff --git a/rtl/lowlevel/artix7/mac16_artix7.v b/rtl/lowlevel/artix7/mac16_artix7.v
deleted file mode 100644
index 63b74ab..0000000
--- a/rtl/lowlevel/artix7/mac16_artix7.v
+++ /dev/null
@@ -1,90 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 16-bit multiplier and 47-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_artix7
- (
- input clk, // clock
- input clr, // clear accumulator (active-high)
- input ce, // enable clock (active-high)
- input [15: 0] a, // operand input
- input [15: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0000;
- wire [ 6: 0] dsp48e1_opmode = {2'b01, clr, 4'b0101};
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_adder
- (
- .clk (clk),
-
- .ce (ce),
-
- .carry (1'b0),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({{14{1'b0}}, a}),
- .b ({{ 2{1'b0}}, b}),
- .c ({48{1'b0}}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign s = p_int[46:0];
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/subtractor32_artix7.v b/rtl/lowlevel/artix7/subtractor32_artix7.v
deleted file mode 100644
index b46ac5c..0000000
--- a/rtl/lowlevel/artix7/subtractor32_artix7.v
+++ /dev/null
@@ -1,94 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_artix7
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- output [31: 0] d,
- input b_in,
- output b_out
- );
-
- //
- // Lower and higher parts of operand
- //
- wire [17: 0] bl = b[17: 0];
- wire [13: 0] bh = b[31:18];
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0011;
- wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_subtractor
- (
- .clk (clk),
-
- .ce (1'b1),
-
- .carry (b_in),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({{16{1'b0}}, bh}),
- .b (bl),
- .c ({{16{1'b0}}, a}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign d = p_int[31: 0];
- assign b_out = p_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/ecdsa_lowlevel_settings.v b/rtl/lowlevel/ecdsa_lowlevel_settings.v
deleted file mode 100644
index 8f95e2f..0000000
--- a/rtl/lowlevel/ecdsa_lowlevel_settings.v
+++ /dev/null
@@ -1,17 +0,0 @@
-`define USE_VENDOR_PRIMITIVES
-
-`ifdef USE_VENDOR_PRIMITIVES
-
-`define MAC16_PRIMITIVE mac16_artix7
-`define ADDER32_PRIMITIVE adder32_artix7
-`define ADDER47_PRIMITIVE adder47_artix7
-`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7
-
-`else
-
-`define MAC16_PRIMITIVE mac16_generic
-`define ADDER32_PRIMITIVE adder32_generic
-`define ADDER47_PRIMITIVE adder47_generic
-`define SUBTRACTOR32_PRIMITIVE subtractor32_generic
-
-`endif
diff --git a/rtl/lowlevel/generic/adder32_generic.v b/rtl/lowlevel/generic/adder32_generic.v
deleted file mode 100644
index b9c94aa..0000000
--- a/rtl/lowlevel/generic/adder32_generic.v
+++ /dev/null
@@ -1,67 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_generic.v
-// -----------------------------------------------------------------------------
-// Generic 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_generic
- (
- input clk, // clock
- input [31: 0] a, // operand input
- input [31: 0] b, // operand input
- output [31: 0] s, // sum output
- input c_in, // carry input
- output c_out // carry output
- );
-
- //
- // Sum
- //
- reg [32: 0] s_int;
-
- always @(posedge clk)
- s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in};
-
- //
- // Output
- //
- assign s = s_int[31:0];
- assign c_out = s_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/adder47_generic.v b/rtl/lowlevel/generic/adder47_generic.v
deleted file mode 100644
index f472061..0000000
--- a/rtl/lowlevel/generic/adder47_generic.v
+++ /dev/null
@@ -1,64 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_generic.v
-// -----------------------------------------------------------------------------
-// Generic 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_generic
- (
- input clk, // clock
- input [46: 0] a, // operand input
- input [46: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Sum
- //
- reg [46: 0] s_int;
-
- always @(posedge clk)
- s_int <= a + b;
-
- //
- // Output
- //
- assign s = s_int;
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/mac16_generic.v b/rtl/lowlevel/generic/mac16_generic.v
deleted file mode 100644
index dc95645..0000000
--- a/rtl/lowlevel/generic/mac16_generic.v
+++ /dev/null
@@ -1,74 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_generic.v
-// -----------------------------------------------------------------------------
-// Generic 16-bit multiplier and 47-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_generic
- (
- input clk, // clock
- input clr, // clear accumulator (active-high)
- input ce, // enable clock (active-high)
- input [15: 0] a, // operand input
- input [15: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Multiplier
- //
- wire [31: 0] p = {{16{1'b0}}, a} * {{16{1'b0}}, b};
- wire [46: 0] p_ext = {{15{1'b0}}, p};
-
- //
- // Accumulator
- //
- reg [46: 0] s_int;
-
- always @(posedge clk)
- //
- if (ce) s_int <= clr ? p_ext : p_ext + s_int;
-
- //
- // Output
- //
- assign s = s_int;
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/subtractor32_generic.v b/rtl/lowlevel/generic/subtractor32_generic.v
deleted file mode 100644
index 46aefe8..0000000
--- a/rtl/lowlevel/generic/subtractor32_generic.v
+++ /dev/null
@@ -1,67 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_generic.v
-// -----------------------------------------------------------------------------
-// Generic 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_generic
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- output [31: 0] d,
- input b_in,
- output b_out
- );
-
- //
- // Difference
- //
- reg [32: 0] d_int;
-
- always @(posedge clk)
- d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in};
-
- //
- // Output
- //
- assign d = d_int[31:0];
- assign b_out = d_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/mac16_wrapper.v b/rtl/lowlevel/mac16_wrapper.v
deleted file mode 100644
index b91e518..0000000
--- a/rtl/lowlevel/mac16_wrapper.v
+++ /dev/null
@@ -1,75 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 16-bit multiplier and 48-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_wrapper
- (
- input clk, // clock
- input clr, // clear accumulator (active-high)
- input ce, // enable clock (active-high)
- input [15: 0] a, // operand input
- input [15: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `MAC16_PRIMITIVE mac16_inst
- (
- .clk(clk),
- .clr(clr),
- .ce(ce),
- .a(a),
- .b(b),
- .s(s)
- );
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/subtractor32_wrapper.v b/rtl/lowlevel/subtractor32_wrapper.v
deleted file mode 100644
index 3c7e5e9..0000000
--- a/rtl/lowlevel/subtractor32_wrapper.v
+++ /dev/null
@@ -1,72 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_wrapper
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- output [31: 0] d,
- input b_in,
- output b_out
- );
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `SUBTRACTOR32_PRIMITIVE subtractor32_inst
- (
- .clk(clk),
- .a(a),
- .b(b),
- .d(d),
- .b_in(b_in),
- .b_out(b_out)
- );
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v
deleted file mode 100644
index 5641feb..0000000
--- a/rtl/modular/modular_adder.v
+++ /dev/null
@@ -1,298 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_adder.v
-// -----------------------------------------------------------------------------
-// Modular adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_adder
- (
- clk, rst_n,
- ena, rdy,
- ab_addr, n_addr, s_addr, s_wren,
- a_din, b_din, n_din, s_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] s_addr; // index of current S word
- output wire s_wren; // store current S word now
-
- input wire [ 31:0] a_din; // A
- input wire [ 31:0] b_din; // B
- input wire [ 31:0] n_din; // N
- output wire [ 31:0] s_dout; // S = (A + B) mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_ab;
- reg [WORD_COUNTER_WIDTH-1:0] index_n;
- reg [WORD_COUNTER_WIDTH-1:0] index_s;
-
- /* map registers to output ports */
- assign ab_addr = index_ab;
- assign n_addr = index_n;
- assign s_addr = index_s;
-
-
- //
- // Adder
- //
- wire [31: 0] add32_s;
- wire add32_c_in;
- wire add32_c_out;
-
- adder32_wrapper adder32
- (
- .clk (clk),
- .a (a_din),
- .b (b_din),
- .s (add32_s),
- .c_in (add32_c_in),
- .c_out (add32_c_out)
- );
-
-
- //
- // Subtractor
- //
- wire [31: 0] sub32_d;
- wire sub32_b_in;
- wire sub32_b_out;
-
- subtractor32_wrapper subtractor32
- (
- .clk (clk),
- .a (add32_s),
- .b (n_din),
- .d (sub32_d),
- .b_in (sub32_b_in),
- .b_out (sub32_b_out)
- );
-
-
- //
- // FSM
- //
-
- localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
-
- wire fsm_latch_msb_carry = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
-
- wire inc_index_ab = |fsm_shreg_inc_index_ab;
- wire inc_index_n = |fsm_shreg_inc_index_n;
- wire store_sum_ab = |fsm_shreg_store_sum_ab;
- wire store_sum_ab_n = |fsm_shreg_store_sum_ab_n;
- wire store_data_s = |fsm_shreg_store_data_s;
- wire inc_index_s = |fsm_shreg_inc_index_s;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
-
-
-
-
-
- //
- // Carry & Borrow Masking Logic
- //
- reg add32_c_mask;
- reg sub32_b_mask;
-
- always @(posedge clk) begin
- //
- add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- sub32_b_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- //
- end
-
- assign add32_c_in = add32_c_out & ~add32_c_mask;
- assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-
-
- //
- // Carry & Borrow Latch Logic
- //
- reg add32_carry_latch;
- reg sub32_borrow_latch;
-
- always @(posedge clk) begin
- //
- if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out;
- if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
- //
- end
-
-
- //
- // Intermediate Results
- //
- reg [32*OPERAND_NUM_WORDS-1:0] s_ab;
- reg [32*OPERAND_NUM_WORDS-1:0] s_ab_n;
-
- always @(posedge clk)
- //
- if (store_data_s) begin
- //
- s_ab <= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]};
- s_ab_n <= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end else begin
- //
- if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]};
- if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) begin
- //
- index_ab <= WORD_INDEX_ZERO;
- index_n <= WORD_INDEX_ZERO;
- index_s <= WORD_INDEX_ZERO;
- //
- end else begin
- //
- if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
- if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
- if (inc_index_s) index_s <= WORD_INDEX_NEXT_OR_ZERO(index_s);
- //
- end
-
-
- //
- // Output Sum Selector
- //
- wire mux_select_ab = sub32_borrow_latch && !add32_carry_latch;
-
-
- //
- // Output Data and Write Enable Logic
- //
- reg s_wren_reg;
- reg [31: 0] s_dout_reg;
- wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0];
-
- assign s_wren = s_wren_reg;
- assign s_dout = s_dout_reg;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- s_wren_reg <= 1'b0;
- s_dout_reg <= {32{1'bX}};
- //
- end else begin
- //
- s_wren_reg <= store_data_s;
- s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}};
- //
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
deleted file mode 100644
index 07c1b4f..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
+++ /dev/null
@@ -1,148 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_copy
- (
- clk, rst_n,
- ena, rdy,
- s_addr, s_din,
- a1_addr, a1_wren, a1_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter OPERAND_ADDR_BITS = 3;
-
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 2;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
-
- input wire ena;
- output wire rdy;
-
- output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
-
- output wire a1_wren;
-
- input wire [ 31:0] s_din;
-
- output wire [ 31:0] a1_dout;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [OPERAND_ADDR_BITS-1:0] addr_s;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_s_max = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_s_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_s_next = (addr_s < addr_s_max) ?
- addr_s + 1'b1 : addr_s_zero;
-
- reg [OPERAND_ADDR_BITS-1:0] addr_a1;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_a1_max = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_a1_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_a1_next = (addr_a1 < addr_a1_max) ?
- addr_a1 + 1'b1 : addr_a1_zero;
-
- assign s_addr = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s};
- assign a1_addr = addr_a1;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_s;
- wire inc_addr_a1;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_stop = OPERAND_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_stop = OPERAND_NUM_WORDS + 1;
-
- assign inc_addr_s = (proc_cnt >= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop);
- assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_s) addr_s <= addr_s_next;
- else addr_s <= addr_s_zero;
- //
- if (inc_addr_a1) addr_a1 <= addr_a1_next;
- else addr_a1 <= addr_a1_zero;
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_a1;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1;
-
- assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop);
-
- assign a1_wren = wren_a1;
-
-
- //
- // Data Logic
- //
- assign a1_dout = s_din;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v
deleted file mode 100644
index 0468134..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_init.v
+++ /dev/null
@@ -1,172 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_init
- (
- clk, rst_n,
- ena, rdy,
- a_addr, a_din,
- q_addr, q_din,
- r_addr, r_wren, r_dout,
- s_addr, s_wren, s_dout,
- u_addr, u_wren, u_dout,
- v_addr, v_wren, v_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter OPERAND_ADDR_BITS = 3;
-
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- output wire [OPERAND_ADDR_BITS-1:0] a_addr;
- output wire [OPERAND_ADDR_BITS-1:0] q_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
-
- output wire r_wren;
- output wire s_wren;
- output wire u_wren;
- output wire v_wren;
-
- input wire [ 31:0] a_din;
- input wire [ 31:0] q_din;
- output wire [ 31:0] r_dout;
- output wire [ 31:0] s_dout;
- output wire [ 31:0] u_dout;
- output wire [ 31:0] v_dout;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [OPERAND_ADDR_BITS-1:0] addr_aq;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ?
- addr_aq + 1'b1 : addr_aq_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_rsuv;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ?
- addr_rsuv + 1'b1 : addr_rsuv_zero;
-
- assign a_addr = addr_aq;
- assign q_addr = addr_aq;
-
- assign r_addr = addr_rsuv;
- assign s_addr = addr_rsuv;
- assign u_addr = addr_rsuv;
- assign v_addr = addr_rsuv;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_aq;
- wire inc_addr_rsuv;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1;
-
- assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop);
- assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_aq) addr_aq <= addr_aq_next;
- else addr_aq <= addr_aq_zero;
- //
- if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next;
- else addr_rsuv <= addr_rsuv_zero;
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_rsuv;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1;
-
- assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop);
-
- assign r_wren = wren_rsuv;
- assign s_wren = wren_rsuv;
- assign u_wren = wren_rsuv;
- assign v_wren = wren_rsuv;
-
-
- //
- // Data Logic
- //
- assign r_dout = 32'd0;
- assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0;
- assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0;
- assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
deleted file mode 100644
index 6b65eb1..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
+++ /dev/null
@@ -1,286 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_compare
- (
- clk, rst_n,
- ena, rdy,
-
- u_addr, u_din,
- v_addr, v_din,
-
- u_gt_v, v_eq_1,
- u_is_even, v_is_even
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- input wire [ 32-1:0] u_din;
- input wire [ 32-1:0] v_din;
-
- output wire u_gt_v;
- output wire v_eq_1;
- output wire u_is_even;
- output wire v_is_even;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
- addr_in - 1'b1 : addr_in_last;
-
- assign u_addr = addr_in;
- assign v_addr = addr_in;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Decrement Logic
- //
- wire dec_addr_in;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0;
-
- assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
-
- always @(posedge clk)
- //
- if (rdy) addr_in <= addr_in_last;
- else if (dec_addr_in) addr_in <= addr_in_prev;
-
-
- //
- // Comparison Stage Flags
- //
- wire calc_leg;
- wire calc_leg_final;
- wire calc_parity;
-
- wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2;
- wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1;
-
- assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop);
- assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop);
- assign calc_parity = (proc_cnt == cnt_calc_parity);
-
-
- //
- // Dummy Input
- //
- reg sub32_din_1_lsb;
- wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb};
-
- always @(posedge clk)
- //
- sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0;
-
-
- //
- // Subtractor (u - v)
- //
- wire [31: 0] sub32_u_minus_v_difference_out;
- wire sub32_u_minus_v_borrow_in;
- wire sub32_u_minus_v_borrow_out;
-
- subtractor32_wrapper sub32_u_minus_v
- (
- .clk (clk),
- .a (u_din),
- .b (v_din),
- .d (sub32_u_minus_v_difference_out),
- .b_in (sub32_u_minus_v_borrow_in),
- .b_out (sub32_u_minus_v_borrow_out)
- );
-
-
- //
- // Subtractor (v - 1)
- //
- wire [31: 0] sub32_v_minus_1_difference_out;
- wire sub32_v_minus_1_borrow_in;
- wire sub32_v_minus_1_borrow_out;
-
- subtractor32_wrapper sub32_v_minus_1
- (
- .clk (clk),
- .a (v_din),
- .b (sub32_din_1),
- .d (sub32_v_minus_1_difference_out),
- .b_in (sub32_v_minus_1_borrow_in),
- .b_out (sub32_v_minus_1_borrow_out)
- );
-
-
-
- //
- // Borrow Masking Logic
- //
- reg mask_borrow;
-
- always @(posedge clk)
- //
- mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ?
- 1'b0 : 1'b1;
-
- assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow;
- assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow;
-
-
- //
- // Comparison Logic
- //
- reg cmp_u_v_l;
- reg cmp_u_v_e;
- reg cmp_u_v_g;
-
- reg cmp_v_1_l;
- reg cmp_v_1_e;
- reg cmp_v_1_g;
-
- wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g);
- wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g);
-
- wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0;
- wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0;
-
- wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0;
- wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0;
-
- reg u_is_even_reg;
- reg v_is_even_reg;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- if (ena) begin
- //
- cmp_u_v_l <= 1'b0;
- cmp_u_v_e <= 1'b0;
- cmp_u_v_g <= 1'b0;
- //
- cmp_v_1_l <= 1'b0;
- cmp_v_1_e <= 1'b0;
- cmp_v_1_g <= 1'b0;
- //
- u_is_even_reg <= 1'bX;
- v_is_even_reg <= 1'bX;
- //
- end
- //
- end else begin
- //
- // parity
- //
- if (calc_parity) begin
- u_is_even_reg <= ~u_din[0];
- v_is_even_reg <= ~v_din[0];
- end
- //
- // u <> v
- //
- if (cmp_unresolved_u_v && calc_leg) begin
- //
- if (cmp_u_v_borrow_is_set)
- cmp_u_v_l <= 1'b1;
- //
- if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero)
- cmp_u_v_g <= 1'b1;
- //
- if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final)
- cmp_u_v_e <= 1'b1;
- //
- end
- //
- // v <> 1
- //
- if (cmp_unresolved_v_1 && calc_leg) begin
- //
- if (cmp_v_1_borrow_is_set)
- cmp_v_1_l <= 1'b1;
- //
- if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero)
- cmp_v_1_g <= 1'b1;
- //
- if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final)
- cmp_v_1_e <= 1'b1;
- //
- end
- //
- end
-
-
- //
- // Output Flags
- //
- assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g;
- assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g;
-
- assign u_is_even = u_is_even_reg;
- assign v_is_even = v_is_even_reg;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
deleted file mode 100644
index ab15563..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
+++ /dev/null
@@ -1,408 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_precalc
- (
- clk, rst_n,
- ena, rdy,
-
- r_addr, r_din,
- s_addr, s_din,
- u_addr, u_din,
- v_addr, v_din,
-
- r_dbl_addr, r_dbl_wren, r_dbl_dout,
- s_dbl_addr, s_dbl_wren, s_dbl_dout,
- r_plus_s_addr, r_plus_s_wren, r_plus_s_dout,
- u_half_addr, u_half_wren, u_half_dout,
- v_half_addr, v_half_wren, v_half_dout,
- u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din,
- v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din,
- u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
- v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- input wire [ 32-1:0] r_din;
- input wire [ 32-1:0] s_din;
- input wire [ 32-1:0] u_din;
- input wire [ 32-1:0] v_din;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
-
- output wire [ 32-1:0] r_dbl_dout;
- output wire [ 32-1:0] s_dbl_dout;
- output wire [ 32-1:0] r_plus_s_dout;
- output wire [ 32-1:0] u_half_dout;
- output wire [ 32-1:0] v_half_dout;
- output wire [ 32-1:0] u_minus_v_dout;
- output wire [ 32-1:0] v_minus_u_dout;
- output wire [ 32-1:0] u_minus_v_half_dout;
- output wire [ 32-1:0] v_minus_u_half_dout;
-
- output wire r_dbl_wren;
- output wire s_dbl_wren;
- output wire r_plus_s_wren;
- output wire u_half_wren;
- output wire v_half_wren;
- output wire u_minus_v_wren;
- output wire v_minus_u_wren;
- output wire u_minus_v_half_wren;
- output wire v_minus_u_half_wren;
-
- input wire [ 32-1:0] u_minus_v_din;
- input wire [ 32-1:0] v_minus_u_din;
-
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ?
- addr_in + 1'b1 : addr_in_zero;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
- addr_in - 1'b1 : addr_in_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out1;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
- addr_out1 + 1'b1 : addr_out1_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out2;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ?
- addr_out2 + 1'b1 : addr_out2_zero;
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
- addr_out2 - 1'b1 : addr_out2_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out3;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
- addr_out3 - 1'b1 : addr_out3_last;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out4;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ?
- addr_out4 - 1'b1 : addr_out4_last;
-
-
- assign r_addr = addr_in;
- assign s_addr = addr_in;
- assign u_addr = addr_in;
- assign v_addr = addr_in;
-
- assign r_dbl_addr = addr_out1;
- assign s_dbl_addr = addr_out1;
- assign r_plus_s_addr = addr_out2;
- assign u_half_addr = addr_out3;
- assign v_half_addr = addr_out3;
- assign u_minus_v_addr = addr_out2;
- assign v_minus_u_addr = addr_out2;
- assign u_minus_v_half_addr = addr_out4;
- assign v_minus_u_half_addr = addr_out4;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment/Decrement Logic
- //
- wire inc_addr_in;
- wire dec_addr_in;
- wire inc_addr_out1;
- wire inc_addr_out2;
- wire dec_addr_out2;
- wire dec_addr_out3;
- wire dec_addr_out4;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
- assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
- assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
- assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
- assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
- assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
- assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
-
-
- always @(posedge clk) begin
- //
- if (rdy) begin
- //
- addr_in <= addr_in_zero;
- addr_out1 <= addr_out1_zero;
- addr_out2 <= addr_out2_zero;
- addr_out3 <= addr_out3_last;
- addr_out4 <= addr_out4_last;
- //
- end else begin
- //
- if (inc_addr_in) addr_in <= addr_in_next;
- else if (dec_addr_in) addr_in <= addr_in_prev;
- //
- if (inc_addr_out1) addr_out1 <= addr_out1_next;
- else addr_out1 <= addr_out1_zero;
- //
- if (inc_addr_out2) addr_out2 <= addr_out2_next;
- else if (dec_addr_out2) addr_out2 <= addr_out2_prev;
- //
- if (dec_addr_out3) addr_out3 <= addr_out3_prev;
- else addr_out3 <= addr_out3_last;
- //
- if (dec_addr_out4) addr_out4 <= addr_out4_prev;
- else addr_out4 <= addr_out4_last;
- //
- end
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_out1;
- wire wren_out2;
- wire wren_out3;
- wire wren_out4;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
- assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
- assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
- assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);
-
- assign r_dbl_wren = wren_out1;
- assign s_dbl_wren = wren_out1;
- assign r_plus_s_wren = wren_out2;
- assign u_half_wren = wren_out3;
- assign v_half_wren = wren_out3;
- assign u_minus_v_wren = wren_out2;
- assign v_minus_u_wren = wren_out2;
- assign u_minus_v_half_wren = wren_out4;
- assign v_minus_u_half_wren = wren_out4;
-
-
- //
- // Adder (r + s)
- //
- wire [31: 0] add32_r_plus_s_sum_out;
- wire add32_r_plus_s_carry_in;
- wire add32_r_plus_s_carry_out;
-
- adder32_wrapper add32_r_plus_s
- (
- .clk (clk),
- .a (r_din),
- .b (s_din),
- .s (add32_r_plus_s_sum_out),
- .c_in (add32_r_plus_s_carry_in),
- .c_out (add32_r_plus_s_carry_out)
- );
-
- //
- // Subtractor (u - v)
- //
- wire [31: 0] sub32_u_minus_v_difference_out;
- wire sub32_u_minus_v_borrow_in;
- wire sub32_u_minus_v_borrow_out;
-
- subtractor32_wrapper sub32_u_minus_v
- (
- .clk (clk),
- .a (u_din),
- .b (v_din),
- .d (sub32_u_minus_v_difference_out),
- .b_in (sub32_u_minus_v_borrow_in),
- .b_out (sub32_u_minus_v_borrow_out)
- );
-
- //
- // Subtractor (v - u)
- //
- wire [31: 0] sub32_v_minus_u_difference_out;
- wire sub32_v_minus_u_borrow_in;
- wire sub32_v_minus_u_borrow_out;
-
- subtractor32_wrapper sub32_v_minus_u
- (
- .clk (clk),
- .a (v_din),
- .b (u_din),
- .d (sub32_v_minus_u_difference_out),
- .b_in (sub32_v_minus_u_borrow_in),
- .b_out (sub32_v_minus_u_borrow_out)
- );
-
-
- //
- // Carry & Borrow Masking Logic
- //
- reg mask_carry_borrow;
-
- always @(posedge clk)
- //
- mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
- 1'b0 : 1'b1;
-
- assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow;
- assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
- assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
-
-
- //
- // Carry Bits
- //
- reg r_dbl_carry;
- reg s_dbl_carry;
- reg u_half_carry;
- reg v_half_carry;
- reg u_minus_v_half_carry;
- reg v_minus_u_half_carry;
-
- always @(posedge clk) begin
-
- r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
- r_din[31] : 1'b0;
-
- s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
- s_din[31] : 1'b0;
-
- u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
- u_din[0] : 1'b0;
-
- v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
- v_din[0] : 1'b0;
-
- u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
- u_minus_v_din[0] : 1'b0;
-
- v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
- v_minus_u_din[0] : 1'b0;
-
- end
-
-
- //
- // Data Mapper
- //
- assign r_dbl_dout = {r_din[30:0], r_dbl_carry};
- assign s_dbl_dout = {s_din[30:0], s_dbl_carry};
- assign r_plus_s_dout = add32_r_plus_s_sum_out;
- assign u_half_dout = {u_half_carry, u_din[31:1]};
- assign v_half_dout = {v_half_carry, v_din[31:1]};
- assign u_minus_v_dout = sub32_u_minus_v_difference_out;
- assign v_minus_u_dout = sub32_v_minus_u_difference_out;
- assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]};
- assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]};
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
deleted file mode 100644
index 0cd6ac5..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
+++ /dev/null
@@ -1,257 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_update
- (
- clk, rst_n,
- ena, rdy,
-
- u_gt_v, v_eq_1,
- u_is_even, v_is_even,
-
- r_addr, r_wren, r_dout,
- s_addr, s_wren, s_dout,
- u_addr, u_wren, u_dout,
- v_addr, v_wren, v_dout,
-
- r_dbl_addr, r_dbl_din,
- s_dbl_addr, s_dbl_din,
- r_plus_s_addr, r_plus_s_din,
- u_half_addr, u_half_din,
- v_half_addr, v_half_din,
- u_minus_v_half_addr, u_minus_v_half_din,
- v_minus_u_half_addr, v_minus_u_half_din
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- input wire u_gt_v;
- input wire v_eq_1;
- input wire u_is_even;
- input wire v_is_even;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- output wire r_wren;
- output wire s_wren;
- output wire u_wren;
- output wire v_wren;
-
- output wire [ 32-1:0] r_dout;
- output wire [ 32-1:0] s_dout;
- output wire [ 32-1:0] u_dout;
- output wire [ 32-1:0] v_dout;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
-
- input wire [ 32-1:0] r_dbl_din;
- input wire [ 32-1:0] s_dbl_din;
- input wire [ 32-1:0] r_plus_s_din;
- input wire [ 32-1:0] u_half_din;
- input wire [ 32-1:0] v_half_din;
- input wire [ 32-1:0] u_minus_v_half_din;
- input wire [ 32-1:0] v_minus_u_half_din;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
- addr_in + 1'b1 : addr_in_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
- addr_out + 1'b1 : addr_out_zero;
-
- assign r_addr = addr_out;
- assign s_addr = addr_out;
- assign u_addr = addr_out;
- assign v_addr = addr_out;
-
- assign r_dbl_addr = addr_in;
- assign s_dbl_addr = addr_in;
- assign r_plus_s_addr = addr_in;
- assign u_half_addr = addr_in;
- assign v_half_addr = addr_in;
- assign u_minus_v_half_addr = addr_in;
- assign v_minus_u_half_addr = addr_in;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_in;
- wire inc_addr_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
- assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_in) addr_in <= addr_in_next;
- else addr_in <= addr_in_zero;
- //
- if (inc_addr_out) addr_out <= addr_out_next;
- else addr_out <= addr_out_zero;
- //
- end
-
- //
- // Write Enable Logic
- //
- wire wren_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
-
- reg r_wren_allow;
- reg s_wren_allow;
- reg u_wren_allow;
- reg v_wren_allow;
-
- assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy;
- assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy;
- assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy;
- assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy;
-
-
- //
- // Data Logic
- //
- reg [31: 0] r_dout_mux;
- reg [31: 0] s_dout_mux;
- reg [31: 0] u_dout_mux;
- reg [31: 0] v_dout_mux;
-
- assign r_dout = r_dout_mux;
- assign s_dout = s_dout_mux;
- assign u_dout = u_dout_mux;
- assign v_dout = v_dout_mux;
-
- always @(*) begin
- //
- // r, s, u, v
- //
- if (u_is_even) begin
- //
- u_dout_mux = u_half_din;
- v_dout_mux = {32{1'bX}};
- r_dout_mux = {32{1'bX}};
- s_dout_mux = s_dbl_din;
- //
- u_wren_allow = 1'b1;
- v_wren_allow = 1'b0;
- r_wren_allow = 1'b0;
- s_wren_allow = 1'b1;
- //
- end else begin
- //
- if (v_is_even) begin
- //
- u_dout_mux = {32{1'bX}};
- v_dout_mux = v_half_din;
- r_dout_mux = r_dbl_din;
- s_dout_mux = {32{1'bX}};
- //
- u_wren_allow = 1'b0;
- v_wren_allow = 1'b1;
- r_wren_allow = 1'b1;
- s_wren_allow = 1'b0;
- //
- end else begin
- //
- u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}};
- v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din;
- r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din;
- s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din;
- //
- u_wren_allow = u_gt_v;
- v_wren_allow = !u_gt_v;
- r_wren_allow = 1'b1;
- s_wren_allow = 1'b1;
- //
- end
- //
- end
- //
- end
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
deleted file mode 100644
index fb858a6..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
+++ /dev/null
@@ -1,328 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_reduce_precalc
- (
- clk, rst_n,
- ena, rdy,
-
- k,
-
- s_is_odd, k_is_nul,
-
- r_addr, r_din, r_wren, r_dout,
- s_addr, s_din,
- u_addr, u_wren, u_dout,
- v_addr, v_wren, v_dout,
- q_addr, q_din
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter OPERAND_ADDR_BITS = 3;
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
- parameter K_NUM_BITS = 10;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- input wire [ K_NUM_BITS-1:0] k;
-
- output wire s_is_odd;
- output wire k_is_nul;
-
- output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
- output wire [OPERAND_ADDR_BITS-1:0] q_addr;
-
- input wire [ 32-1:0] r_din;
- input wire [ 32-1:0] s_din;
- input wire [ 32-1:0] q_din;
-
- output wire r_wren;
- output wire u_wren;
- output wire v_wren;
-
- output wire [ 32-1:0] r_dout;
- output wire [ 32-1:0] u_dout;
- output wire [ 32-1:0] v_dout;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf;
- reg [OPERAND_ADDR_BITS-1:0] addr_in_op;
- reg [ BUFFER_ADDR_BITS-1:0] addr_out1;
- reg [ BUFFER_ADDR_BITS-1:0] addr_out2;
- reg [ BUFFER_ADDR_BITS-1:0] addr_out3;
-
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1;
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ?
- addr_in_buf + 1'b1 : addr_in_buf_zero;
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ?
- addr_in_buf - 1'b1 : addr_in_buf_zero;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ?
- addr_in_op + 1'b1 : addr_in_op_zero;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
- addr_out1 + 1'b1 : addr_out1_zero;
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ?
- addr_out1 - 1'b1 : addr_out1_zero;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
- addr_out2 - 1'b1 : addr_out2_last;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
- addr_out3 - 1'b1 : addr_out3_last;
-
-
- assign s_addr = addr_in_buf;
- assign q_addr = addr_in_op;
- assign r_addr = addr_out1;
- assign u_addr = addr_out2;
- assign v_addr = addr_out3;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment/Decrement Logic
- //
- wire inc_addr_buf_in;
- wire dec_addr_buf_in;
- wire inc_addr_op_in;
- wire inc_addr_out1;
- wire dec_addr_out1;
- wire dec_addr_out2;
- wire dec_addr_out3;
-
- wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop);
- assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
- assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop);
- assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
- assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop);
- assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
- assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
-
- always @(posedge clk) begin
- //
- if (rdy) begin
- //
- addr_in_buf <= addr_in_buf_zero;
- addr_in_op <= addr_in_op_zero;
- addr_out1 <= addr_out1_zero;
- addr_out2 <= addr_out2_last;
- addr_out3 <= addr_out3_last;
- //
- end else begin
- //
- if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next;
- else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev;
- //
- if (inc_addr_op_in) addr_in_op <= addr_in_op_next;
- else addr_in_op <= addr_in_op_zero;
- //
- if (inc_addr_out1) addr_out1 <= addr_out1_next;
- else if (dec_addr_out1) addr_out1 <= addr_out1_prev;
- //
- if (dec_addr_out2) addr_out2 <= addr_out2_prev;
- else addr_out2 <= addr_out2_last;
- //
- if (dec_addr_out3) addr_out3 <= addr_out3_prev;
- else addr_out3 <= addr_out3_last;
- //
- end
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_out1;
- wire wren_out2;
- wire wren_out3;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
- assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
- assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
-
- assign r_wren = wren_out1;
- assign u_wren = wren_out2;
- assign v_wren = wren_out3;
-
- //
- // Adder (s + q)
- //
- wire [31: 0] q_din_masked;
- wire [31: 0] add32_s_plus_q_sum_out;
- wire add32_s_plus_q_carry_in;
- wire add32_s_plus_q_carry_out;
-
- adder32_wrapper add32_r_plus_s
- (
- .clk (clk),
- .a (s_din),
- .b (q_din_masked),
- .s (add32_s_plus_q_sum_out),
- .c_in (add32_s_plus_q_carry_in),
- .c_out (add32_s_plus_q_carry_out)
- );
-
-
- //
- // Carry Masking Logic
- //
- wire mask_carry;
-
- assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;
-
-
- //
- // Addend Masking Logic
- //
- reg q_din_mask;
-
- always @(posedge clk)
- q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0;
-
- assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;
-
- assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry;
-
-
- //
- // Carry Bits
- //
- reg s_half_carry;
- reg s_plus_q_half_carry;
-
- always @(posedge clk) begin
- //
- s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ?
- s_din[0] : 1'b0;
- //
- s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
- r_din[0] : 1'b0;
- //
- end
-
- //
- // Data Mapper
- //
- assign r_dout = add32_s_plus_q_sum_out;
- assign u_dout = {s_half_carry, s_din[31:1]};
- assign v_dout = {s_plus_q_half_carry, r_din[31:1]};
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
- //
- // Output Flags
- //
- reg s_is_odd_reg;
- reg k_is_nul_reg;
-
- assign s_is_odd = s_is_odd_reg;
- assign k_is_nul = k_is_nul_reg;
-
- always @(posedge clk)
- //
- if (proc_cnt == cnt_calc_flags) begin
- s_is_odd_reg <= s_din[0];
- k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
deleted file mode 100644
index ea5b854..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
+++ /dev/null
@@ -1,153 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_reduce_update
- (
- clk, rst_n,
- ena, rdy,
-
- s_is_odd, k_is_nul,
-
- s_addr, s_wren, s_dout,
- u_addr, u_din,
- v_addr, v_din
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- input wire s_is_odd;
- input wire k_is_nul;
-
- output wire [BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- output wire s_wren;
-
- output wire [ 32-1:0] s_dout;
-
- input wire [ 32-1:0] u_din;
- input wire [ 32-1:0] v_din;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
- addr_in + 1'b1 : addr_in_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
- addr_out + 1'b1 : addr_out_zero;
-
- assign s_addr = addr_out;
- assign u_addr = addr_in;
- assign v_addr = addr_in;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_in;
- wire inc_addr_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
- assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_in) addr_in <= addr_in_next;
- else addr_in <= addr_in_zero;
- //
- if (inc_addr_out) addr_out <= addr_out_next;
- else addr_out <= addr_out_zero;
- //
- end
-
- //
- // Write Enable Logic
- //
- wire wren_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
-
- assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy;
-
-
- //
- // Data Logic
- //
- assign s_dout = s_is_odd ? v_din : u_din;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v
deleted file mode 100644
index 2f7b64d..0000000
--- a/rtl/modular/modular_invertor/modinv_clog2.v
+++ /dev/null
@@ -1,10 +0,0 @@
-function integer clog2;
- input integer value;
- integer result;
- begin
- value = value - 1;
- for (result = 0; value > 0; result = result + 1)
- value = value >> 1;
- clog2 = result;
- end
-endfunction
diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v
deleted file mode 100644
index e9f2460..0000000
--- a/rtl/modular/modular_invertor/modular_invertor.v
+++ /dev/null
@@ -1,981 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_invertor.v
-// -----------------------------------------------------------------------------
-// Modular invertor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_invertor
- (
- clk, rst_n,
- ena, rdy,
- a_addr, q_addr, a1_addr, a1_wren,
- a_din, q_din, a1_dout
- );
-
-
- //
- // Parameters
- //
- parameter MAX_OPERAND_WIDTH = 256;
-
-
- //
- // clog2
- //
-`include "modinv_clog2.v"
-
-
- //
- // More Parameters
- //
- localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32;
- localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS);
-
- localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1;
- localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS);
-
- localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH;
- localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS);
-
- localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
-
- input wire ena;
- output wire rdy;
-
- output wire [OPERAND_ADDR_BITS-1:0] a_addr;
- output reg [OPERAND_ADDR_BITS-1:0] q_addr;
- output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
- output wire a1_wren;
-
- input wire [32-1:0] a_din;
- input wire [32-1:0] q_din;
- output wire [32-1:0] a1_dout;
-
-
- //
- // "Redundant" Power of 2 (K)
- //
- reg [K_NUM_BITS-1:0] k;
-
-
- //
- // Buffers
- //
- reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr;
- reg buf_r_wr_en;
- reg [ 32-1:0] buf_r_wr_din;
- wire [ 32-1:0] buf_r_wr_dout;
- wire [ 32-1:0] buf_r_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_r
- ( .clk(clk),
- .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din),
- .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout)
- );
-
- reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr;
- reg buf_s_wr_en;
- reg [ 32-1:0] buf_s_wr_din;
- wire [ 32-1:0] buf_s_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_s
- ( .clk(clk),
- .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din),
- .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout)
- );
-
- reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr;
- reg buf_u_wr_en;
- reg [ 32-1:0] buf_u_wr_din;
- wire [ 32-1:0] buf_u_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u
- ( .clk(clk),
- .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din),
- .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout)
- );
-
- reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr;
- reg buf_v_wr_en;
- reg [ 32-1:0] buf_v_wr_din;
- wire [ 32-1:0] buf_v_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v
- ( .clk(clk),
- .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din),
- .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr;
- wire buf_r_dbl_wr_en;
- wire [ 32-1:0] buf_r_dbl_wr_din;
- wire [ 32-1:0] buf_r_dbl_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_r_dbl
- ( .clk(clk),
- .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din),
- .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr;
- wire buf_s_dbl_wr_en;
- wire [ 32-1:0] buf_s_dbl_wr_din;
- wire [ 32-1:0] buf_s_dbl_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_s_dbl
- ( .clk(clk),
- .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din),
- .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr;
- wire buf_r_plus_s_wr_en;
- wire [ 32-1:0] buf_r_plus_s_wr_din;
- wire [ 32-1:0] buf_r_plus_s_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_r_plus_s
- ( .clk(clk),
- .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din),
- .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr;
- wire buf_u_minus_v_wr_en;
- wire [ 32-1:0] buf_u_minus_v_wr_din;
- wire [ 32-1:0] buf_u_minus_v_wr_dout;
-
- assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u_minus_v
- ( .clk(clk),
- .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din),
- .b_addr(buf_u_minus_v_rd_addr), .b_out()
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr;
- wire buf_v_minus_u_wr_en;
- wire [ 32-1:0] buf_v_minus_u_wr_din;
- wire [ 32-1:0] buf_v_minus_u_wr_dout;
-
- assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v_minus_u
- ( .clk(clk),
- .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din),
- .b_addr(buf_v_minus_u_rd_addr), .b_out()
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr;
- wire buf_u_half_wr_en;
- wire [ 32-1:0] buf_u_half_wr_din;
- wire [ 32-1:0] buf_u_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u_half
- ( .clk(clk),
- .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din),
- .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr;
- wire buf_v_half_wr_en;
- wire [ 32-1:0] buf_v_half_wr_din;
- wire [ 32-1:0] buf_v_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v_half
- ( .clk(clk),
- .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din),
- .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr;
- wire buf_u_minus_v_half_wr_en;
- wire [ 32-1:0] buf_u_minus_v_half_wr_din;
- wire [ 32-1:0] buf_u_minus_v_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u_minus_v_half
- ( .clk(clk),
- .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din),
- .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr;
- wire buf_v_minus_u_half_wr_en;
- wire [ 32-1:0] buf_v_minus_u_half_wr_din;
- wire [ 32-1:0] buf_v_minus_u_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v_minus_u_half
- ( .clk(clk),
- .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din),
- .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout)
- );
-
-
- //
- // Helper Modules
- //
- wire helper_init_ena;
- wire helper_invert_precalc_ena;
- wire helper_invert_compare_ena;
- wire helper_invert_update_ena;
- wire helper_reduce_precalc_ena;
- wire helper_reduce_update_ena;
- wire helper_copy_ena;
-
- wire helper_init_rdy;
- wire helper_invert_precalc_rdy;
- wire helper_invert_compare_rdy;
- wire helper_invert_update_rdy;
- wire helper_reduce_precalc_rdy;
- wire helper_reduce_update_rdy;
- wire helper_copy_rdy;
-
- wire helper_init_done = helper_init_rdy && !helper_init_ena;
- wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena;
- wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena;
- wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena;
- wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena;
- wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena;
- wire helper_copy_done = helper_copy_rdy && !helper_copy_ena;
-
-
- //
- // Helper Module - Initialization
- //
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr;
- wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr;
-
- wire helper_init_r_wren;
- wire helper_init_s_wren;
- wire helper_init_u_wren;
- wire helper_init_v_wren;
-
- wire [ 32-1:0] helper_init_r_data;
- wire [ 32-1:0] helper_init_s_data;
- wire [ 32-1:0] helper_init_u_data;
- wire [ 32-1:0] helper_init_v_data;
-
- modinv_helper_init #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
-
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_init
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_init_ena),
- .rdy (helper_init_rdy),
-
- .a_addr (a_addr),
- .q_addr (helper_init_q_addr),
-
- .r_addr (helper_init_r_addr),
- .s_addr (helper_init_s_addr),
- .u_addr (helper_init_u_addr),
- .v_addr (helper_init_v_addr),
-
- .q_din (q_din),
- .a_din (a_din),
-
- .r_dout (helper_init_r_data),
- .s_dout (helper_init_s_data),
- .u_dout (helper_init_u_data),
- .v_dout (helper_init_v_data),
-
- .r_wren (helper_init_r_wren),
- .s_wren (helper_init_s_wren),
- .u_wren (helper_init_u_wren),
- .v_wren (helper_init_v_wren)
- );
-
-
- //
- // Helper Module - Inversion Pre-Calculation
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr;
-
- modinv_helper_invert_precalc #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_invert_precalc
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_invert_precalc_ena),
- .rdy (helper_invert_precalc_rdy),
-
- .r_addr (helper_invert_precalc_r_addr),
- .s_addr (helper_invert_precalc_s_addr),
- .u_addr (helper_invert_precalc_u_addr),
- .v_addr (helper_invert_precalc_v_addr),
-
- .r_din (buf_r_rd_dout),
- .s_din (buf_s_rd_dout),
- .u_din (buf_u_rd_dout),
- .v_din (buf_v_rd_dout),
-
- .r_dbl_addr (buf_r_dbl_wr_addr),
- .s_dbl_addr (buf_s_dbl_wr_addr),
- .r_plus_s_addr (buf_r_plus_s_wr_addr),
-
- .u_half_addr (buf_u_half_wr_addr),
- .v_half_addr (buf_v_half_wr_addr),
- .u_minus_v_addr (buf_u_minus_v_wr_addr),
- .v_minus_u_addr (buf_v_minus_u_wr_addr),
- .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr),
- .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr),
-
- .r_dbl_dout (buf_r_dbl_wr_din),
- .s_dbl_dout (buf_s_dbl_wr_din),
- .r_plus_s_dout (buf_r_plus_s_wr_din),
-
- .u_half_dout (buf_u_half_wr_din),
- .v_half_dout (buf_v_half_wr_din),
- .u_minus_v_dout (buf_u_minus_v_wr_din),
- .v_minus_u_dout (buf_v_minus_u_wr_din),
- .u_minus_v_half_dout (buf_u_minus_v_half_wr_din),
- .v_minus_u_half_dout (buf_v_minus_u_half_wr_din),
-
- .r_dbl_wren (buf_r_dbl_wr_en),
- .s_dbl_wren (buf_s_dbl_wr_en),
- .r_plus_s_wren (buf_r_plus_s_wr_en),
-
- .u_half_wren (buf_u_half_wr_en),
- .v_half_wren (buf_v_half_wr_en),
- .u_minus_v_wren (buf_u_minus_v_wr_en),
- .v_minus_u_wren (buf_v_minus_u_wr_en),
- .u_minus_v_half_wren (buf_u_minus_v_half_wr_en),
- .v_minus_u_half_wren (buf_v_minus_u_half_wr_en),
-
- .u_minus_v_din (buf_u_minus_v_wr_dout),
- .v_minus_u_din (buf_v_minus_u_wr_dout)
- );
-
-
- //
- // Helper Module - Inversion Comparison
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr;
-
- wire flag_invert_u_gt_v;
- wire flag_invert_v_eq_1;
- wire flag_invert_u_is_even;
- wire flag_invert_v_is_even;
-
- modinv_helper_invert_compare #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_invert_compare
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_invert_compare_ena),
- .rdy (helper_invert_compare_rdy),
-
- .u_addr (helper_invert_compare_u_addr),
- .v_addr (helper_invert_compare_v_addr),
-
- .u_din (buf_u_rd_dout),
- .v_din (buf_v_rd_dout),
-
- .u_gt_v (flag_invert_u_gt_v),
- .v_eq_1 (flag_invert_v_eq_1),
- .u_is_even (flag_invert_u_is_even),
- .v_is_even (flag_invert_v_is_even)
- );
-
-
- //
- // Helper Module - Inversion Update
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr;
-
- wire helper_invert_update_r_wren;
- wire helper_invert_update_s_wren;
- wire helper_invert_update_u_wren;
- wire helper_invert_update_v_wren;
-
- wire [ 32-1:0] helper_invert_update_r_data;
- wire [ 32-1:0] helper_invert_update_s_data;
- wire [ 32-1:0] helper_invert_update_u_data;
- wire [ 32-1:0] helper_invert_update_v_data;
-
- modinv_helper_invert_update #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_invert_update
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_invert_update_ena),
- .rdy (helper_invert_update_rdy),
-
- .u_gt_v (flag_invert_u_gt_v),
- .v_eq_1 (flag_invert_v_eq_1),
- .u_is_even (flag_invert_u_is_even),
- .v_is_even (flag_invert_v_is_even),
-
- .r_addr (helper_invert_update_r_addr),
- .s_addr (helper_invert_update_s_addr),
- .u_addr (helper_invert_update_u_addr),
- .v_addr (helper_invert_update_v_addr),
-
- .r_wren (helper_invert_update_r_wren),
- .s_wren (helper_invert_update_s_wren),
- .u_wren (helper_invert_update_u_wren),
- .v_wren (helper_invert_update_v_wren),
-
- .r_dout (helper_invert_update_r_data),
- .s_dout (helper_invert_update_s_data),
- .u_dout (helper_invert_update_u_data),
- .v_dout (helper_invert_update_v_data),
-
- .r_dbl_addr (buf_r_dbl_rd_addr),
- .s_dbl_addr (buf_s_dbl_rd_addr),
- .r_plus_s_addr (buf_r_plus_s_rd_addr),
- .u_half_addr (buf_u_half_rd_addr),
- .v_half_addr (buf_v_half_rd_addr),
- .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr),
- .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr),
-
- .r_dbl_din (buf_r_dbl_rd_dout),
- .s_dbl_din (buf_s_dbl_rd_dout),
- .r_plus_s_din (buf_r_plus_s_rd_dout),
- .u_half_din (buf_u_half_rd_dout),
- .v_half_din (buf_v_half_rd_dout),
- .u_minus_v_half_din (buf_u_minus_v_half_rd_dout),
- .v_minus_u_half_din (buf_v_minus_u_half_rd_dout)
- );
-
-
- //
- // Helper Module - Reduction Pre-Calculation
- //
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr;
- wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr;
-
- wire helper_reduce_precalc_r_wren;
- wire helper_reduce_precalc_u_wren;
- wire helper_reduce_precalc_v_wren;
-
- wire [ 32-1:0] helper_reduce_precalc_r_data;
- wire [ 32-1:0] helper_reduce_precalc_u_data;
- wire [ 32-1:0] helper_reduce_precalc_v_data;
-
- wire flag_reduce_s_is_odd;
- wire flag_invert_k_is_nul;
-
- modinv_helper_reduce_precalc #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS),
- .K_NUM_BITS (K_NUM_BITS)
- )
- helper_reduce_precalc
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_reduce_precalc_ena),
- .rdy (helper_reduce_precalc_rdy),
-
- .r_addr (helper_reduce_precalc_r_addr),
- .s_addr (helper_reduce_precalc_s_addr),
- .u_addr (helper_reduce_precalc_u_addr),
- .v_addr (helper_reduce_precalc_v_addr),
- .q_addr (helper_reduce_precalc_q_addr),
-
- .k (k),
-
- .s_is_odd (flag_reduce_s_is_odd),
- .k_is_nul (flag_reduce_k_is_nul),
-
- .r_din (buf_r_wr_dout),
- .s_din (buf_s_rd_dout),
- .q_din (q_din),
-
- .r_wren (helper_reduce_precalc_r_wren),
- .u_wren (helper_reduce_precalc_u_wren),
- .v_wren (helper_reduce_precalc_v_wren),
-
- .r_dout (helper_reduce_precalc_r_data),
- .u_dout (helper_reduce_precalc_u_data),
- .v_dout (helper_reduce_precalc_v_data)
- );
-
- //
- // Helper Module - Reduction Update
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr;
-
- wire helper_reduce_update_s_wren;
-
- wire [ 32-1:0] helper_reduce_update_s_data;
-
- modinv_helper_reduce_update #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_reduce_update
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_reduce_update_ena),
- .rdy (helper_reduce_update_rdy),
-
- .s_is_odd (flag_reduce_s_is_odd),
- .k_is_nul (flag_reduce_k_is_nul),
-
- .s_addr (helper_reduce_update_s_addr),
- .u_addr (helper_reduce_update_u_addr),
- .v_addr (helper_reduce_update_v_addr),
-
- .s_wren (helper_reduce_update_s_wren),
-
- .s_dout (helper_reduce_update_s_data),
-
- .u_din (buf_u_rd_dout),
- .v_din (buf_v_rd_dout)
- );
-
-
- //
- // Helper Module - Copying
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr;
-
- modinv_helper_copy #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
-
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_copy
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_copy_ena),
- .rdy (helper_copy_rdy),
-
- .s_addr (helper_copy_s_addr),
- .a1_addr (a1_addr),
-
- .s_din (buf_s_rd_dout),
-
- .a1_dout (a1_dout),
-
- .a1_wren (a1_wren)
- );
-
-
- //
- // Round Counter
- //
- reg [ROUND_COUNTER_BITS-1:0] round_counter;
- wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1;
- wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}};
- wire [ROUND_COUNTER_BITS-1:0] round_counter_next =
- (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero;
-
-
- //
- // FSM
- //
- localparam FSM_STATE_IDLE = 4'd0;
-
- localparam FSM_STATE_INIT = 4'd1;
-
- localparam FSM_STATE_INVERT_PRECALC = 4'd11;
- localparam FSM_STATE_INVERT_COMPARE = 4'd12;
- localparam FSM_STATE_INVERT_UPDATE = 4'd13;
-
- localparam FSM_STATE_REDUCE_PRECALC = 4'd14;
- localparam FSM_STATE_REDUCE_UPDATE = 4'd15;
-
- localparam FSM_STATE_COPY = 4'd2;
-
- localparam FSM_STATE_DONE = 4'd3;
-
- reg [3:0] fsm_state = FSM_STATE_IDLE;
- reg [3:0] fsm_state_dly = FSM_STATE_IDLE;
-
- wire fsm_state_new = (fsm_state != fsm_state_dly);
-
- wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ?
- FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC;
-
- wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ?
- FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
- else case (fsm_state)
- FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE;
- FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT;
- FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC;
- FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE;
- FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE;
- FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC;
- FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE;
- FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY;
- FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
- default: fsm_state <= FSM_STATE_IDLE;
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE;
- else fsm_state_dly <= fsm_state;
-
-
- assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new;
- assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new;
- assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new;
- assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new;
- assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new;
- assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new;
- assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new;
-
-
- //
- // Counter Increment
- //
- always @(posedge clk) begin
- //
- if ((fsm_state == FSM_STATE_INIT) && helper_init_done)
- round_counter <= round_counter_zero;
- //
- if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done)
- round_counter <= round_counter_next;
- //
- if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done)
- round_counter <= round_counter_next;
- //
- end
-
-
- //
- // Q Address Selector
- //
- always @(*) begin
- //
- case (fsm_state)
- FSM_STATE_INIT: q_addr = helper_init_q_addr;
- FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr;
- default: q_addr = {OPERAND_ADDR_BITS{1'bX}};
- endcase
- //
- end
-
-
- //
- // Buffer Address Selector
- //
- always @(*) begin
- //
- // Write Ports
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr;
- FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr;
- FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr;
- default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr;
- FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr;
- FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr;
- default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr;
- FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr;
- FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr;
- default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr;
- FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr;
- FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr;
- default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- // Read Ports
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr;
- default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr;
- FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr;
- FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr;
- default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr;
- FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr;
- FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr;
- default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr;
- FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr;
- FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr;
- default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- end
-
-
- //
- // Buffer Write Enable Logic
- //
- always @(*) begin
- //
- // Write Ports
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren;
- FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren;
- FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren;
- default: buf_r_wr_en = 1'b0;
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren;
- FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren;
- FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren;
- default: buf_s_wr_en = 1'b0;
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren;
- FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren;
- FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren;
- default: buf_u_wr_en = 1'b0;
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren;
- FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren;
- FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren;
- default: buf_v_wr_en = 1'b0;
- endcase
- //
- end
-
-
- //
- // Buffer Write Data Selector
- //
- always @(*) begin
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data;
- FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data;
- FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data;
- default: buf_r_wr_din = {32{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data;
- FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data;
- FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data;
- default: buf_s_wr_din = {32{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data;
- FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data;
- FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data;
- default: buf_u_wr_din = {32{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data;
- FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data;
- FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data;
- default: buf_v_wr_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // Ready Logic
- //
- reg rdy_reg = 1'b1;
-
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else begin
-
- /* clear */
- if (rdy && ena) rdy_reg <= 1'b0;
-
- /* set */
- if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1;
-
- end
-
-
- //
- // Store Redundant Power of 2 (K)
- //
- always @(posedge clk)
- //
- if (helper_init_ena)
- k <= {K_NUM_BITS{1'b0}};
- else begin
-
- if (helper_invert_update_ena && !flag_invert_v_eq_1)
- k <= k + 1'b1;
-
- if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}}))
- k <= k - 1'b1;
-
- end
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v
index c2f2661..2b35233 100644
--- a/rtl/modular/modular_multiplier_256.v
+++ b/rtl/modular/modular_multiplier_256.v
@@ -1,402 +1,402 @@
-//------------------------------------------------------------------------------
-//
-// modular_multiplier_256.v
-// -----------------------------------------------------------------------------
-// Modular multiplier.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_multiplier_256
- (
- clk, rst_n,
- ena, rdy,
- a_addr, b_addr, n_addr, p_addr, p_wren,
- a_din, b_din, n_din, p_dout
- );
-
-
- //
- // Constants
- //
- localparam OPERAND_NUM_WORDS = 8;
- localparam WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word
- output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
- output wire p_wren; // store current P word now
-
- input wire [ 31:0] a_din; // A
- input wire [ 31:0] b_din; // B
- input wire [ 31:0] n_din; // N (must be P-256!)
- output wire [ 31:0] p_dout; // P = A * B mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_a;
- reg [WORD_COUNTER_WIDTH-1:0] index_b;
-
- /* map registers to output ports */
- assign a_addr = index_a;
- assign b_addr = index_b;
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
- wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
- wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
- wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
- wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
-
- wire inc_index_a = |fsm_shreg_inc_index_a;
- wire store_word_a = |fsm_shreg_store_word_a;
- wire inc_index_b = |fsm_shreg_inc_index_b;
- wire clear_mac_ab = |fsm_shreg_inc_index_b;
- wire shift_wide_a = |fsm_shreg_inc_index_b;
- wire enable_mac_ab = |fsm_shreg_inc_index_b;
- wire store_si_msb = |fsm_shreg_store_si_msb;
- wire store_si_lsb = fsm_shreg_store_si_lsb;
- wire shift_si = |fsm_shreg_shift_si;
- wire mask_cw1_sum = fsm_shreg_mask_cw1_sum;
- wire store_c_word = |fsm_shreg_store_c_word;
- wire reduce_start = fsm_shreg_reduce_start;
- wire reduce_stop = fsm_shreg_reduce_stop;
-
-
- //
- // FSM Logic
- //
- wire reduce_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy)
- fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else if (!reduce_stop || reduce_done)
- fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- reg index_b_ff;
-
- always @(posedge clk)
- //
- if (inc_index_b) index_b_ff <= ~index_b_ff;
- else index_b_ff <= 1'b0;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- index_a <= WORD_INDEX_ZERO;
- index_b <= WORD_INDEX_LAST;
- //
- end else begin
- //
- if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
- if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
- //
- end
-
-
- //
- // Wide Operand Buffer
- //
- reg [255:0] buf_a_wide;
-
- always @(posedge clk)
- //
- if (store_word_a)
- buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
- else if (shift_wide_a)
- buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
-
-
- //
- // Multiplier Array
- //
- wire mac_inhibit; // control signal to pause all accumulators
-
- wire [46: 0] mac[0:15]; // outputs of all accumulators
- reg [15: 0] mac_clear; // individual per-accumulator clear flag
-
- assign mac_inhibit = ~enable_mac_ab;
-
- always @(posedge clk)
- //
- if (!clear_mac_ab)
- mac_clear <= {16{1'b1}};
- else begin
-
- if (mac_clear == {16{1'b1}})
- mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
- else
- mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
-
-
- end
-
- //
- // Array of parallel multipliers
- //
- genvar i;
- generate for (i=0; i<16; i=i+1)
- begin : gen_mac_array
- //
- mac16_wrapper mac16_inst
- (
- .clk (clk),
- .ce (~mac_inhibit),
-
- .clr (mac_clear[i]),
-
- .a (buf_a_wide[16*i+:16]),
- .b (index_b_ff ? b_din[15:0] : b_din[31:16]),
- .s (mac[i])
- );
- //
- end
- endgenerate
-
- //
- // Intermediate Words
- //
- reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
- reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
-
-
- wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
- wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
-
- generate for (i=0; i<16; i=i+1)
- begin : gen_si_lsb_new
- assign si_lsb_new[47*i+:47] = mac[15-i];
- end
- endgenerate
-
- generate for (i=1; i<16; i=i+1)
- begin : gen_si_msb_new
- assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
- end
- endgenerate
-
- always @(posedge clk) begin
- //
- if (shift_si) begin
- si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
- si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
- end else begin
-
- if (store_si_msb)
- si_msb <= si_msb_new;
-
- if (store_si_lsb)
- si_lsb <= si_lsb_new;
- end
-
- end
-
-
- //
- // Accumulators
- //
- wire [46: 0] add47_cw0_s;
- wire [46: 0] add47_cw1_s;
-
-
- //
- // cw0, b, cw1, b
- //
- reg [30: 0] si_prev_dly;
- reg [15: 0] si_next_dly;
-
- always @(posedge clk)
- //
- if (shift_si)
- si_prev_dly <= si_lsb[93:63];
- else
- si_prev_dly <= {31{1'b0}};
-
- always @(posedge clk)
- //
- si_next_dly <= si_lsb[62:47];
-
- wire [46: 0] add47_cw0_a = si_lsb[46:0];
- wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly};
-
- wire [46: 0] add47_cw1_a = add47_cw0_s;
- wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
-
- adder47_wrapper add47_cw0_inst
- (
- .clk (clk),
- .a (add47_cw0_a),
- .b (add47_cw0_b),
- .s (add47_cw0_s)
- );
-
- adder47_wrapper add47_cw1_inst
- (
- .clk (clk),
- .a (add47_cw1_a),
- .b (add47_cw1_b),
- .s (add47_cw1_s)
- );
-
-
-
- //
- // Full-Size Product
- //
- reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
-
- wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
- wire [ 31:0] reduce_c_word;
-
- always @(posedge clk)
- //
- if (store_c_word)
- bram_c_addr <= bram_c_addr + 1'b1;
- else
- bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1)
- )
- bram_c_inst
- (
- .clk (clk),
-
- .a_addr (bram_c_addr),
- .a_wr (store_c_word),
- .a_in (add47_cw1_s[31:0]),
- .a_out (),
-
- .b_addr (reduce_c_addr),
- .b_out (reduce_c_word)
- );
-
-
- //
- // Reduction Stage
- //
- modular_reductor_256 reduce_256_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_done),
-
- .x_addr (reduce_c_addr),
- .n_addr (n_addr),
- .p_addr (p_addr),
- .p_wren (p_wren),
-
- .x_din (reduce_c_word),
- .n_din (n_din),
- .p_dout (p_dout)
- );
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// modular_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Modular multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_multiplier_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, b_addr, n_addr, p_addr, p_wren,
+ a_din, b_din, n_din, p_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word
+ output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] a_din; // A
+ input wire [ 31:0] b_din; // B
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = A * B mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_a;
+ reg [WORD_COUNTER_WIDTH-1:0] index_b;
+
+ /* map registers to output ports */
+ assign a_addr = index_a;
+ assign b_addr = index_b;
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+ wire inc_index_a = |fsm_shreg_inc_index_a;
+ wire store_word_a = |fsm_shreg_store_word_a;
+ wire inc_index_b = |fsm_shreg_inc_index_b;
+ wire clear_mac_ab = |fsm_shreg_inc_index_b;
+ wire shift_wide_a = |fsm_shreg_inc_index_b;
+ wire enable_mac_ab = |fsm_shreg_inc_index_b;
+ wire store_si_msb = |fsm_shreg_store_si_msb;
+ wire store_si_lsb = fsm_shreg_store_si_lsb;
+ wire shift_si = |fsm_shreg_shift_si;
+ wire mask_cw1_sum = fsm_shreg_mask_cw1_sum;
+ wire store_c_word = |fsm_shreg_store_c_word;
+ wire reduce_start = fsm_shreg_reduce_start;
+ wire reduce_stop = fsm_shreg_reduce_stop;
+
+
+ //
+ // FSM Logic
+ //
+ wire reduce_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done)
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ reg index_b_ff;
+
+ always @(posedge clk)
+ //
+ if (inc_index_b) index_b_ff <= ~index_b_ff;
+ else index_b_ff <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ index_a <= WORD_INDEX_ZERO;
+ index_b <= WORD_INDEX_LAST;
+ //
+ end else begin
+ //
+ if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+ if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+ //
+ end
+
+
+ //
+ // Wide Operand Buffer
+ //
+ reg [255:0] buf_a_wide;
+
+ always @(posedge clk)
+ //
+ if (store_word_a)
+ buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
+ else if (shift_wide_a)
+ buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+
+
+ //
+ // Multiplier Array
+ //
+ wire mac_inhibit; // control signal to pause all accumulators
+
+ wire [46: 0] mac[0:15]; // outputs of all accumulators
+ reg [15: 0] mac_clear; // individual per-accumulator clear flag
+
+ assign mac_inhibit = ~enable_mac_ab;
+
+ always @(posedge clk)
+ //
+ if (!clear_mac_ab)
+ mac_clear <= {16{1'b1}};
+ else begin
+
+ if (mac_clear == {16{1'b1}})
+ mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
+ else
+ mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
+
+
+ end
+
+ //
+ // Array of parallel multipliers
+ //
+ genvar i;
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_mac_array
+ //
+ mac16_wrapper mac16_inst
+ (
+ .clk (clk),
+ .ce (~mac_inhibit),
+
+ .clr (mac_clear[i]),
+
+ .a (buf_a_wide[16*i+:16]),
+ .b (index_b_ff ? b_din[15:0] : b_din[31:16]),
+ .s (mac[i])
+ );
+ //
+ end
+ endgenerate
+
+ //
+ // Intermediate Words
+ //
+ reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
+ reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
+
+
+ wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
+ wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
+
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_si_lsb_new
+ assign si_lsb_new[47*i+:47] = mac[15-i];
+ end
+ endgenerate
+
+ generate for (i=1; i<16; i=i+1)
+ begin : gen_si_msb_new
+ assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
+ end
+ endgenerate
+
+ always @(posedge clk) begin
+ //
+ if (shift_si) begin
+ si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
+ si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
+ end else begin
+
+ if (store_si_msb)
+ si_msb <= si_msb_new;
+
+ if (store_si_lsb)
+ si_lsb <= si_lsb_new;
+ end
+
+ end
+
+
+ //
+ // Accumulators
+ //
+ wire [46: 0] add47_cw0_s;
+ wire [46: 0] add47_cw1_s;
+
+
+ //
+ // cw0, b, cw1, b
+ //
+ reg [30: 0] si_prev_dly;
+ reg [15: 0] si_next_dly;
+
+ always @(posedge clk)
+ //
+ if (shift_si)
+ si_prev_dly <= si_lsb[93:63];
+ else
+ si_prev_dly <= {31{1'b0}};
+
+ always @(posedge clk)
+ //
+ si_next_dly <= si_lsb[62:47];
+
+ wire [46: 0] add47_cw0_a = si_lsb[46:0];
+ wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+ wire [46: 0] add47_cw1_a = add47_cw0_s;
+ wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
+
+ adder47_wrapper add47_cw0_inst
+ (
+ .clk (clk),
+ .a (add47_cw0_a),
+ .b (add47_cw0_b),
+ .s (add47_cw0_s)
+ );
+
+ adder47_wrapper add47_cw1_inst
+ (
+ .clk (clk),
+ .a (add47_cw1_a),
+ .b (add47_cw1_b),
+ .s (add47_cw1_s)
+ );
+
+
+
+ //
+ // Full-Size Product
+ //
+ reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
+
+ wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
+ wire [ 31:0] reduce_c_word;
+
+ always @(posedge clk)
+ //
+ if (store_c_word)
+ bram_c_addr <= bram_c_addr + 1'b1;
+ else
+ bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1)
+ )
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_c_addr),
+ .a_wr (store_c_word),
+ .a_in (add47_cw1_s[31:0]),
+ .a_out (),
+
+ .b_addr (reduce_c_addr),
+ .b_out (reduce_c_word)
+ );
+
+
+ //
+ // Reduction Stage
+ //
+ modular_reductor_256 reduce_256_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_done),
+
+ .x_addr (reduce_c_addr),
+ .n_addr (n_addr),
+ .p_addr (p_addr),
+ .p_wren (p_wren),
+
+ .x_din (reduce_c_word),
+ .n_din (n_din),
+ .p_dout (p_dout)
+ );
+
+
+ endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v
index e4b346a..6f31570 100644
--- a/rtl/modular/modular_reductor_256.v
+++ b/rtl/modular/modular_reductor_256.v
@@ -1,692 +1,692 @@
-//------------------------------------------------------------------------------
-//
-// modular_reductor_256.v
-// -----------------------------------------------------------------------------
-// Modular reductor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_reductor_256
- (
- clk, rst_n,
- ena, rdy,
- x_addr, n_addr, p_addr, p_wren,
- x_din, n_din, p_dout
- );
-
- //
- // Constants
- //
- localparam OPERAND_NUM_WORDS = 8;
- localparam WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST;
- input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
- output wire p_wren; // store current P word now
-
- input wire [ 31:0] x_din; // X
- input wire [ 31:0] n_din; // N (must be P-256!)
- output wire [ 31:0] p_dout; // P = X mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH:0] index_x;
-
-
- /* map registers to output ports */
- assign x_addr = index_x;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
- wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
- wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
-
- wire [5-1:0] fsm_shreg_reduce_stage_start;
- wire [5-1:0] fsm_shreg_reduce_stage_stop;
-
- genvar s;
- generate for (s=0; s<5; s=s+1)
- begin : gen_fsm_shreg_reduce_stages
- assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
- assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
- end
- endgenerate
-
- wire inc_index_x = |fsm_shreg_inc_index_x;
- wire store_word_z = |fsm_shreg_store_word_z;
- wire reduce_start = |fsm_shreg_reduce_stage_start;
- wire reduce_stop = |fsm_shreg_reduce_stage_stop;
- wire store_p = fsm_shreg_reduce_stage_stop[0];
-
-
- wire reduce_adder0_done;
- wire reduce_adder1_done;
- wire reduce_subtractor_done;
-
- wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy)
- //
- fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else if (!reduce_stop || reduce_done_all)
- //
- fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy)
- //
- index_x <= WORD_INDEX_LAST;
- //
- else if (inc_index_x)
- //
- index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
-
-
- //
- // Look-up Table
- //
-
- //
- // Take a look at the corresponding C model for more information
- // on how exactly the math behind reduction works. The first step
- // is to assemble nine 256-bit values ("z-words") from 32-bit parts
- // of the full 512-bit product ("c-word"). The problem with z5 is
- // that it contains c13 two times. This implementation scans from
- // c15 to c0 and writes current part of c-word into corresponding
- // parts of z-words. Since those 32-bit parts are stored in block
- // memories, one source word can only be written to one location in
- // every z-word at a time. The trick is to delay c13 and then write
- // the delayed value at the corresponding location in z5 instead of
- // the next c12. "z_save" flag is used to indicate that the current
- // word should be delayed and written once again during the next cycle.
- //
-
- reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
- reg [9 -1:0] z_wren; //
- reg [9 -1:0] z_mask; // mask input to store zero word
- reg [9 -1:0] z_save; // save previous word once again
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // s9 s8 s7 s6 s5 s4 s3 s2 s1
- // || || || || || || || || ||
- 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
- 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
- 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
- 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
- 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
- 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
- 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
- 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
- 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
- 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
- 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
- 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
- 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
- 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
- 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
- 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
- //
- default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
- //
- endcase
-
- always @(posedge clk)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- //
- default: z_wren <= {9{1'b0}};
- //
- endcase
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
- 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
- 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
- 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- //
- default: z_mask <= {9{1'bX}};
- //
- endcase
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- //
- default: z_save <= {9{1'bX}};
- //
- endcase
-
-
- //
- // Intermediate Numbers
- //
- reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
- wire [ 32-1:0] reduce_z_dout[1:9];
-
- reg [31: 0] x_din_dly;
- always @(posedge clk)
- //
- x_din_dly <= x_din;
-
-
- genvar z;
- generate for (z=1; z<=9; z=z+1)
- //
- begin : gen_z_bram
- //
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_c_inst
- (
- .clk (clk),
-
- .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
- .a_wr (z_wren[z-1] & store_word_z),
- .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
- .a_out (),
-
- .b_addr (reduce_z_addr[z]),
- .b_out (reduce_z_dout[z])
- );
- //
- end
- //
- endgenerate
-
-
-
-
- wire [ 32-1:0] bram_sum0_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
- wire bram_sum0_wr_wren;
-
- wire [ 32-1:0] bram_sum1_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
- wire bram_sum1_wr_wren;
-
- wire [ 32-1:0] bram_diff_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
- wire bram_diff_wr_wren;
-
- wire [ 32-1:0] bram_sum0_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
-
- wire [ 32-1:0] bram_sum1_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
-
- wire [ 32-1:0] bram_diff_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
-
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_sum0_inst
- (
- .clk (clk),
-
- .a_addr (bram_sum0_wr_addr),
- .a_wr (bram_sum0_wr_wren),
- .a_in (bram_sum0_wr_din),
- .a_out (),
-
- .b_addr (bram_sum0_rd_addr),
- .b_out (bram_sum0_rd_dout)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_sum1_inst
- (
- .clk (clk),
-
- .a_addr (bram_sum1_wr_addr),
- .a_wr (bram_sum1_wr_wren),
- .a_in (bram_sum1_wr_din),
- .a_out (),
-
- .b_addr (bram_sum1_rd_addr),
- .b_out (bram_sum1_rd_dout)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_diff_inst
- (
- .clk (clk),
-
- .a_addr (bram_diff_wr_addr),
- .a_wr (bram_diff_wr_wren),
- .a_in (bram_diff_wr_din),
- .a_out (),
-
- .b_addr (bram_diff_rd_addr),
- .b_out (bram_diff_rd_dout)
- );
-
-
- wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
- wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
- wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
-
- reg [ 32-1:0] adder0_a_din;
- reg [ 32-1:0] adder0_b_din;
-
- reg [ 32-1:0] adder1_a_din;
- reg [ 32-1:0] adder1_b_din;
-
- reg [ 32-1:0] subtractor_a_din;
- reg [ 32-1:0] subtractor_b_din;
-
- // n_addr - only 1 output, because all modules are in sync
-
- modular_adder #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- adder_inst0
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_adder0_done),
-
- .ab_addr (adder0_ab_addr),
- .n_addr (),
- .s_addr (bram_sum0_wr_addr),
- .s_wren (bram_sum0_wr_wren),
-
- .a_din (adder0_a_din),
- .b_din (adder0_b_din),
- .n_din (n_din),
- .s_dout (bram_sum0_wr_din)
- );
-
- modular_adder #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- adder_inst1
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_adder1_done),
-
- .ab_addr (adder1_ab_addr),
- .n_addr (),
- .s_addr (bram_sum1_wr_addr),
- .s_wren (bram_sum1_wr_wren),
-
- .a_din (adder1_a_din),
- .b_din (adder1_b_din),
- .n_din (n_din),
- .s_dout (bram_sum1_wr_din)
- );
-
- modular_subtractor #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- subtractor_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_subtractor_done),
-
- .ab_addr (subtractor_ab_addr),
- .n_addr (n_addr),
- .d_addr (bram_diff_wr_addr),
- .d_wren (bram_diff_wr_wren),
-
- .a_din (subtractor_a_din),
- .b_din (subtractor_b_din),
- .n_din (n_din),
- .d_dout (bram_diff_wr_din)
- );
-
-
- //
- // Address (Operand) Selector
- //
- always @(*)
- //
- case (fsm_shreg_reduce_stage_stop)
- //
- 5'b10000: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = adder0_ab_addr;
- reduce_z_addr[3] = adder1_ab_addr;
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = subtractor_ab_addr;
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- 5'b01000: begin
- reduce_z_addr[1] = adder0_ab_addr;
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = adder1_ab_addr;
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = subtractor_ab_addr;
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder1_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00100: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = adder0_ab_addr;
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = subtractor_ab_addr;
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder1_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00010: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = subtractor_ab_addr;
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder0_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00001: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = adder0_ab_addr;
- end
- //
- default: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- endcase
-
-
- //
- // adder 0
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder0_a_din = reduce_z_dout[2];
- 5'b01000: adder0_a_din = bram_sum0_rd_dout;
- 5'b00100: adder0_a_din = bram_sum0_rd_dout;
- 5'b00010: adder0_a_din = bram_sum0_rd_dout;
- 5'b00001: adder0_a_din = bram_sum0_rd_dout;
- default: adder0_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder0_b_din = reduce_z_dout[2];
- 5'b01000: adder0_b_din = reduce_z_dout[1];
- 5'b00100: adder0_b_din = reduce_z_dout[5];
- 5'b00010: adder0_b_din = bram_sum1_rd_dout;
- 5'b00001: adder0_b_din = bram_diff_rd_dout;
- default: adder0_b_din = {32{1'bX}};
- endcase
- //
- end
-
- //
- // adder 1
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder1_a_din = reduce_z_dout[3];
- 5'b01000: adder1_a_din = bram_sum1_rd_dout;
- 5'b00100: adder1_a_din = bram_sum1_rd_dout;
- 5'b00010: adder1_a_din = {32{1'bX}};
- 5'b00001: adder1_a_din = {32{1'bX}};
- default: adder1_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder1_b_din = reduce_z_dout[3];
- 5'b01000: adder1_b_din = reduce_z_dout[4];
- 5'b00100: adder1_b_din = {32{1'b0}};
- 5'b00010: adder1_b_din = {32{1'bX}};
- 5'b00001: adder1_b_din = {32{1'bX}};
- default: adder1_b_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // subtractor
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: subtractor_a_din = {32{1'b0}};
- 5'b01000: subtractor_a_din = bram_diff_rd_dout;
- 5'b00100: subtractor_a_din = bram_diff_rd_dout;
- 5'b00010: subtractor_a_din = bram_diff_rd_dout;
- 5'b00001: subtractor_a_din = {32{1'bX}};
- default: subtractor_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: subtractor_b_din = reduce_z_dout[6];
- 5'b01000: subtractor_b_din = reduce_z_dout[7];
- 5'b00100: subtractor_b_din = reduce_z_dout[8];
- 5'b00010: subtractor_b_din = reduce_z_dout[9];
- 5'b00001: subtractor_b_din = {32{1'bX}};
- default: subtractor_b_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // Address Mapping
- //
- assign p_addr = bram_sum0_wr_addr;
- assign p_wren = bram_sum0_wr_wren & store_p;
- assign p_dout = bram_sum0_wr_din;
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// modular_reductor_256.v
+// -----------------------------------------------------------------------------
+// Modular reductor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_reductor_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ x_addr, n_addr, p_addr, p_wren,
+ x_din, n_din, p_dout
+ );
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] x_din; // X
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = X mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH:0] index_x;
+
+
+ /* map registers to output ports */
+ assign x_addr = index_x;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
+
+ wire [5-1:0] fsm_shreg_reduce_stage_start;
+ wire [5-1:0] fsm_shreg_reduce_stage_stop;
+
+ genvar s;
+ generate for (s=0; s<5; s=s+1)
+ begin : gen_fsm_shreg_reduce_stages
+ assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+ assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+ end
+ endgenerate
+
+ wire inc_index_x = |fsm_shreg_inc_index_x;
+ wire store_word_z = |fsm_shreg_store_word_z;
+ wire reduce_start = |fsm_shreg_reduce_stage_start;
+ wire reduce_stop = |fsm_shreg_reduce_stage_stop;
+ wire store_p = fsm_shreg_reduce_stage_stop[0];
+
+
+ wire reduce_adder0_done;
+ wire reduce_adder1_done;
+ wire reduce_subtractor_done;
+
+ wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ //
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done_all)
+ //
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy)
+ //
+ index_x <= WORD_INDEX_LAST;
+ //
+ else if (inc_index_x)
+ //
+ index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+ //
+ // Look-up Table
+ //
+
+ //
+ // Take a look at the corresponding C model for more information
+ // on how exactly the math behind reduction works. The first step
+ // is to assemble nine 256-bit values ("z-words") from 32-bit parts
+ // of the full 512-bit product ("c-word"). The problem with z5 is
+ // that it contains c13 two times. This implementation scans from
+ // c15 to c0 and writes current part of c-word into corresponding
+ // parts of z-words. Since those 32-bit parts are stored in block
+ // memories, one source word can only be written to one location in
+ // every z-word at a time. The trick is to delay c13 and then write
+ // the delayed value at the corresponding location in z5 instead of
+ // the next c12. "z_save" flag is used to indicate that the current
+ // word should be delayed and written once again during the next cycle.
+ //
+
+ reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
+ reg [9 -1:0] z_wren; //
+ reg [9 -1:0] z_mask; // mask input to store zero word
+ reg [9 -1:0] z_save; // save previous word once again
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // s9 s8 s7 s6 s5 s4 s3 s2 s1
+ // || || || || || || || || ||
+ 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
+ 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
+ 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
+ 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
+ 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
+ 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
+ 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
+ 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
+ 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
+ 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
+ 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
+ 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
+ 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
+ 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
+ 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
+ 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
+ //
+ default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ //
+ default: z_wren <= {9{1'b0}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
+ 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ //
+ default: z_mask <= {9{1'bX}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ //
+ default: z_save <= {9{1'bX}};
+ //
+ endcase
+
+
+ //
+ // Intermediate Numbers
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
+ wire [ 32-1:0] reduce_z_dout[1:9];
+
+ reg [31: 0] x_din_dly;
+ always @(posedge clk)
+ //
+ x_din_dly <= x_din;
+
+
+ genvar z;
+ generate for (z=1; z<=9; z=z+1)
+ //
+ begin : gen_z_bram
+ //
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
+ .a_wr (z_wren[z-1] & store_word_z),
+ .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
+ .a_out (),
+
+ .b_addr (reduce_z_addr[z]),
+ .b_out (reduce_z_dout[z])
+ );
+ //
+ end
+ //
+ endgenerate
+
+
+
+
+ wire [ 32-1:0] bram_sum0_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
+ wire bram_sum0_wr_wren;
+
+ wire [ 32-1:0] bram_sum1_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
+ wire bram_sum1_wr_wren;
+
+ wire [ 32-1:0] bram_diff_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
+ wire bram_diff_wr_wren;
+
+ wire [ 32-1:0] bram_sum0_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
+
+ wire [ 32-1:0] bram_sum1_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
+
+ wire [ 32-1:0] bram_diff_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
+
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_sum0_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_sum0_wr_addr),
+ .a_wr (bram_sum0_wr_wren),
+ .a_in (bram_sum0_wr_din),
+ .a_out (),
+
+ .b_addr (bram_sum0_rd_addr),
+ .b_out (bram_sum0_rd_dout)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_sum1_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_sum1_wr_addr),
+ .a_wr (bram_sum1_wr_wren),
+ .a_in (bram_sum1_wr_din),
+ .a_out (),
+
+ .b_addr (bram_sum1_rd_addr),
+ .b_out (bram_sum1_rd_dout)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_diff_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_diff_wr_addr),
+ .a_wr (bram_diff_wr_wren),
+ .a_in (bram_diff_wr_din),
+ .a_out (),
+
+ .b_addr (bram_diff_rd_addr),
+ .b_out (bram_diff_rd_dout)
+ );
+
+
+ wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
+
+ reg [ 32-1:0] adder0_a_din;
+ reg [ 32-1:0] adder0_b_din;
+
+ reg [ 32-1:0] adder1_a_din;
+ reg [ 32-1:0] adder1_b_din;
+
+ reg [ 32-1:0] subtractor_a_din;
+ reg [ 32-1:0] subtractor_b_din;
+
+ // n_addr - only 1 output, because all modules are in sync
+
+ modular_adder #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+ )
+ adder_inst0
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder0_done),
+
+ .ab_addr (adder0_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum0_wr_addr),
+ .s_wren (bram_sum0_wr_wren),
+
+ .a_din (adder0_a_din),
+ .b_din (adder0_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum0_wr_din)
+ );
+
+ modular_adder #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+ )
+ adder_inst1
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder1_done),
+
+ .ab_addr (adder1_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum1_wr_addr),
+ .s_wren (bram_sum1_wr_wren),
+
+ .a_din (adder1_a_din),
+ .b_din (adder1_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum1_wr_din)
+ );
+
+ modular_subtractor #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+ )
+ subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_subtractor_done),
+
+ .ab_addr (subtractor_ab_addr),
+ .n_addr (n_addr),
+ .d_addr (bram_diff_wr_addr),
+ .d_wren (bram_diff_wr_wren),
+
+ .a_din (subtractor_a_din),
+ .b_din (subtractor_b_din),
+ .n_din (n_din),
+ .d_dout (bram_diff_wr_din)
+ );
+
+
+ //
+ // Address (Operand) Selector
+ //
+ always @(*)
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ //
+ 5'b10000: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = adder0_ab_addr;
+ reduce_z_addr[3] = adder1_ab_addr;
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = subtractor_ab_addr;
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ 5'b01000: begin
+ reduce_z_addr[1] = adder0_ab_addr;
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = adder1_ab_addr;
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = subtractor_ab_addr;
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00100: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = adder0_ab_addr;
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = subtractor_ab_addr;
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00010: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = subtractor_ab_addr;
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder0_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00001: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = adder0_ab_addr;
+ end
+ //
+ default: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+ //
+ // adder 0
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_a_din = reduce_z_dout[2];
+ 5'b01000: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00100: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00010: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00001: adder0_a_din = bram_sum0_rd_dout;
+ default: adder0_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_b_din = reduce_z_dout[2];
+ 5'b01000: adder0_b_din = reduce_z_dout[1];
+ 5'b00100: adder0_b_din = reduce_z_dout[5];
+ 5'b00010: adder0_b_din = bram_sum1_rd_dout;
+ 5'b00001: adder0_b_din = bram_diff_rd_dout;
+ default: adder0_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+ //
+ // adder 1
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_a_din = reduce_z_dout[3];
+ 5'b01000: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00100: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00010: adder1_a_din = {32{1'bX}};
+ 5'b00001: adder1_a_din = {32{1'bX}};
+ default: adder1_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_b_din = reduce_z_dout[3];
+ 5'b01000: adder1_b_din = reduce_z_dout[4];
+ 5'b00100: adder1_b_din = {32{1'b0}};
+ 5'b00010: adder1_b_din = {32{1'bX}};
+ 5'b00001: adder1_b_din = {32{1'bX}};
+ default: adder1_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // subtractor
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_a_din = {32{1'b0}};
+ 5'b01000: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00100: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00010: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00001: subtractor_a_din = {32{1'bX}};
+ default: subtractor_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_b_din = reduce_z_dout[6];
+ 5'b01000: subtractor_b_din = reduce_z_dout[7];
+ 5'b00100: subtractor_b_din = reduce_z_dout[8];
+ 5'b00010: subtractor_b_din = reduce_z_dout[9];
+ 5'b00001: subtractor_b_din = {32{1'bX}};
+ default: subtractor_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Address Mapping
+ //
+ assign p_addr = bram_sum0_wr_addr;
+ assign p_wren = bram_sum0_wr_wren & store_p;
+ assign p_dout = bram_sum0_wr_din;
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v
deleted file mode 100644
index 322aec4..0000000
--- a/rtl/modular/modular_subtractor.v
+++ /dev/null
@@ -1,292 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_subtractor.v
-// -----------------------------------------------------------------------------
-// Modular subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_subtractor
- (
- clk, rst_n,
- ena, rdy,
- ab_addr, n_addr, d_addr, d_wren,
- a_din, b_din, n_din, d_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word
- output wire d_wren; // store current D word now
-
- input wire [ 31:0] a_din; // A
- input wire [ 31:0] b_din; // B
- input wire [ 31:0] n_din; // N
- output wire [ 31:0] d_dout; // D = (A - B) mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_ab;
- reg [WORD_COUNTER_WIDTH-1:0] index_n;
- reg [WORD_COUNTER_WIDTH-1:0] index_d;
-
- /* map registers to output ports */
- assign ab_addr = index_ab;
- assign n_addr = index_n;
- assign d_addr = index_d;
-
-
- //
- // Subtractor
- //
- wire [31: 0] sub32_d;
- wire sub32_b_in;
- wire sub32_b_out;
-
- subtractor32_wrapper subtractor32
- (
- .clk (clk),
- .a (a_din),
- .b (b_din),
- .d (sub32_d),
- .b_in (sub32_b_in),
- .b_out (sub32_b_out)
- );
-
-
- //
- // Adder
- //
- wire [31: 0] add32_s;
- wire add32_c_in;
- wire add32_c_out;
-
- adder32_wrapper adder32
- (
- .clk (clk),
- .a (sub32_d),
- .b (n_din),
- .s (add32_s),
- .c_in (add32_c_in),
- .c_out (add32_c_out)
- );
-
-
- //
- // FSM
- //
-
- localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
-
- wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-
- wire inc_index_ab = |fsm_shreg_inc_index_ab;
- wire inc_index_n = |fsm_shreg_inc_index_n;
- wire store_dif_ab = |fsm_shreg_store_dif_ab;
- wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n;
- wire store_data_d = |fsm_shreg_store_data_d;
- wire inc_index_d = |fsm_shreg_inc_index_d;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Borrow & Carry Masking Logic
- //
- reg sub32_b_mask;
- reg add32_c_mask;
-
-
- always @(posedge clk) begin
- //
- sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- //
- end
-
- assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
- assign add32_c_in = add32_c_out & ~add32_c_mask;
-
-
-
- //
- // Borrow & Carry Latch Logic
- //
- reg sub32_borrow_latch;
-
- always @(posedge clk) begin
- //
- if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
- //
- end
-
-
- //
- // Intermediate Results
- //
- reg [32*OPERAND_NUM_WORDS-1:0] d_ab;
- reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n;
-
- always @(posedge clk)
- //
- if (store_data_d) begin
- //
- d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]};
- d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end else begin
- //
- if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]};
- if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) begin
- //
- index_ab <= WORD_INDEX_ZERO;
- index_n <= WORD_INDEX_ZERO;
- index_d <= WORD_INDEX_ZERO;
- //
- end else begin
- //
- if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
- if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
- if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d);
- //
- end
-
-
- //
- // Output Sum Selector
- //
- wire mux_select_ab_n = sub32_borrow_latch;
-
-
- //
- // Output Data and Write Enable Logic
- //
- reg d_wren_reg;
- reg [31: 0] d_dout_reg;
- wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0];
-
- assign d_wren = d_wren_reg;
- assign d_dout = d_dout_reg;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- d_wren_reg <= 1'b0;
- d_dout_reg <= {32{1'bX}};
- //
- end else begin
- //
- d_wren_reg <= store_data_d;
- d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}};
- //
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v
deleted file mode 100644
index b97a6cf..0000000
--- a/rtl/multiword/mw_comparator.v
+++ /dev/null
@@ -1,220 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mw_comparator.v
-// -----------------------------------------------------------------------------
-// Multi-word comparator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mw_comparator
- (
- clk, rst_n,
- ena, rdy,
- xy_addr, x_din, y_din,
- cmp_l, cmp_e, cmp_g
- );
-
-
- //
- // Parameters
- //
- parameter WORD_COUNTER_WIDTH = 3;
- parameter OPERAND_NUM_WORDS = 8;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREV_OR_LAST;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] xy_addr; // address of current X and Y words
- input wire [ 32-1:0] x_din; // current X word
- input wire [ 32-1:0] y_din; // current Y word
-
- output wire cmp_l; // X < Y ?
- output wire cmp_e; // X = Y ?
- output wire cmp_g; // X > Y ?
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_xy;
-
- reg reg_cmp_l;
- reg reg_cmp_e;
- reg reg_cmp_g;
-
-
- //
- // Output Mapping
- //
- assign xy_addr = index_xy;
-
- assign cmp_l = reg_cmp_l;
- assign cmp_e = reg_cmp_e;
- assign cmp_g = reg_cmp_g;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_xy = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_calc_leg = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire fsm_shreg_calc_leg_last = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-
- wire dec_index_xy = |fsm_shreg_dec_index_xy;
- wire calc_leg = |fsm_shreg_calc_leg;
- wire calc_leg_last = fsm_shreg_calc_leg_last;
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) index_xy <= WORD_INDEX_LAST;
- else if (dec_index_xy) index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy);
-
-
- //
- // 32-bit Subtractor
- //
- wire [31: 0] sub32_d_out;
- wire sub32_b_in;
- wire sub32_b_out;
-
- subtractor32_wrapper subtractor32_inst
- (
- .clk (clk),
-
- .a (x_din),
- .b (y_din),
-
- .d (sub32_d_out),
-
- .b_in (sub32_b_in),
- .b_out (sub32_b_out)
- );
-
-
- //
- // Borrow Masking Logic
- //
- reg sub32_b_mask;
-
- always @(posedge clk)
- //
- sub32_b_mask <= (index_xy == WORD_INDEX_LAST) ? 1'b1 : 1'b0;
-
- assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-
- //
- // Output Logic
- //
- wire cmp_unresolved = !(cmp_l || cmp_g);
-
- wire cmp_borrow_is_set = (sub32_b_out == 1'b1) ? 1'b1 : 1'b0;
- wire cmp_difference_is_nonzero = (sub32_d_out != 32'd0) ? 1'b1 : 1'b0;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- if (ena) begin
- //
- reg_cmp_l <= 1'b0;
- reg_cmp_e <= 1'b0;
- reg_cmp_g <= 1'b0;
- //
- end
- //
- end else if (cmp_unresolved && calc_leg) begin
- //
- if ( cmp_borrow_is_set) reg_cmp_l <= 1'b1;
- if (!cmp_borrow_is_set && cmp_difference_is_nonzero) reg_cmp_g <= 1'b1;
- if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last) reg_cmp_e <= 1'b1;
- //
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v
deleted file mode 100644
index 5db95a7..0000000
--- a/rtl/multiword/mw_mover.v
+++ /dev/null
@@ -1,175 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mw_mover.v
-// -----------------------------------------------------------------------------
-// Multi-word data mover.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mw_mover
- (
- clk, rst_n,
- ena, rdy,
- x_addr, y_addr, y_wren,
- x_din, y_dout
- );
-
-
- //
- // Parameters
- //
- parameter WORD_COUNTER_WIDTH = 3;
- parameter OPERAND_NUM_WORDS = 8;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] x_addr; // address of current X word
- output wire [WORD_COUNTER_WIDTH-1:0] y_addr; // address of current Y word
- output wire y_wren; // store current Y word
-
- input wire [ 32-1:0] x_din; // current X word
- output wire [ 32-1:0] y_dout; // current Y word
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_x;
- reg [WORD_COUNTER_WIDTH-1:0] index_y;
-
-
- //
- // Output Mapping
- //
- assign x_addr = index_x;
- assign y_addr = index_y;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_y = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
-
- wire inc_index_x = |fsm_shreg_inc_index_x;
- wire inc_index_y = |fsm_shreg_inc_index_y;
- wire store_word_y = |fsm_shreg_inc_index_x;
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) begin
- index_x <= WORD_INDEX_ZERO;
- index_y <= WORD_INDEX_ZERO;
- end else begin
- if (inc_index_x) index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x);
- if (inc_index_y) index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y);
- end
-
-
- //
- // Write Enable Logic
- //
- reg y_wren_reg;
-
- assign y_wren = y_wren_reg;
-
- always @(posedge clk)
- //
- if (rdy) y_wren_reg <= 1'b0;
- else y_wren_reg <= store_word_y;
-
-
- //
- // Output Logic
- //
- assign y_dout = x_din;
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/util/bram_1rw_1ro_readfirst.v
deleted file mode 100644
index 28782c2..0000000
--- a/rtl/util/bram_1rw_1ro_readfirst.v
+++ /dev/null
@@ -1,101 +0,0 @@
-//======================================================================
-//
-// Copyright (c) 2015, NORDUnet A/S All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may
-// be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-//======================================================================
-
-`timescale 1ns / 1ps
-
-module bram_1rw_1ro_readfirst
- #(parameter MEM_WIDTH = 32,
- parameter MEM_ADDR_BITS = 8)
- (
- input wire clk,
-
- input wire [MEM_ADDR_BITS-1:0] a_addr,
- input wire a_wr,
- input wire [MEM_WIDTH-1:0] a_in,
- output wire [MEM_WIDTH-1:0] a_out,
-
- input wire [MEM_ADDR_BITS-1:0] b_addr,
- output wire [MEM_WIDTH-1:0] b_out
- );
-
-
- //
- // BRAM
- //
- (* RAM_STYLE="BLOCK" *)
- reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1];
-
-
- //
- // Initialization
- //
- /**
- integer c;
- initial begin
- for (c=0; c<(2**MEM_ADDR_BITS); c=c+1)
- bram[c] = {MEM_WIDTH{1'b0}};
- end
- **/
-
-
-
- //
- // Output Registers
- //
- reg [MEM_WIDTH-1:0] bram_reg_a;
- reg [MEM_WIDTH-1:0] bram_reg_b;
-
- assign a_out = bram_reg_a;
- assign b_out = bram_reg_b;
-
-
- //
- // Read-Write Port A
- //
- always @(posedge clk) begin
- //
- bram_reg_a <= bram[a_addr];
- //
- if (a_wr) bram[a_addr] <= a_in;
- //
- end
-
-
- //
- // Read-Only Port B
- //
- always @(posedge clk)
- //
- bram_reg_b <= bram[b_addr];
-
-
-endmodule
diff --git a/stm32_driver/ecdsa256_driver_sample.c b/stm32_driver/ecdsa256_driver_sample.c
index cef4af0..8047e98 100644
--- a/stm32_driver/ecdsa256_driver_sample.c
+++ b/stm32_driver/ecdsa256_driver_sample.c
@@ -1,173 +1,171 @@
- //
- // simple driver to test "ecdsa384" core in hardware
- //
-
- //
- // note, that the test program needs a custom bitstream where
- // the core is located at offset 0 (without the core selector)
- //
-
- // stm32 headers
-#include "stm-init.h"
-#include "stm-led.h"
-#include "stm-fmc.h"
-
- // locations of core registers
-#define CORE_ADDR_NAME0 (0x00 << 2)
-#define CORE_ADDR_NAME1 (0x01 << 2)
-#define CORE_ADDR_VERSION (0x02 << 2)
-#define CORE_ADDR_CONTROL (0x08 << 2)
-#define CORE_ADDR_STATUS (0x09 << 2)
-
- // locations of data buffers
-#define CORE_ADDR_BUF_K (0x20 << 2)
-#define CORE_ADDR_BUF_X (0x28 << 2)
-#define CORE_ADDR_BUF_Y (0x30 << 2)
-
- // bit maps
-#define CORE_CONTROL_BIT_NEXT 0x00000002
-#define CORE_STATUS_BIT_READY 0x00000002
-
- // curve selection
-#define USE_CURVE 1
-
-#include "ecdsa_model.h"
-
-#define BUF_NUM_WORDS (OPERAND_WIDTH / (sizeof(uint32_t) << 3)) // 8
-
- //
- // test vectors
- //
-static const uint32_t p256_d[BUF_NUM_WORDS] = ECDSA_D;
-static const uint32_t p256_qx[BUF_NUM_WORDS] = ECDSA_Q_X;
-static const uint32_t p256_qy[BUF_NUM_WORDS] = ECDSA_Q_Y;
-
-static const uint32_t p256_k[BUF_NUM_WORDS] = ECDSA_K;
-static const uint32_t p256_rx[BUF_NUM_WORDS] = ECDSA_R_X;
-static const uint32_t p256_ry[BUF_NUM_WORDS] = ECDSA_R_Y;
-
-static const uint32_t p256_i[BUF_NUM_WORDS] = ECDSA_ONE;
-static const uint32_t p256_gx[BUF_NUM_WORDS] = ECDSA_G_X;
-static const uint32_t p256_gy[BUF_NUM_WORDS] = ECDSA_G_Y;
-
-static const uint32_t p256_z[BUF_NUM_WORDS] = ECDSA_ZERO;
-static const uint32_t p256_n[BUF_NUM_WORDS] = ECDSA_N;
-
- //
- // prototypes
- //
-void toggle_yellow_led(void);
-int test_p256_multiplier(const uint32_t *k, const uint32_t *px, const uint32_t *py);
-
- //
- // test routine
- //
-int main()
-{
- int ok;
-
- stm_init();
- fmc_init();
-
- led_on(LED_GREEN);
- led_off(LED_RED);
-
- led_off(LED_YELLOW);
- led_off(LED_BLUE);
-
- uint32_t core_name0;
- uint32_t core_name1;
-
- fmc_read_32(CORE_ADDR_NAME0, &core_name0);
- fmc_read_32(CORE_ADDR_NAME1, &core_name1);
-
- // "ecds", "a256"
- if ((core_name0 != 0x65636473) || (core_name1 != 0x61323536))
- {
- led_off(LED_GREEN);
- led_on(LED_RED);
- while (1);
- }
-
- // repeat forever
- while (1)
- {
- ok = 1;
- ok = ok && test_p256_multiplier(p256_d, p256_qx, p256_qy);
- ok = ok && test_p256_multiplier(p256_k, p256_rx, p256_ry);
- ok = ok && test_p256_multiplier(p256_z, p256_z, p256_z);
- ok = ok && test_p256_multiplier(p256_i, p256_gx, p256_gy);
- ok = ok && test_p256_multiplier(p256_n, p256_z, p256_z);
-
- if (!ok)
- { led_off(LED_GREEN);
- led_on(LED_RED);
- }
-
- toggle_yellow_led();
- }
-}
-
-
- //
- // this routine uses the hardware multiplier to obtain Q(qx,qy), which is the
- // scalar multiple of the base point, qx and qy are then compared to the values
- // px and py (correct result known in advance)
- //
-int test_p256_multiplier(const uint32_t *k, const uint32_t *px, const uint32_t *py)
-{
- int i, num_cyc;
- uint32_t reg_control, reg_status;
- uint32_t k_word, qx_word, qy_word;
-
- // fill k
- for (i=0; i<BUF_NUM_WORDS; i++)
- { k_word = k[i];
- fmc_write_32(CORE_ADDR_BUF_K + ((BUF_NUM_WORDS - (i + 1)) * sizeof(uint32_t)), &k_word);
- }
-
- // clear 'next' control bit, then set 'next' control bit again to trigger new operation
- reg_control = 0;
- fmc_write_32(CORE_ADDR_CONTROL, &reg_control);
- reg_control = CORE_CONTROL_BIT_NEXT;
- fmc_write_32(CORE_ADDR_CONTROL, &reg_control);
-
- // wait for 'ready' status bit to be set
- num_cyc = 0;
- do
- { num_cyc++;
- fmc_read_32(CORE_ADDR_STATUS, &reg_status);
- }
- while (!(reg_status & CORE_STATUS_BIT_READY));
-
- // read back x and y word-by-word, then compare to the reference values
- for (i=0; i<BUF_NUM_WORDS; i++)
- {
- fmc_read_32(CORE_ADDR_BUF_X + (i * sizeof(uint32_t)), &qx_word);
- fmc_read_32(CORE_ADDR_BUF_Y + (i * sizeof(uint32_t)), &qy_word);
-
- if ((qx_word != px[BUF_NUM_WORDS - (i + 1)])) return 0;
- if ((qy_word != py[BUF_NUM_WORDS - (i + 1)])) return 0;
- }
-
- // everything went just fine
- return 1;
-}
-
- //
- // toggle the yellow led to indicate that we're not stuck somewhere
- //
-void toggle_yellow_led(void)
-{
- static int led_state = 0;
-
- led_state = !led_state;
-
- if (led_state) led_on(LED_YELLOW);
- else led_off(LED_YELLOW);
-}
-
-
- //
- // end of file
- //
+//
+// simple driver to test "ecdsa384" core in hardware
+//
+
+//
+// note, that the test program needs a custom bitstream where
+// the core is located at offset 0 (without the core selector)
+//
+
+// stm32 headers
+#include "stm-init.h"
+#include "stm-led.h"
+#include "stm-fmc.h"
+
+// locations of core registers
+#define CORE_ADDR_NAME0 (0x00 << 2)
+#define CORE_ADDR_NAME1 (0x01 << 2)
+#define CORE_ADDR_VERSION (0x02 << 2)
+#define CORE_ADDR_CONTROL (0x08 << 2)
+#define CORE_ADDR_STATUS (0x09 << 2)
+
+// locations of data buffers
+#define CORE_ADDR_BUF_K (0x20 << 2)
+#define CORE_ADDR_BUF_X (0x28 << 2)
+#define CORE_ADDR_BUF_Y (0x30 << 2)
+
+// bit maps
+#define CORE_CONTROL_BIT_NEXT 0x00000002
+#define CORE_STATUS_BIT_READY 0x00000002
+
+// curve selection
+#define USE_CURVE 1
+
+#include "ecdsa_model.h"
+
+#define BUF_NUM_WORDS (OPERAND_WIDTH / (sizeof(uint32_t) << 3)) // 8
+
+//
+// test vectors
+//
+static const uint32_t p256_d[BUF_NUM_WORDS] = ECDSA_D;
+static const uint32_t p256_qx[BUF_NUM_WORDS] = ECDSA_Q_X;
+static const uint32_t p256_qy[BUF_NUM_WORDS] = ECDSA_Q_Y;
+
+static const uint32_t p256_k[BUF_NUM_WORDS] = ECDSA_K;
+static const uint32_t p256_rx[BUF_NUM_WORDS] = ECDSA_R_X;
+static const uint32_t p256_ry[BUF_NUM_WORDS] = ECDSA_R_Y;
+
+static const uint32_t p256_i[BUF_NUM_WORDS] = ECDSA_ONE;
+static const uint32_t p256_gx[BUF_NUM_WORDS] = ECDSA_G_X;
+static const uint32_t p256_gy[BUF_NUM_WORDS] = ECDSA_G_Y;
+
+static const uint32_t p256_z[BUF_NUM_WORDS] = ECDSA_ZERO;
+static const uint32_t p256_n[BUF_NUM_WORDS] = ECDSA_N;
+
+//
+// prototypes
+//
+void toggle_yellow_led(void);
+int test_p256_multiplier(const uint32_t *k, const uint32_t *px, const uint32_t *py);
+
+//
+// test routine
+//
+int main()
+{
+ int ok;
+
+ stm_init();
+ fmc_init();
+
+ led_on(LED_GREEN);
+ led_off(LED_RED);
+
+ led_off(LED_YELLOW);
+ led_off(LED_BLUE);
+
+ uint32_t core_name0;
+ uint32_t core_name1;
+
+ fmc_read_32(CORE_ADDR_NAME0, &core_name0);
+ fmc_read_32(CORE_ADDR_NAME1, &core_name1);
+
+ // "ecds", "a256"
+ if ((core_name0 != 0x65636473) || (core_name1 != 0x61323536)) {
+ led_off(LED_GREEN);
+ led_on(LED_RED);
+ while (1);
+ }
+
+ // repeat forever
+ while (1)
+ {
+ ok = 1;
+ ok = ok && test_p256_multiplier(p256_d, p256_qx, p256_qy);
+ ok = ok && test_p256_multiplier(p256_k, p256_rx, p256_ry);
+ ok = ok && test_p256_multiplier(p256_z, p256_z, p256_z);
+ ok = ok && test_p256_multiplier(p256_i, p256_gx, p256_gy);
+ ok = ok && test_p256_multiplier(p256_n, p256_z, p256_z);
+
+ if (!ok) {
+ led_off(LED_GREEN);
+ led_on(LED_RED);
+ }
+
+ toggle_yellow_led();
+ }
+}
+
+
+//
+// this routine uses the hardware multiplier to obtain Q(qx,qy), which is the
+// scalar multiple of the base point, qx and qy are then compared to the values
+// px and py (correct result known in advance)
+//
+int test_p256_multiplier(const uint32_t *k, const uint32_t *px, const uint32_t *py)
+{
+ int i, num_cyc;
+ uint32_t reg_control, reg_status;
+ uint32_t k_word, qx_word, qy_word;
+
+ // fill k
+ for (i=0; i<BUF_NUM_WORDS; i++) {
+ k_word = k[i];
+ fmc_write_32(CORE_ADDR_BUF_K + ((BUF_NUM_WORDS - (i + 1)) * sizeof(uint32_t)), &k_word);
+ }
+
+ // clear 'next' control bit, then set 'next' control bit again to trigger new operation
+ reg_control = 0;
+ fmc_write_32(CORE_ADDR_CONTROL, &reg_control);
+ reg_control = CORE_CONTROL_BIT_NEXT;
+ fmc_write_32(CORE_ADDR_CONTROL, &reg_control);
+
+ // wait for 'ready' status bit to be set
+ num_cyc = 0;
+ do {
+ num_cyc++;
+ fmc_read_32(CORE_ADDR_STATUS, &reg_status);
+ }
+ while (!(reg_status & CORE_STATUS_BIT_READY));
+
+ // read back x and y word-by-word, then compare to the reference values
+ for (i=0; i<BUF_NUM_WORDS; i++) {
+ fmc_read_32(CORE_ADDR_BUF_X + (i * sizeof(uint32_t)), &qx_word);
+ fmc_read_32(CORE_ADDR_BUF_Y + (i * sizeof(uint32_t)), &qy_word);
+
+ if ((qx_word != px[BUF_NUM_WORDS - (i + 1)])) return 0;
+ if ((qy_word != py[BUF_NUM_WORDS - (i + 1)])) return 0;
+ }
+
+ // everything went just fine
+ return 1;
+}
+
+//
+// toggle the yellow led to indicate that we're not stuck somewhere
+//
+void toggle_yellow_led(void)
+{
+ static int led_state = 0;
+
+ led_state = !led_state;
+
+ if (led_state) led_on(LED_YELLOW);
+ else led_off(LED_YELLOW);
+}
+
+
+//
+// end of file
+//
diff --git a/stm32_driver/ecdsa_model.h b/stm32_driver/ecdsa_model.h
index 44ab59f..620c56e 100644
--- a/stm32_driver/ecdsa_model.h
+++ b/stm32_driver/ecdsa_model.h
@@ -1,204 +1,204 @@
-//------------------------------------------------------------------------------
-//
-// ecdsa_model.h
-// --------------------------------------------
-// Base point scalar multiplier model for ECDSA
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-//
-// Curve Selection
-//
-// USE_CURVE == 1 -> P-256
-// USE_CURVE == 2 -> P-384
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Model Parameters
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-#define OPERAND_WIDTH (256) // largest supported operand width in bits
-#elif USE_CURVE == 2
-#define OPERAND_WIDTH (384) // largest supported operand width in bits
-#else
-#error USE_CURVE must be either 1 or 2!
-#endif
-
-
-//------------------------------------------------------------------------------
-// P-256 Parameters and Test Vectors
-//------------------------------------------------------------------------------
-
-/* Field Size */
-#define P_256_Q {0xffffffff, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
-
-/* Generic Numbers */
-#define P_256_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
-#define P_256_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
-
-/* Division Factor */
-#define P_256_DELTA {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000}
-
-/* Base Point */
-#define P_256_G_X {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296}
-#define P_256_G_Y {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5}
-
-/* Doubled Base Point */
-#define P_256_H_X {0x29d05c19, 0x3da77b71, 0x0e863235, 0x38b77e1b, 0x11f904fe, 0xa42998be, 0x16bd8d74, 0x4ece7ad0}
-#define P_256_H_Y {0xb01cbd1c, 0x01e58065, 0x711814b5, 0x83f061e9, 0xd431cca9, 0x94cea131, 0x3449bf97, 0xc840ae07}
-
-/* Base Point Order */
-#define P_256_N {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551}
-
-/* Private Key */
-#define P_256_D {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a}
-
-/* Per-message Random Number */
-#define P_256_K {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0}
-
-/* Public Key */
-#define P_256_Q_X {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8}
-#define P_256_Q_Y {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9}
-
-/* Part of Signature */
-#define P_256_R_X {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c}
-#define P_256_R_Y {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541}
-
-
-//------------------------------------------------------------------------------
-// P-384 Parameters and Test Vectors
-//------------------------------------------------------------------------------
-
-/* Field Size */
-#define P_384_Q {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff}
-
-/* Generic Numbers */
-#define P_384_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
-#define P_384_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
-
-/* Division Factor */
-#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x0000000, 0x080000000}
-
-/* Base Point */
-#define P_384_G_X {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7}
-#define P_384_G_Y {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f}
-
-/* Doubled Base Point */
-#define P_384_H_X {0xaaf06bba, 0x82e9f590, 0xe29c71c2, 0x19bea517, 0x23c5893a, 0xe8b0c8cf, 0x4c117c3e, 0xfb57ab8d, 0x55fa1b42, 0x8155ad27, 0x8b574391, 0x1b13ea8a}
-#define P_384_H_Y {0xc9e821b5, 0x69d9d390, 0xa2616740, 0x6d6d23d6, 0x070be242, 0xd765eb83, 0x1625ceec, 0x4a0f473e, 0xf59f4e30, 0xe2817e62, 0x85bce284, 0x6f15f19d}
-
-/* Base Point Order */
-#define P_384_N {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973}
-
-/* Private Key */
-#define P_384_D {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24}
-
-/* Per-message Random Number */
-#define P_384_K {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359}
-
-/* Public Key */
-#define P_384_Q_X {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0}
-#define P_384_Q_Y {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9}
-
-/* Part of Signature */
-#define P_384_R_X {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856}
-#define P_384_R_Y {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b}
-
-
-//------------------------------------------------------------------------------
-// Parameter and Test Vector Selection
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-
-#define ECDSA_Q P_256_Q
-
-#define ECDSA_ZERO P_256_ZERO
-#define ECDSA_ONE P_256_ONE
-
-#define ECDSA_DELTA P_256_DELTA
-
-#define ECDSA_G_X P_256_G_X
-#define ECDSA_G_Y P_256_G_Y
-
-#define ECDSA_H_X P_256_H_X
-#define ECDSA_H_Y P_256_H_Y
-
-#define ECDSA_N P_256_N
-#define ECDSA_D P_256_D
-#define ECDSA_K P_256_K
-
-#define ECDSA_Q_X P_256_Q_X
-#define ECDSA_Q_Y P_256_Q_Y
-
-#define ECDSA_R_X P_256_R_X
-#define ECDSA_R_Y P_256_R_Y
-
-#elif USE_CURVE == 2
-
-#define ECDSA_Q P_384_Q
-
-#define ECDSA_ZERO P_384_ZERO
-#define ECDSA_ONE P_384_ONE
-
-#define ECDSA_DELTA P_384_DELTA
-
-#define ECDSA_G_X P_384_G_X
-#define ECDSA_G_Y P_384_G_Y
-
-#define ECDSA_H_X P_384_H_X
-#define ECDSA_H_Y P_384_H_Y
-
-#define ECDSA_N P_384_N
-#define ECDSA_D P_384_D
-#define ECDSA_K P_384_K
-
-#define ECDSA_Q_X P_384_Q_X
-#define ECDSA_Q_Y P_384_Q_Y
-
-#define ECDSA_R_X P_384_R_X
-#define ECDSA_R_Y P_384_R_Y
-
-#else
-
-#error USE_CURVE must be either 1 or 2!
-
-#endif
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// ecdsa_model.h
+// --------------------------------------------
+// Base point scalar multiplier model for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+//
+// Curve Selection
+//
+// USE_CURVE == 1 -> P-256
+// USE_CURVE == 2 -> P-384
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Model Parameters
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+#define OPERAND_WIDTH (256) // largest supported operand width in bits
+#elif USE_CURVE == 2
+#define OPERAND_WIDTH (384) // largest supported operand width in bits
+#else
+#error USE_CURVE must be either 1 or 2!
+#endif
+
+
+//------------------------------------------------------------------------------
+// P-256 Parameters and Test Vectors
+//------------------------------------------------------------------------------
+
+/* Field Size */
+#define P_256_Q {0xffffffff, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
+
+/* Generic Numbers */
+#define P_256_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+#define P_256_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
+
+/* Division Factor */
+#define P_256_DELTA {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000}
+
+/* Base Point */
+#define P_256_G_X {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296}
+#define P_256_G_Y {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5}
+
+/* Doubled Base Point */
+#define P_256_H_X {0x29d05c19, 0x3da77b71, 0x0e863235, 0x38b77e1b, 0x11f904fe, 0xa42998be, 0x16bd8d74, 0x4ece7ad0}
+#define P_256_H_Y {0xb01cbd1c, 0x01e58065, 0x711814b5, 0x83f061e9, 0xd431cca9, 0x94cea131, 0x3449bf97, 0xc840ae07}
+
+/* Base Point Order */
+#define P_256_N {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551}
+
+/* Private Key */
+#define P_256_D {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a}
+
+/* Per-message Random Number */
+#define P_256_K {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0}
+
+/* Public Key */
+#define P_256_Q_X {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8}
+#define P_256_Q_Y {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9}
+
+/* Part of Signature */
+#define P_256_R_X {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c}
+#define P_256_R_Y {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541}
+
+
+//------------------------------------------------------------------------------
+// P-384 Parameters and Test Vectors
+//------------------------------------------------------------------------------
+
+/* Field Size */
+#define P_384_Q {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff}
+
+/* Generic Numbers */
+#define P_384_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+#define P_384_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
+
+/* Division Factor */
+#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x0000000, 0x080000000}
+
+/* Base Point */
+#define P_384_G_X {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7}
+#define P_384_G_Y {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f}
+
+/* Doubled Base Point */
+#define P_384_H_X {0xaaf06bba, 0x82e9f590, 0xe29c71c2, 0x19bea517, 0x23c5893a, 0xe8b0c8cf, 0x4c117c3e, 0xfb57ab8d, 0x55fa1b42, 0x8155ad27, 0x8b574391, 0x1b13ea8a}
+#define P_384_H_Y {0xc9e821b5, 0x69d9d390, 0xa2616740, 0x6d6d23d6, 0x070be242, 0xd765eb83, 0x1625ceec, 0x4a0f473e, 0xf59f4e30, 0xe2817e62, 0x85bce284, 0x6f15f19d}
+
+/* Base Point Order */
+#define P_384_N {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973}
+
+/* Private Key */
+#define P_384_D {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24}
+
+/* Per-message Random Number */
+#define P_384_K {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359}
+
+/* Public Key */
+#define P_384_Q_X {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0}
+#define P_384_Q_Y {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9}
+
+/* Part of Signature */
+#define P_384_R_X {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856}
+#define P_384_R_Y {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b}
+
+
+//------------------------------------------------------------------------------
+// Parameter and Test Vector Selection
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+
+#define ECDSA_Q P_256_Q
+
+#define ECDSA_ZERO P_256_ZERO
+#define ECDSA_ONE P_256_ONE
+
+#define ECDSA_DELTA P_256_DELTA
+
+#define ECDSA_G_X P_256_G_X
+#define ECDSA_G_Y P_256_G_Y
+
+#define ECDSA_H_X P_256_H_X
+#define ECDSA_H_Y P_256_H_Y
+
+#define ECDSA_N P_256_N
+#define ECDSA_D P_256_D
+#define ECDSA_K P_256_K
+
+#define ECDSA_Q_X P_256_Q_X
+#define ECDSA_Q_Y P_256_Q_Y
+
+#define ECDSA_R_X P_256_R_X
+#define ECDSA_R_Y P_256_R_Y
+
+#elif USE_CURVE == 2
+
+#define ECDSA_Q P_384_Q
+
+#define ECDSA_ZERO P_384_ZERO
+#define ECDSA_ONE P_384_ONE
+
+#define ECDSA_DELTA P_384_DELTA
+
+#define ECDSA_G_X P_384_G_X
+#define ECDSA_G_Y P_384_G_Y
+
+#define ECDSA_H_X P_384_H_X
+#define ECDSA_H_Y P_384_H_Y
+
+#define ECDSA_N P_384_N
+#define ECDSA_D P_384_D
+#define ECDSA_K P_384_K
+
+#define ECDSA_Q_X P_384_Q_X
+#define ECDSA_Q_Y P_384_Q_Y
+
+#define ECDSA_R_X P_384_R_X
+#define ECDSA_R_Y P_384_R_Y
+
+#else
+
+#error USE_CURVE must be either 1 or 2!
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------