summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2016-06-01 10:44:54 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2016-06-01 10:44:54 +0300
commit2c92500715a13e017ae7e792ba8283c91a716b7d (patch)
tree13e739468f791a28d1b06f86826e2f304afca920
Ported ModExpS6 core to the new Alpha platform, hence the core now becomes ModExpA7.HEADmaster
Note, that the core takes advantage of built-in DSP slices available in 7-Series FPGAs. This considerably speeds up computations, because the core can operate in 32-bit-word-serial mode instead of just bit-serial mode. The core directly instantiates DSP slices instead of using IP wizard to avoid using CoreGen during console bitstream builds.
-rw-r--r--bench/tb_modexpa7.v534
-rw-r--r--rtl/dsp_multiplier_a7.v522
-rw-r--r--rtl/dsp_subtractor_a7.v142
-rw-r--r--rtl/modexpa7_adder64_carry32.v81
-rw-r--r--rtl/modexpa7_buffer_core.v218
-rw-r--r--rtl/modexpa7_buffer_user.v197
-rw-r--r--rtl/modexpa7_modinv32.v141
-rw-r--r--rtl/modexpa7_montgomery_coeff.v425
-rw-r--r--rtl/modexpa7_montgomery_multiplier.v408
-rw-r--r--rtl/modexpa7_top.v706
-rw-r--r--rtl/modexpa7_wrapper.v211
-rw-r--r--rtl/ram_1rw_1ro_readfirst.v88
12 files changed, 3673 insertions, 0 deletions
diff --git a/bench/tb_modexpa7.v b/bench/tb_modexpa7.v
new file mode 100644
index 0000000..27b8928
--- /dev/null
+++ b/bench/tb_modexpa7.v
@@ -0,0 +1,534 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module tb_modexpa7_top;
+
+
+ //
+ // Settings
+ //
+ localparam MAX_MODULUS_WIDTH = 256;
+
+
+ //
+ // Control Registers
+ //
+ reg use_public_mode;
+ reg [ 8: 0] modulus_width;
+ reg [ 8: 0] exponent_width;
+
+
+ //
+ // Test Vectors (128-bit)
+ //
+ localparam [127:0] M_TEST_128 = 128'h0001FF00544553545445535454455354;
+ localparam [127:0] M_ABC_128 = 128'h0001FFFFFFFF00414243414243414243;
+ localparam [127:0] M_XYZ_128 = 128'h0001FFFFFFFF0058595A58595A58595A;
+
+ localparam [127:0] N_1_128 = 128'h56247F8A1582CD1C96ED0ECD3E60FCB1;
+ localparam [127:0] N_2_128 = 128'h708CC1BEA087DCA3D0999E3AC033A50B;
+ localparam [127:0] D_1_128 = 128'h40C9E4AEEB6CDC6D12E4526089BCB8FD;
+ localparam [127:0] D_2_128 = 128'h1ADFF16F74639CB28976EEC528C866C1;
+
+ localparam [127:0] S_TEST_1_128 = 128'h310AB3124D4CB1DEE1CFA4694DC8BCA2;
+ localparam [127:0] S_TEST_2_128 = 128'h5D621FC2642AEB705FDB7B90693FE3BF;
+ localparam [127:0] S_ABC_1_128 = 128'h235EA2712140F90344DFE2BF74B13075;
+ localparam [127:0] S_XYZ_1_128 = 128'h1A501D80B1F8648DE08C1AE673E13770;
+ localparam [127:0] S_ABC_2_128 = 128'h2258423C6CDB0E1AA73F70CA41BC6633;
+ localparam [127:0] S_XYZ_2_128 = 128'h5B278DEB75C055C8DA8E2993A791F3C2;
+
+
+ //
+ // Test Vectors (256-bit)
+ //
+ localparam [255:0] M_TEST_256 = 256'h0001FF0054455354544553545445535454455354544553545445535454455354;
+ localparam [255:0] M_ABC_256 = 256'h0001FFFFFFFFFF00414243414243414243414243414243414243414243414243;
+ localparam [255:0] M_XYZ_256 = 256'h0001FFFFFFFFFF0058595A58595A58595A58595A58595A58595A58595A58595A;
+
+ localparam [255:0] N_1_256 = 256'h624341A8670845B4AA39CC2C08437E2568E11441CB5461CB49162EAC2E751FC5;
+ localparam [255:0] N_2_256 = 256'h772F7C4A4D8C92CE59D8688897795F7208D3B1D3B5C7D83BF0E1A2A59D879A13;
+ localparam [255:0] D_1_256 = 256'h41D555B401F3A4DFF9FC673249070F08247EF8A52A270F283FFAA7EA65B25F01;
+ localparam [255:0] D_2_256 = 256'h1606E0A19AD3700E52EC7BE542995BDA26AA386DD677A74B7B2846AC44BCAD81;
+
+ localparam [255:0] S_TEST_1_256 = 256'h07EFB60CF6E6F2350E8D2D5C5261F36CF0B2B71833386AC6E7958A8F39DAD331;
+ localparam [255:0] S_TEST_2_256 = 256'h5C18F857BF6E4D6A013F554A680329BDC14C31FF38D0EEAD1DE02CFD85337A53;
+ localparam [255:0] S_ABC_1_256 = 256'h2AA0A5D3B2AE1FFD23640714F652DD147B6C202905B4D273E837313FB392487C;
+ localparam [255:0] S_XYZ_1_256 = 256'h27850A998B867CB67146EEFF15147668D7CF6B7BD5B048D9735D5767EB6D68CB;
+ localparam [255:0] S_ABC_2_256 = 256'h56A0DA5F9987DF4E921A771CCC8A1D68F8DD7D9244A0DD37A4095D61F455489D;
+ localparam [255:0] S_XYZ_2_256 = 256'h08A7AF13E058754278EE6582626A6675D09A08C0C898E218FCDF7FFA37C9876C;
+
+
+ //
+ // Locals
+ //
+ localparam [ 1: 0] BANK_MODULUS = 2'b00;
+ localparam [ 1: 0] BANK_MESSAGE = 2'b01;
+ localparam [ 1: 0] BANK_EXPONENT = 2'b10;
+ localparam [ 1: 0] BANK_RESULT = 2'b11;
+
+
+ //
+ // Integers
+ //
+ integer addr;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg init;
+ reg next;
+ wire ready;
+ wire valid;
+
+
+ //
+ // Bus
+ //
+ reg bus_cs;
+ reg bus_we;
+ reg [ 4: 0] bus_addr;
+ reg [31: 0] bus_data_wr;
+ wire [31: 0] bus_data_rd;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // UUT
+ //
+ modexpa7_top #
+ (
+ .MAX_MODULUS_WIDTH (MAX_MODULUS_WIDTH)
+ )
+ uut
+ (
+ .clk (clk),
+
+ .init (init),
+ .ready (ready),
+
+ .next (next),
+ .valid (valid),
+
+ .modulus_width (modulus_width),
+ .exponent_width (exponent_width),
+
+ .fast_public_mode (use_public_mode),
+
+ .bus_cs (bus_cs),
+ .bus_we (bus_we),
+ .bus_addr (bus_addr),
+ .bus_data_wr (bus_data_wr),
+ .bus_data_rd (bus_data_rd)
+ );
+
+
+ //
+ // Script
+ //
+ initial begin
+ //
+ $display("Testbench started.");
+ //
+ init = 0;
+ next = 0;
+ //
+ bus_cs = 0;
+ bus_we = 0;
+ bus_addr = 5'd0;
+ bus_data_wr = 32'h00000000;
+ //
+ #200;
+ //
+ $display("Running in public (unsafe, faster) mode...");
+ use_public_mode = 1;
+ //
+ run_tests_128();
+ run_tests_256();
+ //
+ $display("Running in private (safe, slower) mode...");
+ use_public_mode = 0;
+ //
+ run_tests_128();
+ run_tests_256();
+ //
+ $display("All tests passed.");
+ $display("Testbench finished.");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // run_tests_128()
+ //
+ task run_tests_128;
+ begin
+ //
+ $display("Testing 128-bit mode...");
+ //
+ $display("Setting modulus #1...");
+ set_modulus_128(N_1_128);
+ $display("Signing TEST message...");
+ test_vector_128(M_TEST_128, D_1_128, S_TEST_1_128);
+ //
+ $display("Setting modulus #2...");
+ set_modulus_128(N_2_128);
+ $display("Signing TEST message...");
+ test_vector_128(M_TEST_128, D_2_128, S_TEST_2_128);
+ //
+ $display("Setting modulus #1...");
+ set_modulus_128(N_1_128);
+ $display("Signing ABC message...");
+ test_vector_128(M_ABC_128, D_1_128, S_ABC_1_128);
+ $display("Signing XYZ message...");
+ test_vector_128(M_XYZ_128, D_1_128, S_XYZ_1_128);
+ //
+ $display("Setting modulus #2...");
+ set_modulus_128(N_2_128);
+ $display("Signing ABC message...");
+ test_vector_128(M_ABC_128, D_2_128, S_ABC_2_128);
+ $display("Signing XYZ message...");
+ test_vector_128(M_XYZ_128, D_2_128, S_XYZ_2_128);
+ //
+ end
+ endtask
+
+
+ //
+ // run_tests_256()
+ //
+ task run_tests_256;
+ begin
+ //
+ $display("Testing 256-bit mode...");
+ //
+ $display("Setting modulus #1...");
+ set_modulus_256(N_1_256);
+ $display("Signing TEST message...");
+ test_vector_256(M_TEST_256, D_1_256, S_TEST_1_256);
+ //
+ $display("Setting modulus #2...");
+ set_modulus_256(N_2_256);
+ $display("Signing TEST message...");
+ test_vector_256(M_TEST_256, D_2_256, S_TEST_2_256);
+ //
+ $display("Setting modulus #1...");
+ set_modulus_256(N_1_256);
+ $display("Signing ABC message...");
+ test_vector_256(M_ABC_256, D_1_256, S_ABC_1_256);
+ $display("Signing XYZ message...");
+ test_vector_256(M_XYZ_256, D_1_256, S_XYZ_1_256);
+ //
+ $display("Setting modulus #2...");
+ set_modulus_256(N_2_256);
+ $display("Signing ABC message...");
+ test_vector_256(M_ABC_256, D_2_256, S_ABC_2_256);
+ $display("Signing XYZ message...");
+ test_vector_256(M_XYZ_256, D_2_256, S_XYZ_2_256);
+ //
+ end
+ endtask
+
+
+
+ //
+ // set_modulus_128()
+ //
+ task set_modulus_128;
+ input [127:0] modulus;
+ begin
+ //
+ modulus_width = 9'd128;
+ //
+ bank_write_128(BANK_MODULUS, modulus);
+ #100;
+ init = 1;
+ wait_ready();
+ init = 0;
+ #100;
+ //
+ end
+ endtask
+
+
+ //
+ // set_modulus_256()
+ //
+ task set_modulus_256;
+ input [255:0] modulus;
+ begin
+ //
+ modulus_width = 9'd256;
+ //
+ bank_write_256(BANK_MODULUS, modulus);
+ #100;
+ init = 1;
+ wait_ready();
+ init = 0;
+ #100;
+ //
+ end
+ endtask
+
+
+ //
+ // test_vector_128()
+ //
+ task test_vector_128;
+ input [127:0] message;
+ input [127:0] exponent;
+ input [127:0] signature;
+ reg [127:0] readback;
+ begin
+ //
+ exponent_width = 9'd128;
+ //
+ bank_write_128(BANK_MESSAGE, message);
+ #100;
+ bank_write_128(BANK_EXPONENT, exponent);
+ #100;
+ //
+ next = 1;
+ wait_valid();
+ next = 0;
+ #100;
+ //
+ bank_read_128(BANK_RESULT, readback);
+ //
+ $display(" signature: %032x", signature);
+ $display(" readback: %032x", readback);
+ //
+ if (readback == signature) $display("Got correct S = M ** E mod N [OK]");
+ else begin
+ $display("Got wrong S = M ** E mod N [ERROR]");
+ $finish;
+ end
+ //
+ end
+ endtask
+
+
+ //
+ // test_vector_256()
+ //
+ task test_vector_256;
+ input [255:0] message;
+ input [255:0] exponent;
+ input [255:0] signature;
+ reg [255:0] readback;
+ begin
+ //
+ exponent_width = 9'd256;
+ //
+ bank_write_256(BANK_MESSAGE, message);
+ #100;
+ bank_write_256(BANK_EXPONENT, exponent);
+ #100;
+ //
+ next = 1;
+ wait_valid();
+ next = 0;
+ #100;
+ //
+ bank_read_256(BANK_RESULT, readback);
+ //
+ $display(" signature: %064x", signature);
+ $display(" readback: %064x", readback);
+ //
+ if (readback == signature) $display("Got correct S = M ** E mod N [OK]");
+ else begin
+ $display("Got wrong S = M ** E mod N [ERROR]");
+ $finish;
+ end
+ //
+ end
+ endtask
+
+
+ //
+ // bus_write_reg()
+ //
+ task bus_write_reg;
+ input [ 1: 0] bank;
+ input [ 2: 0] offset;
+ input [31: 0] data;
+ begin
+ //
+ bus_cs = 1;
+ bus_we = 1;
+ bus_addr = {bank, offset};
+ bus_data_wr = data;
+ //
+ #10;
+ //
+ bus_cs = 0;
+ bus_we = 0;
+ //
+ #10;
+ //
+ end
+ endtask
+
+
+ //
+ // bus_read_reg()
+ //
+ task bus_read_reg;
+ input [ 1: 0] bank;
+ input [ 2: 0] offset;
+ output [31: 0] data;
+ begin
+ //
+ bus_cs = 1;
+ bus_we = 0;
+ bus_addr = {bank, offset};
+ //
+ #10;
+ //
+ bus_cs = 0;
+ bus_we = 0;
+ data = bus_data_rd;
+ //
+ #10;
+ //
+ end
+ endtask
+
+
+ //
+ // bank_write_128()
+ //
+ task bank_write_128;
+ input [ 1:0] bank;
+ input [127:0] value;
+ reg [127:0] shreg;
+ begin
+ //
+ shreg = value;
+ //
+ for (addr=0; addr<4; addr=addr+1) begin
+ bus_write_reg(bank, addr[2:0], shreg[31:0]);
+ shreg = {{32{1'b0}}, shreg[127:32]};
+ end
+ //
+ end
+ endtask
+
+
+ //
+ // bank_write_256()
+ //
+ task bank_write_256;
+ input [ 1:0] bank;
+ input [255:0] value;
+ reg [255:0] shreg;
+ begin
+ //
+ shreg = value;
+ //
+ for (addr=0; addr<8; addr=addr+1) begin
+ bus_write_reg(bank, addr[2:0], shreg[31:0]);
+ shreg = {{32{1'b0}}, shreg[255:32]};
+ end
+ //
+ end
+ endtask
+
+
+ //
+ // bank_read_128()
+ //
+ task bank_read_128;
+ input [ 1:0] bank;
+ output [127:0] value;
+ reg [ 31:0] shreg;
+ begin
+ //
+ for (addr=0; addr<4; addr=addr+1) begin
+ bus_read_reg(bank, addr[2:0], shreg);
+ value = {shreg, value[127:32]};
+ end
+ //
+ end
+ endtask
+
+
+ //
+ // bank_read_256()
+ //
+ task bank_read_256;
+ input [ 1:0] bank;
+ output [255:0] value;
+ reg [ 31:0] shreg;
+ begin
+ //
+ for (addr=0; addr<8; addr=addr+1) begin
+ bus_read_reg(bank, addr[2:0], shreg);
+ value = {shreg, value[255:32]};
+ end
+ //
+ end
+ endtask
+
+
+ //
+ // wait_ready()
+ //
+ task wait_ready;
+ begin
+ while (!ready) #10;
+ end
+ endtask
+
+
+ //
+ // wait_valid()
+ //
+ task wait_valid;
+ begin
+ while (!valid) #10;
+ end
+ endtask
+
+
+endmodule
+
diff --git a/rtl/dsp_multiplier_a7.v b/rtl/dsp_multiplier_a7.v
new file mode 100644
index 0000000..bb6a139
--- /dev/null
+++ b/rtl/dsp_multiplier_a7.v
@@ -0,0 +1,522 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module dsp_multiplier_a7
+ (
+ input clk,
+ input [31: 0] a,
+ input [31: 0] b,
+ output [63: 0] p
+ );
+
+ //
+ // Split a, b into parts
+ //
+ wire [16: 0] a_lo = a[16: 0];
+ wire [16: 0] b_lo = b[16: 0];
+
+ wire [14: 0] a_hi = a[31:17];
+ wire [14: 0] b_hi = b[31:17];
+
+ //
+ // Products
+ //
+ wire [47: 0] p_dsp1;
+ wire [47: 0] p_dsp3;
+ wire [47: 0] p_dsp4;
+
+ //
+ // Cascade p
+ //
+ wire [47: 0] p_dsp1_chain;
+ wire [47: 0] p_dsp2_chain;
+ wire [47: 0] p_dsp3_chain;
+
+ //
+ // Cascade a
+ //
+ wire [29: 0] a_lo_chain;
+ wire [29: 0] a_hi_chain;
+
+ //
+ // Register parts of p
+ //
+ genvar i;
+ generate for (i=0; i<17; i=i+1)
+ begin : FD_gen
+ //
+ FD #(.INIT (1'b0))
+ FD_inst_1
+ (
+ .C (clk),
+ .D (p_dsp1[i]),
+ .Q (p[i])
+ );
+ //
+ FD #(.INIT(1'b0))
+ FD_inst_2
+ (
+ .C (clk),
+ .D (p_dsp3[i]),
+ .Q (p[17+i])
+ );
+ //
+ end
+ endgenerate
+
+ //
+ // Mapping
+ //
+ assign p[63:34] = p_dsp4[29:0];
+
+ //
+ // a_lo * b_lo
+ //
+ DSP48E1 #
+ (
+ .AREG (1),
+ .BREG (1),
+ .CREG (0),
+ .DREG (0),
+ .MREG (0),
+ .PREG (0),
+ .ADREG (0),
+
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .ALUMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .ACASCREG (1),
+ .BCASCREG (1),
+
+ .A_INPUT ("DIRECT"),
+ .B_INPUT ("DIRECT"),
+
+ .USE_SIMD ("ONE48"),
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("MULTIPLY"),
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+
+ .SEL_MASK ("MASK"),
+ .SEL_PATTERN ("PATTERN"),
+
+ .MASK (48'h000000000000),
+ .PATTERN (48'h000000000000),
+
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst1
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .CEA1 (1'b0),
+ .CEA2 (1'b1),
+ .CEB1 (1'b0),
+ .CEB2 (1'b1),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (1'b0),
+ .CEP (1'b0),
+ .CEAD (1'b0),
+
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0),
+ .CEALUMODE (1'b0),
+ .CECARRYIN (1'b0),
+
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b0110101),
+
+ .A ({{13{1'b0}}, a_lo}),
+ .B ({{ 1{1'b0}}, b_lo}),
+ .C ({48{1'b0}}),
+ .D ({25{1'b0}}),
+ .P (p_dsp1),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .ACIN ({30{1'b0}}),
+ .BCIN ({18{1'b0}}),
+
+ .ACOUT (a_lo_chain),
+ .BCOUT (),
+
+ .PCIN ({48{1'b0}}),
+ .PCOUT (p_dsp1_chain),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT (),
+
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .UNDERFLOW (),
+ .OVERFLOW ()
+ );
+
+ //
+ // a_lo * b_hi
+ //
+ DSP48E1 #
+ (
+ .AREG (0),
+ .BREG (1),
+ .CREG (0),
+ .DREG (0),
+ .MREG (0),
+ .PREG (0),
+ .ADREG (0),
+
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .ALUMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .ACASCREG (0),
+ .BCASCREG (1),
+
+ .A_INPUT ("CASCADE"),
+ .B_INPUT ("DIRECT"),
+
+ .USE_SIMD ("ONE48"),
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("MULTIPLY"),
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+
+ .SEL_MASK ("MASK"),
+ .SEL_PATTERN ("PATTERN"),
+
+ .MASK (48'h000000000000),
+ .PATTERN (48'h000000000000),
+
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst2
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .CEA1 (1'b0),
+ .CEA2 (1'b0),
+ .CEB1 (1'b0),
+ .CEB2 (1'b1),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEP (1'b0),
+ .CEM (1'b0),
+ .CEAD (1'b0),
+
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0),
+ .CEALUMODE (1'b0),
+ .CECARRYIN (1'b0),
+
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b1010101),
+
+ .A ({30{1'b0}}),
+ .B ({{3{1'b0}}, b_hi}),
+ .C ({48{1'b0}}),
+ .D ({25{1'b0}}),
+ .P (),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .ACIN (a_lo_chain),
+ .BCIN ({18{1'b0}}),
+
+ .ACOUT (),
+ .BCOUT (),
+
+ .PCIN (p_dsp1_chain),
+ .PCOUT (p_dsp2_chain),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT (),
+
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .UNDERFLOW (),
+ .OVERFLOW ()
+ );
+
+ //
+ // a_hi * b_lo
+ //
+ DSP48E1 #
+ (
+ .AREG (1),
+ .BREG (1),
+ .CREG (0),
+ .DREG (0),
+ .MREG (0),
+ .PREG (0),
+ .ADREG (0),
+
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .ALUMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .ACASCREG (1),
+ .BCASCREG (1),
+
+ .A_INPUT ("DIRECT"),
+ .B_INPUT ("DIRECT"),
+
+ .USE_SIMD ("ONE48"),
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("MULTIPLY" ),
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+
+ .SEL_MASK ("MASK"),
+ .SEL_PATTERN ("PATTERN"),
+
+ .MASK (48'h000000000000),
+ .PATTERN (48'h000000000000),
+
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst3
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .CEA1 (1'b0),
+ .CEA2 (1'b1),
+ .CEB1 (1'b0),
+ .CEB2 (1'b1),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (1'b0),
+ .CEP (1'b0),
+ .CEAD (1'b0),
+
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0),
+ .CEALUMODE (1'b0),
+ .CECARRYIN (1'b0),
+
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b0010101),
+
+ .A ({{15{1'b0}}, a_hi}),
+ .B ({{ 1{1'b0}}, b_lo}),
+ .C ({48{1'b0}}),
+ .D ({25{1'b0}}),
+ .P (p_dsp3),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .ACIN ({30{1'b0}}),
+ .BCIN ({18{1'b0}}),
+
+ .ACOUT (a_hi_chain),
+ .BCOUT (),
+
+ .PCIN (p_dsp2_chain),
+ .PCOUT (p_dsp3_chain),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT (),
+
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .UNDERFLOW (),
+ .OVERFLOW ()
+ );
+
+ //
+ // a_hi * b_hi
+ //
+ DSP48E1 #
+ (
+ .AREG (0),
+ .BREG (1),
+ .CREG (0),
+ .DREG (0),
+ .MREG (0),
+ .PREG (1),
+ .ADREG (0),
+
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .ALUMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .ACASCREG (0),
+ .BCASCREG (1),
+
+ .A_INPUT ("CASCADE"),
+ .B_INPUT ("DIRECT"),
+
+ .USE_SIMD ("ONE48"),
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("MULTIPLY"),
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+
+ .SEL_MASK ("MASK"),
+ .SEL_PATTERN ("PATTERN"),
+
+ .MASK (48'h000000000000),
+ .PATTERN (48'h000000000000),
+
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst4
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .CEA1 (1'b0),
+ .CEA2 (1'b0),
+ .CEB1 (1'b0),
+ .CEB2 (1'b1),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (1'b0),
+ .CEP (1'b1),
+ .CEAD (1'b0),
+
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0),
+ .CEALUMODE (1'b0),
+ .CECARRYIN (1'b0),
+
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b1010101),
+
+ .A ({30{1'b0}}),
+ .B ({{3{1'b0}}, b_hi}),
+ .C ({48{1'b0}}),
+ .D ({25{1'b0}}),
+ .P (p_dsp4),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .ACIN (a_hi_chain),
+ .BCIN ({18{1'b0}}),
+
+ .ACOUT (),
+ .BCOUT (),
+
+ .PCIN (p_dsp3_chain),
+ .PCOUT (),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT (),
+
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .UNDERFLOW (),
+ .OVERFLOW ()
+ );
+
+
+endmodule
diff --git a/rtl/dsp_subtractor_a7.v b/rtl/dsp_subtractor_a7.v
new file mode 100644
index 0000000..00c2f95
--- /dev/null
+++ b/rtl/dsp_subtractor_a7.v
@@ -0,0 +1,142 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module dsp_subtractor_a7
+ (
+ input [31: 0] a,
+ input [31: 0] b,
+ output [31: 0] s,
+ input c_in,
+ output c_out
+ );
+
+ wire [47: 0] p;
+ assign s = p[31:0];
+ assign c_out = p[32];
+
+ DSP48E1 #
+ (
+ .AREG (0),
+ .BREG (0),
+ .CREG (0),
+ .DREG (0),
+ .MREG (0),
+ .PREG (0),
+ .ADREG (0),
+ .ACASCREG (0),
+ .BCASCREG (0),
+ .ALUMODEREG (0),
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .A_INPUT ("DIRECT"),
+ .B_INPUT ("DIRECT"),
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("NONE"),
+ .USE_SIMD ("ONE48"),
+
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+ .SEL_PATTERN ("PATTERN"),
+ .SEL_MASK ("MASK"),
+ .PATTERN (48'h000000000000),
+ .MASK (48'h3fffffffffff),
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst
+ (
+ .CLK (1'b0),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .CEA1 (1'b0),
+ .CEA2 (1'b0),
+ .CEB1 (1'b0),
+ .CEB2 (1'b0),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (1'b0),
+ .CEP (1'b0),
+ .CEAD (1'b0),
+ .CEALUMODE (1'b0),
+ .CEINMODE (1'b0),
+
+ .CECTRL (1'b0),
+ .CECARRYIN (1'b0),
+
+ .A ({{16{1'b0}}, b[31:18]}),
+ .B (b[17: 0]),
+ .C ({{16{1'b0}}, a[31:0]}),
+ .D (25'd0),
+ .P (p),
+
+ .CARRYIN (c_in),
+ .CARRYOUT (),
+ .CARRYINSEL (3'b000),
+
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .OPMODE (7'b0110011),
+ .ALUMODE (4'b0011),
+ .INMODE (5'b00000),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT (),
+
+ .UNDERFLOW (),
+ .OVERFLOW (),
+
+ .ACIN (30'd0),
+ .BCIN (18'd0),
+ .PCIN (48'd0),
+
+ .ACOUT (),
+ .BCOUT (),
+ .PCOUT ()
+ );
+
+endmodule
diff --git a/rtl/modexpa7_adder64_carry32.v b/rtl/modexpa7_adder64_carry32.v
new file mode 100644
index 0000000..093f660
--- /dev/null
+++ b/rtl/modexpa7_adder64_carry32.v
@@ -0,0 +1,81 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_adder64_carry32
+ (
+ input wire clk,
+ input wire [31: 0] t,
+ input wire [31: 0] x,
+ input wire [31: 0] y,
+ output wire [31: 0] s,
+ input wire [31: 0] c_in,
+ output wire [31: 0] c_out
+ );
+
+
+ //
+ // Multiplier
+ //
+ wire [63: 0] multiplier_out;
+
+ dsp_multiplier_a7 dsp_multiplier
+ (
+ .clk (clk),
+ .a (x),
+ .b (y),
+ .p (multiplier_out)
+ );
+
+
+ //
+ // Carry and T
+ //
+ wire [63: 0] t_ext = {{32{1'b0}}, t};
+ wire [63: 0] c_ext = {{32{1'b0}}, c_in};
+
+
+ //
+ // Sum
+ //
+ wire [63: 0] sum = multiplier_out + c_in + t;
+
+
+ //
+ // Output
+ //
+ assign s = sum[31: 0];
+ assign c_out = sum[63:32];
+
+
+endmodule
diff --git a/rtl/modexpa7_buffer_core.v b/rtl/modexpa7_buffer_core.v
new file mode 100644
index 0000000..a48686e
--- /dev/null
+++ b/rtl/modexpa7_buffer_core.v
@@ -0,0 +1,218 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_buffer_core
+ #(parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits
+ (
+ input wire clk,
+
+ input wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr,
+ input wire rw_coeff_bram_wr,
+ input wire [31:0] rw_coeff_bram_in,
+ output wire [31:0] rw_coeff_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr,
+ input wire rw_mm_bram_wr,
+ input wire [31:0] rw_mm_bram_in,
+ output wire [31:0] rw_mm_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr,
+ input wire rw_nn_bram_wr,
+ input wire [31:0] rw_nn_bram_in,
+
+ input wire [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr,
+ input wire rw_y_bram_wr,
+ input wire [31:0] rw_y_bram_in,
+ output wire [31:0] rw_y_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr,
+ input wire rw_r_bram_wr,
+ input wire [31:0] rw_r_bram_in,
+ output wire [31:0] rw_r_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr,
+ input wire rw_t_bram_wr,
+ input wire [31:0] rw_t_bram_in,
+ output wire [31:0] rw_t_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr,
+ output wire [31:0] ro_coeff_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr,
+ output wire [31:0] ro_mm_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr,
+ output wire [31:0] ro_nn_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr,
+ output wire [31:0] ro_r_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr,
+ output wire [31:0] ro_t_bram_out
+ );
+
+
+ //
+ // Montgomery Coefficient
+ //
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1)
+ )
+ mem_coeff
+ (
+ .clk (clk),
+
+ .a_addr (rw_coeff_bram_addr),
+ .a_wr (rw_coeff_bram_wr),
+ .a_in (rw_coeff_bram_in),
+ .a_out (rw_coeff_bram_out),
+
+ .b_addr (ro_coeff_bram_addr),
+ .b_out (ro_coeff_bram_out)
+ );
+
+
+ //
+ // Powers of Message
+ //
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1)
+ )
+ mem_mm
+ (
+ .clk (clk),
+
+ .a_addr (rw_mm_bram_addr),
+ .a_wr (rw_mm_bram_wr),
+ .a_in (rw_mm_bram_in),
+ .a_out (rw_mm_bram_out),
+
+ .b_addr (ro_mm_bram_addr),
+ .b_out (ro_mm_bram_out)
+ );
+
+
+ //
+ // Extended Modulus
+ //
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1)
+ )
+ mem_nn
+ (
+ .clk (clk),
+
+ .a_addr (rw_nn_bram_addr),
+ .a_wr (rw_nn_bram_wr),
+ .a_in (rw_nn_bram_in),
+ .a_out (),
+
+ .b_addr (ro_nn_bram_addr),
+ .b_out (ro_nn_bram_out)
+ );
+
+
+ //
+ // Output
+ //
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1)
+ )
+ mem_y
+ (
+ .clk (clk),
+
+ .a_addr (rw_y_bram_addr),
+ .a_wr (rw_y_bram_wr),
+ .a_in (rw_y_bram_in),
+ .a_out (rw_y_bram_out),
+
+ .b_addr ({(OPERAND_ADDR_WIDTH+1){1'b0}}),
+ .b_out ()
+ );
+
+
+ //
+ // Result of Multiplication
+ //
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1)
+ )
+ mem_r
+ (
+ .clk (clk),
+
+ .a_addr (rw_r_bram_addr),
+ .a_wr (rw_r_bram_wr),
+ .a_in (rw_r_bram_in),
+ .a_out (rw_r_bram_out),
+
+ .b_addr (ro_r_bram_addr),
+ .b_out (ro_r_bram_out)
+ );
+
+
+ //
+ // Temporary Buffer
+ //
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1)
+ )
+ mem_t
+ (
+ .clk (clk),
+
+ .a_addr (rw_t_bram_addr),
+ .a_wr (rw_t_bram_wr),
+ .a_in (rw_t_bram_in),
+ .a_out (rw_t_bram_out),
+
+ .b_addr (ro_t_bram_addr),
+ .b_out (ro_t_bram_out)
+ );
+
+
+endmodule
diff --git a/rtl/modexpa7_buffer_user.v b/rtl/modexpa7_buffer_user.v
new file mode 100644
index 0000000..abb772b
--- /dev/null
+++ b/rtl/modexpa7_buffer_user.v
@@ -0,0 +1,197 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_buffer_user
+ #(parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits
+ (
+ input wire clk,
+
+ input wire bus_cs,
+ input wire bus_we,
+ input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr,
+ input wire [31:0] bus_data_wr,
+ output wire [31:0] bus_data_rd,
+
+ input wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr,
+ output wire [31:0] ro_modulus_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr,
+ output wire [31:0] ro_message_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr,
+ output wire [31:0] ro_exponent_bram_out,
+
+ input wire [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr,
+ input wire rw_result_bram_wr,
+ input wire [31:0] rw_result_bram_in
+ );
+
+
+ //
+ // Locals
+ //
+ localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2;
+
+ localparam [1: 0] BUS_ADDR_BANK_MODULUS = 2'b00;
+ localparam [1: 0] BUS_ADDR_BANK_MESSAGE = 2'b01;
+ localparam [1: 0] BUS_ADDR_BANK_EXPONENT = 2'b10;
+ localparam [1: 0] BUS_ADDR_BANK_RESULT = 2'b11;
+
+ //
+ // Address Decoder
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
+ wire [ 1:0] bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2];
+
+
+ //
+ // Modulus Memory
+ //
+ wire [31: 0] bus_data_rd_modulus;
+
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH)
+ )
+ mem_modulus
+ (
+ .clk (clk),
+
+ .a_addr (bus_addr_operand_word),
+ .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)),
+ .a_in (bus_data_wr),
+ .a_out (bus_data_rd_modulus),
+
+ .b_addr (ro_modulus_bram_addr),
+ .b_out (ro_modulus_bram_out)
+ );
+
+
+ //
+ // Message Memory
+ //
+ wire [31: 0] bus_data_rd_message;
+
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH)
+ )
+ mem_message
+ (
+ .clk (clk),
+
+ .a_addr (bus_addr_operand_word),
+ .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)),
+ .a_in (bus_data_wr),
+ .a_out (bus_data_rd_message),
+
+ .b_addr (ro_message_bram_addr),
+ .b_out (ro_message_bram_out)
+ );
+
+
+ //
+ // Exponent Memory
+ //
+ wire [31: 0] bus_data_rd_exponent;
+
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH)
+ )
+ mem_exponent
+ (
+ .clk (clk),
+
+ .a_addr (bus_addr_operand_word),
+ .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)),
+ .a_in (bus_data_wr),
+ .a_out (bus_data_rd_exponent),
+
+ .b_addr (ro_exponent_bram_addr),
+ .b_out (ro_exponent_bram_out)
+ );
+
+
+ //
+ // Result Memory
+ //
+ wire [31: 0] bus_data_rd_result;
+
+ ram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH)
+ )
+ mem_result
+ (
+ .clk (clk),
+
+ .a_addr (rw_result_bram_addr),
+ .a_wr (rw_result_bram_wr),
+ .a_in (rw_result_bram_in),
+ .a_out (),
+
+ .b_addr (bus_addr_operand_word),
+ .b_out (bus_data_rd_result)
+ );
+
+
+ //
+ // Output Selector
+ //
+ reg [1: 0] bus_addr_operand_bank_prev;
+ always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank;
+
+ reg [31: 0] bus_data_rd_mux;
+ assign bus_data_rd = bus_data_rd_mux;
+
+ always @(*)
+ //
+ case (bus_addr_operand_bank_prev)
+ //
+ BUS_ADDR_BANK_MODULUS: bus_data_rd_mux = bus_data_rd_modulus;
+ BUS_ADDR_BANK_MESSAGE: bus_data_rd_mux = bus_data_rd_message;
+ BUS_ADDR_BANK_EXPONENT: bus_data_rd_mux = bus_data_rd_exponent;
+ BUS_ADDR_BANK_RESULT: bus_data_rd_mux = bus_data_rd_result;
+ //
+ default: bus_data_rd_mux = {32{1'bX}};
+ //
+ endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_modinv32.v b/rtl/modexpa7_modinv32.v
new file mode 100644
index 0000000..cb47746
--- /dev/null
+++ b/rtl/modexpa7_modinv32.v
@@ -0,0 +1,141 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_modinv32
+ (
+ input wire clk,
+
+ input wire ena,
+ output wire rdy,
+
+ input wire [31: 0] n0,
+ output wire [31: 0] n0_modinv
+ );
+
+
+ //
+ // Trigger
+ //
+ reg ena_dly = 1'b0;
+ wire ena_trig = ena && !ena_dly;
+ always @(posedge clk) ena_dly <= ena;
+
+
+ //
+ // Ready Register
+ //
+ reg rdy_reg = 1'b0;
+ assign rdy = rdy_reg;
+
+
+ //
+ // Counter
+ //
+ reg [7: 0] cnt = 8'd0;
+ wire [7: 0] cnt_zero = 8'd0;
+ wire [7: 0] cnt_last = 8'd132;
+ wire [7: 0] cnt_next = cnt + 1'b1;
+ wire [1: 0] cnt_phase = cnt[1:0];
+ wire [5: 0] cnt_cycle = cnt[7:2];
+
+ always @(posedge clk)
+ //
+ if (cnt == cnt_zero) cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero;
+ else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next;
+
+
+ //
+ // Enable / Ready Logic
+ //
+ always @(posedge clk)
+ //
+ if (cnt == cnt_last) rdy_reg <= 1'b1;
+ else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0;
+
+
+ //
+ // Output Register
+ //
+ reg [31: 0] n0_modinv_reg;
+ assign n0_modinv = n0_modinv_reg;
+
+
+ //
+ // Multiplier
+ //
+ (* KEEP="TRUE" *)
+ wire [63: 0] multiplier_out;
+ wire [31: 0] multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1};
+
+ dsp_multiplier_a7 dsp_multiplier
+ (
+ .clk (clk),
+ .a (n0),
+ .b (n0_modinv_reg),
+ .p (multiplier_out)
+ );
+
+
+ //
+ // Mask and Power
+ //
+ reg [30: 0] mask_reg;
+ reg [31: 0] power_reg;
+
+ always @(posedge clk)
+ //
+ if (cnt_phase == 2'd1) begin
+ //
+ if (cnt_cycle == 6'd0) begin
+ //
+ mask_reg <= 31'd0;
+ power_reg <= 32'd1;
+ //
+ n0_modinv_reg <= 32'd0;
+ //
+ end else begin
+ //
+ mask_reg <= { mask_reg[29:0], 1'b1};
+ power_reg <= {power_reg[30:0], 1'b0};
+ //
+ if (multiplier_out_masked != 32'd1)
+ //
+ n0_modinv_reg <= n0_modinv_reg + power_reg;
+ //
+ end
+ //
+ end
+
+
+endmodule
diff --git a/rtl/modexpa7_montgomery_coeff.v b/rtl/modexpa7_montgomery_coeff.v
new file mode 100644
index 0000000..009fd99
--- /dev/null
+++ b/rtl/modexpa7_montgomery_coeff.v
@@ -0,0 +1,425 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_montgomery_coeff
+ #(parameter MODULUS_NUM_BITS = 11, // 1024 -> 11 bits
+ parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits
+ (
+ input wire clk,
+
+ input wire ena,
+ output wire rdy,
+
+ input wire [MODULUS_NUM_BITS-1:0] modulus_width,
+
+ output wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr,
+ output wire coeff_bram_wr,
+ output wire [31:0] coeff_bram_in,
+ input wire [31:0] coeff_bram_out,
+
+ output wire [OPERAND_ADDR_WIDTH :0] nn_bram_addr,
+ output wire nn_bram_wr,
+ output wire [31:0] nn_bram_in,
+
+ output wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr,
+ input wire [31:0] modulus_bram_out,
+
+ output wire [31:0] modinv_n0,
+ output wire modinv_ena,
+ input wire modinv_rdy
+ );
+
+
+ //
+ // Locals
+ //
+ localparam [ MODULUS_NUM_BITS :0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+ localparam [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+ localparam [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+
+
+ //
+ // FSM
+ //
+ localparam FSM_STATE_IDLE = 6'd0;
+
+ localparam FSM_STATE_INIT = 6'd10;
+
+ localparam FSM_STATE_SHIFT_READ = 6'd21;
+ localparam FSM_STATE_SHIFT_WRITE = 6'd22;
+
+ localparam FSM_STATE_COMPARE_READ = 6'd31;
+ localparam FSM_STATE_COMPARE_COMPARE = 6'd32;
+
+ localparam FSM_STATE_SUBTRACT_READ = 6'd41;
+ localparam FSM_STATE_SUBTRACT_WRITE = 6'd42;
+
+ localparam FSM_STATE_ROUND = 6'd50;
+
+ localparam FSM_STATE_FINAL = 6'd60;
+
+ reg [5: 0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Trigger
+ //
+ reg ena_dly = 1'b0;
+
+ wire ena_trig = ena && !ena_dly;
+
+ always @(posedge clk) ena_dly <= ena;
+
+
+ //
+ // Ready Register
+ //
+ reg rdy_reg = 1'b0;
+
+ assign rdy = rdy_reg;
+
+
+ //
+ // ModInv Control
+ //
+ reg modinv_ena_reg = 1'b0;
+ reg [31: 0] modinv_n0_reg;
+
+ assign modinv_ena = modinv_ena_reg;
+ assign modinv_n0 = modinv_n0_reg;
+
+
+ //
+ // Enable / Ready Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_FINAL) begin
+ //
+ if (modinv_rdy) rdy_reg <= 1'b1;
+ //
+ end else if (fsm_state == FSM_STATE_IDLE) begin
+ //
+ if (rdy_reg && !ena) rdy_reg <= 1'b0;
+ //
+ end
+
+
+ //
+ // Flags
+ //
+ reg reg_shift_carry = 1'b0;
+ reg reg_subtractor_borrow = 1'b0;
+
+
+ //
+ // Round Counter
+ //
+ reg [MODULUS_NUM_BITS:0] round_count = round_count_zero;
+ wire [MODULUS_NUM_BITS:0] round_count_last = {modulus_width, 1'b0} + 6'd63;
+ wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+ //
+ // Modulus BRAM Interface
+ //
+ reg [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_reg = modulus_bram_addr_zero;
+
+ assign modulus_bram_addr = modulus_bram_addr_reg;
+
+
+ //
+ // Coeff BRAM Interface
+ //
+ reg [OPERAND_ADDR_WIDTH:0] coeff_bram_addr_reg = coeff_bram_addr_zero;
+ reg coeff_bram_wr_reg = 1'b0;
+
+ assign coeff_bram_addr = coeff_bram_addr_reg;
+ assign coeff_bram_wr = coeff_bram_wr_reg;
+
+
+ //
+ // NN BRAM Interface
+ //
+ reg [OPERAND_ADDR_WIDTH:0] nn_bram_addr_reg = coeff_bram_addr_zero;
+ reg nn_bram_wr_reg = 1'b0;
+
+ assign nn_bram_addr = nn_bram_addr_reg;
+ assign nn_bram_wr = nn_bram_wr_reg;
+
+
+ //
+ // Hardware Subtractor
+ //
+ wire [31: 0] subtractor_out;
+ wire subtractor_out_nonzero = |subtractor_out;
+ wire subtractor_borrow_out;
+ wire subtractor_borrow_in;
+
+ assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow;
+
+ dsp_subtractor_a7 dsp_subtractor
+ (
+ .a (coeff_bram_out),
+ .b (modulus_bram_out),
+ .s (subtractor_out),
+ .c_in (subtractor_borrow_in),
+ .c_out (subtractor_borrow_out)
+ );
+
+
+ //
+ // Handy Wires
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+ wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_last = {modulus_width_msb, 1'b0};
+ wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_zero = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero;
+ wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_last = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last;
+ wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_prev_or_zero = (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero;
+
+ wire [OPERAND_ADDR_WIDTH :0] modulus_bram_addr_last_ext = coeff_bram_addr_last - 1'b1;
+
+ wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_last = modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+ wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_next_or_zero = (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero;
+ wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_prev_or_zero = (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero;
+
+
+ //
+ // Coeff BRAM Input Logic
+ //
+ reg [31: 0] coeff_bram_in_mux;
+
+ assign coeff_bram_in = coeff_bram_in_mux;
+
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT:
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_zero) coeff_bram_in_mux = 32'h00000001;
+ else coeff_bram_in_mux = 32'h00000000;
+
+ FSM_STATE_SHIFT_WRITE:
+ //
+ coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry};
+
+ FSM_STATE_SUBTRACT_WRITE:
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_last) coeff_bram_in_mux = 32'h00000000;
+ else coeff_bram_in_mux = subtractor_out;
+
+ default:
+ //
+ coeff_bram_in_mux = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // NN BRAM Input Logic
+ //
+ reg [31: 0] nn_bram_in_mux;
+
+ assign nn_bram_in = nn_bram_in_mux;
+
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT:
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_last) nn_bram_in_mux = {32{1'b0}};
+ else nn_bram_in_mux = modulus_bram_out;
+
+ default:
+ //
+ nn_bram_in_mux = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // Comparison Functions
+ //
+ reg compare_greater_or_equal;
+ reg compare_less_than;
+
+ wire compare_done = compare_greater_or_equal | compare_less_than;
+
+ always @(*)
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_greater_or_equal = coeff_bram_out[0];
+ //
+ else if (coeff_bram_addr_reg == coeff_bram_addr_zero) compare_greater_or_equal = !subtractor_borrow_out;
+ //
+ else compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero;
+
+ always @(*)
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_less_than = 1'b0;
+ //
+ else compare_less_than = subtractor_borrow_out;
+
+
+
+ //
+ // Main Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT: begin
+ //
+ coeff_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
+ coeff_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
+ //
+ nn_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
+ nn_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
+ //
+ if (!coeff_bram_wr_reg) begin
+ //
+ modinv_ena_reg <= 1'b1;
+ modinv_n0_reg <= modulus_bram_out;
+ //
+ end
+ //
+ if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin
+ //
+ if (!coeff_bram_wr_reg)
+ //
+ modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero;
+ //
+ end else begin
+ //
+ modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero;
+ //
+ end
+ //
+ end
+
+ FSM_STATE_SHIFT_READ: begin
+ //
+ coeff_bram_wr_reg <= 1'b1;
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_zero)
+ //
+ reg_shift_carry <= 1'b0;
+ //
+ end
+
+ FSM_STATE_SHIFT_WRITE: begin
+ //
+ coeff_bram_wr_reg <= 1'b0;
+ coeff_bram_addr_reg <= coeff_bram_addr_next_or_last;
+ //
+ reg_shift_carry <= coeff_bram_out[31];
+ //
+ end
+
+ FSM_STATE_COMPARE_COMPARE: begin
+ //
+ coeff_bram_addr_reg <= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero;
+ //
+ modulus_bram_addr_reg <= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero);
+ //
+ end
+
+ FSM_STATE_SUBTRACT_READ: begin
+ //
+ coeff_bram_wr_reg <= 1'b1;
+ //
+ if (coeff_bram_addr_reg == coeff_bram_addr_zero)
+ //
+ reg_subtractor_borrow <= 1'b0;
+ //
+ end
+
+ FSM_STATE_SUBTRACT_WRITE: begin
+ //
+ coeff_bram_wr_reg <= 1'b0;
+ coeff_bram_addr_reg <= coeff_bram_addr_next_or_zero;
+ //
+ modulus_bram_addr_reg <= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero;
+ //
+ reg_subtractor_borrow <= subtractor_borrow_out;
+ //
+ end
+
+ FSM_STATE_ROUND: begin
+ //
+ round_count <= round_count_next;
+ //
+ end
+
+ FSM_STATE_FINAL: begin
+ //
+ if (modinv_rdy) modinv_ena_reg <= 1'b0;
+ //
+ end
+
+ endcase
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
+
+ FSM_STATE_SHIFT_READ: fsm_state <= FSM_STATE_SHIFT_WRITE;
+ FSM_STATE_COMPARE_READ: fsm_state <= FSM_STATE_COMPARE_COMPARE;
+ FSM_STATE_SUBTRACT_READ: fsm_state <= FSM_STATE_SUBTRACT_WRITE;
+
+ FSM_STATE_INIT: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT : FSM_STATE_SHIFT_READ;
+ FSM_STATE_SHIFT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_COMPARE_READ;
+ FSM_STATE_SUBTRACT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND;
+
+ FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL;
+
+ FSM_STATE_COMPARE_COMPARE: fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ;
+
+ FSM_STATE_FINAL: fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_montgomery_multiplier.v b/rtl/modexpa7_montgomery_multiplier.v
new file mode 100644
index 0000000..6637231
--- /dev/null
+++ b/rtl/modexpa7_montgomery_multiplier.v
@@ -0,0 +1,408 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_montgomery_multiplier
+ #(parameter OPERAND_NUM_BITS = 11, // 1024 -> 11 bits
+ parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits
+ (
+ input wire clk,
+
+ input wire ena,
+ output wire rdy,
+
+ input wire [OPERAND_NUM_BITS-1:0] operand_width,
+
+ output wire [OPERAND_ADDR_WIDTH :0] x_bram_addr,
+ input wire [31:0] x_bram_out,
+
+ output wire [OPERAND_ADDR_WIDTH :0] y_bram_addr,
+ input wire [31:0] y_bram_out,
+
+ output wire [OPERAND_ADDR_WIDTH :0] n_bram_addr,
+ input wire [31:0] n_bram_out,
+
+ output wire [OPERAND_ADDR_WIDTH :0] z_bram_addr,
+ output wire z_bram_wr,
+ output wire [31:0] z_bram_in,
+ input wire [31:0] z_bram_out,
+
+ input wire [31:0] n0_modinv
+ );
+
+
+ //
+ // Locals
+ //
+ localparam [OPERAND_ADDR_WIDTH:0] round_count_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+ localparam [OPERAND_ADDR_WIDTH:0] bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+
+ //
+ // FSM
+ //
+ localparam FSM_STATE_IDLE = 6'd0;
+
+ localparam FSM_STATE_INIT = 6'd10;
+
+ localparam FSM_STATE_MUL_XY_CALC = 6'd21;
+ localparam FSM_STATE_MUL_XY_PIPELINE = 6'd22;
+ localparam FSM_STATE_MUL_XY_REGISTER = 6'd23;
+ localparam FSM_STATE_MUL_XY_WRITE = 6'd24;
+
+ localparam FSM_STATE_MAGIC_CALC = 6'd31;
+ localparam FSM_STATE_MAGIC_PIPELINE = 6'd32;
+ localparam FSM_STATE_MAGIC_REGISTER = 6'd33;
+
+ localparam FSM_STATE_MUL_MN_CALC = 6'd41;
+ localparam FSM_STATE_MUL_MN_PIPELINE = 6'd42;
+ localparam FSM_STATE_MUL_MN_REGISTER = 6'd43;
+ localparam FSM_STATE_MUL_MN_WRITE = 6'd44;
+
+ localparam FSM_STATE_SHIFT = 6'd50;
+
+ localparam FSM_STATE_ROUND = 6'd55;
+
+ localparam FSM_STATE_FINAL = 6'd60;
+
+ reg [5: 0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Trigger
+ //
+ reg ena_dly = 1'b0;
+ always @(posedge clk) ena_dly <= ena;
+ wire ena_trig = (ena == 1'b1) && (ena_dly == 1'b0);
+
+
+ //
+ // Ready Register
+ //
+ reg rdy_reg = 1'b0;
+ assign rdy = rdy_reg;
+
+
+ //
+ // Enable / Ready Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_FINAL) begin
+ //
+ rdy_reg <= 1'b1;
+ //
+ end else if (fsm_state == FSM_STATE_IDLE) begin
+ //
+ if (rdy_reg && !ena) rdy_reg <= 1'b0;
+ //
+ end
+
+
+ //
+ // X, Y, N BRAM Interface
+ //
+ reg [OPERAND_ADDR_WIDTH:0] x_bram_addr_reg = bram_addr_zero;
+ reg [OPERAND_ADDR_WIDTH:0] y_bram_addr_reg = bram_addr_zero;
+ reg [OPERAND_ADDR_WIDTH:0] n_bram_addr_reg = bram_addr_zero;
+
+ assign x_bram_addr = x_bram_addr_reg;
+ assign y_bram_addr = y_bram_addr_reg;
+ assign n_bram_addr = n_bram_addr_reg;
+
+
+ //
+ // Z BRAM Interface
+ //
+ reg [OPERAND_ADDR_WIDTH:0] z_bram_addr_reg = bram_addr_zero;
+ reg z_bram_wr_reg = 1'b0;
+ reg [ 31:0] z_bram_in_mux;
+
+ assign z_bram_addr = z_bram_addr_reg;
+ assign z_bram_wr = z_bram_wr_reg;
+ assign z_bram_in = z_bram_in_mux;
+
+
+ //
+ // Handy Wires
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+ wire [OPERAND_ADDR_WIDTH :0] bram_addr_last = {operand_width_msb, 1'b1}; // +1
+
+
+ //
+ // Hardware Multiplier (X * Y)
+ //
+ reg [31: 0] multiplier_xy_carry_in;
+ wire [31: 0] multiplier_xy_out;
+ wire [31: 0] multiplier_xy_carry_out;
+
+ modexpa7_adder64_carry32 dsp_multiplier_xy
+ (
+ .clk (clk),
+ .t (/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/),
+ .x (/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/),
+ .y (/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/),
+ .s (multiplier_xy_out),
+ .c_in (multiplier_xy_carry_in),
+ .c_out (multiplier_xy_carry_out)
+ );
+
+
+ //
+ // Hardware Multiplier (Magic)
+ //
+ (* KEEP="TRUE" *)
+ wire [63: 0] multiplier_magic_out;
+ reg [31: 0] magic_value_reg;
+
+ dsp_multiplier_a7 dsp_multiplier_magic
+ (
+ .clk (clk),
+ .a (z_bram_out),
+ .b (n0_modinv),
+ .p (multiplier_magic_out)
+ );
+
+
+ //
+ // Hardware Multiplier (M * N)
+ //
+ reg [31: 0] multiplier_mn_carry_in;
+ wire [31: 0] multiplier_mn_out;
+ wire [31: 0] multiplier_mn_carry_out;
+
+ modexpa7_adder64_carry32 dsp_multiplier_mn
+ (
+ .clk (clk),
+ .t (z_bram_out),
+ .x (magic_value_reg),
+ .y (/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/),
+ .s (multiplier_mn_out),
+ .c_in (multiplier_mn_carry_in),
+ .c_out (multiplier_mn_carry_out)
+ );
+
+
+ //
+ // Z BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT:
+ //
+ z_bram_in_mux = {32{1'b0}};
+
+ FSM_STATE_MUL_XY_WRITE:
+ //
+ if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_xy_out;
+ else z_bram_in_mux = multiplier_xy_carry_in;
+
+ FSM_STATE_MUL_MN_WRITE:
+ //
+ if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_mn_out;
+ else z_bram_in_mux = multiplier_mn_carry_in + z_bram_out;
+
+ FSM_STATE_SHIFT:
+ //
+ z_bram_in_mux = z_bram_out;
+
+ default:
+ //
+ z_bram_in_mux = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // Handy Functions
+ //
+ function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_zero;
+ input [OPERAND_ADDR_WIDTH:0] bram_addr;
+ begin
+ bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero;
+ end
+ endfunction
+
+ function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_last;
+ input [OPERAND_ADDR_WIDTH:0] bram_addr;
+ begin
+ bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last;
+ end
+ endfunction
+
+ function [OPERAND_ADDR_WIDTH:0] bram_addr_prev_or_zero;
+ input [OPERAND_ADDR_WIDTH:0] bram_addr;
+ begin
+ bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero;
+ end
+ endfunction
+
+
+ //
+ // Round Counter
+ //
+ reg [OPERAND_ADDR_WIDTH:0] round_count = round_count_zero;
+ wire [OPERAND_ADDR_WIDTH:0] round_count_last = {operand_width_msb, 1'b0};
+ wire [OPERAND_ADDR_WIDTH:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+ //
+ // Main Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT: begin
+ //
+ z_bram_wr_reg <= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0;
+ z_bram_addr_reg <= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero;
+ //
+ end
+
+ FSM_STATE_MUL_XY_CALC: begin
+ //
+ if (z_bram_addr_reg == bram_addr_zero) begin
+ //
+ multiplier_xy_carry_in <= {32{1'b0}};
+ //
+ end
+ //
+ end
+
+ FSM_STATE_MUL_XY_REGISTER: begin
+ //
+ z_bram_wr_reg <= 1'b1;
+ //
+ end
+
+ FSM_STATE_MUL_XY_WRITE: begin
+ //
+ z_bram_wr_reg <= 1'b0;
+ z_bram_addr_reg <= bram_addr_next_or_zero(z_bram_addr_reg);
+ //
+ x_bram_addr_reg <= bram_addr_next_or_zero(x_bram_addr_reg);
+ //
+ multiplier_xy_carry_in <= multiplier_xy_carry_out;
+ //
+ end
+
+ FSM_STATE_MUL_MN_CALC: begin
+ //
+ if (z_bram_addr_reg == bram_addr_zero) begin
+ //
+ multiplier_mn_carry_in <= {32{1'b0}};
+ //
+ magic_value_reg <= multiplier_magic_out[31:0];
+ //
+ end
+ //
+ end
+
+ FSM_STATE_MUL_MN_REGISTER: begin
+ //
+ z_bram_wr_reg <= 1'b1;
+ //
+ end
+
+ FSM_STATE_MUL_MN_WRITE: begin
+ //
+ z_bram_wr_reg <= 1'b0;
+ z_bram_addr_reg <= bram_addr_next_or_last(z_bram_addr_reg);
+ //
+ n_bram_addr_reg <= bram_addr_next_or_zero(n_bram_addr_reg);
+ //
+ multiplier_mn_carry_in <= multiplier_mn_carry_out;
+ //
+ end
+
+ FSM_STATE_SHIFT: begin
+ //
+ if (z_bram_wr_reg == 1'b0) z_bram_wr_reg <= 1'b1;
+ else if (z_bram_addr_reg == bram_addr_zero) z_bram_wr_reg <= 1'b0;
+
+ z_bram_addr_reg <= bram_addr_prev_or_zero(z_bram_addr_reg);
+ //
+ end
+
+ FSM_STATE_ROUND: begin
+ //
+ y_bram_addr_reg <= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero;
+ //
+ round_count <= round_count_next;
+ //
+ end
+
+ endcase
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_IDLE: fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
+
+ FSM_STATE_INIT: fsm_state <= (z_bram_addr < bram_addr_last ) ? FSM_STATE_INIT : FSM_STATE_MUL_XY_CALC;
+ FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL;
+
+ FSM_STATE_MUL_XY_CALC: fsm_state <= FSM_STATE_MUL_XY_PIPELINE;
+ FSM_STATE_MAGIC_CALC: fsm_state <= FSM_STATE_MAGIC_PIPELINE;
+ FSM_STATE_MUL_MN_CALC: fsm_state <= FSM_STATE_MUL_MN_PIPELINE;
+
+ FSM_STATE_MUL_XY_PIPELINE: fsm_state <= FSM_STATE_MUL_XY_REGISTER;
+ FSM_STATE_MAGIC_PIPELINE: fsm_state <= FSM_STATE_MAGIC_REGISTER;
+ FSM_STATE_MUL_MN_PIPELINE: fsm_state <= FSM_STATE_MUL_MN_REGISTER;
+
+ FSM_STATE_MUL_XY_REGISTER: fsm_state <= FSM_STATE_MUL_XY_WRITE;
+ FSM_STATE_MAGIC_REGISTER: fsm_state <= FSM_STATE_MUL_MN_CALC;
+ FSM_STATE_MUL_MN_REGISTER: fsm_state <= FSM_STATE_MUL_MN_WRITE;
+
+ FSM_STATE_MUL_XY_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC;
+ FSM_STATE_MUL_MN_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT;
+ FSM_STATE_SHIFT: fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT : FSM_STATE_ROUND;
+
+ FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_top.v b/rtl/modexpa7_top.v
new file mode 100644
index 0000000..6c5a922
--- /dev/null
+++ b/rtl/modexpa7_top.v
@@ -0,0 +1,706 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_top
+ #(parameter MAX_MODULUS_WIDTH = 1024)
+ (
+ input wire clk,
+
+ input wire init,
+ output wire ready,
+
+ input wire next,
+ output wire valid,
+
+ input wire [MODULUS_NUM_BITS-1:0] modulus_width,
+ input wire [MODULUS_NUM_BITS-1:0] exponent_width,
+
+ input wire fast_public_mode,
+
+ input wire bus_cs,
+ input wire bus_we,
+ input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr,
+ input wire [31:0] bus_data_wr,
+ output wire [31:0] bus_data_rd
+ );
+
+
+ //
+ // modexpa7_clog2()
+ //
+ function integer modexpa7_clog2;
+ input integer value;
+ integer ret;
+ begin
+ value = value - 1;
+ for (ret = 0; value > 0; ret = ret + 1)
+ value = value >> 1;
+ modexpa7_clog2 = ret;
+ end
+ endfunction
+
+
+ //
+ // Locals
+ //
+ localparam OPERAND_ADDR_WIDTH = modexpa7_clog2(MAX_MODULUS_WIDTH / 32);
+ localparam MODULUS_NUM_BITS = modexpa7_clog2(MAX_MODULUS_WIDTH + 1);
+ localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2;
+
+ localparam [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+ localparam [OPERAND_ADDR_WIDTH :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+ localparam [ MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+
+
+ //
+ // User Memory
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr;
+ wire [ 31:0] ro_modulus_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr = bram_user_addr_zero;
+ wire [ 31:0] ro_message_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr = bram_user_addr_zero;
+ wire [ 31:0] ro_exponent_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr = bram_user_addr_zero;
+ wire [ 31:0] rw_result_bram_out;
+ reg rw_result_bram_wr = 1'b0;
+ wire [ 31:0] rw_result_bram_in;
+
+ modexpa7_buffer_user #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ mem_user
+ (
+ .clk (clk),
+
+ .bus_cs (bus_cs),
+ .bus_we (bus_we),
+ .bus_addr (bus_addr),
+ .bus_data_wr (bus_data_wr),
+ .bus_data_rd (bus_data_rd),
+
+ .ro_modulus_bram_addr (ro_modulus_bram_addr),
+ .ro_modulus_bram_out (ro_modulus_bram_out),
+
+ .ro_message_bram_addr (ro_message_bram_addr),
+ .ro_message_bram_out (ro_message_bram_out),
+
+ .ro_exponent_bram_addr (ro_exponent_bram_addr),
+ .ro_exponent_bram_out (ro_exponent_bram_out),
+
+ .rw_result_bram_addr (rw_result_bram_addr),
+ .rw_result_bram_wr (rw_result_bram_wr),
+ .rw_result_bram_in (rw_result_bram_in)
+ );
+
+
+ //
+ // Core (Internal) Memory
+ //
+ wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr;
+ wire rw_coeff_bram_wr;
+ wire [ 31:0] rw_coeff_bram_in;
+ wire [ 31:0] rw_coeff_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr = bram_core_addr_zero;
+ reg rw_mm_bram_wr = 1'b0;
+ reg [ 31:0] rw_mm_bram_in;
+ wire [ 31:0] rw_mm_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr;
+ wire rw_nn_bram_wr;
+ wire [ 31:0] rw_nn_bram_in;
+
+ reg [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr = bram_core_addr_zero;
+ reg rw_y_bram_wr = 1'b0;
+ reg [ 31:0] rw_y_bram_in;
+ wire [ 31:0] rw_y_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr;
+ wire rw_r_bram_wr;
+ wire [ 31:0] rw_r_bram_in;
+ wire [ 31:0] rw_r_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr = bram_core_addr_zero;
+ reg rw_t_bram_wr = 1'b0;
+ reg [ 31:0] rw_t_bram_in;
+ wire [ 31:0] rw_t_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr = bram_core_addr_zero;
+ wire [ 31:0] ro_coeff_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr;
+ wire [ 31:0] ro_mm_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr;
+ wire [ 31:0] ro_nn_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr = bram_core_addr_zero;
+ wire [ 31:0] ro_r_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr;
+ wire [ 31:0] ro_t_bram_out;
+
+ modexpa7_buffer_core #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ mem_core
+ (
+ .clk (clk),
+
+ .rw_coeff_bram_addr (rw_coeff_bram_addr),
+ .rw_coeff_bram_wr (rw_coeff_bram_wr),
+ .rw_coeff_bram_in (rw_coeff_bram_in),
+ .rw_coeff_bram_out (rw_coeff_bram_out),
+
+ .rw_mm_bram_addr (rw_mm_bram_addr),
+ .rw_mm_bram_wr (rw_mm_bram_wr),
+ .rw_mm_bram_in (rw_mm_bram_in),
+ .rw_mm_bram_out (rw_mm_bram_out),
+
+ .rw_nn_bram_addr (rw_nn_bram_addr),
+ .rw_nn_bram_wr (rw_nn_bram_wr),
+ .rw_nn_bram_in (rw_nn_bram_in),
+
+ .rw_y_bram_addr (rw_y_bram_addr),
+ .rw_y_bram_wr (rw_y_bram_wr),
+ .rw_y_bram_in (rw_y_bram_in),
+ .rw_y_bram_out (rw_y_bram_out),
+
+ .rw_r_bram_addr (rw_r_bram_addr),
+ .rw_r_bram_wr (rw_r_bram_wr),
+ .rw_r_bram_in (rw_r_bram_in),
+ .rw_r_bram_out (rw_r_bram_out),
+
+ .rw_t_bram_addr (rw_t_bram_addr),
+ .rw_t_bram_wr (rw_t_bram_wr),
+ .rw_t_bram_in (rw_t_bram_in),
+ .rw_t_bram_out (rw_t_bram_out),
+
+ .ro_coeff_bram_addr (ro_coeff_bram_addr),
+ .ro_coeff_bram_out (ro_coeff_bram_out),
+
+ .ro_mm_bram_addr (ro_mm_bram_addr),
+ .ro_mm_bram_out (ro_mm_bram_out),
+
+ .ro_nn_bram_addr (ro_nn_bram_addr),
+ .ro_nn_bram_out (ro_nn_bram_out),
+
+ .ro_r_bram_addr (ro_r_bram_addr),
+ .ro_r_bram_out (ro_r_bram_out),
+
+ .ro_t_bram_addr (ro_t_bram_addr),
+ .ro_t_bram_out (ro_t_bram_out)
+ );
+
+
+ //
+ // Small 32-bit ModInv Core
+ //
+ wire modinv_ena;
+ wire modinv_rdy;
+
+ wire [31: 0] modinv_n0;
+ wire [31: 0] modinv_n0_negative = ~modinv_n0 + 1'b1;
+ wire [31: 0] modinv_n0_modinv;
+
+ modexpa7_modinv32 core_modinv32
+ (
+ .clk (clk),
+
+ .ena (modinv_ena),
+ .rdy (modinv_rdy),
+
+ .n0 (modinv_n0_negative),
+ .n0_modinv (modinv_n0_modinv)
+ );
+
+
+ //
+ // Montgomery Coefficient Calculator
+ //
+ modexpa7_montgomery_coeff #
+ (
+ .MODULUS_NUM_BITS (MODULUS_NUM_BITS),
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ core_montgomery_coeff
+ (
+ .clk (clk),
+
+ .ena (init),
+ .rdy (ready),
+
+ .modulus_width (modulus_width),
+
+ .coeff_bram_addr (rw_coeff_bram_addr),
+ .coeff_bram_wr (rw_coeff_bram_wr),
+ .coeff_bram_in (rw_coeff_bram_in),
+ .coeff_bram_out (rw_coeff_bram_out),
+
+ .nn_bram_addr (rw_nn_bram_addr),
+ .nn_bram_wr (rw_nn_bram_wr),
+ .nn_bram_in (rw_nn_bram_in),
+
+ .modulus_bram_addr (ro_modulus_bram_addr),
+ .modulus_bram_out (ro_modulus_bram_out),
+
+ .modinv_n0 (modinv_n0),
+ .modinv_ena (modinv_ena),
+ .modinv_rdy (modinv_rdy)
+ );
+
+
+ //
+ // Montgomery Multiplier
+ //
+ reg mul_ena = 1'b0;
+ wire mul_rdy;
+
+ modexpa7_montgomery_multiplier #
+ (
+ .OPERAND_NUM_BITS (MODULUS_NUM_BITS),
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ core_montgomery_multiplier
+ (
+ .clk (clk),
+
+ .ena (mul_ena),
+ .rdy (mul_rdy),
+
+ .operand_width (modulus_width),
+
+ .x_bram_addr (ro_t_bram_addr),
+ .x_bram_out (ro_t_bram_out),
+
+ .y_bram_addr (ro_mm_bram_addr),
+ .y_bram_out (ro_mm_bram_out),
+
+ .n_bram_addr (ro_nn_bram_addr),
+ .n_bram_out (ro_nn_bram_out),
+
+ .z_bram_addr (rw_r_bram_addr),
+ .z_bram_wr (rw_r_bram_wr),
+ .z_bram_in (rw_r_bram_in),
+ .z_bram_out (rw_r_bram_out),
+
+ .n0_modinv (modinv_n0_modinv)
+ );
+
+
+ //
+ // FSM
+ //
+ localparam FSM_STATE_IDLE = 6'd0;
+
+ localparam FSM_STATE_INIT_LOAD = 6'd11;
+ localparam FSM_STATE_INIT_WAIT = 6'd12;
+ localparam FSM_STATE_INIT_UNLOAD = 6'd13;
+
+ localparam FSM_STATE_READ_EI = 6'd20;
+
+ localparam FSM_STATE_ROUND_BEGIN = 6'd25;
+
+ localparam FSM_STATE_MULTIPLY_LOAD = 6'd31;
+ localparam FSM_STATE_MULTIPLY_WAIT = 6'd32;
+ localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33;
+
+ localparam FSM_STATE_SQUARE_LOAD = 6'd41;
+ localparam FSM_STATE_SQUARE_WAIT = 6'd42;
+ localparam FSM_STATE_SQUARE_UNLOAD = 6'd43;
+
+ localparam FSM_STATE_ROUND_END = 6'd50;
+
+ localparam FSM_STATE_FINAL = 6'd60;
+
+ reg [5: 0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Trigger
+ //
+ reg next_dly = 1'b0;
+ always @(posedge clk) next_dly <= next;
+ wire next_trig = (next == 1'b1) && (next_dly == 1'b0);
+
+
+ //
+ // Valid Register
+ //
+ reg valid_reg = 1'b0;
+ assign valid = valid_reg;
+
+
+ //
+ // Next/ Valid Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_FINAL) begin
+ //
+ valid_reg <= 1'b1;
+ //
+ end else if (fsm_state == FSM_STATE_IDLE) begin
+ //
+ if (valid_reg && !next) valid_reg <= 1'b0;
+ //
+ end
+
+
+ //
+ // Exponent Bit Counter
+ //
+ reg [4: 0] ei_bit_count = 5'd0;
+ wire ei_bit = ro_exponent_bram_out[ei_bit_count];
+
+
+ //
+ // Round Counter
+ //
+ reg [MODULUS_NUM_BITS:0] round_count = round_count_zero;
+ wire [MODULUS_NUM_BITS:0] round_count_last = exponent_width - 1'b1;
+ wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+ //
+ // Handy Wires
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+ wire [OPERAND_ADDR_WIDTH :0] bram_core_addr_last = {modulus_width_msb, 1'b0};
+
+ wire [OPERAND_ADDR_WIDTH :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+
+
+ //
+ // Handy Functions
+ //
+ function [OPERAND_ADDR_WIDTH:0] bram_core_addr_next_or_zero;
+ input [OPERAND_ADDR_WIDTH:0] bram_core_addr;
+ begin
+ bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero;
+ end
+ endfunction
+
+ function [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_next_or_zero;
+ input [OPERAND_ADDR_WIDTH-1:0] bram_user_addr;
+ begin
+ bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero;
+ end
+ endfunction
+
+
+ //
+ // Result BRAM Input
+ //
+ assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out;
+
+
+ //
+ // MM BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD:
+ //
+ rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}};
+
+ FSM_STATE_INIT_UNLOAD:
+ //
+ rw_mm_bram_in = ro_r_bram_out;
+
+ FSM_STATE_SQUARE_UNLOAD:
+ //
+ rw_mm_bram_in = ro_r_bram_out;
+
+ default:
+ //
+ rw_mm_bram_in = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // Y BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD:
+ //
+ rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000;
+
+ FSM_STATE_MULTIPLY_UNLOAD:
+ //
+ rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW!
+
+ default:
+ //
+ rw_y_bram_in = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // T BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD:
+ //
+ rw_t_bram_in = ro_coeff_bram_out;
+
+ FSM_STATE_MULTIPLY_LOAD:
+ //
+ rw_t_bram_in = rw_y_bram_out;
+
+ FSM_STATE_SQUARE_LOAD:
+ //
+ rw_t_bram_in = rw_mm_bram_out;
+
+ default:
+ //
+ rw_t_bram_in = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // Main Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD: begin
+ //
+ rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ rw_y_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ rw_t_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ rw_y_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ rw_t_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ //
+ if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+ else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+ //
+ if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr);
+ else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr);
+ //
+ end
+
+ FSM_STATE_INIT_WAIT: begin
+ //
+ if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+ else mul_ena <= 1'b1;
+ //
+ end
+
+ FSM_STATE_INIT_UNLOAD: begin
+ //
+ rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ //
+ if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+ else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+ //
+ end
+
+ FSM_STATE_MULTIPLY_LOAD: begin
+ //
+ rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+ //
+ if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr);
+ else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr);
+ //
+ end
+
+ FSM_STATE_MULTIPLY_WAIT: begin
+ //
+ if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+ else mul_ena <= 1'b1;
+ //
+ end
+
+ FSM_STATE_MULTIPLY_UNLOAD: begin
+ //
+ rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero;
+ //
+ if (ei_bit) begin
+ //
+ if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+ else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+ //
+ end else begin
+ //
+ if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr);
+ else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr);
+ //
+ end
+ //
+ if (round_count == round_count_last) begin
+ //
+ if (rw_result_bram_addr == bram_user_addr_zero) begin
+ //
+ if (rw_y_bram_wr) begin
+ //
+ rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1;
+ rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr);
+ //
+ end else begin
+ //
+ rw_result_bram_wr <= 1'b1;
+ rw_result_bram_addr <= bram_user_addr_zero;
+ //
+ end
+ //
+ end else begin
+ //
+ rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0;
+ rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr);
+ //
+ end
+ //
+ end
+ //
+ end
+
+ FSM_STATE_SQUARE_LOAD: begin
+ //
+ rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+ //
+ if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr);
+ else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr);
+ //
+ end
+
+ FSM_STATE_SQUARE_WAIT: begin
+ //
+ if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+ else mul_ena <= 1'b1;
+ //
+ end
+
+ FSM_STATE_SQUARE_UNLOAD: begin
+ //
+ rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ //
+ if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+ else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+ //
+ end
+
+ FSM_STATE_ROUND_END: begin
+ //
+ round_count <= round_count_next;
+ //
+ if (round_count < round_count_last) begin
+ //
+ ei_bit_count <= ei_bit_count + 1'b1;
+ //
+ if (ei_bit_count == 5'd31)
+ //
+ ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr);
+ //
+ end else begin
+ //
+ ei_bit_count <= 5'd0;
+ //
+ ro_exponent_bram_addr <= bram_user_addr_zero;
+ //
+ end
+ //
+ end
+
+ endcase
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE;
+
+ FSM_STATE_INIT_LOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT;
+ FSM_STATE_INIT_WAIT: fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT;
+ FSM_STATE_INIT_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI;
+
+ FSM_STATE_READ_EI: fsm_state <= FSM_STATE_ROUND_BEGIN;
+
+ FSM_STATE_ROUND_BEGIN: fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD;
+
+ FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT;
+ FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT;
+ FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD;
+
+ FSM_STATE_SQUARE_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT;
+ FSM_STATE_SQUARE_WAIT: fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT;
+ FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END;
+
+ FSM_STATE_ROUND_END: fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL;
+
+ FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_wrapper.v b/rtl/modexpa7_wrapper.v
new file mode 100644
index 0000000..271cb20
--- /dev/null
+++ b/rtl/modexpa7_wrapper.v
@@ -0,0 +1,211 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpa7_wrapper
+ (
+ input wire clk,
+ input wire reset_n,
+
+ input wire cs,
+ input wire we,
+
+ input wire [9: 0] address,
+ input wire [31: 0] write_data,
+ output wire [31: 0] read_data
+ );
+
+
+ //
+ // Address Decoder
+ //
+ localparam ADDR_MSB_REGS = 1'b0;
+ localparam ADDR_MSB_CORE = 1'b1;
+ wire address_msb = address[9];
+ wire [8: 0] address_lsb = address[8:0];
+
+
+ //
+ // Output Mux
+ //
+ wire [31: 0] read_data_regs;
+ wire [31: 0] read_data_core;
+
+
+ //
+ // Registers
+ //
+ localparam ADDR_NAME0 = 9'h000;
+ localparam ADDR_NAME1 = 9'h001;
+ localparam ADDR_VERSION = 9'h002;
+
+ localparam ADDR_CONTROL = 9'h008; // {next, init}
+ localparam ADDR_STATUS = 9'h009; // {valid, ready}
+ localparam ADDR_MODE = 9'h010; // 0 = slow secure, 1 = fast unsafe (public)
+ localparam ADDR_MODULUS_BITS = 9'h011; //
+ localparam ADDR_EXPONENT_BITS = 9'h012; //
+ localparam ADDR_GPIO_REG = 9'h020; //
+
+ localparam CONTROL_INIT_BIT = 0;
+ localparam CONTROL_NEXT_BIT = 1;
+
+ localparam STATUS_READY_BIT = 0;
+ localparam STATUS_VALID_BIT = 1;
+
+ localparam CORE_NAME0 = 32'h6D6F6465; // "mode"
+ localparam CORE_NAME1 = 32'h78706137; // "xpa7"
+ localparam CORE_VERSION = 32'h302E3130; // "0.10"
+
+
+ //
+ // Registers
+ //
+ reg [1: 0] reg_control;
+ reg reg_mode;
+ reg [12: 0] reg_modulus_width;
+ reg [12: 0] reg_exponent_width;
+ reg [31: 0] reg_gpio;
+
+
+ //
+ // Wires
+ //
+ wire [1: 0] reg_status;
+
+
+ //
+ // ModExpA7
+ //
+ modexpa7_top #
+ (
+ .MAX_MODULUS_WIDTH (4096)
+ )
+ modexpa7_core
+ (
+ .clk (clk),
+
+ .init (reg_control[CONTROL_INIT_BIT]),
+ .ready (reg_status[STATUS_READY_BIT]),
+ .next (reg_control[CONTROL_NEXT_BIT]),
+ .valid (reg_status[STATUS_VALID_BIT]),
+
+ .modulus_width (reg_modulus_width),
+ .exponent_width (reg_exponent_width),
+
+ .fast_public_mode (reg_mode),
+
+ .bus_cs (cs && (address_msb == ADDR_MSB_CORE)),
+ .bus_we (we),
+ .bus_addr (address_lsb),
+ .bus_data_wr (write_data),
+ .bus_data_rd (read_data_core)
+ );
+
+
+ //
+ // Read Latch
+ //
+ reg [31: 0] tmp_read_data;
+
+
+ //
+ // Read/Write Interface
+ //
+ always @(posedge clk)
+ //
+ if (!reset_n) begin
+ //
+ reg_control <= 2'b00;
+ reg_mode <= 1'b0;
+ reg_modulus_width <= 13'd1024;
+ reg_exponent_width <= 13'd1024;
+ //
+ end else if (cs && (address_msb == ADDR_MSB_REGS)) begin
+ //
+ if (we) begin
+ //
+ // Write Handler
+ //
+ case (address_lsb)
+ //
+ ADDR_CONTROL: reg_control <= write_data[1: 0];
+ ADDR_MODE: reg_mode <= write_data[0];
+ ADDR_MODULUS_BITS: reg_modulus_width <= write_data[12: 0];
+ ADDR_EXPONENT_BITS: reg_exponent_width <= write_data[12: 0];
+ ADDR_GPIO_REG: reg_gpio <= write_data;
+ //
+ endcase
+ //
+ end else begin
+ //
+ // Read Handler
+ //
+ case (address)
+ //
+ ADDR_NAME0: tmp_read_data <= CORE_NAME0;
+ ADDR_NAME1: tmp_read_data <= CORE_NAME1;
+ ADDR_VERSION: tmp_read_data <= CORE_VERSION;
+ ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control};
+ ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status};
+ ADDR_MODE: tmp_read_data <= {{31{1'b0}}, reg_mode};
+ ADDR_MODULUS_BITS: tmp_read_data <= {{19{1'b0}}, reg_modulus_width};
+ ADDR_EXPONENT_BITS: tmp_read_data <= {{19{1'b0}}, reg_exponent_width};
+ ADDR_GPIO_REG: tmp_read_data <= reg_gpio;
+ //
+ default: tmp_read_data <= 32'h00000000;
+ //
+ endcase
+ //
+ end
+ //
+ end
+
+
+ //
+ // Register / Core Memory Selector
+ //
+ reg address_msb_last;
+ always @(posedge clk) address_msb_last = address_msb;
+
+ reg [31: 0] read_data_mux;
+ assign read_data = read_data_mux;
+
+ always @(*)
+ //
+ case (address_msb_last)
+ //
+ ADDR_MSB_REGS: read_data_mux = tmp_read_data;
+ ADDR_MSB_CORE: read_data_mux = read_data_core;
+ //
+ endcase
+
+
+endmodule
diff --git a/rtl/ram_1rw_1ro_readfirst.v b/rtl/ram_1rw_1ro_readfirst.v
new file mode 100644
index 0000000..25b708f
--- /dev/null
+++ b/rtl/ram_1rw_1ro_readfirst.v
@@ -0,0 +1,88 @@
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module ram_1rw_1ro_readfirst
+ #(parameter MEM_WIDTH = 32,
+ parameter MEM_ADDR_BITS = 8)
+ (
+ input wire clk,
+
+ input wire [MEM_ADDR_BITS-1:0] a_addr,
+ input wire a_wr,
+ input wire [MEM_WIDTH-1:0] a_in,
+ output wire [MEM_WIDTH-1:0] a_out,
+
+ input wire [MEM_ADDR_BITS-1:0] b_addr,
+ output wire [MEM_WIDTH-1:0] b_out
+ );
+
+
+ //
+ // BRAM
+ //
+ (* RAM_STYLE="BLOCK" *)
+ reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1];
+
+
+ //
+ // Output Registers
+ //
+ reg [MEM_WIDTH-1:0] bram_reg_a;
+ reg [MEM_WIDTH-1:0] bram_reg_b;
+
+ assign a_out = bram_reg_a;
+ assign b_out = bram_reg_b;
+
+
+ //
+ // Read-Write Port A
+ //
+ always @(posedge clk) begin
+ //
+ bram_reg_a <= bram[a_addr];
+ //
+ if (a_wr) bram[a_addr] <= a_in;
+ //
+ end
+
+
+ //
+ // Read-Only Port B
+ //
+ always @(posedge clk)
+ //
+ bram_reg_b <= bram[b_addr];
+
+
+endmodule