diff options
Diffstat (limited to 'src/rtl')
-rw-r--r-- | src/rtl/dsp_multiplier_a7.v | 522 | ||||
-rw-r--r-- | src/rtl/dsp_subtractor_a7.v | 142 | ||||
-rw-r--r-- | src/rtl/modexpa7_adder64_carry32.v | 81 | ||||
-rw-r--r-- | src/rtl/modexpa7_buffer_core.v | 218 | ||||
-rw-r--r-- | src/rtl/modexpa7_buffer_user.v | 197 | ||||
-rw-r--r-- | src/rtl/modexpa7_modinv32.v | 141 | ||||
-rw-r--r-- | src/rtl/modexpa7_montgomery_coeff.v | 425 | ||||
-rw-r--r-- | src/rtl/modexpa7_montgomery_multiplier.v | 408 | ||||
-rw-r--r-- | src/rtl/modexpa7_top.v | 706 | ||||
-rw-r--r-- | src/rtl/modexpa7_wrapper.v | 211 | ||||
-rw-r--r-- | src/rtl/ram_1rw_1ro_readfirst.v | 88 |
11 files changed, 3139 insertions, 0 deletions
diff --git a/src/rtl/dsp_multiplier_a7.v b/src/rtl/dsp_multiplier_a7.v new file mode 100644 index 0000000..bb6a139 --- /dev/null +++ b/src/rtl/dsp_multiplier_a7.v @@ -0,0 +1,522 @@ +//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module dsp_multiplier_a7 + ( + input clk,
+ input [31: 0] a,
+ input [31: 0] b,
+ output [63: 0] p + );
+
+ //
+ // Split a, b into parts
+ //
+ wire [16: 0] a_lo = a[16: 0];
+ wire [16: 0] b_lo = b[16: 0];
+
+ wire [14: 0] a_hi = a[31:17];
+ wire [14: 0] b_hi = b[31:17];
+
+ //
+ // Products
+ //
+ wire [47: 0] p_dsp1;
+ wire [47: 0] p_dsp3;
+ wire [47: 0] p_dsp4;
+
+ //
+ // Cascade p
+ //
+ wire [47: 0] p_dsp1_chain;
+ wire [47: 0] p_dsp2_chain;
+ wire [47: 0] p_dsp3_chain; +
+ //
+ // Cascade a
+ //
+ wire [29: 0] a_lo_chain;
+ wire [29: 0] a_hi_chain;
+
+ //
+ // Register parts of p
+ // + genvar i;
+ generate for (i=0; i<17; i=i+1)
+ begin : FD_gen
+ //
+ FD #(.INIT (1'b0)) + FD_inst_1
+ ( + .C (clk), + .D (p_dsp1[i]), + .Q (p[i]) + );
+ //
+ FD #(.INIT(1'b0)) + FD_inst_2
+ ( + .C (clk), + .D (p_dsp3[i]), + .Q (p[17+i]) + );
+ //
+ end
+ endgenerate
+
+ //
+ // Mapping
+ //
+ assign p[63:34] = p_dsp4[29:0];
+
+ //
+ // a_lo * b_lo
+ // + DSP48E1 #
+ ( + .AREG (1), + .BREG (1), + .CREG (0), + .DREG (0), + .MREG (0), + .PREG (0), + .ADREG (0),
+
+ .INMODEREG (0),
+ .OPMODEREG (0),
+ .ALUMODEREG (0),
+ .CARRYINREG (0), + .CARRYINSELREG (0),
+ + .ACASCREG (1), + .BCASCREG (1), +
+ .A_INPUT ("DIRECT"), + .B_INPUT ("DIRECT"), +
+ .USE_SIMD ("ONE48"), + .USE_DPORT ("FALSE"), + .USE_MULT ("MULTIPLY"), + .USE_PATTERN_DETECT ("NO_PATDET"),
+
+ .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), +
+ .MASK (48'h000000000000), + .PATTERN (48'h000000000000),
+
+ .AUTORESET_PATDET ("NO_RESET")
+ ) + DSP48E1_inst1
+ ( + .CLK (clk),
+ + .RSTA (1'b0), + .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0), +
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), +
+ .CEA1 (1'b0),
+ .CEA2 (1'b1), + .CEB1 (1'b0), + .CEB2 (1'b1), + .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (1'b0),
+ .CEP (1'b0),
+ .CEAD (1'b0),
+
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0), + .CEALUMODE (1'b0), + .CECARRYIN (1'b0),
+
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b0110101),
+
+ .A ({{13{1'b0}}, a_lo}), + .B ({{ 1{1'b0}}, b_lo}),
+ .C ({48{1'b0}}),
+ .D ({25{1'b0}}),
+ .P (p_dsp1),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+
+ .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .ACIN ({30{1'b0}}), + .BCIN ({18{1'b0}}), + + .ACOUT (a_lo_chain), + .BCOUT (), + + .PCIN ({48{1'b0}}), + .PCOUT (p_dsp1_chain),
+
+ .MULTSIGNIN (1'b0), + .MULTSIGNOUT (), +
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (), +
+ .UNDERFLOW (), + .OVERFLOW () + );
+
+ //
+ // a_lo * b_hi
+ // + DSP48E1 #
+ ( + .AREG (0), + .BREG (1), + .CREG (0), + .DREG (0),
+ .MREG (0), + .PREG (0), + .ADREG (0), +
+ .INMODEREG (0), + .OPMODEREG (0), + .ALUMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0),
+ + .ACASCREG (0),
+ .BCASCREG (1), + + .A_INPUT ("CASCADE"), + .B_INPUT ("DIRECT"), + + .USE_SIMD ("ONE48"),
+ .USE_DPORT ("FALSE"), + .USE_MULT ("MULTIPLY"), + .USE_PATTERN_DETECT ("NO_PATDET"), +
+ .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), +
+ .MASK (48'h000000000000), + .PATTERN (48'h000000000000), +
+ .AUTORESET_PATDET ("NO_RESET")
+ ) + DSP48E1_inst2
+ ( + .CLK (clk), +
+ .RSTA (1'b0),
+ .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), +
+ .CEA1 (1'b0), + .CEA2 (1'b0),
+ .CEB1 (1'b0),
+ .CEB2 (1'b1),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEP (1'b0),
+ .CEM (1'b0),
+ .CEAD (1'b0),
+
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0),
+ .CEALUMODE (1'b0),
+ .CECARRYIN (1'b0),
+
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b1010101),
+
+ .A ({30{1'b0}}),
+ .B ({{3{1'b0}}, b_hi}), + .C ({48{1'b0}}),
+ .D ({25{1'b0}}), + .P (), +
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+ + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .ACIN (a_lo_chain), + .BCIN ({18{1'b0}}),
+ + .ACOUT (),
+ .BCOUT (),
+ + .PCIN (p_dsp1_chain), + .PCOUT (p_dsp2_chain), +
+ .MULTSIGNIN (1'b0), + .MULTSIGNOUT (), +
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (), +
+ .UNDERFLOW (), + .OVERFLOW () + );
+
+ //
+ // a_hi * b_lo
+ // + DSP48E1 #
+ ( + .AREG (1), + .BREG (1), + .CREG (0), + .DREG (0), + .MREG (0), + .PREG (0),
+ .ADREG (0),
+
+ .INMODEREG (0), + .OPMODEREG (0),
+ .ALUMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), +
+ .ACASCREG (1), + .BCASCREG (1), +
+ .A_INPUT ("DIRECT"), + .B_INPUT ("DIRECT"), +
+ .USE_SIMD ("ONE48"), + .USE_DPORT ("FALSE"), + .USE_MULT ("MULTIPLY" ), + .USE_PATTERN_DETECT ("NO_PATDET"), +
+ .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), +
+ .MASK (48'h000000000000), + .PATTERN (48'h000000000000), +
+ .AUTORESET_PATDET ("NO_RESET")
+ ) + DSP48E1_inst3
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0), + .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0), +
+ .RSTCTRL (1'b0), + .RSTINMODE (1'b0),
+ .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), +
+ .CEA1 (1'b0), + .CEA2 (1'b1), + .CEB1 (1'b0), + .CEB2 (1'b1), + .CEC (1'b0), + .CED (1'b0), + .CEM (1'b0), + .CEP (1'b0), + .CEAD (1'b0), +
+ .CECTRL (1'b0),
+ .CEINMODE (1'b0), + .CEALUMODE (1'b0), + .CECARRYIN (1'b0), +
+ .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000), + .INMODE (5'b00000),
+ .OPMODE (7'b0010101),
+
+ .A ({{15{1'b0}}, a_hi}),
+ .B ({{ 1{1'b0}}, b_lo}), + .C ({48{1'b0}}),
+ .D ({25{1'b0}}), + .P (p_dsp3),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+ + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), +
+ .ACIN ({30{1'b0}}), + .BCIN ({18{1'b0}}),
+ + .ACOUT (a_hi_chain),
+ .BCOUT (),
+ + .PCIN (p_dsp2_chain), + .PCOUT (p_dsp3_chain),
+
+ .MULTSIGNIN (1'b0), + .MULTSIGNOUT (), +
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .UNDERFLOW (), + .OVERFLOW () + );
+
+ //
+ // a_hi * b_hi
+ // + DSP48E1 #
+ (
+ .AREG (0),
+ .BREG (1),
+ .CREG (0), + .DREG (0), + .MREG (0),
+ .PREG (1),
+ .ADREG (0), +
+ .INMODEREG (0), + .OPMODEREG (0), + .ALUMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), +
+ .ACASCREG (0), + .BCASCREG (1), +
+ .A_INPUT ("CASCADE"), + .B_INPUT ("DIRECT"), +
+ .USE_SIMD ("ONE48"), + .USE_DPORT ("FALSE"), + .USE_MULT ("MULTIPLY"), + .USE_PATTERN_DETECT ("NO_PATDET"), +
+ .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), +
+ .MASK (48'h000000000000), + .PATTERN (48'h000000000000), +
+ .AUTORESET_PATDET ("NO_RESET")
+ ) + DSP48E1_inst4
+ (
+ .CLK (clk),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+
+ .RSTCTRL (1'b0),
+ .RSTINMODE (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), +
+ .CEA1 (1'b0), + .CEA2 (1'b0), + .CEB1 (1'b0), + .CEB2 (1'b1), + .CEC (1'b0), + .CED (1'b0), + .CEM (1'b0), + .CEP (1'b1), + .CEAD (1'b0),
+
+ .CECTRL (1'b0), + .CEINMODE (1'b0), + .CEALUMODE (1'b0), + .CECARRYIN (1'b0),
+ + .CARRYINSEL (3'b000),
+ .ALUMODE (4'b0000),
+ .INMODE (5'b00000),
+ .OPMODE (7'b1010101),
+
+ .A ({30{1'b0}}), + .B ({{3{1'b0}}, b_hi}), + .C ({48{1'b0}}), + .D ({25{1'b0}}),
+ .P (p_dsp4),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+ + .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .ACIN (a_hi_chain), + .BCIN ({18{1'b0}}),
+ + .ACOUT (),
+ .BCOUT (),
+ + .PCIN (p_dsp3_chain), + .PCOUT (),
+
+ .MULTSIGNIN (1'b0), + .MULTSIGNOUT (), +
+ .PATTERNDETECT (), + .PATTERNBDETECT (),
+
+ .UNDERFLOW (), + .OVERFLOW () + );
+
+
+endmodule
diff --git a/src/rtl/dsp_subtractor_a7.v b/src/rtl/dsp_subtractor_a7.v new file mode 100644 index 0000000..00c2f95 --- /dev/null +++ b/src/rtl/dsp_subtractor_a7.v @@ -0,0 +1,142 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module dsp_subtractor_a7 + ( + input [31: 0] a, + input [31: 0] b, + output [31: 0] s, + input c_in, + output c_out + ); + + wire [47: 0] p; + assign s = p[31:0]; + assign c_out = p[32]; + + DSP48E1 # + ( + .AREG (0), + .BREG (0), + .CREG (0), + .DREG (0), + .MREG (0), + .PREG (0), + .ADREG (0), + .ACASCREG (0), + .BCASCREG (0), + .ALUMODEREG (0), + .INMODEREG (0), + .OPMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), + + .A_INPUT ("DIRECT"), + .B_INPUT ("DIRECT"), + .USE_DPORT ("FALSE"), + .USE_MULT ("NONE"), + .USE_SIMD ("ONE48"), + + .USE_PATTERN_DETECT ("NO_PATDET"), + .SEL_PATTERN ("PATTERN"), + .SEL_MASK ("MASK"), + .PATTERN (48'h000000000000), + .MASK (48'h3fffffffffff), + .AUTORESET_PATDET ("NO_RESET") + ) + DSP48E1_inst + ( + .CLK (1'b0), + + .RSTA (1'b0), + .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0), + + .RSTCTRL (1'b0), + .RSTINMODE (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), + + .CEA1 (1'b0), + .CEA2 (1'b0), + .CEB1 (1'b0), + .CEB2 (1'b0), + .CEC (1'b0), + .CED (1'b0), + .CEM (1'b0), + .CEP (1'b0), + .CEAD (1'b0), + .CEALUMODE (1'b0), + .CEINMODE (1'b0), + + .CECTRL (1'b0), + .CECARRYIN (1'b0), + + .A ({{16{1'b0}}, b[31:18]}), + .B (b[17: 0]), + .C ({{16{1'b0}}, a[31:0]}), + .D (25'd0), + .P (p), + + .CARRYIN (c_in), + .CARRYOUT (), + .CARRYINSEL (3'b000), + + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .PATTERNDETECT (), + .PATTERNBDETECT (), + + .OPMODE (7'b0110011), + .ALUMODE (4'b0011), + .INMODE (5'b00000), + + .MULTSIGNIN (1'b0), + .MULTSIGNOUT (), + + .UNDERFLOW (), + .OVERFLOW (), + + .ACIN (30'd0), + .BCIN (18'd0), + .PCIN (48'd0), + + .ACOUT (), + .BCOUT (), + .PCOUT () + ); + +endmodule diff --git a/src/rtl/modexpa7_adder64_carry32.v b/src/rtl/modexpa7_adder64_carry32.v new file mode 100644 index 0000000..093f660 --- /dev/null +++ b/src/rtl/modexpa7_adder64_carry32.v @@ -0,0 +1,81 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_adder64_carry32 + ( + input wire clk, + input wire [31: 0] t, + input wire [31: 0] x, + input wire [31: 0] y, + output wire [31: 0] s, + input wire [31: 0] c_in, + output wire [31: 0] c_out + ); + + + // + // Multiplier + // + wire [63: 0] multiplier_out; + + dsp_multiplier_a7 dsp_multiplier + ( + .clk (clk), + .a (x), + .b (y), + .p (multiplier_out) + ); + + + // + // Carry and T + // + wire [63: 0] t_ext = {{32{1'b0}}, t}; + wire [63: 0] c_ext = {{32{1'b0}}, c_in}; + + + // + // Sum + // + wire [63: 0] sum = multiplier_out + c_in + t; + + + // + // Output + // + assign s = sum[31: 0]; + assign c_out = sum[63:32]; + + +endmodule diff --git a/src/rtl/modexpa7_buffer_core.v b/src/rtl/modexpa7_buffer_core.v new file mode 100644 index 0000000..a48686e --- /dev/null +++ b/src/rtl/modexpa7_buffer_core.v @@ -0,0 +1,218 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_buffer_core + #(parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr, + input wire rw_coeff_bram_wr, + input wire [31:0] rw_coeff_bram_in, + output wire [31:0] rw_coeff_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr, + input wire rw_mm_bram_wr, + input wire [31:0] rw_mm_bram_in, + output wire [31:0] rw_mm_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr, + input wire rw_nn_bram_wr, + input wire [31:0] rw_nn_bram_in, + + input wire [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr, + input wire rw_y_bram_wr, + input wire [31:0] rw_y_bram_in, + output wire [31:0] rw_y_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr, + input wire rw_r_bram_wr, + input wire [31:0] rw_r_bram_in, + output wire [31:0] rw_r_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr, + input wire rw_t_bram_wr, + input wire [31:0] rw_t_bram_in, + output wire [31:0] rw_t_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr, + output wire [31:0] ro_coeff_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr, + output wire [31:0] ro_mm_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr, + output wire [31:0] ro_nn_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr, + output wire [31:0] ro_r_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr, + output wire [31:0] ro_t_bram_out + ); + + + // + // Montgomery Coefficient + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_coeff + ( + .clk (clk), + + .a_addr (rw_coeff_bram_addr), + .a_wr (rw_coeff_bram_wr), + .a_in (rw_coeff_bram_in), + .a_out (rw_coeff_bram_out), + + .b_addr (ro_coeff_bram_addr), + .b_out (ro_coeff_bram_out) + ); + + + // + // Powers of Message + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_mm + ( + .clk (clk), + + .a_addr (rw_mm_bram_addr), + .a_wr (rw_mm_bram_wr), + .a_in (rw_mm_bram_in), + .a_out (rw_mm_bram_out), + + .b_addr (ro_mm_bram_addr), + .b_out (ro_mm_bram_out) + ); + + + // + // Extended Modulus + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_nn + ( + .clk (clk), + + .a_addr (rw_nn_bram_addr), + .a_wr (rw_nn_bram_wr), + .a_in (rw_nn_bram_in), + .a_out (), + + .b_addr (ro_nn_bram_addr), + .b_out (ro_nn_bram_out) + ); + + + // + // Output + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_y + ( + .clk (clk), + + .a_addr (rw_y_bram_addr), + .a_wr (rw_y_bram_wr), + .a_in (rw_y_bram_in), + .a_out (rw_y_bram_out), + + .b_addr ({(OPERAND_ADDR_WIDTH+1){1'b0}}), + .b_out () + ); + + + // + // Result of Multiplication + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_r + ( + .clk (clk), + + .a_addr (rw_r_bram_addr), + .a_wr (rw_r_bram_wr), + .a_in (rw_r_bram_in), + .a_out (rw_r_bram_out), + + .b_addr (ro_r_bram_addr), + .b_out (ro_r_bram_out) + ); + + + // + // Temporary Buffer + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_t + ( + .clk (clk), + + .a_addr (rw_t_bram_addr), + .a_wr (rw_t_bram_wr), + .a_in (rw_t_bram_in), + .a_out (rw_t_bram_out), + + .b_addr (ro_t_bram_addr), + .b_out (ro_t_bram_out) + ); + + +endmodule diff --git a/src/rtl/modexpa7_buffer_user.v b/src/rtl/modexpa7_buffer_user.v new file mode 100644 index 0000000..abb772b --- /dev/null +++ b/src/rtl/modexpa7_buffer_user.v @@ -0,0 +1,197 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_buffer_user + #(parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire bus_cs, + input wire bus_we, + input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr, + input wire [31:0] bus_data_wr, + output wire [31:0] bus_data_rd, + + input wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr, + output wire [31:0] ro_modulus_bram_out, + + input wire [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr, + output wire [31:0] ro_message_bram_out, + + input wire [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr, + output wire [31:0] ro_exponent_bram_out, + + input wire [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr, + input wire rw_result_bram_wr, + input wire [31:0] rw_result_bram_in + ); + + + // + // Locals + // + localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2; + + localparam [1: 0] BUS_ADDR_BANK_MODULUS = 2'b00; + localparam [1: 0] BUS_ADDR_BANK_MESSAGE = 2'b01; + localparam [1: 0] BUS_ADDR_BANK_EXPONENT = 2'b10; + localparam [1: 0] BUS_ADDR_BANK_RESULT = 2'b11; + + // + // Address Decoder + // + wire [OPERAND_ADDR_WIDTH-1:0] bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0]; + wire [ 1:0] bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2]; + + + // + // Modulus Memory + // + wire [31: 0] bus_data_rd_modulus; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_modulus + ( + .clk (clk), + + .a_addr (bus_addr_operand_word), + .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)), + .a_in (bus_data_wr), + .a_out (bus_data_rd_modulus), + + .b_addr (ro_modulus_bram_addr), + .b_out (ro_modulus_bram_out) + ); + + + // + // Message Memory + // + wire [31: 0] bus_data_rd_message; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_message + ( + .clk (clk), + + .a_addr (bus_addr_operand_word), + .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)), + .a_in (bus_data_wr), + .a_out (bus_data_rd_message), + + .b_addr (ro_message_bram_addr), + .b_out (ro_message_bram_out) + ); + + + // + // Exponent Memory + // + wire [31: 0] bus_data_rd_exponent; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_exponent + ( + .clk (clk), + + .a_addr (bus_addr_operand_word), + .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)), + .a_in (bus_data_wr), + .a_out (bus_data_rd_exponent), + + .b_addr (ro_exponent_bram_addr), + .b_out (ro_exponent_bram_out) + ); + + + // + // Result Memory + // + wire [31: 0] bus_data_rd_result; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_result + ( + .clk (clk), + + .a_addr (rw_result_bram_addr), + .a_wr (rw_result_bram_wr), + .a_in (rw_result_bram_in), + .a_out (), + + .b_addr (bus_addr_operand_word), + .b_out (bus_data_rd_result) + ); + + + // + // Output Selector + // + reg [1: 0] bus_addr_operand_bank_prev; + always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank; + + reg [31: 0] bus_data_rd_mux; + assign bus_data_rd = bus_data_rd_mux; + + always @(*) + // + case (bus_addr_operand_bank_prev) + // + BUS_ADDR_BANK_MODULUS: bus_data_rd_mux = bus_data_rd_modulus; + BUS_ADDR_BANK_MESSAGE: bus_data_rd_mux = bus_data_rd_message; + BUS_ADDR_BANK_EXPONENT: bus_data_rd_mux = bus_data_rd_exponent; + BUS_ADDR_BANK_RESULT: bus_data_rd_mux = bus_data_rd_result; + // + default: bus_data_rd_mux = {32{1'bX}}; + // + endcase + + +endmodule diff --git a/src/rtl/modexpa7_modinv32.v b/src/rtl/modexpa7_modinv32.v new file mode 100644 index 0000000..cb47746 --- /dev/null +++ b/src/rtl/modexpa7_modinv32.v @@ -0,0 +1,141 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_modinv32 + ( + input wire clk, + + input wire ena, + output wire rdy, + + input wire [31: 0] n0, + output wire [31: 0] n0_modinv + ); + + + // + // Trigger + // + reg ena_dly = 1'b0; + wire ena_trig = ena && !ena_dly; + always @(posedge clk) ena_dly <= ena; + + + // + // Ready Register + // + reg rdy_reg = 1'b0; + assign rdy = rdy_reg; + + + // + // Counter + // + reg [7: 0] cnt = 8'd0; + wire [7: 0] cnt_zero = 8'd0; + wire [7: 0] cnt_last = 8'd132; + wire [7: 0] cnt_next = cnt + 1'b1; + wire [1: 0] cnt_phase = cnt[1:0]; + wire [5: 0] cnt_cycle = cnt[7:2]; + + always @(posedge clk) + // + if (cnt == cnt_zero) cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero; + else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next; + + + // + // Enable / Ready Logic + // + always @(posedge clk) + // + if (cnt == cnt_last) rdy_reg <= 1'b1; + else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0; + + + // + // Output Register + // + reg [31: 0] n0_modinv_reg; + assign n0_modinv = n0_modinv_reg; + + + // + // Multiplier + //
+ (* KEEP="TRUE" *) + wire [63: 0] multiplier_out; + wire [31: 0] multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1}; + + dsp_multiplier_a7 dsp_multiplier + ( + .clk (clk), + .a (n0), + .b (n0_modinv_reg), + .p (multiplier_out) + ); + + + // + // Mask and Power + // + reg [30: 0] mask_reg; + reg [31: 0] power_reg; + + always @(posedge clk) + // + if (cnt_phase == 2'd1) begin + // + if (cnt_cycle == 6'd0) begin + // + mask_reg <= 31'd0; + power_reg <= 32'd1; + // + n0_modinv_reg <= 32'd0; + // + end else begin + // + mask_reg <= { mask_reg[29:0], 1'b1}; + power_reg <= {power_reg[30:0], 1'b0}; + // + if (multiplier_out_masked != 32'd1) + // + n0_modinv_reg <= n0_modinv_reg + power_reg; + // + end + // + end + + +endmodule diff --git a/src/rtl/modexpa7_montgomery_coeff.v b/src/rtl/modexpa7_montgomery_coeff.v new file mode 100644 index 0000000..009fd99 --- /dev/null +++ b/src/rtl/modexpa7_montgomery_coeff.v @@ -0,0 +1,425 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_montgomery_coeff + #(parameter MODULUS_NUM_BITS = 11, // 1024 -> 11 bits + parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire ena, + output wire rdy, + + input wire [MODULUS_NUM_BITS-1:0] modulus_width, + + output wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr, + output wire coeff_bram_wr, + output wire [31:0] coeff_bram_in, + input wire [31:0] coeff_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] nn_bram_addr, + output wire nn_bram_wr, + output wire [31:0] nn_bram_in, + + output wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr, + input wire [31:0] modulus_bram_out, + + output wire [31:0] modinv_n0, + output wire modinv_ena, + input wire modinv_rdy + ); + + + // + // Locals + // + localparam [ MODULUS_NUM_BITS :0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}}; + localparam [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + localparam [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; + + + // + // FSM + // + localparam FSM_STATE_IDLE = 6'd0; + + localparam FSM_STATE_INIT = 6'd10; + + localparam FSM_STATE_SHIFT_READ = 6'd21; + localparam FSM_STATE_SHIFT_WRITE = 6'd22; + + localparam FSM_STATE_COMPARE_READ = 6'd31; + localparam FSM_STATE_COMPARE_COMPARE = 6'd32; + + localparam FSM_STATE_SUBTRACT_READ = 6'd41; + localparam FSM_STATE_SUBTRACT_WRITE = 6'd42; + + localparam FSM_STATE_ROUND = 6'd50; + + localparam FSM_STATE_FINAL = 6'd60; + + reg [5: 0] fsm_state = FSM_STATE_IDLE; + + + // + // Trigger + // + reg ena_dly = 1'b0; + + wire ena_trig = ena && !ena_dly; + + always @(posedge clk) ena_dly <= ena; + + + // + // Ready Register + // + reg rdy_reg = 1'b0; + + assign rdy = rdy_reg; + + + // + // ModInv Control + // + reg modinv_ena_reg = 1'b0; + reg [31: 0] modinv_n0_reg; + + assign modinv_ena = modinv_ena_reg; + assign modinv_n0 = modinv_n0_reg; + + + // + // Enable / Ready Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_FINAL) begin + // + if (modinv_rdy) rdy_reg <= 1'b1; + // + end else if (fsm_state == FSM_STATE_IDLE) begin + // + if (rdy_reg && !ena) rdy_reg <= 1'b0; + // + end + + + // + // Flags + // + reg reg_shift_carry = 1'b0; + reg reg_subtractor_borrow = 1'b0; + + + // + // Round Counter + // + reg [MODULUS_NUM_BITS:0] round_count = round_count_zero; + wire [MODULUS_NUM_BITS:0] round_count_last = {modulus_width, 1'b0} + 6'd63; + wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; + + + // + // Modulus BRAM Interface + // + reg [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_reg = modulus_bram_addr_zero; + + assign modulus_bram_addr = modulus_bram_addr_reg; + + + // + // Coeff BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] coeff_bram_addr_reg = coeff_bram_addr_zero; + reg coeff_bram_wr_reg = 1'b0; + + assign coeff_bram_addr = coeff_bram_addr_reg; + assign coeff_bram_wr = coeff_bram_wr_reg; + + + // + // NN BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] nn_bram_addr_reg = coeff_bram_addr_zero; + reg nn_bram_wr_reg = 1'b0; + + assign nn_bram_addr = nn_bram_addr_reg; + assign nn_bram_wr = nn_bram_wr_reg; + + + // + // Hardware Subtractor + // + wire [31: 0] subtractor_out; + wire subtractor_out_nonzero = |subtractor_out; + wire subtractor_borrow_out; + wire subtractor_borrow_in; + + assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow; + + dsp_subtractor_a7 dsp_subtractor + ( + .a (coeff_bram_out), + .b (modulus_bram_out), + .s (subtractor_out), + .c_in (subtractor_borrow_in), + .c_out (subtractor_borrow_out) + ); + + + // + // Handy Wires + // + wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH]; + + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_last = {modulus_width_msb, 1'b0}; + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_zero = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero; + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_last = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last; + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_prev_or_zero = (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero; + + wire [OPERAND_ADDR_WIDTH :0] modulus_bram_addr_last_ext = coeff_bram_addr_last - 1'b1; + + wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_last = modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0]; + wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_next_or_zero = (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero; + wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_prev_or_zero = (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero; + + + // + // Coeff BRAM Input Logic + // + reg [31: 0] coeff_bram_in_mux; + + assign coeff_bram_in = coeff_bram_in_mux; + + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT: + // + if (coeff_bram_addr_reg == coeff_bram_addr_zero) coeff_bram_in_mux = 32'h00000001; + else coeff_bram_in_mux = 32'h00000000; + + FSM_STATE_SHIFT_WRITE: + // + coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry}; + + FSM_STATE_SUBTRACT_WRITE: + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) coeff_bram_in_mux = 32'h00000000; + else coeff_bram_in_mux = subtractor_out; + + default: + // + coeff_bram_in_mux = {32{1'bX}}; + + endcase + + + // + // NN BRAM Input Logic + // + reg [31: 0] nn_bram_in_mux; + + assign nn_bram_in = nn_bram_in_mux; + + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT: + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) nn_bram_in_mux = {32{1'b0}}; + else nn_bram_in_mux = modulus_bram_out; + + default: + // + nn_bram_in_mux = {32{1'bX}}; + + endcase + + + // + // Comparison Functions + // + reg compare_greater_or_equal; + reg compare_less_than; + + wire compare_done = compare_greater_or_equal | compare_less_than; + + always @(*) + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_greater_or_equal = coeff_bram_out[0]; + // + else if (coeff_bram_addr_reg == coeff_bram_addr_zero) compare_greater_or_equal = !subtractor_borrow_out; + // + else compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero; + + always @(*) + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_less_than = 1'b0; + // + else compare_less_than = subtractor_borrow_out; + + + + // + // Main Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_INIT: begin + // + coeff_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0; + coeff_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero; + // + nn_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0; + nn_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero; + // + if (!coeff_bram_wr_reg) begin + // + modinv_ena_reg <= 1'b1; + modinv_n0_reg <= modulus_bram_out; + // + end + // + if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin + // + if (!coeff_bram_wr_reg) + // + modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero; + // + end else begin + // + modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero; + // + end + // + end + + FSM_STATE_SHIFT_READ: begin + // + coeff_bram_wr_reg <= 1'b1; + // + if (coeff_bram_addr_reg == coeff_bram_addr_zero) + // + reg_shift_carry <= 1'b0; + // + end + + FSM_STATE_SHIFT_WRITE: begin + // + coeff_bram_wr_reg <= 1'b0; + coeff_bram_addr_reg <= coeff_bram_addr_next_or_last; + // + reg_shift_carry <= coeff_bram_out[31]; + // + end + + FSM_STATE_COMPARE_COMPARE: begin + // + coeff_bram_addr_reg <= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero; + // + modulus_bram_addr_reg <= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero); + // + end + + FSM_STATE_SUBTRACT_READ: begin + // + coeff_bram_wr_reg <= 1'b1; + // + if (coeff_bram_addr_reg == coeff_bram_addr_zero) + // + reg_subtractor_borrow <= 1'b0; + // + end + + FSM_STATE_SUBTRACT_WRITE: begin + // + coeff_bram_wr_reg <= 1'b0; + coeff_bram_addr_reg <= coeff_bram_addr_next_or_zero; + // + modulus_bram_addr_reg <= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero; + // + reg_subtractor_borrow <= subtractor_borrow_out; + // + end + + FSM_STATE_ROUND: begin + // + round_count <= round_count_next; + // + end + + FSM_STATE_FINAL: begin + // + if (modinv_rdy) modinv_ena_reg <= 1'b0; + // + end + + endcase + + + // + // FSM Transition Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_IDLE: fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE; + + FSM_STATE_SHIFT_READ: fsm_state <= FSM_STATE_SHIFT_WRITE; + FSM_STATE_COMPARE_READ: fsm_state <= FSM_STATE_COMPARE_COMPARE; + FSM_STATE_SUBTRACT_READ: fsm_state <= FSM_STATE_SUBTRACT_WRITE; + + FSM_STATE_INIT: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT : FSM_STATE_SHIFT_READ; + FSM_STATE_SHIFT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_COMPARE_READ; + FSM_STATE_SUBTRACT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND; + + FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL; + + FSM_STATE_COMPARE_COMPARE: fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ; + + FSM_STATE_FINAL: fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + +endmodule diff --git a/src/rtl/modexpa7_montgomery_multiplier.v b/src/rtl/modexpa7_montgomery_multiplier.v new file mode 100644 index 0000000..6637231 --- /dev/null +++ b/src/rtl/modexpa7_montgomery_multiplier.v @@ -0,0 +1,408 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_montgomery_multiplier + #(parameter OPERAND_NUM_BITS = 11, // 1024 -> 11 bits + parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire ena, + output wire rdy, + + input wire [OPERAND_NUM_BITS-1:0] operand_width, + + output wire [OPERAND_ADDR_WIDTH :0] x_bram_addr, + input wire [31:0] x_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] y_bram_addr, + input wire [31:0] y_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] n_bram_addr, + input wire [31:0] n_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] z_bram_addr, + output wire z_bram_wr, + output wire [31:0] z_bram_in, + input wire [31:0] z_bram_out, + + input wire [31:0] n0_modinv + ); + + + // + // Locals + // + localparam [OPERAND_ADDR_WIDTH:0] round_count_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + localparam [OPERAND_ADDR_WIDTH:0] bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + + + // + // FSM + // + localparam FSM_STATE_IDLE = 6'd0; + + localparam FSM_STATE_INIT = 6'd10; + + localparam FSM_STATE_MUL_XY_CALC = 6'd21; + localparam FSM_STATE_MUL_XY_PIPELINE = 6'd22; + localparam FSM_STATE_MUL_XY_REGISTER = 6'd23; + localparam FSM_STATE_MUL_XY_WRITE = 6'd24; + + localparam FSM_STATE_MAGIC_CALC = 6'd31; + localparam FSM_STATE_MAGIC_PIPELINE = 6'd32; + localparam FSM_STATE_MAGIC_REGISTER = 6'd33; + + localparam FSM_STATE_MUL_MN_CALC = 6'd41; + localparam FSM_STATE_MUL_MN_PIPELINE = 6'd42; + localparam FSM_STATE_MUL_MN_REGISTER = 6'd43; + localparam FSM_STATE_MUL_MN_WRITE = 6'd44; + + localparam FSM_STATE_SHIFT = 6'd50; + + localparam FSM_STATE_ROUND = 6'd55; + + localparam FSM_STATE_FINAL = 6'd60; + + reg [5: 0] fsm_state = FSM_STATE_IDLE; + + + // + // Trigger + // + reg ena_dly = 1'b0; + always @(posedge clk) ena_dly <= ena; + wire ena_trig = (ena == 1'b1) && (ena_dly == 1'b0); + + + // + // Ready Register + // + reg rdy_reg = 1'b0; + assign rdy = rdy_reg; + + + // + // Enable / Ready Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_FINAL) begin + // + rdy_reg <= 1'b1; + // + end else if (fsm_state == FSM_STATE_IDLE) begin + // + if (rdy_reg && !ena) rdy_reg <= 1'b0; + // + end + + + // + // X, Y, N BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] x_bram_addr_reg = bram_addr_zero; + reg [OPERAND_ADDR_WIDTH:0] y_bram_addr_reg = bram_addr_zero; + reg [OPERAND_ADDR_WIDTH:0] n_bram_addr_reg = bram_addr_zero; + + assign x_bram_addr = x_bram_addr_reg; + assign y_bram_addr = y_bram_addr_reg; + assign n_bram_addr = n_bram_addr_reg; + + + // + // Z BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] z_bram_addr_reg = bram_addr_zero; + reg z_bram_wr_reg = 1'b0; + reg [ 31:0] z_bram_in_mux; + + assign z_bram_addr = z_bram_addr_reg; + assign z_bram_wr = z_bram_wr_reg; + assign z_bram_in = z_bram_in_mux; + + + // + // Handy Wires + // + wire [OPERAND_ADDR_WIDTH-1:0] operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH]; + + wire [OPERAND_ADDR_WIDTH :0] bram_addr_last = {operand_width_msb, 1'b1}; // +1 + + + // + // Hardware Multiplier (X * Y) + // + reg [31: 0] multiplier_xy_carry_in; + wire [31: 0] multiplier_xy_out; + wire [31: 0] multiplier_xy_carry_out; + + modexpa7_adder64_carry32 dsp_multiplier_xy + ( + .clk (clk), + .t (/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/), + .x (/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/), + .y (/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/), + .s (multiplier_xy_out), + .c_in (multiplier_xy_carry_in), + .c_out (multiplier_xy_carry_out) + ); + + + // + // Hardware Multiplier (Magic) + //
+ (* KEEP="TRUE" *) + wire [63: 0] multiplier_magic_out; + reg [31: 0] magic_value_reg; + + dsp_multiplier_a7 dsp_multiplier_magic + ( + .clk (clk), + .a (z_bram_out), + .b (n0_modinv), + .p (multiplier_magic_out) + ); + + + // + // Hardware Multiplier (M * N) + // + reg [31: 0] multiplier_mn_carry_in; + wire [31: 0] multiplier_mn_out; + wire [31: 0] multiplier_mn_carry_out; + + modexpa7_adder64_carry32 dsp_multiplier_mn + ( + .clk (clk), + .t (z_bram_out), + .x (magic_value_reg), + .y (/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/), + .s (multiplier_mn_out), + .c_in (multiplier_mn_carry_in), + .c_out (multiplier_mn_carry_out) + ); + + + // + // Z BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT: + // + z_bram_in_mux = {32{1'b0}}; + + FSM_STATE_MUL_XY_WRITE: + // + if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_xy_out; + else z_bram_in_mux = multiplier_xy_carry_in; + + FSM_STATE_MUL_MN_WRITE: + // + if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_mn_out; + else z_bram_in_mux = multiplier_mn_carry_in + z_bram_out; + + FSM_STATE_SHIFT: + // + z_bram_in_mux = z_bram_out; + + default: + // + z_bram_in_mux = {32{1'bX}}; + + endcase + + + // + // Handy Functions + // + function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_zero; + input [OPERAND_ADDR_WIDTH:0] bram_addr; + begin + bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero; + end + endfunction + + function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_last; + input [OPERAND_ADDR_WIDTH:0] bram_addr; + begin + bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last; + end + endfunction + + function [OPERAND_ADDR_WIDTH:0] bram_addr_prev_or_zero; + input [OPERAND_ADDR_WIDTH:0] bram_addr; + begin + bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero; + end + endfunction + + + // + // Round Counter + // + reg [OPERAND_ADDR_WIDTH:0] round_count = round_count_zero; + wire [OPERAND_ADDR_WIDTH:0] round_count_last = {operand_width_msb, 1'b0}; + wire [OPERAND_ADDR_WIDTH:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; + + + // + // Main Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_INIT: begin + // + z_bram_wr_reg <= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0; + z_bram_addr_reg <= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero; + // + end + + FSM_STATE_MUL_XY_CALC: begin + // + if (z_bram_addr_reg == bram_addr_zero) begin + // + multiplier_xy_carry_in <= {32{1'b0}}; + // + end + // + end + + FSM_STATE_MUL_XY_REGISTER: begin + // + z_bram_wr_reg <= 1'b1; + // + end + + FSM_STATE_MUL_XY_WRITE: begin + // + z_bram_wr_reg <= 1'b0; + z_bram_addr_reg <= bram_addr_next_or_zero(z_bram_addr_reg); + // + x_bram_addr_reg <= bram_addr_next_or_zero(x_bram_addr_reg); + // + multiplier_xy_carry_in <= multiplier_xy_carry_out; + // + end + + FSM_STATE_MUL_MN_CALC: begin + // + if (z_bram_addr_reg == bram_addr_zero) begin + // + multiplier_mn_carry_in <= {32{1'b0}}; + // + magic_value_reg <= multiplier_magic_out[31:0]; + // + end + // + end + + FSM_STATE_MUL_MN_REGISTER: begin + // + z_bram_wr_reg <= 1'b1; + // + end + + FSM_STATE_MUL_MN_WRITE: begin + // + z_bram_wr_reg <= 1'b0; + z_bram_addr_reg <= bram_addr_next_or_last(z_bram_addr_reg); + // + n_bram_addr_reg <= bram_addr_next_or_zero(n_bram_addr_reg); + // + multiplier_mn_carry_in <= multiplier_mn_carry_out; + // + end + + FSM_STATE_SHIFT: begin + // + if (z_bram_wr_reg == 1'b0) z_bram_wr_reg <= 1'b1; + else if (z_bram_addr_reg == bram_addr_zero) z_bram_wr_reg <= 1'b0; + + z_bram_addr_reg <= bram_addr_prev_or_zero(z_bram_addr_reg); + // + end + + FSM_STATE_ROUND: begin + // + y_bram_addr_reg <= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero; + // + round_count <= round_count_next; + // + end + + endcase + + + // + // FSM Transition Logic + // + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_IDLE: fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE; + + FSM_STATE_INIT: fsm_state <= (z_bram_addr < bram_addr_last ) ? FSM_STATE_INIT : FSM_STATE_MUL_XY_CALC; + FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL; + + FSM_STATE_MUL_XY_CALC: fsm_state <= FSM_STATE_MUL_XY_PIPELINE; + FSM_STATE_MAGIC_CALC: fsm_state <= FSM_STATE_MAGIC_PIPELINE; + FSM_STATE_MUL_MN_CALC: fsm_state <= FSM_STATE_MUL_MN_PIPELINE; + + FSM_STATE_MUL_XY_PIPELINE: fsm_state <= FSM_STATE_MUL_XY_REGISTER; + FSM_STATE_MAGIC_PIPELINE: fsm_state <= FSM_STATE_MAGIC_REGISTER; + FSM_STATE_MUL_MN_PIPELINE: fsm_state <= FSM_STATE_MUL_MN_REGISTER; + + FSM_STATE_MUL_XY_REGISTER: fsm_state <= FSM_STATE_MUL_XY_WRITE; + FSM_STATE_MAGIC_REGISTER: fsm_state <= FSM_STATE_MUL_MN_CALC; + FSM_STATE_MUL_MN_REGISTER: fsm_state <= FSM_STATE_MUL_MN_WRITE; + + FSM_STATE_MUL_XY_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC; + FSM_STATE_MUL_MN_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT; + FSM_STATE_SHIFT: fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT : FSM_STATE_ROUND; + + FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + +endmodule diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v new file mode 100644 index 0000000..6c5a922 --- /dev/null +++ b/src/rtl/modexpa7_top.v @@ -0,0 +1,706 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexpa7_top + #(parameter MAX_MODULUS_WIDTH = 1024) + ( + input wire clk, + + input wire init, + output wire ready, + + input wire next, + output wire valid, + + input wire [MODULUS_NUM_BITS-1:0] modulus_width, + input wire [MODULUS_NUM_BITS-1:0] exponent_width, + + input wire fast_public_mode, + + input wire bus_cs, + input wire bus_we, + input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr, + input wire [31:0] bus_data_wr, + output wire [31:0] bus_data_rd + ); + + + // + // modexpa7_clog2() + // + function integer modexpa7_clog2; + input integer value; + integer ret; + begin + value = value - 1; + for (ret = 0; value > 0; ret = ret + 1) + value = value >> 1; + modexpa7_clog2 = ret; + end + endfunction + + + // + // Locals + // + localparam OPERAND_ADDR_WIDTH = modexpa7_clog2(MAX_MODULUS_WIDTH / 32); + localparam MODULUS_NUM_BITS = modexpa7_clog2(MAX_MODULUS_WIDTH + 1); + localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2; + + localparam [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; + localparam [OPERAND_ADDR_WIDTH :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + + localparam [ MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}}; + + + // + // User Memory + // + wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr; + wire [ 31:0] ro_modulus_bram_out; + + reg [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr = bram_user_addr_zero; + wire [ 31:0] ro_message_bram_out; + + reg [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr = bram_user_addr_zero; + wire [ 31:0] ro_exponent_bram_out; + + reg [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr = bram_user_addr_zero; + wire [ 31:0] rw_result_bram_out; + reg rw_result_bram_wr = 1'b0; + wire [ 31:0] rw_result_bram_in; + + modexpa7_buffer_user # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + mem_user + ( + .clk (clk), + + .bus_cs (bus_cs), + .bus_we (bus_we), + .bus_addr (bus_addr), + .bus_data_wr (bus_data_wr), + .bus_data_rd (bus_data_rd), + + .ro_modulus_bram_addr (ro_modulus_bram_addr), + .ro_modulus_bram_out (ro_modulus_bram_out), + + .ro_message_bram_addr (ro_message_bram_addr), + .ro_message_bram_out (ro_message_bram_out), + + .ro_exponent_bram_addr (ro_exponent_bram_addr), + .ro_exponent_bram_out (ro_exponent_bram_out), + + .rw_result_bram_addr (rw_result_bram_addr), + .rw_result_bram_wr (rw_result_bram_wr), + .rw_result_bram_in (rw_result_bram_in) + ); + + + // + // Core (Internal) Memory + // + wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr; + wire rw_coeff_bram_wr; + wire [ 31:0] rw_coeff_bram_in; + wire [ 31:0] rw_coeff_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr = bram_core_addr_zero; + reg rw_mm_bram_wr = 1'b0; + reg [ 31:0] rw_mm_bram_in; + wire [ 31:0] rw_mm_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr; + wire rw_nn_bram_wr; + wire [ 31:0] rw_nn_bram_in; + + reg [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr = bram_core_addr_zero; + reg rw_y_bram_wr = 1'b0; + reg [ 31:0] rw_y_bram_in; + wire [ 31:0] rw_y_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr; + wire rw_r_bram_wr; + wire [ 31:0] rw_r_bram_in; + wire [ 31:0] rw_r_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr = bram_core_addr_zero; + reg rw_t_bram_wr = 1'b0; + reg [ 31:0] rw_t_bram_in; + wire [ 31:0] rw_t_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr = bram_core_addr_zero; + wire [ 31:0] ro_coeff_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr; + wire [ 31:0] ro_mm_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr; + wire [ 31:0] ro_nn_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr = bram_core_addr_zero; + wire [ 31:0] ro_r_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr; + wire [ 31:0] ro_t_bram_out; + + modexpa7_buffer_core # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + mem_core + ( + .clk (clk), + + .rw_coeff_bram_addr (rw_coeff_bram_addr), + .rw_coeff_bram_wr (rw_coeff_bram_wr), + .rw_coeff_bram_in (rw_coeff_bram_in), + .rw_coeff_bram_out (rw_coeff_bram_out), + + .rw_mm_bram_addr (rw_mm_bram_addr), + .rw_mm_bram_wr (rw_mm_bram_wr), + .rw_mm_bram_in (rw_mm_bram_in), + .rw_mm_bram_out (rw_mm_bram_out), + + .rw_nn_bram_addr (rw_nn_bram_addr), + .rw_nn_bram_wr (rw_nn_bram_wr), + .rw_nn_bram_in (rw_nn_bram_in), + + .rw_y_bram_addr (rw_y_bram_addr), + .rw_y_bram_wr (rw_y_bram_wr), + .rw_y_bram_in (rw_y_bram_in), + .rw_y_bram_out (rw_y_bram_out), + + .rw_r_bram_addr (rw_r_bram_addr), + .rw_r_bram_wr (rw_r_bram_wr), + .rw_r_bram_in (rw_r_bram_in), + .rw_r_bram_out (rw_r_bram_out), + + .rw_t_bram_addr (rw_t_bram_addr), + .rw_t_bram_wr (rw_t_bram_wr), + .rw_t_bram_in (rw_t_bram_in), + .rw_t_bram_out (rw_t_bram_out), + + .ro_coeff_bram_addr (ro_coeff_bram_addr), + .ro_coeff_bram_out (ro_coeff_bram_out), + + .ro_mm_bram_addr (ro_mm_bram_addr), + .ro_mm_bram_out (ro_mm_bram_out), + + .ro_nn_bram_addr (ro_nn_bram_addr), + .ro_nn_bram_out (ro_nn_bram_out), + + .ro_r_bram_addr (ro_r_bram_addr), + .ro_r_bram_out (ro_r_bram_out), + + .ro_t_bram_addr (ro_t_bram_addr), + .ro_t_bram_out (ro_t_bram_out) + ); + + + // + // Small 32-bit ModInv Core + // + wire modinv_ena; + wire modinv_rdy; + + wire [31: 0] modinv_n0; + wire [31: 0] modinv_n0_negative = ~modinv_n0 + 1'b1; + wire [31: 0] modinv_n0_modinv; + + modexpa7_modinv32 core_modinv32 + ( + .clk (clk), + + .ena (modinv_ena), + .rdy (modinv_rdy), + + .n0 (modinv_n0_negative), + .n0_modinv (modinv_n0_modinv) + ); + + + // + // Montgomery Coefficient Calculator + // + modexpa7_montgomery_coeff # + ( + .MODULUS_NUM_BITS (MODULUS_NUM_BITS), + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + core_montgomery_coeff + ( + .clk (clk), + + .ena (init), + .rdy (ready), + + .modulus_width (modulus_width), + + .coeff_bram_addr (rw_coeff_bram_addr), + .coeff_bram_wr (rw_coeff_bram_wr), + .coeff_bram_in (rw_coeff_bram_in), + .coeff_bram_out (rw_coeff_bram_out), + + .nn_bram_addr (rw_nn_bram_addr), + .nn_bram_wr (rw_nn_bram_wr), + .nn_bram_in (rw_nn_bram_in), + + .modulus_bram_addr (ro_modulus_bram_addr), + .modulus_bram_out (ro_modulus_bram_out), + + .modinv_n0 (modinv_n0), + .modinv_ena (modinv_ena), + .modinv_rdy (modinv_rdy) + ); + + + // + // Montgomery Multiplier + // + reg mul_ena = 1'b0; + wire mul_rdy; + + modexpa7_montgomery_multiplier # + ( + .OPERAND_NUM_BITS (MODULUS_NUM_BITS), + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + core_montgomery_multiplier + ( + .clk (clk), + + .ena (mul_ena), + .rdy (mul_rdy), + + .operand_width (modulus_width), + + .x_bram_addr (ro_t_bram_addr), + .x_bram_out (ro_t_bram_out), + + .y_bram_addr (ro_mm_bram_addr), + .y_bram_out (ro_mm_bram_out), + + .n_bram_addr (ro_nn_bram_addr), + .n_bram_out (ro_nn_bram_out), + + .z_bram_addr (rw_r_bram_addr), + .z_bram_wr (rw_r_bram_wr), + .z_bram_in (rw_r_bram_in), + .z_bram_out (rw_r_bram_out), + + .n0_modinv (modinv_n0_modinv) + ); + + + // + // FSM + // + localparam FSM_STATE_IDLE = 6'd0; + + localparam FSM_STATE_INIT_LOAD = 6'd11; + localparam FSM_STATE_INIT_WAIT = 6'd12; + localparam FSM_STATE_INIT_UNLOAD = 6'd13; + + localparam FSM_STATE_READ_EI = 6'd20; + + localparam FSM_STATE_ROUND_BEGIN = 6'd25; + + localparam FSM_STATE_MULTIPLY_LOAD = 6'd31; + localparam FSM_STATE_MULTIPLY_WAIT = 6'd32; + localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33; + + localparam FSM_STATE_SQUARE_LOAD = 6'd41; + localparam FSM_STATE_SQUARE_WAIT = 6'd42; + localparam FSM_STATE_SQUARE_UNLOAD = 6'd43; + + localparam FSM_STATE_ROUND_END = 6'd50; + + localparam FSM_STATE_FINAL = 6'd60; + + reg [5: 0] fsm_state = FSM_STATE_IDLE; + + + // + // Trigger + // + reg next_dly = 1'b0; + always @(posedge clk) next_dly <= next; + wire next_trig = (next == 1'b1) && (next_dly == 1'b0); + + + // + // Valid Register + // + reg valid_reg = 1'b0; + assign valid = valid_reg; + + + // + // Next/ Valid Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_FINAL) begin + // + valid_reg <= 1'b1; + // + end else if (fsm_state == FSM_STATE_IDLE) begin + // + if (valid_reg && !next) valid_reg <= 1'b0; + // + end + + + // + // Exponent Bit Counter + // + reg [4: 0] ei_bit_count = 5'd0; + wire ei_bit = ro_exponent_bram_out[ei_bit_count]; + + + // + // Round Counter + // + reg [MODULUS_NUM_BITS:0] round_count = round_count_zero; + wire [MODULUS_NUM_BITS:0] round_count_last = exponent_width - 1'b1; + wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; + + + // + // Handy Wires + // + wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH]; + + wire [OPERAND_ADDR_WIDTH :0] bram_core_addr_last = {modulus_width_msb, 1'b0}; + + wire [OPERAND_ADDR_WIDTH :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0]; + + + // + // Handy Functions + // + function [OPERAND_ADDR_WIDTH:0] bram_core_addr_next_or_zero; + input [OPERAND_ADDR_WIDTH:0] bram_core_addr; + begin + bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero; + end + endfunction + + function [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_next_or_zero; + input [OPERAND_ADDR_WIDTH-1:0] bram_user_addr; + begin + bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero; + end + endfunction + + + // + // Result BRAM Input + // + assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; + + + // + // MM BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: + // + rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}}; + + FSM_STATE_INIT_UNLOAD: + // + rw_mm_bram_in = ro_r_bram_out; + + FSM_STATE_SQUARE_UNLOAD: + // + rw_mm_bram_in = ro_r_bram_out; + + default: + // + rw_mm_bram_in = {32{1'bX}}; + + endcase + + + // + // Y BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: + // + rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000; + + FSM_STATE_MULTIPLY_UNLOAD: + // + rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW! + + default: + // + rw_y_bram_in = {32{1'bX}}; + + endcase + + + // + // T BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: + // + rw_t_bram_in = ro_coeff_bram_out; + + FSM_STATE_MULTIPLY_LOAD: + // + rw_t_bram_in = rw_y_bram_out; + + FSM_STATE_SQUARE_LOAD: + // + rw_t_bram_in = rw_mm_bram_out; + + default: + // + rw_t_bram_in = {32{1'bX}}; + + endcase + + + // + // Main Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: begin + // + rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + rw_y_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + rw_t_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + rw_y_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + rw_t_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + // + if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr); + else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr); + // + if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr); + else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr); + // + end + + FSM_STATE_INIT_WAIT: begin + // + if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; + else mul_ena <= 1'b1; + // + end + + FSM_STATE_INIT_UNLOAD: begin + // + rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + // + if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); + else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); + // + end + + FSM_STATE_MULTIPLY_LOAD: begin + // + rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero; + // + if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr); + else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr); + // + end + + FSM_STATE_MULTIPLY_WAIT: begin + // + if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; + else mul_ena <= 1'b1; + // + end + + FSM_STATE_MULTIPLY_UNLOAD: begin + // + rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero; + // + if (ei_bit) begin + // + if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); + else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); + // + end else begin + // + if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr); + else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr); + // + end + // + if (round_count == round_count_last) begin + // + if (rw_result_bram_addr == bram_user_addr_zero) begin + // + if (rw_y_bram_wr) begin + // + rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1; + rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr); + // + end else begin + // + rw_result_bram_wr <= 1'b1; + rw_result_bram_addr <= bram_user_addr_zero; + // + end + // + end else begin + // + rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0; + rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr); + // + end + // + end + // + end + + FSM_STATE_SQUARE_LOAD: begin + // + rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero; + // + if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr); + else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr); + // + end + + FSM_STATE_SQUARE_WAIT: begin + // + if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; + else mul_ena <= 1'b1; + // + end + + FSM_STATE_SQUARE_UNLOAD: begin + // + rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + // + if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); + else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); + // + end + + FSM_STATE_ROUND_END: begin + // + round_count <= round_count_next; + // + if (round_count < round_count_last) begin + // + ei_bit_count <= ei_bit_count + 1'b1; + // + if (ei_bit_count == 5'd31) + // + ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr); + // + end else begin + // + ei_bit_count <= 5'd0; + // + ro_exponent_bram_addr <= bram_user_addr_zero; + // + end + // + end + + endcase + + + // + // FSM Transition Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_IDLE: fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE; + + FSM_STATE_INIT_LOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT; + FSM_STATE_INIT_WAIT: fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT; + FSM_STATE_INIT_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI; + + FSM_STATE_READ_EI: fsm_state <= FSM_STATE_ROUND_BEGIN; + + FSM_STATE_ROUND_BEGIN: fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD; + + FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT; + FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT; + FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD; + + FSM_STATE_SQUARE_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT; + FSM_STATE_SQUARE_WAIT: fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT; + FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END; + + FSM_STATE_ROUND_END: fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL; + + FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + +endmodule diff --git a/src/rtl/modexpa7_wrapper.v b/src/rtl/modexpa7_wrapper.v new file mode 100644 index 0000000..271cb20 --- /dev/null +++ b/src/rtl/modexpa7_wrapper.v @@ -0,0 +1,211 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module modexpa7_wrapper + ( + input wire clk, + input wire reset_n, + + input wire cs, + input wire we, + + input wire [9: 0] address, + input wire [31: 0] write_data, + output wire [31: 0] read_data + ); + + + // + // Address Decoder + // + localparam ADDR_MSB_REGS = 1'b0; + localparam ADDR_MSB_CORE = 1'b1; + wire address_msb = address[9]; + wire [8: 0] address_lsb = address[8:0]; + + + // + // Output Mux + // + wire [31: 0] read_data_regs; + wire [31: 0] read_data_core; + + + // + // Registers + // + localparam ADDR_NAME0 = 9'h000; + localparam ADDR_NAME1 = 9'h001; + localparam ADDR_VERSION = 9'h002; + + localparam ADDR_CONTROL = 9'h008; // {next, init} + localparam ADDR_STATUS = 9'h009; // {valid, ready} + localparam ADDR_MODE = 9'h010; // 0 = slow secure, 1 = fast unsafe (public) + localparam ADDR_MODULUS_BITS = 9'h011; // + localparam ADDR_EXPONENT_BITS = 9'h012; // + localparam ADDR_GPIO_REG = 9'h020; // + + localparam CONTROL_INIT_BIT = 0; + localparam CONTROL_NEXT_BIT = 1; + + localparam STATUS_READY_BIT = 0; + localparam STATUS_VALID_BIT = 1; + + localparam CORE_NAME0 = 32'h6D6F6465; // "mode" + localparam CORE_NAME1 = 32'h78706137; // "xpa7" + localparam CORE_VERSION = 32'h302E3130; // "0.10" + + + // + // Registers + // + reg [1: 0] reg_control; + reg reg_mode; + reg [12: 0] reg_modulus_width; + reg [12: 0] reg_exponent_width; + reg [31: 0] reg_gpio; + + + // + // Wires + // + wire [1: 0] reg_status; + + + // + // ModExpA7 + // + modexpa7_top # + ( + .MAX_MODULUS_WIDTH (4096) + ) + modexpa7_core + ( + .clk (clk), + + .init (reg_control[CONTROL_INIT_BIT]), + .ready (reg_status[STATUS_READY_BIT]), + .next (reg_control[CONTROL_NEXT_BIT]), + .valid (reg_status[STATUS_VALID_BIT]), + + .modulus_width (reg_modulus_width), + .exponent_width (reg_exponent_width), + + .fast_public_mode (reg_mode), + + .bus_cs (cs && (address_msb == ADDR_MSB_CORE)), + .bus_we (we), + .bus_addr (address_lsb), + .bus_data_wr (write_data), + .bus_data_rd (read_data_core) + ); + + + // + // Read Latch + // + reg [31: 0] tmp_read_data; + + + // + // Read/Write Interface + // + always @(posedge clk) + // + if (!reset_n) begin + // + reg_control <= 2'b00; + reg_mode <= 1'b0; + reg_modulus_width <= 13'd1024; + reg_exponent_width <= 13'd1024; + // + end else if (cs && (address_msb == ADDR_MSB_REGS)) begin + // + if (we) begin + // + // Write Handler + // + case (address_lsb) + // + ADDR_CONTROL: reg_control <= write_data[1: 0]; + ADDR_MODE: reg_mode <= write_data[0]; + ADDR_MODULUS_BITS: reg_modulus_width <= write_data[12: 0]; + ADDR_EXPONENT_BITS: reg_exponent_width <= write_data[12: 0]; + ADDR_GPIO_REG: reg_gpio <= write_data; + // + endcase + // + end else begin + // + // Read Handler + // + case (address) + // + ADDR_NAME0: tmp_read_data <= CORE_NAME0; + ADDR_NAME1: tmp_read_data <= CORE_NAME1; + ADDR_VERSION: tmp_read_data <= CORE_VERSION; + ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control}; + ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status}; + ADDR_MODE: tmp_read_data <= {{31{1'b0}}, reg_mode}; + ADDR_MODULUS_BITS: tmp_read_data <= {{19{1'b0}}, reg_modulus_width}; + ADDR_EXPONENT_BITS: tmp_read_data <= {{19{1'b0}}, reg_exponent_width}; + ADDR_GPIO_REG: tmp_read_data <= reg_gpio; + // + default: tmp_read_data <= 32'h00000000; + // + endcase + // + end + // + end + + + // + // Register / Core Memory Selector + // + reg address_msb_last; + always @(posedge clk) address_msb_last = address_msb; + + reg [31: 0] read_data_mux; + assign read_data = read_data_mux; + + always @(*) + // + case (address_msb_last) + // + ADDR_MSB_REGS: read_data_mux = tmp_read_data; + ADDR_MSB_CORE: read_data_mux = read_data_core; + // + endcase + + +endmodule diff --git a/src/rtl/ram_1rw_1ro_readfirst.v b/src/rtl/ram_1rw_1ro_readfirst.v new file mode 100644 index 0000000..25b708f --- /dev/null +++ b/src/rtl/ram_1rw_1ro_readfirst.v @@ -0,0 +1,88 @@ +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module ram_1rw_1ro_readfirst + #(parameter MEM_WIDTH = 32, + parameter MEM_ADDR_BITS = 8) + ( + input wire clk, + + input wire [MEM_ADDR_BITS-1:0] a_addr, + input wire a_wr, + input wire [MEM_WIDTH-1:0] a_in, + output wire [MEM_WIDTH-1:0] a_out, + + input wire [MEM_ADDR_BITS-1:0] b_addr, + output wire [MEM_WIDTH-1:0] b_out + ); + + + // + // BRAM + // + (* RAM_STYLE="BLOCK" *) + reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; + + + // + // Output Registers + // + reg [MEM_WIDTH-1:0] bram_reg_a; + reg [MEM_WIDTH-1:0] bram_reg_b; + + assign a_out = bram_reg_a; + assign b_out = bram_reg_b; + + + // + // Read-Write Port A + // + always @(posedge clk) begin + // + bram_reg_a <= bram[a_addr]; + // + if (a_wr) bram[a_addr] <= a_in; + // + end + + + // + // Read-Only Port B + // + always @(posedge clk) + // + bram_reg_b <= bram[b_addr]; + + +endmodule |