From 1f8d13bf8d2e813f0c5da653c4abffb7a817db9a Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Wed, 19 Dec 2018 16:03:08 +0300 Subject: * New hardware architecture * Randomized test vector --- ecdsa_fpga_curve.h | 203 +++++++ ecdsa_fpga_curve_abstract.cpp | 313 ++++++++++ ecdsa_fpga_curve_microcode.cpp | 494 +++++++++++++++ ecdsa_fpga_lowlevel.cpp | 135 +++++ ecdsa_fpga_lowlevel.h | 79 +++ ecdsa_fpga_microcode.cpp | 432 +++++++++++++ ecdsa_fpga_microcode.h | 164 +++++ ecdsa_fpga_model.cpp | 539 +++++++++++++++++ ecdsa_fpga_model.h | 141 +++++ ecdsa_fpga_modular.cpp | 723 ++++++++++++++++++++++ ecdsa_fpga_modular.h | 144 +++++ ecdsa_fpga_multiword.cpp | 107 ++++ ecdsa_fpga_multiword.h | 91 +++ ecdsa_microcode_parser.py | 694 +++++++++++++++++++++ ecdsa_model.h | 205 ------- ecdsa_model_fpga.cpp | 414 ------------- fpga_curve.cpp | 340 ----------- fpga_curve.h | 61 -- fpga_lowlevel.cpp | 137 ----- fpga_lowlevel.h | 80 --- fpga_modular.cpp | 838 -------------------------- fpga_modular.h | 87 --- fpga_util.cpp | 86 --- fpga_util.h | 49 -- test_vectors/charlie_p256.key | 8 + test_vectors/charlie_p384.key | 9 + test_vectors/ecdsa_test_vector_nsa.h | 56 ++ test_vectors/ecdsa_test_vector_randomized.h | 29 + test_vectors/ecdsa_test_vector_randomized.vh | 29 + test_vectors/format_random_test_vector.py | 327 ++++++++++ test_vectors/regenerate_random_test_vector.py | 91 +++ 31 files changed, 4808 insertions(+), 2297 deletions(-) create mode 100644 ecdsa_fpga_curve.h create mode 100644 ecdsa_fpga_curve_abstract.cpp create mode 100644 ecdsa_fpga_curve_microcode.cpp create mode 100644 ecdsa_fpga_lowlevel.cpp create mode 100644 ecdsa_fpga_lowlevel.h create mode 100644 ecdsa_fpga_microcode.cpp create mode 100644 ecdsa_fpga_microcode.h create mode 100644 ecdsa_fpga_model.cpp create mode 100644 ecdsa_fpga_model.h create mode 100644 ecdsa_fpga_modular.cpp create mode 100644 ecdsa_fpga_modular.h create mode 100644 ecdsa_fpga_multiword.cpp create mode 100644 ecdsa_fpga_multiword.h create mode 100644 ecdsa_microcode_parser.py delete mode 100644 ecdsa_model.h delete mode 100644 ecdsa_model_fpga.cpp delete mode 100644 fpga_curve.cpp delete mode 100644 fpga_curve.h delete mode 100644 fpga_lowlevel.cpp delete mode 100644 fpga_lowlevel.h delete mode 100644 fpga_modular.cpp delete mode 100644 fpga_modular.h delete mode 100644 fpga_util.cpp delete mode 100644 fpga_util.h create mode 100644 test_vectors/charlie_p256.key create mode 100644 test_vectors/charlie_p384.key create mode 100644 test_vectors/ecdsa_test_vector_nsa.h create mode 100644 test_vectors/ecdsa_test_vector_randomized.h create mode 100644 test_vectors/ecdsa_test_vector_randomized.vh create mode 100644 test_vectors/format_random_test_vector.py create mode 100644 test_vectors/regenerate_random_test_vector.py diff --git a/ecdsa_fpga_curve.h b/ecdsa_fpga_curve.h new file mode 100644 index 0000000..00448eb --- /dev/null +++ b/ecdsa_fpga_curve.h @@ -0,0 +1,203 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve.h +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-256) +//------------------------------------------------------------------------------ + +/* Base Point */ +#define ECDSA_P256_GX_INIT \ + {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, \ + 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296} + +#define ECDSA_P256_GY_INIT \ + {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, \ + 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5} + +/* Doubled Base Point */ +#define ECDSA_P256_HX_INIT \ + {0x7cf27b18, 0x8d034f7e, 0x8a523803, 0x04b51ac3, \ + 0xc08969e2, 0x77f21b35, 0xa60b48fc, 0x47669978} + +#define ECDSA_P256_HY_INIT \ + {0x07775510, 0xdb8ed040, 0x293d9ac6, 0x9f7430db, \ + 0xba7dade6, 0x3ce98229, 0x9e04b79d, 0x227873d1} + +/* Order of the Base Point */ +#define ECDSA_P256_N_INIT \ + {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, \ + 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551} + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-384) +//------------------------------------------------------------------------------ + +/* Base Point */ +#define ECDSA_P384_GX_INIT \ + {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, \ + 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, \ + 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7} + +#define ECDSA_P384_GY_INIT \ + {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, \ + 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, \ + 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f} + +/* Doubled Base Point */ +#define ECDSA_P384_HX_INIT \ + {0x08d99905, 0x7ba3d2d9, 0x69260045, 0xc55b97f0, \ + 0x89025959, 0xa6f434d6, 0x51d207d1, 0x9fb96e9e, \ + 0x4fe0e86e, 0xbe0e64f8, 0x5b96a9c7, 0x5295df61} + +#define ECDSA_P384_HY_INIT \ + {0x8e80f1fa, 0x5b1b3ced, 0xb7bfe8df, 0xfd6dba74, \ + 0xb275d875, 0xbc6cc43e, 0x904e505f, 0x256ab425, \ + 0x5ffd43e9, 0x4d39e22d, 0x61501e70, 0x0a940e80} + +/* Order of the Base Point */ +#define ECDSA_P384_N_INIT \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, \ + 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973} + +//------------------------------------------------------------------------------ +// ECDSA Parameters Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define ECDSA_GX_INIT ECDSA_P256_GX_INIT +#define ECDSA_GY_INIT ECDSA_P256_GY_INIT +#define ECDSA_HX_INIT ECDSA_P256_HX_INIT +#define ECDSA_HY_INIT ECDSA_P256_HY_INIT + +#define ECDSA_N_INIT ECDSA_P256_N_INIT + +#elif USE_CURVE == 2 + +#define ECDSA_GX_INIT ECDSA_P384_GX_INIT +#define ECDSA_GY_INIT ECDSA_P384_GY_INIT +#define ECDSA_HX_INIT ECDSA_P384_HX_INIT +#define ECDSA_HY_INIT ECDSA_P384_HY_INIT + +#define ECDSA_N_INIT ECDSA_P384_N_INIT + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER ECDSA_GX, ECDSA_GY; +extern FPGA_BUFFER ECDSA_HX, ECDSA_HY; +extern FPGA_BUFFER ECDSA_N; + + +//------------------------------------------------------------------------------ +// Switch +//------------------------------------------------------------------------------ +#ifdef USE_MICROCODE + +#define fpga_curve_base_scalar_multiply fpga_curve_base_scalar_multiply_microcode +#define fpga_curve_add_jacobian fpga_curve_add_jacobian_microcode_wrapper +#define fpga_curve_double_jacobian fpga_curve_double_jacobian_microcode_wrapper + +#else + +#define fpga_curve_base_scalar_multiply fpga_curve_base_scalar_multiply_abstract +#define fpga_curve_add_jacobian fpga_curve_add_jacobian_abstract +#define fpga_curve_double_jacobian fpga_curve_double_jacobian_abstract + +#endif + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_curve_init (); + +void fpga_curve_base_scalar_multiply_abstract (const FPGA_BUFFER *k, + FPGA_BUFFER *qx, + FPGA_BUFFER *qy); + +void fpga_curve_base_scalar_multiply_microcode (const FPGA_BUFFER *k, + FPGA_BUFFER *qx, + FPGA_BUFFER *qy); + +void fpga_curve_add_jacobian_abstract (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + +void fpga_curve_double_jacobian_abstract (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + +void fpga_curve_add_jacobian_microcode (); + +void fpga_curve_double_jacobian_microcode (); + +void fpga_curve_add_jacobian_microcode_wrapper (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + + +void fpga_curve_double_jacobian_microcode_wrapper (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_curve_abstract.cpp b/ecdsa_fpga_curve_abstract.cpp new file mode 100644 index 0000000..5510ac1 --- /dev/null +++ b/ecdsa_fpga_curve_abstract.cpp @@ -0,0 +1,313 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve_abstract.cpp +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER ECDSA_GX, ECDSA_GY; +FPGA_BUFFER ECDSA_HX, ECDSA_HY; +FPGA_BUFFER ECDSA_N; + + +//------------------------------------------------------------------------------ +void fpga_curve_init() +//------------------------------------------------------------------------------ +{ + int w; // word counter + + FPGA_BUFFER tmp_gx = ECDSA_GX_INIT, tmp_gy = ECDSA_GY_INIT; + FPGA_BUFFER tmp_hx = ECDSA_HX_INIT, tmp_hy = ECDSA_HY_INIT; + FPGA_BUFFER tmp_n = ECDSA_N_INIT; + + /* fill buffers for large multi-word integers */ + for (w=0; w0; word_count--) + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + /* calculate T = 2 * R */ + fpga_curve_double_jacobian_abstract(&rx, &ry, &rz, &tx, &ty, &tz); + + /* always calculate R = T + P for constant-time */ + fpga_curve_add_jacobian_abstract(&tx, &ty, &tz, &rx, &ry, &rz); + + /* revert to the value of T before addition if the current bit of k is not set */ + if (!((k->words[word_count-1] >> (bit_count-1)) & 1)) + { fpga_multiword_copy(&tx, &rx); + fpga_multiword_copy(&ty, &ry); + fpga_multiword_copy(&tz, &rz); + } + + } + + FPGA_BUFFER a2, a3; + + fpga_modular_inv23(&rz, &a2, &a3); + + fpga_modular_mul(&rx, &a2, qx); // qx = px * (pz^-1)^2 (mod q) + fpga_modular_mul(&ry, &a3, qy); // qy = py * (pz^-1)^3 (mod q) + + // check, that rz is non-zero (not point at infinity) + bool rz_is_zero = fpga_multiword_is_zero(&rz); + + // handle special case (result is point at infinity) + if (rz_is_zero) + { fpga_multiword_copy(&ECDSA_ZERO, qx); + fpga_multiword_copy(&ECDSA_ZERO, qy); + } +} + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point doubling routine. +// +// R(rx,ry,rz) = 2 * P(px,py,pz) +// +// Note, that P(px,py,pz) is supposed to be in projective Jacobian coordinates, +// R will be in projective Jacobian coordinates. +// +// This routine implements algorithm 3.21 from "Guide to Elliptic Curve +// Cryptography", the only difference is that step 6. does T1 = T2 + T2 and +// then T2 = T2 + T1 instead of T2 = 3 * T2, because our addition is much +// faster, than multiplication. +// +// Note, that this routine also handles one special case, namely when P is at +// infinity. +// +// Instead of actual modular division, multiplication by pre-computed constant +// (2^-1 mod q) is done. +// +// Note, that FPGA modular multiplier can't multiply a given buffer by itself, +// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome +// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and +// then fpga_modular_mul(pz, &t1, &t1) instead. +// +// WARNING: Though this procedure always does doubling steps, it does not take +// any active measures to keep run-time constant. The main purpose of this +// model is to help debug Verilog code for FPGA, so *DO NOT* use is anywhere +// near production! +// +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER t1, t2, t3; // temporary variables + + // check, whether P is at infinity + bool pz_is_zero = fpga_multiword_is_zero(pz); + + /* 2. */ fpga_multiword_copy(pz, &t1); + fpga_modular_mul(pz, &t1, &t1); + /* 3. */ fpga_modular_sub(px, &t1, &t2); + /* 4. */ fpga_modular_add(px, &t1, &t1); + /* 5. */ fpga_modular_mul(&t1, &t2, &t2); + /* 6. */ fpga_modular_add(&t2, &t2, &t1); + /* */ fpga_modular_add(&t1, &t2, &t2); + /* 7. */ fpga_modular_add(py, py, ry); + /* 8. */ fpga_modular_mul(pz, ry, rz); + /* 9. */ fpga_multiword_copy(ry, &t1); + fpga_multiword_copy(ry, &t3); + fpga_modular_mul(&t1, &t3, ry); + /* 10. */ fpga_modular_mul(px, ry, &t3); + /* 11. */ fpga_multiword_copy(ry, &t1); + fpga_modular_mul(ry, &t1, &t1); + /* 12. */ fpga_modular_mul(&t1, &ECDSA_DELTA, ry); + /* 13. */ fpga_multiword_copy(&t2, &t1); + fpga_modular_mul(&t1, &t2, rx); + /* 14. */ fpga_modular_add(&t3, &t3, &t1); + /* 15. */ fpga_modular_sub(rx, &t1, rx); + /* 16. */ fpga_modular_sub(&t3, rx, &t1); + /* 17. */ fpga_modular_mul(&t1, &t2, &t1); + /* 18. */ fpga_modular_sub(&t1, ry, ry); + + // handle special case (input point is at infinity) + if (pz_is_zero) + { fpga_multiword_copy(&ECDSA_ONE, rx); + fpga_multiword_copy(&ECDSA_ONE, ry); + fpga_multiword_copy(&ECDSA_ZERO, rz); + } +} + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point addition routine. +// +// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy) +// +// Note, that P(px, py, pz) is supposed to be in projective Jacobian +// coordinates, while Q(qx,qy) is supposed to be in affine coordinates, +// R(rx, ry, rz) will be in projective Jacobian coordinates. Moreover, in this +// particular implementation Q is always the base point G. +// +// This routine implements algorithm 3.22 from "Guide to Elliptic Curve +// Cryptography". Differences from the original algorithm: +// +// 1) Step 1. is omitted, because point Q is always the base point, which is +// not at infinity by definition. +// +// 2) Step 9.1 just returns the pre-computed double of the base point instead +// of actually doubling it. +// +// Note, that this routine also handles three special cases: +// +// 1) P is at infinity +// 2) P == Q +// 3) P == -Q +// +// Note, that FPGA modular multiplier can't multiply a given buffer by itself, +// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome +// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and +// then fpga_modular_mul(pz, &t1, &t1) instead. +// +// WARNING: This procedure does not take any active measures to keep run-time +// constant. The main purpose of this model is to help debug Verilog code for +// FPGA, so *DO NOT* use is anywhere near production! +// +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_abstract(const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER t1, t2, t3, t4; // temporary variables + + bool pz_is_zero = fpga_multiword_is_zero(pz); // Step 2. + + /* 3. */ fpga_multiword_copy(pz, &t1); + fpga_modular_mul(pz, &t1, &t1); + /* 4. */ fpga_modular_mul(pz, &t1, &t2); + /* 5. */ fpga_modular_mul(&t1, &ECDSA_GX, &t1); + /* 6. */ fpga_modular_mul(&t2, &ECDSA_GY, &t2); + /* 7. */ fpga_modular_sub(&t1, px, &t1); + /* 8. */ fpga_modular_sub(&t2, py, &t2); + + bool t1_is_zero = fpga_multiword_is_zero(&t1); // | Step 9. + bool t2_is_zero = fpga_multiword_is_zero(&t2); // | + + /* 10. */ fpga_modular_mul(pz, &t1, rz); + /* 11. */ fpga_multiword_copy(&t1, &t3); + fpga_modular_mul(&t1, &t3, &t3); + /* 12. */ fpga_modular_mul(&t1, &t3, &t4); + /* 13. */ fpga_modular_mul(px, &t3, &t3); + /* 14. */ fpga_modular_add(&t3, &t3, &t1); + /* 15. */ fpga_multiword_copy(&t2, rx); + fpga_modular_mul(rx, &t2, rx); + /* 16. */ fpga_modular_sub(rx, &t1, rx); + /* 17. */ fpga_modular_sub(rx, &t4, rx); + /* 18. */ fpga_modular_sub(&t3, rx, &t3); + /* 19. */ fpga_modular_mul(&t2, &t3, &t3); + /* 20. */ fpga_modular_mul(py, &t4, &t4); + /* 21. */ fpga_modular_sub(&t3, &t4, ry); + + // + // final selection + // + if (pz_is_zero) // P at infinity ? + { fpga_multiword_copy(&ECDSA_GX, rx); + fpga_multiword_copy(&ECDSA_GY, ry); + fpga_multiword_copy(&ECDSA_ONE, rz); + } + else if (t1_is_zero) // same x for P and Q ? + { + fpga_multiword_copy(t2_is_zero ? &ECDSA_HX : &ECDSA_ONE, rx); // | same y ? (P==Q => R=2*G) : (P==-Q => R=O) + fpga_multiword_copy(t2_is_zero ? &ECDSA_HY : &ECDSA_ONE, ry); // | + fpga_multiword_copy(t2_is_zero ? &ECDSA_ONE : &ECDSA_ZERO, rz); // | + } +} + + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_curve_microcode.cpp b/ecdsa_fpga_curve_microcode.cpp new file mode 100644 index 0000000..553498c --- /dev/null +++ b/ecdsa_fpga_curve_microcode.cpp @@ -0,0 +1,494 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve_microcode.cpp +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Required for Microcode Routines +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// +// Doubles the point stored in CYCLE_R* and stores the result in CYCLE_S*. +// +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_microcode() +//------------------------------------------------------------------------------ +{ + // fpga_modular_mul(RZ, RZ, RZ2 ); // 2. RZ2 = RZ * RZ + // fpga_modular_sub(RX, RZ2, T1 ); // 3. T1 = RX - RZ2 + // fpga_modular_add(RX, RZ2, T2 ); // 4. T2 = RX + RZ2 + // fpga_modular_mul(T1, T2, T3 ); // 5. T3 = T1 * T2 + // fpga_modular_add(T3, T3, T4 ); // 6a. T4 = T3 + T3 + // fpga_modular_add(T3, T4, A ); // 6b. A = T3 + T4 + // fpga_modular_add(RY, RY, B ); // 7. B = RY + RY + // fpga_modular_mul(B, RZ, SZ ); // 8. SZ = B * RZ [output] + // fpga_modular_mul(B, B, C ); // 9. C = B * B + // fpga_modular_mul(C, RX, D ); // 10. D = C * RX + // fpga_modular_mul(C, C, C2 ); // 11. C2 = C * C + // fpga_modular_mul(C2, DELTA, C2_2); // 12. C2_2 = C / 2 + // fpga_modular_mul(A, A, A2 ); // 13. A2 = A * A + // fpga_modular_add(D, D, T1 ); // 14. T1 = D + D + // fpga_modular_sub(A2, T1, SX ); // 15. SX = A2 - T1 [output] + // fpga_modular_sub(D, SX, T1 ); // 16. T1 = D - SX + // fpga_modular_mul(A , T1, T2 ); // 17. T2 = A * T1 + // fpga_modular_sub(T2, C2_2, SY ); // 18. SY = T2 - C2_2 [output] + + /* BEGIN_MICROCODE: CYCLE_DOUBLE */ + + FPGA_BUFFER TEMP; + + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, CYCLE_Z2); + uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer("CYCLE_Z2 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T1); + uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T2); + uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_T1, CYCLE_T2, BANK_HI, CYCLE_T3); + uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer("CYCLE_T3 = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T3, BANK_LO, CYCLE_T4); + uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer("CYCLE_T4 = ", &TEMP); + + uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4); + + uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T4, BANK_LO, CYCLE_A); + uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer("CYCLE_A = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_RY, CYCLE_RY, BANK_LO, CYCLE_B); + uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer("CYCLE_B = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_RZ, BANK_HI, CYCLE_SZ); + uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer("CYCLE_SZ = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_B, BANK_HI, CYCLE_C); + uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer("CYCLE_C = ", &TEMP); + + uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_RX, BANK_LO, CYCLE_D); + uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer("CYCLE_D = ", &TEMP); + + uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_C, BANK_LO, CYCLE_C2); + uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer("CYCLE_C2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_C2, CONST_DELTA, BANK_HI, CYCLE_C2_2); + uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer("CYCLE_C2_2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_A, BANK_HI, CYCLE_A2); + uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer("CYCLE_A2 = ", &TEMP); + + uop_calc(ADD, BANK_LO, CYCLE_D, CYCLE_D, BANK_HI, CYCLE_T1); + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_A2, CYCLE_T1, BANK_LO, CYCLE_SX); + uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer("CYCLE_SX = ", &TEMP); + + uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SX, BANK_HI, CYCLE_T1); + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1); + + uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_T1, BANK_HI, CYCLE_T2); + uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_C2_2, BANK_LO, CYCLE_SY); + uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer("CYCLE_SY = ", &TEMP); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +// +// Adds the base point G to the point stored in CYCLE_S* and stores the result +// again in CYCLE_R*. +// +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_microcode() +{ + //fpga_modular_mul(SZ, SZ, A) ; // 3. A = SZ * SZ + //fpga_modular_mul(A, SZ, B ); // 4. B = A * SZ + //fpga_modular_mul(A, &ECDSA_GX, C ); // 5. C = A * GX + //fpga_modular_mul(B, &ECDSA_GY, D ); // 6. D = B * GY + //fpga_modular_sub(C, SX, E ); // 7. E = C - SX + //fpga_modular_sub(D, SY, F ); // 8. F = D - SY + //fpga_modular_mul(E, SZ, RZ); // 10. RZ = E * SZ [output] + //fpga_modular_mul(E, E, G ); // 11. G = E * E + //fpga_modular_mul(E, G, H ); // 12. H = E * G + //fpga_modular_mul(G, SX, J ); // 13. J = G * SX + //fpga_modular_add(J, J, T1); // 14. T1 = J + J + //fpga_modular_mul(F, F, T2); // 15. T2 = F * F + //fpga_modular_sub(T2, T1, T3); // 16. T3 = T2 - T1 + //fpga_modular_sub(T3, H, RX); // 17. RX = T3 - H [output] + //fpga_modular_sub(J, RX, T1); // 18. T1 = J - RX + //fpga_modular_mul(F, T1, T2); // 19. T2 = F * T1 + //fpga_modular_mul(H, SY, T3); // 20. T3 = H * SY + //fpga_modular_sub(T2, T3, RY); // 21. RY = T2 - T3 [output] + + /* BEGIN_MICROCODE: CYCLE_ADD */ + + uop_cmpz( BANK_HI, CYCLE_SZ); + uop_move( BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_SZ); + uop_calc(MUL, BANK_LO, CYCLE_SZ, CYCLE_SZ, BANK_HI, CYCLE_A); + uop_calc(MUL, BANK_HI, CYCLE_A, CYCLE_SZ, BANK_LO, CYCLE_B); + uop_move( BANK_LO, CYCLE_B, BANK_HI, CYCLE_B); + uop_calc(MUL, BANK_HI, CYCLE_A, CONST_GX, BANK_LO, CYCLE_C); + uop_calc(MUL, BANK_HI, CYCLE_B, CONST_GY, BANK_LO, CYCLE_D); + uop_calc(SUB, BANK_LO, CYCLE_C, CYCLE_SX, BANK_HI, CYCLE_E); + uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SY, BANK_HI, CYCLE_F); + uop_cmpz( BANK_HI, CYCLE_E); + uop_cmpz( BANK_HI, CYCLE_F); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_SZ, BANK_LO, CYCLE_RZ); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_E, BANK_LO, CYCLE_G); + uop_move( BANK_LO, CYCLE_G, BANK_HI, CYCLE_G); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_G, BANK_LO, CYCLE_H); + uop_calc(MUL, BANK_LO, CYCLE_G, CYCLE_SX, BANK_HI, CYCLE_J); + uop_calc(ADD, BANK_HI, CYCLE_J, CYCLE_J, BANK_LO, CYCLE_T1); + uop_calc(MUL, BANK_HI, CYCLE_F, CYCLE_F, BANK_LO, CYCLE_T2); + uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T1, BANK_HI, CYCLE_T3); + uop_move( BANK_HI, CYCLE_T3, BANK_LO, CYCLE_T3); + uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_H, BANK_HI, CYCLE_RX); + uop_calc(SUB, BANK_HI, CYCLE_J, CYCLE_RX, BANK_LO, CYCLE_T1); + uop_move( BANK_HI, CYCLE_F, BANK_LO, CYCLE_F); + uop_calc(MUL, BANK_LO, CYCLE_F, CYCLE_T1, BANK_HI, CYCLE_T2); + uop_calc(MUL, BANK_LO, CYCLE_H, CYCLE_SY, BANK_HI, CYCLE_T3); + uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_T3, BANK_LO, CYCLE_RY); + uop_move( BANK_LO, CYCLE_RY, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + + // + // handle special corner cases + // + if (uop_flagz_sz) + { + /* BEGIN_MICROCODE: CYCLE_ADD_AT_INFINITY */ + + uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + if (uop_flagz_e) + { + if (uop_flagz_f) + { + /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X_SAME_Y */ + + uop_move(BANK_LO, CONST_HX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_HY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X */ + + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + } + else + { + /* BEGIN_MICROCODE: CYCLE_ADD_REGULAR */ + + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T1); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T2); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_T3); + + /* END_MICROCODE */ + } + } +} + + +#ifdef USE_MICROCODE +//------------------------------------------------------------------------------ +void fpga_curve_base_scalar_multiply_microcode(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy) +//------------------------------------------------------------------------------ +{ + int word_count, bit_count; // counters + FPGA_WORD k_word; + bool k_bit; + + // initialize internal banks + fpga_multiword_copy(&ECDSA_ZERO, &BUF_LO[CONST_ZERO]); + fpga_multiword_copy(&ECDSA_ZERO, &BUF_HI[CONST_ZERO]); + + fpga_multiword_copy(&ECDSA_ONE, &BUF_LO[CONST_ONE]); + fpga_multiword_copy(&ECDSA_ONE, &BUF_HI[CONST_ONE]); + + fpga_multiword_copy(&ECDSA_DELTA, &BUF_LO[CONST_DELTA]); + fpga_multiword_copy(&ECDSA_DELTA, &BUF_HI[CONST_DELTA]); + + fpga_multiword_copy(&ECDSA_GX, &BUF_LO[CONST_GX]); + fpga_multiword_copy(&ECDSA_GX, &BUF_HI[CONST_GX]); + + fpga_multiword_copy(&ECDSA_GY, &BUF_LO[CONST_GY]); + fpga_multiword_copy(&ECDSA_GY, &BUF_HI[CONST_GY]); + + fpga_multiword_copy(&ECDSA_HX, &BUF_LO[CONST_HX]); + fpga_multiword_copy(&ECDSA_HX, &BUF_HI[CONST_HX]); + + fpga_multiword_copy(&ECDSA_HY, &BUF_LO[CONST_HY]); + fpga_multiword_copy(&ECDSA_HY, &BUF_HI[CONST_HY]); + + /* BEGIN_MICROCODE: PREPARE */ + + // set initial value of R to point at infinity + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + + /* process bits of k left-to-right */ + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + k_word = k->words[word_count-1]; + k_bit = (k_word & (FPGA_WORD)(1 << (bit_count-1))) > 0; + + // Banks of working cycle operands + // ------------------------------- + // RX: HI + // RY: HI + // RZ: LO + + // calculate S = 2 * R + fpga_curve_double_jacobian_microcode(); + + // Banks of working cycle operands + // ------------------------------- + // SX: LO + // SY: LO + // SZ: HI + + // always calculate R = S * G for constant-time operation + fpga_curve_add_jacobian_microcode(); + + // Banks of working cycle operands + // ------------------------------- + // RX: HI + // RY: HI + // RZ: LO + + + if (!k_bit) + { + /* BEGIN_MICROCODE: CYCLE_K0 */ + + // revert to the value of S before addition if the current bit of k is not set + uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CYCLE_K1 */ + + // do dummy overwrite for constant-time operation + uop_move(BANK_HI, CYCLE_RX, BANK_LO, CYCLE_SX); + uop_move(BANK_HI, CYCLE_RY, BANK_LO, CYCLE_SY); + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_SZ); + + /* END_MICROCODE */ + } + + FPGA_BUFFER TEMP; + + //printf("wc = %d, bc = %d\n", word_count-1, bit_count-1); + + uop_stor(BANK_LO, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RX = ", &TEMP); + uop_stor(BANK_LO, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RY = ", &TEMP); + uop_stor(BANK_LO, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RZ = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SX = ", &TEMP); + uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SY = ", &TEMP); + uop_stor(BANK_LO, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SZ = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A = ", &TEMP); + uop_stor(BANK_LO, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_B = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2_2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_D = ", &TEMP); + uop_stor(BANK_LO, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_E = ", &TEMP); + uop_stor(BANK_LO, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_F = ", &TEMP); + uop_stor(BANK_LO, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_G = ", &TEMP); + uop_stor(BANK_LO, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_H = ", &TEMP); + uop_stor(BANK_LO, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_J = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_Z2 = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T1 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T3 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T4 = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RX = ", &TEMP); + uop_stor(BANK_HI, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RY = ", &TEMP); + uop_stor(BANK_HI, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RZ = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SX = ", &TEMP); + uop_stor(BANK_HI, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SY = ", &TEMP); + uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SZ = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A = ", &TEMP); + uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_B = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2_2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_D = ", &TEMP); + uop_stor(BANK_HI, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_E = ", &TEMP); + uop_stor(BANK_HI, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_F = ", &TEMP); + uop_stor(BANK_HI, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_G = ", &TEMP); + uop_stor(BANK_HI, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_H = ", &TEMP); + uop_stor(BANK_HI, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_J = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_Z2 = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T1 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T3 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T4 = ", &TEMP); + + } + + // now convert to affine coordinates + fpga_modular_inv23_microcode(); + + /* BEGIN_MICROCODE: CONVERT */ + + uop_calc(MUL, BANK_HI, INVERT_A2, CYCLE_RX, BANK_LO, CYCLE_SX); + uop_calc(MUL, BANK_HI, INVERT_A3, CYCLE_RY, BANK_LO, CYCLE_SY); + uop_cmpz(BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + + if (uop_flagz_rz) + { + /* BEGIN_MICROCODE: CONVERT_AT_INFINITY */ + + uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CONVERT_REGULAR */ + + uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + } + + // return + uop_stor(BANK_HI, CYCLE_RX, qx); + uop_stor(BANK_HI, CYCLE_RY, qy); +} +#endif USE_MICROCODE + + +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_microcode_wrapper(const FPGA_BUFFER *rx, + const FPGA_BUFFER *ry, + const FPGA_BUFFER *rz, + FPGA_BUFFER *sx, + FPGA_BUFFER *sy, + FPGA_BUFFER *sz) +//------------------------------------------------------------------------------ +{ + uop_load(rx, BANK_HI, CYCLE_RX); + uop_load(ry, BANK_HI, CYCLE_RY); + uop_load(rz, BANK_LO, CYCLE_RZ); + + fpga_curve_double_jacobian_microcode(); + + uop_stor(BANK_LO, CYCLE_SX, sx); + uop_stor(BANK_LO, CYCLE_SY, sy); + uop_stor(BANK_HI, CYCLE_SZ, sz); +} + + +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_microcode_wrapper(const FPGA_BUFFER *sx, + const FPGA_BUFFER *sy, + const FPGA_BUFFER *sz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + uop_load(sx, BANK_LO, CYCLE_SX); + uop_load(sy, BANK_LO, CYCLE_SY); + uop_load(sz, BANK_HI, CYCLE_SZ); + + fpga_curve_add_jacobian_microcode(); + + uop_stor(BANK_HI, CYCLE_RX, rx); + uop_stor(BANK_HI, CYCLE_RY, ry); + uop_stor(BANK_LO, CYCLE_RZ, rz); +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_lowlevel.cpp b/ecdsa_fpga_lowlevel.cpp new file mode 100644 index 0000000..c7ab322 --- /dev/null +++ b/ecdsa_fpga_lowlevel.cpp @@ -0,0 +1,135 @@ +//------------------------------------------------------------------------------ +// +// fpga_ecdsa_lowlevel.cpp +// ----------------------------------- +// Models of low-level FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// +// Low-level 32-bit adder with carry input and carry output. +// +// Carries are 1 bit wide. +// +// {c_out, s} = x + y + c_in +// +//------------------------------------------------------------------------------ +void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out) +//------------------------------------------------------------------------------ +{ + // calculate sum + FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y; + + // process carry input + if (c_in) r++; + + // store sum + *s = (FPGA_WORD)r; + + // shift sum to obtain carry flag + r >>= FPGA_WORD_WIDTH; + + // store carry + *c_out = (r != 0); +} + + +//------------------------------------------------------------------------------ +// +// Low-level 32-bit subtractor with borrow input and borrow output. +// +// Borrows are 1 bit wide. +// +// {b_out, d} = x - y - b_in +// +void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out) +//------------------------------------------------------------------------------ +{ + // calculate difference + FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y; + + // process borrow input + if (b_in) r--; + + // store difference + *d = (FPGA_WORD)r; + + // shift difference to obtain borrow flag + r >>= FPGA_WORD_WIDTH; + + // store borrow + *b_out = (r != 0); +} + + +//------------------------------------------------------------------------------ +// +// Low-level 16x16-bit multiplier. +// +// Inputs are 16-bit wide, output is 32-bit wide. +// +// p = x * y +// +void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p) +//------------------------------------------------------------------------------ +{ + // multiply and store result + *p = (FPGA_WORD)x * (FPGA_WORD)y; +} + + +//------------------------------------------------------------------------------ +// +// Low-level 47-bit adder without carries. +// +// s = (x + y)[46:0] +// +void fpga_lowlevel_add47(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s) +//------------------------------------------------------------------------------ +{ + // add and store result truncated to 47 bits + *s = (x + y) & FPGA_MASK_ADDER47; +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_lowlevel.h b/ecdsa_fpga_lowlevel.h new file mode 100644 index 0000000..8d0dccd --- /dev/null +++ b/ecdsa_fpga_lowlevel.h @@ -0,0 +1,79 @@ +//------------------------------------------------------------------------------ +// +// fpga_ecdsa_lowlevel.h +// ----------------------------------- +// Models of low-level FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include + + +//------------------------------------------------------------------------------ +// FPGA Pipeline Settings +//------------------------------------------------------------------------------ +#define FPGA_WORD_WIDTH (32) + + +//------------------------------------------------------------------------------ +// Word Types (Normal 32-bit, Extended 47-bit, Reduced 16-bit) +//------------------------------------------------------------------------------ +typedef uint32_t FPGA_WORD; +typedef uint64_t FPGA_WORD_EXTENDED; +typedef uint16_t FPGA_WORD_REDUCED; + + +//------------------------------------------------------------------------------ +// Wide 47-bit Adder Mask +//------------------------------------------------------------------------------ +#define FPGA_MASK_ADDER47 ((FPGA_WORD_EXTENDED)0x00007FFFFFFFFFFF) + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_lowlevel_add32 (FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out); +void fpga_lowlevel_sub32 (FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out); + +void fpga_lowlevel_mul16 (FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p); + +void fpga_lowlevel_add47 (FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp new file mode 100644 index 0000000..f02dc8a --- /dev/null +++ b/ecdsa_fpga_microcode.cpp @@ -0,0 +1,432 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_microcode.cpp +// -------------------------------- +// Microcode Architecture for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Required for Microcode Routines +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Global Buffers +//------------------------------------------------------------------------------ +FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT]; +FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// Global Flags +//------------------------------------------------------------------------------ +bool uop_flagz_sz; +bool uop_flagz_rz; +bool uop_flagz_e; +bool uop_flagz_f; + + +//------------------------------------------------------------------------------ +void uop_move(UOP_BANK src, int s_op, UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr = NULL; + FPGA_BUFFER *d_ptr = NULL; + + if (src == BANK_LO) s_ptr = &BUF_LO[s_op]; + if (src == BANK_HI) s_ptr = &BUF_HI[s_op]; + if (dst == BANK_LO) d_ptr = &BUF_LO[d_op]; + if (dst == BANK_HI) d_ptr = &BUF_HI[d_op]; + + fpga_multiword_copy(s_ptr, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_cmpz(UOP_BANK src, int s_op) +//------------------------------------------------------------------------------ +{ + bool flagz; + + FPGA_BUFFER *s_ptr = NULL; + + if (src == BANK_LO) s_ptr = &BUF_LO[s_op]; + if (src == BANK_HI) s_ptr = &BUF_HI[s_op]; + + flagz = fpga_multiword_is_zero(s_ptr); + + switch (s_op) + { + case CYCLE_SZ: + uop_flagz_sz = flagz; + break; + case CYCLE_RZ: + uop_flagz_rz = flagz; + break; + case CYCLE_E: + uop_flagz_e = flagz; + break; + case CYCLE_F: + uop_flagz_f = flagz; + break; + } +} + + +//------------------------------------------------------------------------------ +void uop_calc(UOP_MATH math, + UOP_BANK src, int s_op1, int s_op2, + UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr1 = NULL; + FPGA_BUFFER *s_ptr2 = NULL; + FPGA_BUFFER *d_ptr = NULL; + FPGA_BUFFER *n_ptr = NULL; + + if (src == BANK_LO) + { s_ptr1 = &BUF_LO[s_op1]; + s_ptr2 = &BUF_LO[s_op2]; + } + if (src == BANK_HI) + { s_ptr1 = &BUF_HI[s_op1]; + s_ptr2 = &BUF_HI[s_op2]; + } + if (dst == BANK_LO) + { d_ptr = &BUF_LO[d_op]; + } + if (dst == BANK_HI) + { d_ptr = &BUF_HI[d_op]; + } + + if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr); + if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr); + if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *d_ptr = NULL; + if (dst == BANK_LO) d_ptr = &BUF_LO[d_op]; + if (dst == BANK_HI) d_ptr = &BUF_HI[d_op]; + + fpga_multiword_copy(mem, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr = NULL; + if (src == BANK_LO) + { s_ptr = &BUF_LO[s_op]; + } + if (src == BANK_HI) + { s_ptr = &BUF_HI[s_op]; + } + + fpga_multiword_copy(s_ptr, mem); +} + + +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p256_microcode() +//------------------------------------------------------------------------------ +// +// This computes A2 = RZ^-2 and A3 = RZ^-3. +// +// RZ is read from the lower bank, A2 and A3 are written to the upper bank. +// +//------------------------------------------------------------------------------ +{ + uop_loop; + + // + // operand placement map: + // + // X1 - LO,HI (RZ) + // X2 - LO,HI + // X3 - LO,HI + // X6 - LO + // X12 - HI + // X15 - LO,HI + // X30 - HI + // X32 - LO,HI + + /* BEGIN_MICROCODE: INVERT_P256 */ + + // first obtain intermediate helper quantities (X#) + + // mirror X1 to HI bank (don't waste time copying to X1, just use RZ) + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ); + + // compute X2 and mirror to the other bank + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2); + uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2); + + // compute X3 and mirror to the other bank + uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3); + uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3); + + // compute X6 (stored in the lower bank) + uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6); + + // compute X12 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1); + uop_cycle(5); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12); + + // compute X15 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15); + uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15); + + // compute X30 (stored in the upper bank) + uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1); + uop_cycle(14); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30); + + // compute X32 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X2, BANK_LO, INVERT_X32); + uop_move(BANK_LO, INVERT_X32, BANK_HI, INVERT_X32); + + // now compute the final results + + uop_calc(MUL, BANK_LO, INVERT_X32, INVERT_X32, BANK_HI, INVERT_R1); + + uop_cycle(31); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1); + + uop_cycle(128); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X32, BANK_LO, INVERT_R2); + + uop_cycle(32); + uop_calc_if_even(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_repeat(); + + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X32, BANK_HI, INVERT_R1); + + uop_cycle(30); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X30, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + + // move A2 into the upper bank + uop_move(BANK_LO, INVERT_R2, BANK_HI, INVERT_A2); + + // A3 ends up in the upper bank by itself + uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p384_microcode() +//------------------------------------------------------------------------------ +// +// This computes A2 = RZ^-2 and A3 = RZ^-3. +// +// RZ is read from the lower bank, A2 and A3 are written to the upper bank. +// +//------------------------------------------------------------------------------ +{ + uop_loop; + + // + // operand placement map: + // + // X1 - LO,HI (RZ) + // X2 - LO,HI + // X3 - LO,HI + // X6 - LO + // X12 - HI + // X15 - LO,HI + // X30 - HI + // X32 - LO,HI + + /* BEGIN_MICROCODE: INVERT_P384 */ + + // first obtain intermediate helper quantities (X#) + + // mirror X1 to HI bank (don't waste time copying to X1, just use RZ) + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ); + + // compute X2 and mirror to the other bank + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2); + uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2); + + // compute X3 and mirror to the other bank + uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3); + uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3); + + // compute X6 (stored in the lower bank) + uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6); + + // compute X12 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1); + uop_cycle(5); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12); + + // compute X15 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15); + uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15); + + // compute X30 (stored in the upper bank) + uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1); + uop_cycle(14); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30); + + // compute X60 (stored in the lower bank) + uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1); + uop_cycle(29); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_X60); + + // compute X120 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X60, INVERT_X60, BANK_HI, INVERT_R1); + uop_cycle(59); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X60, BANK_HI, INVERT_X120); + + // now compute the final results + + uop_calc(MUL, BANK_HI, INVERT_X120, INVERT_X120, BANK_LO, INVERT_R1); + + uop_cycle(119); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X120, BANK_LO, INVERT_R1); + + uop_cycle(15); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X15, BANK_LO, INVERT_R1); + + uop_cycle(31); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X2, BANK_HI, INVERT_R2); + + uop_cycle(94); + uop_calc_if_even(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + + // move A2 into the upper bank + uop_move(BANK_LO, INVERT_R1, BANK_HI, INVERT_A2); + + // A3 ends up in the upper bank by itself + uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_microcode.h b/ecdsa_fpga_microcode.h new file mode 100644 index 0000000..f551d96 --- /dev/null +++ b/ecdsa_fpga_microcode.h @@ -0,0 +1,164 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_microcode.h +// -------------------------------- +// Microcode Architecture for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include // NULL + + +//------------------------------------------------------------------------------ +enum UOP_BANK +//------------------------------------------------------------------------------ +{ + BANK_LO, BANK_HI +}; + +//-------------------------- +enum UOP_OPERAND +//-------------------------- +{ + CONST_ZERO, // 0 + CONST_ONE, // 1 + CONST_DELTA, // 2 + + CONST_GX, // 3 + CONST_GY, // 4 + + CONST_HX, // 5 + CONST_HY, // 6 + + CYCLE_RX, // 7 + CYCLE_RY, // 8 + CYCLE_RZ, // 9 + + CYCLE_SX, // 10 + CYCLE_SY, // 11 + CYCLE_SZ, // 12 + + CYCLE_A, // 13 + CYCLE_A2, // 14 + CYCLE_B, // 15 + CYCLE_C, // 16 + CYCLE_C2, // 17 + CYCLE_C2_2, // 18 + CYCLE_D, // 19 + CYCLE_E, // 20 + CYCLE_F, // 21 + CYCLE_G, // 22 + CYCLE_H, // 23 + CYCLE_J, // 24 + + CYCLE_Z2, // 25 + + CYCLE_T1, // 26 + CYCLE_T2, // 27 + CYCLE_T3, // 28 + CYCLE_T4, // 29 + + INVERT_R1, // 30 + INVERT_R2, // 31 + + INVERT_X2, // 32 + INVERT_X3, // 33 + INVERT_X6, // 34 + INVERT_X12, // 35 + INVERT_X15, // 36 + INVERT_X30, // 37 + INVERT_X32, // 38 + INVERT_X60, // 39 + INVERT_X120, // 40 + + INVERT_A2, // 41 + INVERT_A3, // 42 + + ECDSA_UOP_OPERAND_COUNT +}; + +//------------------------------------------------------------------------------ +enum UOP_MATH +//------------------------------------------------------------------------------ +{ + ADD, SUB, MUL +}; + + +//------------------------------------------------------------------------------ +// Global Storage Buffers +//------------------------------------------------------------------------------ +extern FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT]; +extern FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// Global Flags +//------------------------------------------------------------------------------ +extern bool uop_flagz_sz; +extern bool uop_flagz_rz; +extern bool uop_flagz_e; +extern bool uop_flagz_f; + + +//------------------------------------------------------------------------------ +// Loop Macros +//------------------------------------------------------------------------------ +#define uop_loop int uop_cnt +#define uop_cycle(iters); for (uop_cnt=0; uop_cnt +#include + + +//------------------------------------------------------------------------------ +// Microcode Switch +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// More Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_model_init (); + +bool test_base_point_multiplier (const FPGA_BUFFER *k, + const FPGA_BUFFER *qx, + const FPGA_BUFFER *qy); + +bool abuse_internal_point_adder (); +bool abuse_internal_point_doubler (); + + +//------------------------------------------------------------------------------ +// Locals +//------------------------------------------------------------------------------ +static FPGA_BUFFER ecdsa_d_nsa; +static FPGA_BUFFER ecdsa_qx_nsa; +static FPGA_BUFFER ecdsa_qy_nsa; + +static FPGA_BUFFER ecdsa_k_nsa; +static FPGA_BUFFER ecdsa_rx_nsa; +static FPGA_BUFFER ecdsa_ry_nsa; + +static FPGA_BUFFER ecdsa_d_random; +static FPGA_BUFFER ecdsa_qx_random; +static FPGA_BUFFER ecdsa_qy_random; + + +//------------------------------------------------------------------------------ +int main() +//------------------------------------------------------------------------------ +{ + bool ok; // flag + + // initialize buffers + fpga_multiword_init(); + fpga_modular_init(); + fpga_curve_init(); + fpga_model_init(); + + printf("ECDSA_UOP_OPERAND_COUNT == %d\n\n", ECDSA_UOP_OPERAND_COUNT); + + // test base point multiplier: Q = d * G + printf("Trying to derive public key from private key (NSA test vector)...\n\n"); + ok = test_base_point_multiplier(&ecdsa_d_nsa, &ecdsa_qx_nsa, &ecdsa_qy_nsa); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: R = k * G + printf("Trying to sign something (NSA test vector)...\n\n"); + ok = test_base_point_multiplier(&ecdsa_k_nsa, &ecdsa_rx_nsa, &ecdsa_ry_nsa); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: Q = d * G + printf("Trying to derive public key from private key (random test vector)...\n\n"); + ok = test_base_point_multiplier(&ecdsa_d_random, &ecdsa_qx_random, &ecdsa_qy_random); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: O = n * G + printf("Trying to multiply the base point by its order...\n\n"); + ok = test_base_point_multiplier(&ECDSA_N, &ECDSA_ZERO, &ECDSA_ZERO); + if (!ok) return EXIT_FAILURE; + + // now run some intricate tests... + + /* Excerpt from the commit message e718fdfae6443466e566ed6ce1515cdecc215ac0: + + The model does multiplication using the double-and-add algorithm. When adding + two points P and Q on curves P-256 and P-384, four special cases must be + considered. One of them is P = Q, in that situation the explicit addition + formulae don't work and either 2*P or 2*Q must be returned from the addition + routine. In this model Q is always the base point G, so when P = G, then 2*G + must be returned. Since G is fixed, this model stores precomputed point H = 2*G + and returns it when adding G + G for true constant-time operation. + + During multiplication the bits of k are scanned left-to-right, so doubling is + done before addition. This way the only situation when both inputs to the + addition routine are equal to G is when after doubling the result is G. This in + its turn is only possible when k = n + 2 (where n is the order of the base + point G). ECDSA requires integer k to be [1, n-1], one of the side effects + is that the model has a code path that will never be used under normal + operation. This code path can be verified by first multiplying by k = 2 + (special handling for P = G not triggered), then multiplying by k = n + 2 + (special handling for P = G triggered). Both multiplications should produce + the same output. In the former case the output will be calculated on-the-fly, + in the latter case the pre-computed coordinates of H will be used. + */ + + + // test base point multiplier: H = 2 * G + FPGA_BUFFER two; + fpga_modular_add(&ECDSA_ONE, &ECDSA_ONE, &two); + + printf("Trying to double the base point...\n\n"); + ok = test_base_point_multiplier(&two, &ECDSA_HX, &ECDSA_HY); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: G = (n + 1) * G + FPGA_BUFFER n1; + fpga_modular_add(&ECDSA_N, &ECDSA_ONE, &n1); // n1 = n + 1 + + printf("Trying to multiply the base point by its order plus one...\n\n"); + ok = test_base_point_multiplier(&n1, &ECDSA_GX, &ECDSA_GY); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: H = (n + 2) * G + FPGA_BUFFER n2; + fpga_modular_add(&ECDSA_N, &two, &n2); // n2 = n + two + + printf("Trying to multiply the base point by its order plus two...\n\n"); + ok = test_base_point_multiplier(&n2, &ECDSA_HX, &ECDSA_HY); + if (!ok) return EXIT_FAILURE; + + // ..and some more tests + + // try to abuse internal point doubler + ok = abuse_internal_point_doubler(); + if (!ok) return EXIT_FAILURE; + + // try to abuse internal point adder + ok = abuse_internal_point_adder(); + if (!ok) return EXIT_FAILURE; + + // everything went just fine + return EXIT_SUCCESS; +} + + +//------------------------------------------------------------------------------ +void fpga_model_init() +//------------------------------------------------------------------------------ +{ + int w; // word counter + + FPGA_BUFFER tmp_d_nsa = ECDSA_D_NSA_INIT, tmp_k_nsa = ECDSA_K_NSA_INIT; + FPGA_BUFFER tmp_qx_nsa = ECDSA_QX_NSA_INIT, tmp_rx_nsa = ECDSA_RX_NSA_INIT; + FPGA_BUFFER tmp_qy_nsa = ECDSA_QY_NSA_INIT, tmp_ry_nsa = ECDSA_RY_NSA_INIT; + + FPGA_BUFFER tmp_d_random = ECDSA_D_RANDOM_INIT; + FPGA_BUFFER tmp_qx_random = ECDSA_QX_RANDOM_INIT; + FPGA_BUFFER tmp_qy_random = ECDSA_QY_RANDOM_INIT; + + // fill buffers for large multi-word integers + for (w=0; w0; w--) + { + printf("%08x", buf->words[w-1]); + + // insert space after every group of 4 bytes + if (w > 1) printf(" "); + } + + // print footer + printf("\n"); +} + + +//------------------------------------------------------------------------------ +void print_fpga_buffer_nodelim(const char *s, const FPGA_BUFFER *buf) +//------------------------------------------------------------------------------ +// +// Pretty print large multi-word integer. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // print header + printf("%s", s); + + // print all bytes + for (w=FPGA_OPERAND_NUM_WORDS; w>0; w--) + printf("%08x", buf->words[w-1]); + + // print footer + printf("\n"); +} + + +//------------------------------------------------------------------------------ +bool abuse_internal_point_doubler() +//------------------------------------------------------------------------------ +// +// This routine tries to abuse the internal curve point doubler by forcing it +// to double point at infinity. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + bool ok; // flag + + FPGA_BUFFER px, py, pz; // input + FPGA_BUFFER qx, qy, qz; // output + + // set P.X and P.Y to some "random" garbage and P.Z to zero + for (w=0; w P-256 +// USE_CURVE == 2 -> P-384 +// +//------------------------------------------------------------------------------ +#ifndef USE_CURVE +#define USE_CURVE 2 +#endif +//------------------------------------------------------------------------------ +#define BAD_CURVE #error USE_CURVE must be either 1 or 2! +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_lowlevel.h" +#include "ecdsa_fpga_multiword.h" +#include "ecdsa_fpga_modular.h" +#include "ecdsa_fpga_curve.h" +#ifdef USE_MICROCODE +#include "ecdsa_fpga_microcode.h" +#endif + + +//------------------------------------------------------------------------------ +// Test Vectors +//------------------------------------------------------------------------------ +#include "ecdsa_test_vector_nsa.h" +#include "ecdsa_test_vector_randomized.h" + + +//------------------------------------------------------------------------------ +// Test Vectors Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define ECDSA_D_NSA_INIT ECDSA_P256_D_NSA_INIT +#define ECDSA_QX_NSA_INIT ECDSA_P256_QX_NSA_INIT +#define ECDSA_QY_NSA_INIT ECDSA_P256_QY_NSA_INIT + +#define ECDSA_K_NSA_INIT ECDSA_P256_K_NSA_INIT +#define ECDSA_RX_NSA_INIT ECDSA_P256_RX_NSA_INIT +#define ECDSA_RY_NSA_INIT ECDSA_P256_RY_NSA_INIT + +#define ECDSA_D_RANDOM_INIT ECDSA_P256_D_RANDOM_INIT +#define ECDSA_QX_RANDOM_INIT ECDSA_P256_QX_RANDOM_INIT +#define ECDSA_QY_RANDOM_INIT ECDSA_P256_QY_RANDOM_INIT + +#elif USE_CURVE == 2 + +#define ECDSA_D_NSA_INIT ECDSA_P384_D_NSA_INIT +#define ECDSA_QX_NSA_INIT ECDSA_P384_QX_NSA_INIT +#define ECDSA_QY_NSA_INIT ECDSA_P384_QY_NSA_INIT + +#define ECDSA_K_NSA_INIT ECDSA_P384_K_NSA_INIT +#define ECDSA_RX_NSA_INIT ECDSA_P384_RX_NSA_INIT +#define ECDSA_RY_NSA_INIT ECDSA_P384_RY_NSA_INIT + +#define ECDSA_D_RANDOM_INIT ECDSA_P384_D_RANDOM_INIT +#define ECDSA_QX_RANDOM_INIT ECDSA_P384_QX_RANDOM_INIT +#define ECDSA_QY_RANDOM_INIT ECDSA_P384_QY_RANDOM_INIT + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void print_fpga_buffer (const char *s, + const FPGA_BUFFER *v); + +void print_fpga_buffer_nodelim (const char *s, + const FPGA_BUFFER *v); + +bool compare_fpga_buffers (const FPGA_BUFFER *az, + const FPGA_BUFFER *bz); + +bool compare_fpga_buffers (const FPGA_BUFFER *ax, + const FPGA_BUFFER *ay, + const FPGA_BUFFER *bx, + const FPGA_BUFFER *by); + +bool compare_fpga_buffers (const FPGA_BUFFER *ax, + const FPGA_BUFFER *ay, + const FPGA_BUFFER *az, + const FPGA_BUFFER *bx, + const FPGA_BUFFER *by, + const FPGA_BUFFER *bz); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_modular.cpp b/ecdsa_fpga_modular.cpp new file mode 100644 index 0000000..9d22c05 --- /dev/null +++ b/ecdsa_fpga_modular.cpp @@ -0,0 +1,723 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_modular.cpp +// ------------------------------------- +// Modular arithmetic routines for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER ECDSA_Q; +FPGA_BUFFER ECDSA_DELTA; + + +//------------------------------------------------------------------------------ +void fpga_modular_init() +//------------------------------------------------------------------------------ +{ + int w_src, w_dst; // word counters + + // temporary things + FPGA_WORD TMP_Q [FPGA_OPERAND_NUM_WORDS] = ECDSA_Q_INIT; + FPGA_WORD TMP_DELTA[FPGA_OPERAND_NUM_WORDS] = ECDSA_DELTA_INIT; + + /* fill buffers for large multi-word integers, we need to fill them in + * reverse order because of the way C arrays are initialized + */ + for ( w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1; + w_src < FPGA_OPERAND_NUM_WORDS; + w_src++, w_dst--) + { + ECDSA_Q.words[w_dst] = TMP_Q[w_src]; + ECDSA_DELTA.words[w_dst] = TMP_DELTA[w_src]; + } +} + + +//------------------------------------------------------------------------------ +// +// Modular addition. +// +// This routine implements algorithm 3. from "Ultra High Performance ECC over +// NIST Primes on Commercial FPGAs". +// +// s = (a + b) mod q +// +// The naive approach is like the following: +// +// 1. s = a + b +// 2. if (s >= q) s -= q +// +// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q) +// and then select the right variant. +// +//------------------------------------------------------------------------------ +void fpga_modular_add(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *s) +//------------------------------------------------------------------------------ +{ + int w; // word counter + FPGA_BUFFER ab, ab_n; // intermediate buffers + bool c_in, c_out; // carries + bool b_in, b_out; // borrows + + c_in = false; // first word has no carry + b_in = false; // first word has no borrow + + // run parallel addition and subtraction + for (w=0; wwords[w], b->words[w], c_in, &ab.words[w], &c_out); + fpga_lowlevel_sub32(ab.words[w], ECDSA_Q.words[w], b_in, &ab_n.words[w], &b_out); + + c_in = c_out; // propagate carry + b_in = b_out; // propagate borrow + } + + // now select the right buffer + + /* + * We select the right variant based on borrow and carry flags after + * addition and subtraction of the very last pair of words. Note, that + * we only need to select the first variant (a + b) when (a + b) < q. + * This way if we get negative number after subtraction, we discard it + * and use the output of the adder instead. The subtractor output is + * negative when borrow flag is set *and* carry flag is not set. When + * both borrow and carry are set, the number is non-negative, because + * borrow and carry cancel each other out. + */ + for (w=0; wwords[w] = (b_out && !c_out) ? ab.words[w] : ab_n.words[w]; +} + + +//------------------------------------------------------------------------------ +// +// Modular subtraction. +// +// This routine implements algorithm 3. from "Ultra High Performance ECC over +// NIST Primes on Commercial FPGAs". +// +// d = (a - b) mod q +// +// The naive approach is like the following: +// +// 1. d = a - b +// 2. if (a < b) d += q +// +// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q) +// and then select the right variant. +// +//------------------------------------------------------------------------------ +void fpga_modular_sub(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *d) +//------------------------------------------------------------------------------ +{ + int w; // word counter + FPGA_BUFFER ab, ab_n; // intermediate buffers + bool c_in, c_out; // carries + bool b_in, b_out; // borrows + + c_in = false; // first word has no carry + b_in = false; // first word has no borrow + + // run parallel subtraction and addition + for (w=0; wwords[w], b->words[w], b_in, &ab.words[w], &b_out); + fpga_lowlevel_add32(ab.words[w], ECDSA_Q.words[w], c_in, &ab_n.words[w], &c_out); + + b_in = b_out; // propagate borrow + c_in = c_out; // propagate carry + } + + // now select the right buffer + + /* + * We select the right variant based on borrow flag after subtraction + * and addition of the very last pair of words. Note, that we only + * need to select the second variant (a - b + q) when a < b. This way + * if we get negative number after subtraction, we discard it + * and use the output of the adder instead. The Subtractor output is + * negative when borrow flag is set. + */ + for (w=0; wwords[w] = b_out ? ab_n.words[w] : ab.words[w]; +} + + +//------------------------------------------------------------------------------ +// +// Modular multiplication. +// +// This routine implements modular multiplication algorithm from "Ultra High +// Performance ECC over NIST Primes on Commercial FPGAs". +// +// p = (a * b) mod q +// +// The complex algorithm is split into three parts: +// +// 1. Calculation of partial words +// 2. Acccumulation of partial words into full-size product +// 3. Modular reduction of the full-size product +// +// See comments for corresponding helper routines for more information. +// +//------------------------------------------------------------------------------ +void fpga_modular_mul(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *p) +//------------------------------------------------------------------------------ +{ + FPGA_WORD_EXTENDED si[4*FPGA_OPERAND_NUM_WORDS-1]; // parts of intermediate product + FPGA_WORD c[2*FPGA_OPERAND_NUM_WORDS]; // full-size intermediate product + + /* multiply to get partial words */ + fpga_modular_mul_helper_multiply(a, b, si); + + /* accumulate partial words into full-size product */ + fpga_modular_mul_helper_accumulate(si, c); + + /* reduce full-size product using special routine */ + fpga_modular_mul_helper_reduce(c, p); +} + + +//------------------------------------------------------------------------------ +// +// Parallelized multiplication. +// +// This routine implements the algorithm in Fig. 3. from "Ultra High +// Performance ECC over NIST Primes on Commercial FPGAs". +// +// Inputs a and b are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2 +// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide +// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS +// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial +// output words si[]. +// +// The order of loading a and b into the multipliers is a bit complicated, +// during the first 2*OPERAND_NUM_WORDS-1 cycles one si word per cycle is +// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the +// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously. +// +//------------------------------------------------------------------------------ +void fpga_modular_mul_helper_multiply(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si) +//------------------------------------------------------------------------------ +{ + int w; // counter + int t, x; // more counters + int j, i; // word indices + FPGA_WORD p; // product + + // buffers for smaller words that multipliers can handle + FPGA_WORD_REDUCED ai[2*FPGA_OPERAND_NUM_WORDS]; + FPGA_WORD_REDUCED bj[2*FPGA_OPERAND_NUM_WORDS]; + + // split a and b into smaller words + for (w=0; wwords[w], ai[2*w + 1] = (FPGA_WORD_REDUCED)(a->words[w] >> (FPGA_WORD_WIDTH / 2)), + bj[2*w] = (FPGA_WORD_REDUCED)b->words[w], bj[2*w + 1] = (FPGA_WORD_REDUCED)(b->words[w] >> (FPGA_WORD_WIDTH / 2)); + + // accumulators + FPGA_WORD_EXTENDED mac[2*FPGA_OPERAND_NUM_WORDS]; + + // clear accumulators + for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) mac[w] = 0; + + // run the crazy algorithm :) + for (t=0; t<(2*FPGA_OPERAND_NUM_WORDS); t++) + { + // save upper half of si[] (one word per cycle) + if (t > 0) + { si[4*FPGA_OPERAND_NUM_WORDS - (t+1)] = mac[t]; + mac[t] = 0; + } + + // update index + j = 2*FPGA_OPERAND_NUM_WORDS - (t+1); + + // parallel multiplication + for (x=0; x<(2*FPGA_OPERAND_NUM_WORDS); x++) + { + // update index + i = t - x; + if (i < 0) i += 2*FPGA_OPERAND_NUM_WORDS; + + // multiply... + fpga_lowlevel_mul16(ai[i], bj[j], &p); + + // ...accumulate + mac[x] += p; + } + + } + + // now finally save lower half of si[] (2*OPERAND_NUM_WORDS words at once) + for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) + si[w] = mac[2*FPGA_OPERAND_NUM_WORDS - (w+1)]; +} + + +//------------------------------------------------------------------------------ +// +// Accumulation of partial words into full-size product. +// +// This routine implements the Algorithm 4. from "Ultra High Performance ECC +// over NIST Primes on Commercial FPGAs". +// +// Input words si[] are accumulated into full-size product c[]. +// +// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in +// si[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are +// summed in full, odd words are split into two parts. During every cycle the +// upper part of the previous word and the lower part of the following word are +// summed too. +// +//------------------------------------------------------------------------------ +void fpga_modular_mul_helper_accumulate(const FPGA_WORD_EXTENDED *si, FPGA_WORD *c) +//------------------------------------------------------------------------------ +{ + int w; // word counter + FPGA_WORD_EXTENDED cw0, cw1; // intermediate sums + FPGA_WORD_REDUCED cw_carry; // wide carry + + // clear carry + cw_carry = 0; + + // execute the algorithm + for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) + { + // handy flags + bool w_is_first = (w == 0); + bool w_is_last = (w == (2*FPGA_OPERAND_NUM_WORDS-1)); + + // accumulate full current even word... + // ...and also the upper part of the previous odd word (if not the first word) + fpga_lowlevel_add47(si[2*w], w_is_first ? 0 : si[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0); + + // generate another word from "carry" part of the previous even word... + // ...and also the lower part of the following odd word (if not the last word) + cw1 = w_is_last ? 0 : (FPGA_WORD)(si[2*w+1] << (FPGA_WORD_WIDTH / 2)); + cw1 |= (FPGA_WORD_EXTENDED)cw_carry; + + // accumulate once again + fpga_lowlevel_add47(cw0, cw1, &cw1); + + // store current word... + c[w] = (FPGA_WORD)cw1; + + // ...and carry + cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH); + } +} + + +//------------------------------------------------------------------------------ +// +// Fast modular reduction for NIST prime P-256. +// +// p = c mod p256 +// +// This routine implements the algorithm 2.29 from "Guide to Elliptic Curve +// Cryptography". +// +// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c +// on the other hand is the output of the parallelized Comba multiplier, so it +// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). +// +// To save FPGA resources, the calculation is done using only two adders and +// one subtractor. The algorithm is split into five steps. +// +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 +void fpga_modular_mul_helper_reduce_p256(const FPGA_WORD *c, FPGA_BUFFER *p) +{ + // "funny" words + FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9; + + // compose "funny" words out of input words + s1.words[7] = c[ 7], s1.words[6] = c[ 6], s1.words[5] = c[ 5], s1.words[4] = c[ 4], s1.words[3] = c[ 3], s1.words[2] = c[ 2], s1.words[1] = c[ 1], s1.words[0] = c[ 0]; + s2.words[7] = c[15], s2.words[6] = c[14], s2.words[5] = c[13], s2.words[4] = c[12], s2.words[3] = c[11], s2.words[2] = 0, s2.words[1] = 0, s2.words[0] = 0; + s3.words[7] = 0, s3.words[6] = c[15], s3.words[5] = c[14], s3.words[4] = c[13], s3.words[3] = c[12], s3.words[2] = 0, s3.words[1] = 0, s3.words[0] = 0; + s4.words[7] = c[15], s4.words[6] = c[14], s4.words[5] = 0, s4.words[4] = 0, s4.words[3] = 0, s4.words[2] = c[10], s4.words[1] = c[ 9], s4.words[0] = c[ 8]; + s5.words[7] = c[ 8], s5.words[6] = c[13], s5.words[5] = c[15], s5.words[4] = c[14], s5.words[3] = c[13], s5.words[2] = c[11], s5.words[1] = c[10], s5.words[0] = c[ 9]; + s6.words[7] = c[10], s6.words[6] = c[ 8], s6.words[5] = 0, s6.words[4] = 0, s6.words[3] = 0, s6.words[2] = c[13], s6.words[1] = c[12], s6.words[0] = c[11]; + s7.words[7] = c[11], s7.words[6] = c[ 9], s7.words[5] = 0, s7.words[4] = 0, s7.words[3] = c[15], s7.words[2] = c[14], s7.words[1] = c[13], s7.words[0] = c[12]; + s8.words[7] = c[12], s8.words[6] = 0, s8.words[5] = c[10], s8.words[4] = c[ 9], s8.words[3] = c[ 8], s8.words[2] = c[15], s8.words[1] = c[14], s8.words[0] = c[13]; + s9.words[7] = c[13], s9.words[6] = 0, s9.words[5] = c[11], s9.words[4] = c[10], s9.words[3] = c[ 9], s9.words[2] = 0, s9.words[1] = c[15], s9.words[0] = c[14]; + + // intermediate results + FPGA_BUFFER sum0, sum1, difference; + + /* Step 1. */ + fpga_modular_add(&s2, &s2, &sum0); // sum0 = 2*s2 + fpga_modular_add(&s3, &s3, &sum1); // sum1 = 2*s3 + fpga_modular_sub(&ECDSA_ZERO, &s6, &difference); // difference = -s6 + + /* Step 2. */ + fpga_modular_add(&sum0, &s1, &sum0); // sum0 = s1 + 2*s2 + fpga_modular_add(&sum1, &s4, &sum1); // sum1 = s4 + 2*s3 + fpga_modular_sub(&difference, &s7, &difference); // difference = -(s6 + s7) + + /* Step 3. */ + fpga_modular_add(&sum0, &s5, &sum0); // sum0 = s1 + 2*s2 + s5 + fpga_modular_add(&sum1, &ECDSA_ZERO, &sum1); // compulsory cycle to keep sum1 constant for next stage + fpga_modular_sub(&difference, &s8, &difference); // difference = -(s6 + s7 + s8) + + /* Step 4. */ + fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 +// fpga_modular_add(, , &sum1); // dummy cycle, result ignored + fpga_modular_sub(&difference, &s9, &difference); // difference = -(s6 + s7 + s8 + s9) + + /* Step 5. */ + fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + 2*s3 + s4 + s5 - s6 - s7 - s8 - s9 +// fpga_modular_add(, , &sum1); // dummy cycle, result ignored +// fpga_modular_add(, , &difference); // dummy cycle, result ignored +} +#endif + + +//------------------------------------------------------------------------------ +// +// Fast modular reduction for NIST prime P-384. +// +// p = c mod p384 +// +// This routine implements the algorithm 2.30 from "Guide to Elliptic Curve +// Cryptography". +// +// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c +// on the other hand is the output of the parallelized Comba multiplier, so it +// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). +// +// To save FPGA resources, the calculation is done using only two adders and +// one subtractor. The algorithm is split into five steps. +// +//------------------------------------------------------------------------------ +#if USE_CURVE == 2 +void fpga_modular_mul_helper_reduce_p384(const FPGA_WORD *c, FPGA_BUFFER *p) +{ + // "funny" words + FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; + + // compose "funny" words + s1.words[11] = c[11], s1.words[10] = c[10], s1.words[ 9] = c[ 9], s1.words[ 8] = c[ 8], s1.words[ 7] = c[ 7], s1.words[ 6] = c[ 6], s1.words[ 5] = c[ 5], s1.words[ 4] = c[ 4], s1.words[ 3] = c[ 3], s1.words[ 2] = c[ 2], s1.words[ 1] = c[ 1], s1.words[ 0] = c[ 0]; + s2.words[11] = 0, s2.words[10] = 0, s2.words[ 9] = 0, s2.words[ 8] = 0, s2.words[ 7] = 0, s2.words[ 6] = c[23], s2.words[ 5] = c[22], s2.words[ 4] = c[21], s2.words[ 3] = 0, s2.words[ 2] = 0, s2.words[ 1] = 0, s2.words[ 0] = 0; + s3.words[11] = c[23], s3.words[10] = c[22], s3.words[ 9] = c[21], s3.words[ 8] = c[20], s3.words[ 7] = c[19], s3.words[ 6] = c[18], s3.words[ 5] = c[17], s3.words[ 4] = c[16], s3.words[ 3] = c[15], s3.words[ 2] = c[14], s3.words[ 1] = c[13], s3.words[ 0] = c[12]; + s4.words[11] = c[20], s4.words[10] = c[19], s4.words[ 9] = c[18], s4.words[ 8] = c[17], s4.words[ 7] = c[16], s4.words[ 6] = c[15], s4.words[ 5] = c[14], s4.words[ 4] = c[13], s4.words[ 3] = c[12], s4.words[ 2] = c[23], s4.words[ 1] = c[22], s4.words[ 0] = c[21]; + s5.words[11] = c[19], s5.words[10] = c[18], s5.words[ 9] = c[17], s5.words[ 8] = c[16], s5.words[ 7] = c[15], s5.words[ 6] = c[14], s5.words[ 5] = c[13], s5.words[ 4] = c[12], s5.words[ 3] = c[20], s5.words[ 2] = 0, s5.words[ 1] = c[23], s5.words[ 0] = 0; + s6.words[11] = 0, s6.words[10] = 0, s6.words[ 9] = 0, s6.words[ 8] = 0, s6.words[ 7] = c[23], s6.words[ 6] = c[22], s6.words[ 5] = c[21], s6.words[ 4] = c[20], s6.words[ 3] = 0, s6.words[ 2] = 0, s6.words[ 1] = 0, s6.words[ 0] = 0; + s7.words[11] = 0, s7.words[10] = 0, s7.words[ 9] = 0, s7.words[ 8] = 0, s7.words[ 7] = 0, s7.words[ 6] = 0, s7.words[ 5] = c[23], s7.words[ 4] = c[22], s7.words[ 3] = c[21], s7.words[ 2] = 0, s7.words[ 1] = 0, s7.words[ 0] = c[20]; + s8.words[11] = c[22], s8.words[10] = c[21], s8.words[ 9] = c[20], s8.words[ 8] = c[19], s8.words[ 7] = c[18], s8.words[ 6] = c[17], s8.words[ 5] = c[16], s8.words[ 4] = c[15], s8.words[ 3] = c[14], s8.words[ 2] = c[13], s8.words[ 1] = c[12], s8.words[ 0] = c[23]; + s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0; + s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0; + + // intermediate results + FPGA_BUFFER sum0, sum1, difference; + + /* Step 1. */ + fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3 + fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2 + fpga_modular_sub(&ECDSA_ZERO, &s8, &difference); // difference = -s8 + + /* Step 2. */ + fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4 + fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5 + fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9) + + /* Step 3. */ + fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6 + fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7 + fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10) + + /* Step 4. */ + fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 +// fpga_modular_add(, , &sum1); // dummy cycle, result ignored + fpga_modular_sub(&difference, &ECDSA_ZERO, &difference); // compulsory cycle to keep difference constant for next stage + + /* Step 5. */ + fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10 +// fpga_modular_add(, , &sum1); // dummy cycle, result ignored +// fpga_modular_add(, , &difference); // dummy cycle, result ignored +} +#endif + + +#if USE_CURVE == 1 +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p256(const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3) +//------------------------------------------------------------------------------ +// +// This uses the addition chain from +// +// < https://briansmith.org/ecc-inversion-addition-chains-01 > +// +// to calculate A2 = A^-2 and A3 = A^-3. +// +//------------------------------------------------------------------------------ +{ + // counter + int cyc_cnt; + + // working variables + FPGA_BUFFER R1, R2, X1, X2, X3, X6, X12, X15, X30, X32; + + // first obtain intermediate helper quantities (X1..X32) + + // X1 + fpga_multiword_copy(A, &X1); + + // X2 + fpga_modular_mul(&X1, &X1, &R1); + fpga_modular_mul(&R1, &X1, &X2); + + // X3 + fpga_modular_mul(&X2, &X2, &R1); + fpga_modular_mul(&R1, &X1, &X3); + + // X6 + fpga_multiword_copy(&X3, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X6); + + // X12 + fpga_multiword_copy(&X6, &R1); + for (cyc_cnt=0; cyc_cnt<6; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X6, &X12); + + // X15 + fpga_multiword_copy(&X12, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X15); + + // X30 + fpga_multiword_copy(&X15, &R1); + for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X15, &X30); + + // X32 + fpga_multiword_copy(&X30, &R1); + for (cyc_cnt=0; cyc_cnt<2; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X2, &X32); + + // now compute the final results + + fpga_multiword_copy(&X32, &R1); + for (cyc_cnt=0; cyc_cnt<32; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X1, &R2); + + for (cyc_cnt=0; cyc_cnt<128; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R2, &X32, &R1); + + for (cyc_cnt=0; cyc_cnt<32; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X32, &R2); + + for (cyc_cnt=0; cyc_cnt<30; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R2, &X30, &R1); + + fpga_modular_mul(&R1, &R1, &R2); + fpga_modular_mul(&R2, &R2, &R1); + + // A2 obtained + fpga_multiword_copy(&R1, A2); + + // now calculate compute inverse cubed from inverse squared + fpga_modular_mul(&R1, &R1, &R2); + fpga_modular_mul(&R2, A, &R1); + + // A3 obtained + fpga_multiword_copy(&R1, A3); +} +#endif + + +#if USE_CURVE == 2 +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p384(const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3) +//------------------------------------------------------------------------------ +// +// This uses the addition chain from +// +// < https://briansmith.org/ecc-inversion-addition-chains-01 > +// +// to calculate A2 = A^-2 and A3 = A^-3. +// +//------------------------------------------------------------------------------ +{ + // counter + int cyc_cnt; + + // working variables + FPGA_BUFFER R1, R2, X1, X2, X3, X6, X12, X15, X30, X60, X120; + + // first obtain intermediate helper quantities (X1..X120) + + // X1 + fpga_multiword_copy(A, &X1); + + // X2 + fpga_modular_mul(&X1, &X1, &R1); + fpga_modular_mul(&R1, &X1, &X2); + + // X3 + fpga_modular_mul(&X2, &X2, &R1); + fpga_modular_mul(&R1, &X1, &X3); + + // X6 + fpga_multiword_copy(&X3, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X6); + + // X12 + fpga_multiword_copy(&X6, &R1); + for (cyc_cnt=0; cyc_cnt<6; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X6, &X12); + + // X15 + fpga_multiword_copy(&X12, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X15); + + // X30 + fpga_multiword_copy(&X15, &R1); + for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X15, &X30); + + // X60 + fpga_multiword_copy(&X30, &R1); + for (cyc_cnt=0; cyc_cnt<30; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X30, &X60); + + // X120 + fpga_multiword_copy(&X60, &R1); + for (cyc_cnt=0; cyc_cnt<60; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X60, &X120); + + // now compute the final results + + fpga_multiword_copy(&X120, &R1); + for (cyc_cnt=0; cyc_cnt<120; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X120, &R2); + + for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R1, &X15, &R2); + + for (cyc_cnt=0; cyc_cnt<31; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R1, &X30, &R2); + fpga_modular_mul(&R2, &R2, &R1); + fpga_modular_mul(&R1, &R1, &R2); + fpga_modular_mul(&R2, &X2, &R1); + + for (cyc_cnt=0; cyc_cnt<94; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X30, &R2); + fpga_modular_mul(&R2, &R2, &R1); + fpga_modular_mul(&R1, &R1, &R2); + + // A2 obtained + fpga_multiword_copy(&R2, A2); + + // now calculate compute inverse cubed from inverse squared + fpga_modular_mul(&R2, &R2, &R1); + fpga_modular_mul(&R1, A, &R2); + + // A3 obtained + fpga_multiword_copy(&R2, A3); +} +#endif + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_modular.h b/ecdsa_fpga_modular.h new file mode 100644 index 0000000..3b75779 --- /dev/null +++ b/ecdsa_fpga_modular.h @@ -0,0 +1,144 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_modular.h +// ------------------------------------- +// Modular arithmetic routines for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-256) +//------------------------------------------------------------------------------ + +/* Field Size */ +#define ECDSA_P256_Q_INIT \ + {0xffffffff, 0x00000001, 0x00000000, 0x00000000, \ + 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff} + +/* Division Factor */ +#define ECDSA_P256_DELTA_INIT \ + {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, \ + 0x00000000, 0x80000000, 0x00000000, 0x00000000} + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-384) +//------------------------------------------------------------------------------ + +/* Field Size */ +#define ECDSA_P384_Q_INIT \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, \ + 0xffffffff, 0x00000000, 0x00000000, 0xffffffff} + +/* Division Factor */ +#define ECDSA_P384_DELTA_INIT \ + {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0x7fffffff, 0x80000000, 0x00000000, 0x80000000} + + +//------------------------------------------------------------------------------ +// ECDSA Parameters Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define ECDSA_Q_INIT ECDSA_P256_Q_INIT +#define ECDSA_DELTA_INIT ECDSA_P256_DELTA_INIT + +#elif USE_CURVE == 2 + +#define ECDSA_Q_INIT ECDSA_P384_Q_INIT +#define ECDSA_DELTA_INIT ECDSA_P384_DELTA_INIT + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER ECDSA_Q; +extern FPGA_BUFFER ECDSA_DELTA; + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_modular_init (); + +void fpga_modular_add (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *S); +void fpga_modular_sub (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *D); +void fpga_modular_mul (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *P); + +void fpga_modular_mul_helper_multiply (const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si); +void fpga_modular_mul_helper_accumulate (const FPGA_WORD_EXTENDED *si, FPGA_WORD *c); +void fpga_modular_mul_helper_reduce_p256 (const FPGA_WORD *c, FPGA_BUFFER *p); +void fpga_modular_mul_helper_reduce_p384 (const FPGA_WORD *c, FPGA_BUFFER *p); + +void fpga_modular_inv23_p256 (const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3); +void fpga_modular_inv23_p384 (const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3); + +void fpga_modular_inv23_p256_microcode (); +void fpga_modular_inv23_p384_microcode (); + + +//------------------------------------------------------------------------------ +// ECDSA Prototype Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p256 +#define fpga_modular_inv23 fpga_modular_inv23_p256 +#define fpga_modular_inv23_microcode fpga_modular_inv23_p256_microcode + +#elif USE_CURVE == 2 + +#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p384 +#define fpga_modular_inv23 fpga_modular_inv23_p384 +#define fpga_modular_inv23_microcode fpga_modular_inv23_p384_microcode + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_multiword.cpp b/ecdsa_fpga_multiword.cpp new file mode 100644 index 0000000..ab65e7a --- /dev/null +++ b/ecdsa_fpga_multiword.cpp @@ -0,0 +1,107 @@ +//------------------------------------------------------------------------------ +// +// fpga_ecdsa_multiword.cpp +// ----------------------------------------- +// Models of multi-precision FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER ECDSA_ZERO; +FPGA_BUFFER ECDSA_ONE; + + +//------------------------------------------------------------------------------ +void fpga_multiword_init() +//------------------------------------------------------------------------------ +{ + int w; // word counter + + /* fill buffers for large multi-word integers */ + for ( w = FPGA_OPERAND_NUM_WORDS - 1; + w >= 0; + w -= 1) + { + ECDSA_ZERO.words[w] = 0; // all words are zero + ECDSA_ONE.words[w] = w ? 0 : 1; // only the lowest word is 1 + } +} + + +//------------------------------------------------------------------------------ +void fpga_multiword_copy(const FPGA_BUFFER *src, FPGA_BUFFER *dst) +//------------------------------------------------------------------------------ +// +// Copies large multi-word integer from src into dst. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // copy all the words from src into dst + for (w=0; wwords[w] = src->words[w]; +} + + +//------------------------------------------------------------------------------ +bool fpga_multiword_is_zero(const FPGA_BUFFER *x) +//------------------------------------------------------------------------------ +// +// Returns true when large multi-word integer x is zero. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // try to find non-zero words + for (w=0; wwords[w]) return false; + + // no non-zero words found + return true; +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_multiword.h b/ecdsa_fpga_multiword.h new file mode 100644 index 0000000..9190fd5 --- /dev/null +++ b/ecdsa_fpga_multiword.h @@ -0,0 +1,91 @@ +//------------------------------------------------------------------------------ +// +// fpga_ecdsa_multiword.h +// ----------------------------------------- +// Models of multi-precision FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Model Parameters +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define FPGA_OPERAND_WIDTH (256) // largest supported operand width in bits + +#elif USE_CURVE == 2 + +#define FPGA_OPERAND_WIDTH (384) // largest supported operand width in bits + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// FPGA Pipeline Settings +//------------------------------------------------------------------------------ +#define FPGA_OPERAND_NUM_WORDS (FPGA_OPERAND_WIDTH / FPGA_WORD_WIDTH) + + +//------------------------------------------------------------------------------ +// Operand Data Type +//------------------------------------------------------------------------------ +typedef struct FPGA_BUFFER +{ + FPGA_WORD words[FPGA_OPERAND_NUM_WORDS]; +} +FPGA_BUFFER; + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER ECDSA_ZERO; +extern FPGA_BUFFER ECDSA_ONE; + + +//------------------------------------------------------------------------------ +// Prototpes +//------------------------------------------------------------------------------ +void fpga_multiword_init (); +void fpga_multiword_copy (const FPGA_BUFFER *src, FPGA_BUFFER *dst); +bool fpga_multiword_is_zero (const FPGA_BUFFER *x); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_microcode_parser.py b/ecdsa_microcode_parser.py new file mode 100644 index 0000000..56aa94c --- /dev/null +++ b/ecdsa_microcode_parser.py @@ -0,0 +1,694 @@ +# +# ecdsa_microcode_parser.py +# +# Author: Pavel Shatov +# +# Copyright (c) 2018, NORDUnet A/S +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of the NORDUnet A/S nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Imports +import re +import sys +from enum import Enum + +# Source File +C_FILES = [ "ecdsa_fpga_curve_microcode.cpp", + "ecdsa_fpga_microcode.cpp"] + +class MICROCODE_PARSER: + + # enumerate microcode pieces + class MICROCODE_PIECE_ENUM(Enum): + + NONE = -1 + + PREPARE = 0 + + CYCLE_DOUBLE = 1 + CYCLE_ADD = 2 + + CYCLE_ADD_AT_INFINITY = 3 + CYCLE_ADD_SAME_X_SAME_Y = 4 + CYCLE_ADD_SAME_X = 5 + CYCLE_ADD_REGULAR = 6 + + CYCLE_K0 = 7 + CYCLE_K1 = 8 + + CONVERT = 9 + + CONVERT_AT_INFINITY = 10 + CONVERT_REGULAR = 11 + + INVERT_P256 = 12 + INVERT_P384 = 13 + + + # magic pair of begin/end markers + MARKER_BEGIN_MICROCODE = "BEGIN_MICROCODE:" + MARKER_END_MICROCODE = "END_MICROCODE" + + # names of banks + MICROCODE_C_NAME_BANK_LO = "BANK_LO" + MICROCODE_C_NAME_BANK_HI = "BANK_HI" + + # micro-operation names + MICROCODE_C_NAME_UOP_CMPZ = "uop_cmpz" + MICROCODE_C_NAME_UOP_MOVE = "uop_move" + MICROCODE_C_NAME_UOP_CALC = "uop_calc" + MICROCODE_C_NAME_UOP_CYCLE = "uop_cycle" + MICROCODE_C_NAME_UOP_REPEAT = "uop_repeat" + MICROCODE_C_NAME_UOP_CALC_EVEN = "uop_calc_if_even" + MICROCODE_C_NAME_UOP_CALC_ODD = "uop_calc_if_odd" + + # calculate micro-operations + MICROCODE_C_NAME_UOP_MATH_MUL = "MUL" + MICROCODE_C_NAME_UOP_MATH_ADD = "ADD" + MICROCODE_C_NAME_UOP_MATH_SUB = "SUB" + + # names of banks in C source + MICROCODE_C_NAMES_BANKS = [MICROCODE_C_NAME_BANK_LO, MICROCODE_C_NAME_BANK_HI] + + # names of operands in C source + MICROCODE_C_NAMES_OPERANDS = [ "CONST_ZERO", "CONST_ONE", + "CONST_DELTA", + "CONST_GX", "CONST_GY", + "CONST_HX", "CONST_HY", + "INVERT_R1", "INVERT_R2", + "INVERT_X2", "INVERT_X3", "INVERT_X6", + "INVERT_X12", "INVERT_X15", "INVERT_X30", + "INVERT_X32", "INVERT_X60", "INVERT_X120", + "INVERT_A2", "INVERT_A3", + "CYCLE_RX", "CYCLE_RY", "CYCLE_RZ", + "CYCLE_SX", "CYCLE_SY", "CYCLE_SZ", + "CYCLE_A", "CYCLE_A2", "CYCLE_B", + "CYCLE_C", "CYCLE_C2", "CYCLE_C2_2", + "CYCLE_D", "CYCLE_E", "CYCLE_F", + "CYCLE_G", "CYCLE_H", "CYCLE_J", + "CYCLE_Z2", + "CYCLE_T1", "CYCLE_T2", + "CYCLE_T3", "CYCLE_T4"] + + # names of banks in Verilog source + MICROCODE_V_NAME_BANKS_DUMMY = "UOP_BANKS_DUMMY" + + MICROCODE_V_NAMES_BANKS = ["UOP_BANKS_LO2HI", "UOP_BANKS_HI2LO"] + + # names of operands in Verilog source + MICROCODE_V_NAMES_OPERANDS = ["UOP_OPERAND_" + op for op in MICROCODE_C_NAMES_OPERANDS] + + # names of opcodes in Verilog source + MICROCODE_V_NAME_OPCODE_CMPZ = "UOP_OPCODE_CMPZ" + MICROCODE_V_NAME_OPCODE_MOVE = "UOP_OPCODE_COPY" + MICROCODE_V_NAME_OPCODE_MUL = "UOP_OPCODE_MUL" + MICROCODE_V_NAME_OPCODE_ADD = "UOP_OPCODE_ADD" + MICROCODE_V_NAME_OPCODE_SUB = "UOP_OPCODE_SUB" + MICROCODE_V_NAME_OPCODE_STOP = "UOP_OPCODE_STOP" + + MICROCODE_V_NAME_OPERAND_DONTCARE = "UOP_OPERAND_DONTCARE" + + # match group to catch operand names + MATCH_GROUP_09AZ = "([0-9A-Z_]+)" + + # match group to catch bank names + MATCH_GROUP_BANK = "(" + MICROCODE_C_NAME_BANK_LO + "|" + MICROCODE_C_NAME_BANK_HI + ")" + + # match group to catch number of loop iterations + MATCH_GROUP_NUMBER = "(\d+)" + + # match group to catch math instruction + MATCH_GROUP_MATH = "(" + MICROCODE_C_NAME_UOP_MATH_MUL + "|" + MICROCODE_C_NAME_UOP_MATH_ADD + "|" + MICROCODE_C_NAME_UOP_MATH_SUB + ")" + + # match group to catch calculation micro-operation + MATCH_GROUP_CALC = "(" + MICROCODE_C_NAME_UOP_CALC + "|" + MICROCODE_C_NAME_UOP_CALC_EVEN + "|" + MICROCODE_C_NAME_UOP_CALC_ODD + ")" + + # map string microcode piece names to enum values + MICROCODE_PIECE_DICT = { "PREPARE": MICROCODE_PIECE_ENUM.PREPARE, + + "CYCLE_DOUBLE": MICROCODE_PIECE_ENUM.CYCLE_DOUBLE, + "CYCLE_ADD": MICROCODE_PIECE_ENUM.CYCLE_ADD, + + "CYCLE_ADD_AT_INFINITY": MICROCODE_PIECE_ENUM.CYCLE_ADD_AT_INFINITY, + "CYCLE_ADD_SAME_X_SAME_Y": MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X_SAME_Y, + "CYCLE_ADD_SAME_X": MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X, + "CYCLE_ADD_REGULAR": MICROCODE_PIECE_ENUM.CYCLE_ADD_REGULAR, + + "CYCLE_K0": MICROCODE_PIECE_ENUM.CYCLE_K0, + "CYCLE_K1": MICROCODE_PIECE_ENUM.CYCLE_K1, + + "INVERT_P256": MICROCODE_PIECE_ENUM.INVERT_P256, + "INVERT_P384": MICROCODE_PIECE_ENUM.INVERT_P384, + + "CONVERT": MICROCODE_PIECE_ENUM.CONVERT, + + "CONVERT_AT_INFINITY": MICROCODE_PIECE_ENUM.CONVERT_AT_INFINITY, + "CONVERT_REGULAR": MICROCODE_PIECE_ENUM.CONVERT_REGULAR} + + + # map C bank names to Verilog bank names + MICROCODE_BANK_DICT = dict(zip(MICROCODE_C_NAMES_BANKS, MICROCODE_V_NAMES_BANKS)) + + # map C operand names to Verilog operand names + MICROCODE_OPERAND_DICT = dict(zip(MICROCODE_C_NAMES_OPERANDS, MICROCODE_V_NAMES_OPERANDS)) + + # map C calculation names to Verilog opcode names + MICROCODE_MATH_DICT = { MICROCODE_C_NAME_UOP_MATH_MUL: MICROCODE_V_NAME_OPCODE_MUL, + MICROCODE_C_NAME_UOP_MATH_ADD: MICROCODE_V_NAME_OPCODE_ADD, + MICROCODE_C_NAME_UOP_MATH_SUB: MICROCODE_V_NAME_OPCODE_SUB} + + # microcode format + MICROCODE_FORMAT_ADDR = "9'd%03d" + MICROCODE_FORMAT_LINE = MICROCODE_FORMAT_ADDR + ": data <= %s;" + MICROCODE_FORMAT_OFFSET = "localparam [UOP_ADDR_WIDTH-1:0] %s = " + MICROCODE_FORMAT_ADDR + ";" + + # pieces of microcode + MICROCODE_LINES_PREPARE = [] + + MICROCODE_LINES_CYCLE_DOUBLE = [] + MICROCODE_LINES_CYCLE_ADD = [] + + MICROCODE_LINES_CYCLE_ADD_AT_INFINITY = [] + MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y = [] + MICROCODE_LINES_CYCLE_ADD_SAME_X = [] + MICROCODE_LINES_CYCLE_ADD_REGULAR = [] + + MICROCODE_LINES_CYCLE_K0 = [] + MICROCODE_LINES_CYCLE_K1 = [] + + MICROCODE_LINES_INVERT_P256 = [] + MICROCODE_LINES_INVERT_P384 = [] + + MICROCODE_LINES_CONVERT = [] + + MICROCODE_LINES_CONVERT_AT_INFINITY = [] + MICROCODE_LINES_CONVERT_REGULAR = [] + + MICROCODE_LINE_STOP = "{%s, %s, %s, %s, %s}" % ( MICROCODE_V_NAME_OPCODE_STOP, + MICROCODE_V_NAME_BANKS_DUMMY, + MICROCODE_V_NAME_OPERAND_DONTCARE, + MICROCODE_V_NAME_OPERAND_DONTCARE, + MICROCODE_V_NAME_OPERAND_DONTCARE) + + def __init__(self, filenames): + self.__filenames = filenames + + def parse(self): + for next_filename in self.__filenames: + print("Parsing '%s'..." % (next_filename)) + parsing_now = False + line_num = 0 + with open(next_filename, "r") as c_file: + c_lines = c_file.readlines() + for next_c_line in c_lines: + line_num += 1 + self.__line_num = line_num + self.__next_c_line = next_c_line.strip() + if len(self.__next_c_line) == 0: continue + if self.__next_c_line.startswith("//"): continue + if not parsing_now: + self.__current_piece = self.__try_start_parsing() + if self.__current_piece != self.MICROCODE_PIECE_ENUM.NONE: + print(" Found piece of microcode: %s" % (str(self.__current_piece))) + parsing_now = True + else: + parsing_now = self.__continue_parsing() + + + def format(self): + + mode = 0 + + if len(sys.argv) == 2: + if sys.argv[1] == "P256": mode = 1 + if sys.argv[1] == "P384": mode = 2 + + if mode == 0: + sys.exit("sys.exit(): Usage: ecdsa_microcode_parser.py !") + + if len(self.MICROCODE_LINES_PREPARE) == 0: sys.exit("sys.exit(): Empty PREPARE piece!") + + if len(self.MICROCODE_LINES_CYCLE_DOUBLE) == 0: sys.exit("sys.exit(): Empty CYCLE_DOUBLE piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD piece!") + + if len(self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_AT_INFINITY piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_SAME_X_SAME_Y piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_SAME_X piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD_REGULAR) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_REGULAR piece!") + + if len(self.MICROCODE_LINES_CYCLE_K0) == 0: sys.exit("sys.exit(): Empty CYCLE_K0 piece!") + if len(self.MICROCODE_LINES_CYCLE_K1) == 0: sys.exit("sys.exit(): Empty CYCLE_K1 piece!") + + if len(self.MICROCODE_LINES_INVERT_P256) == 0: sys.exit("sys.exit(): Empty INVERT_P256 piece!") + if len(self.MICROCODE_LINES_INVERT_P384) == 0: sys.exit("sys.exit(): Empty INVERT_P384 piece!") + + if len(self.MICROCODE_LINES_CONVERT) == 0: sys.exit("sys.exit(): Empty CONVERT piece!") + + if len(self.MICROCODE_LINES_CONVERT_AT_INFINITY) == 0: sys.exit("sys.exit(): Empty CONVERT_AT_INFINITY piece!") + if len(self.MICROCODE_LINES_CONVERT_REGULAR) == 0: sys.exit("sys.exit(): Empty CONVERT_REGULAR piece!") + + length = 0 + length += len(self.MICROCODE_LINES_PREPARE) + + length += len(self.MICROCODE_LINES_CYCLE_DOUBLE) + length += len(self.MICROCODE_LINES_CYCLE_ADD) + + length += len(self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY) + length += len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y) + length += len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X) + length += len(self.MICROCODE_LINES_CYCLE_ADD_REGULAR) + + length += len(self.MICROCODE_LINES_CYCLE_K0) + length += len(self.MICROCODE_LINES_CYCLE_K1) + + length += len(self.MICROCODE_LINES_CONVERT) + + length += len(self.MICROCODE_LINES_CONVERT_AT_INFINITY) + length += len(self.MICROCODE_LINES_CONVERT_REGULAR) + + if mode == 1: length += len(self.MICROCODE_LINES_INVERT_P256) + if mode == 2: length += len(self.MICROCODE_LINES_INVERT_P384) + + print("Total number of micro-operations (w/o stops): %s" % (length)) + + + self.__addr = 0 + + print("\n -=-=-=-=-=- CUT AND PASTE BELOW -=-=-=-=-=-\n") + + num_mul_cycle = 0 + num_mul_invert_p256 = 0 + num_mul_invert_p384 = 0 + + offset_prepare = self.__addr; + print("// PREPARE"); + for line in self.MICROCODE_LINES_PREPARE: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_double = self.__addr; + print("// CYCLE_DOUBLE"); + for line in self.MICROCODE_LINES_CYCLE_DOUBLE: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add = self.__addr; + print("// CYCLE_ADD"); + for line in self.MICROCODE_LINES_CYCLE_ADD: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_at_infinity = self.__addr; + print("// CYCLE_ADD_AT_INFINITY"); + for line in self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_same_x_same_y = self.__addr; + print("// CYCLE_ADD_SAME_X_SAME_Y"); + for line in self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_same_x = self.__addr; + print("// CYCLE_ADD_SAME_X"); + for line in self.MICROCODE_LINES_CYCLE_ADD_SAME_X: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_regular = self.__addr; + print("// CYCLE_ADD_REGULAR"); + for line in self.MICROCODE_LINES_CYCLE_ADD_REGULAR: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_k0 = self.__addr; + print("// CYCLE_K0"); + for line in self.MICROCODE_LINES_CYCLE_K0: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_k1 = self.__addr; + print("// CYCLE_K1"); + for line in self.MICROCODE_LINES_CYCLE_K1: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_convert = self.__addr; + print("// CONVERT"); + for line in self.MICROCODE_LINES_CONVERT: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_convert_at_infinity = self.__addr; + print("// CONVERT_AT_INFINITY"); + for line in self.MICROCODE_LINES_CONVERT_AT_INFINITY: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_convert_regular = self.__addr; + print("// CONVERT_REGULAR"); + for line in self.MICROCODE_LINES_CONVERT_REGULAR: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + if mode == 1: + offset_invert_p256 = self.__addr; + print("// INVERT_P256"); + for line in self.MICROCODE_LINES_INVERT_P256: + num_mul_invert_p256 += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + if mode == 2: + offset_invert_p384 = self.__addr; + print("// INVERT_P384"); + for line in self.MICROCODE_LINES_INVERT_P384: + num_mul_invert_p384 += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + print("\n") + self.__format_offset("UOP_OFFSET_PREPARE ", offset_prepare) + self.__format_offset("UOP_OFFSET_CYCLE_DOUBLE ", offset_cycle_double) + self.__format_offset("UOP_OFFSET_CYCLE_ADD ", offset_cycle_add) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_AT_INFINITY ", offset_cycle_add_at_infinity) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_SAME_X_SAME_Y", offset_cycle_add_same_x_same_y) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_SAME_X ", offset_cycle_add_same_x) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_REGULAR ", offset_cycle_add_regular) + self.__format_offset("UOP_OFFSET_CYCLE_K0 ", offset_cycle_k0) + self.__format_offset("UOP_OFFSET_CYCLE_K1 ", offset_cycle_k1) + self.__format_offset("UOP_OFFSET_CONVERT ", offset_convert) + self.__format_offset("UOP_OFFSET_CONVERT_AT_INFINITY ", offset_convert_at_infinity) + self.__format_offset("UOP_OFFSET_CONVERT_REGULAR ", offset_convert_regular) + if mode == 1: self.__format_offset("UOP_OFFSET_INVERT_P256 ", offset_invert_p256) + if mode == 2: self.__format_offset("UOP_OFFSET_INVERT_P384 ", offset_invert_p384) + + print("") + print("num_mul_cycle = %d" % num_mul_cycle) + print("num_mul_invert_p256 = %d" % num_mul_invert_p256) + print("num_mul_invert_p384 = %d" % num_mul_invert_p384) + + def __format_line(self, line): + line = self.MICROCODE_FORMAT_LINE % (self.__addr, line) + print(line) + self.__addr += 1 + is_mul = line.find(self.MICROCODE_V_NAME_OPCODE_MUL) + if is_mul >= 0: return 1 + return 0 + + def __format_offset(self, name, offset): + print(self.MICROCODE_FORMAT_OFFSET % (name, offset)) + + def __try_start_parsing(self): + piece = self.MICROCODE_PIECE_ENUM.NONE + begin_marker = re.match("^/\*\s*" + self.MARKER_BEGIN_MICROCODE + "\s*" + + self.MATCH_GROUP_09AZ + "\s*\*/$", + self.__next_c_line); + if begin_marker: + piece = self.MICROCODE_PIECE_DICT[begin_marker.group(1)] + + return piece + + def __encode_uop(self, opcode, banks, src1, src2, dst): + return "{%s, %s, %s, %s, %s}" % (opcode, banks, src1, src2, dst) + + def __continue_parsing(self): + + end_marker = re.match("^/\*\s*" + self.MARKER_END_MICROCODE + "\s*\*/$", + self.__next_c_line); + if end_marker: return False + + # cmpz? + uop_cmpz = re.match("^" + self.MICROCODE_C_NAME_UOP_CMPZ + "\(\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_cmpz: + ok = self.__parse_uop_cmpz(uop_cmpz) + if ok: return True + else: + self.__print_parse_error("parse_uop_cmpz() failed!") + self.__abort() + + # move? + uop_move = re.match("^" + self.MICROCODE_C_NAME_UOP_MOVE + "\(\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\,\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_move: + ok = self.__parse_uop_move(uop_move) + if ok: return True + else: + self.__print_parse_error("parse_uop_move() failed!") + self.__abort() + + # calc? + uop_calc = re.match("^" + self.MATCH_GROUP_CALC + "\s*\(" + + self.MATCH_GROUP_MATH + "\,\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\,\s*" + + self.MATCH_GROUP_09AZ + "\,\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_calc: + ok = self.__parse_uop_calc(uop_calc) + if ok: return True + else: + self.__print_parse_error("parse_uop_calc() failed!") + self.__abort() + + # cycle? + uop_cycle = re.match("^" + self.MICROCODE_C_NAME_UOP_CYCLE + "\(" + + self.MATCH_GROUP_NUMBER + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_cycle: + ok = self.__parse_uop_cycle(uop_cycle) + if ok: return True + else: + self.__print_parse_error("parse_uop_cycle() failed!") + self.__abort() + + # repeat? + uop_repeat = re.match("^" + self.MICROCODE_C_NAME_UOP_REPEAT + "\(" + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_repeat: + ok = self.__parse_uop_repeat() + if ok: return True + else: + self.__print_parse_error("__parse_uop_repeat() failed!") + self.__abort() + + self.__print_parse_error("unknown micro-operation!") + self.__abort() + + def __check_math(self, math): + if not math in self.MICROCODE_MATH_DICT: + print_parse_error("bad math!") + return False + + return True + + def __check_banks(self, src, dst): + if not src in self.MICROCODE_BANK_DICT: + print_parse_error("bad src bank!") + return False + + if not dst in self.MICROCODE_BANK_DICT: + print_parse_error("bad dst bank!") + return False + + if src == dst: + print_parse_error("src bank == dst bank!") + return False + + return True + + def __check_bank(self, src): + if not src in self.MICROCODE_BANK_DICT: + print_parse_error("bad src bank!") + return False + + return True + + def __check_op3(self, src1, src2, dst): + + if not src1 in self.MICROCODE_OPERAND_DICT: + self.__print_parse_error("bad src1 operand!") + return False + + if src2 != "" and not src2 in self.MICROCODE_OPERAND_DICT: + self.__print_parse_error("bad src2 operand!") + return False + + if dst != "" and not dst in self.MICROCODE_OPERAND_DICT: + self.__print_parse_error("bad dst operand!") + return False + + return True + + def __check_op2(self, src, dst): + return self.__check_op3(src, "", dst) + + def __check_op1(self, src): + return self.__check_op2(src, "") + + def __parse_uop_move(self, params): + bank_src = params.group(1) + bank_dst = params.group(3) + op_src = params.group(2) + op_dst = params.group(4) + + if not self.__check_banks(bank_src, bank_dst): + self.__print_parse_error("check_banks() failed!") + return False + + if not self.__check_op2(op_src, op_dst): + self.__print_parse_error("check_op2() failed!") + return False + + opcode = self.MICROCODE_V_NAME_OPCODE_MOVE + bank = self.MICROCODE_BANK_DICT[bank_src] + src1 = self.MICROCODE_OPERAND_DICT[op_src] + src2 = self.MICROCODE_V_NAME_OPERAND_DONTCARE + dst = self.MICROCODE_OPERAND_DICT[op_dst] + + data = self.__encode_uop(opcode, bank, src1, src2, dst) + self.__store_uop(data) + return True + + def __parse_uop_cmpz(self, params): + bank_src = params.group(1) + op_src = params.group(2) + + if not self.__check_bank(bank_src): + self.__print_parse_error("check_bank() failed!") + return False + + if not self.__check_op1(op_src): + self.__print_parse_error("check_op1() failed!") + return False + + opcode = self.MICROCODE_V_NAME_OPCODE_CMPZ + bank = self.MICROCODE_BANK_DICT[bank_src] + src1 = self.MICROCODE_OPERAND_DICT[op_src] + src2 = self.MICROCODE_V_NAME_OPERAND_DONTCARE + dst = self.MICROCODE_V_NAME_OPERAND_DONTCARE + + data = self.__encode_uop(opcode, bank, src1, src2, dst) + self.__store_uop(data) + return True + + def __parse_uop_calc(self, params): + calc = params.group(1) + math = params.group(2) + bank_src = params.group(3) + bank_dst = params.group(6) + op_src1 = params.group(4) + op_src2 = params.group(5) + op_dst = params.group(7) + + if not self.__check_math(math): + self.__print_parse_error("check_calc() failed!") + return False + + if not self.__check_banks(bank_src, bank_dst): + self.__print_parse_error("check_banks() failed!") + return False + + if not self.__check_op3(op_src1, op_src2, op_dst): + self.__print_parse_error("check_op3() failed!") + return False + + opcode = self.MICROCODE_MATH_DICT[math] + banks = self.MICROCODE_BANK_DICT[bank_src] + src1 = self.MICROCODE_OPERAND_DICT[op_src1] + src2 = self.MICROCODE_OPERAND_DICT[op_src2] + dst = self.MICROCODE_OPERAND_DICT[op_dst] + + data = self.__encode_uop(opcode, banks, src1, src2, dst) + if calc == self.MICROCODE_C_NAME_UOP_CALC: self.__store_uop(data) + elif calc == self.MICROCODE_C_NAME_UOP_CALC_EVEN: self.__loop_even = data + elif calc == self.MICROCODE_C_NAME_UOP_CALC_ODD: self.__loop_odd = data + else: return False + + return True + + def __parse_uop_cycle(self, params): + self.__loop_iters = int(params.group(1)) + return True + + def __parse_uop_repeat(self): + print(" Unrolling loop (%d iters)..." % (self.__loop_iters)) + + for i in range(0, self.__loop_iters): + if i % 2 == 0: self.__store_uop(self.__loop_even) + else: self.__store_uop(self.__loop_odd) + + return True + + def __store_uop(self, data): + #print("\t" + data) + if self.__current_piece == self.MICROCODE_PIECE_ENUM.PREPARE: self.MICROCODE_LINES_PREPARE.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_DOUBLE: self.MICROCODE_LINES_CYCLE_DOUBLE.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD: self.MICROCODE_LINES_CYCLE_ADD.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_AT_INFINITY: self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X_SAME_Y: self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X: self.MICROCODE_LINES_CYCLE_ADD_SAME_X.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_REGULAR: self.MICROCODE_LINES_CYCLE_ADD_REGULAR.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_K0: self.MICROCODE_LINES_CYCLE_K0.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_K1: self.MICROCODE_LINES_CYCLE_K1.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.INVERT_P256: self.MICROCODE_LINES_INVERT_P256.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.INVERT_P384: self.MICROCODE_LINES_INVERT_P384.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT: self.MICROCODE_LINES_CONVERT.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT_AT_INFINITY: self.MICROCODE_LINES_CONVERT_AT_INFINITY.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT_REGULAR: self.MICROCODE_LINES_CONVERT_REGULAR.append(data) + + def __print_parse_error(self, msg): + print("PARSE ERROR: %s" % (msg)) + + def __abort(self): + sys.exit("Stopped at line #%d:\n%s" % (self.__line_num, self.__next_c_line)) + + +# ----------------------------------------------------------------------------- +def main(filenames): +# ----------------------------------------------------------------------------- + parser = MICROCODE_PARSER(filenames) + parser.parse() + parser.format() + +# ----------------------------------------------------------------------------- +if __name__ == "__main__": +# ----------------------------------------------------------------------------- + main(C_FILES) + +# +# End-of-File +# diff --git a/ecdsa_model.h b/ecdsa_model.h deleted file mode 100644 index fc7e571..0000000 --- a/ecdsa_model.h +++ /dev/null @@ -1,205 +0,0 @@ -//------------------------------------------------------------------------------ -// -// ecdsa_model.h -// -------------------------------------------- -// Base point scalar multiplier model for ECDSA -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// -// Curve Selection -// -// USE_CURVE == 1 -> P-256 -// USE_CURVE == 2 -> P-384 -// -//------------------------------------------------------------------------------ -#define USE_CURVE 2 - - -//------------------------------------------------------------------------------ -// Model Parameters -//------------------------------------------------------------------------------ -#if USE_CURVE == 1 -#define OPERAND_WIDTH (256) // largest supported operand width in bits -#elif USE_CURVE == 2 -#define OPERAND_WIDTH (384) // largest supported operand width in bits -#else -#error USE_CURVE must be either 1 or 2! -#endif - - -//------------------------------------------------------------------------------ -// P-256 Parameters and Test Vectors -//------------------------------------------------------------------------------ - -/* Field Size */ -#define P_256_Q {0xffffffff, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff} - -/* Generic Numbers */ -#define P_256_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} -#define P_256_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001} - -/* Division Factor */ -#define P_256_DELTA {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000} - -/* Base Point */ -#define P_256_G_X {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296} -#define P_256_G_Y {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5} - -/* Doubled Base Point */ -#define P_256_H_X {0x29d05c19, 0x3da77b71, 0x0e863235, 0x38b77e1b, 0x11f904fe, 0xa42998be, 0x16bd8d74, 0x4ece7ad0} -#define P_256_H_Y {0xb01cbd1c, 0x01e58065, 0x711814b5, 0x83f061e9, 0xd431cca9, 0x94cea131, 0x3449bf97, 0xc840ae07} - -/* Base Point Order */ -#define P_256_N {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551} - -/* Private Key */ -#define P_256_D {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a} - -/* Per-message Random Number */ -#define P_256_K {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0} - -/* Public Key */ -#define P_256_Q_X {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8} -#define P_256_Q_Y {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9} - -/* Part of Signature */ -#define P_256_R_X {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c} -#define P_256_R_Y {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541} - - -//------------------------------------------------------------------------------ -// P-384 Parameters and Test Vectors -//------------------------------------------------------------------------------ - -/* Field Size */ -#define P_384_Q {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff} - -/* Generic Numbers */ -#define P_384_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} -#define P_384_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001} - -/* Division Factor */ -#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x00000000, 0x80000000} - -/* Base Point */ -#define P_384_G_X {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7} -#define P_384_G_Y {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f} - -/* Doubled Base Point */ -#define P_384_H_X {0xaaf06bba, 0x82e9f590, 0xe29c71c2, 0x19bea517, 0x23c5893a, 0xe8b0c8cf, 0x4c117c3e, 0xfb57ab8d, 0x55fa1b42, 0x8155ad27, 0x8b574391, 0x1b13ea8a} -#define P_384_H_Y {0xc9e821b5, 0x69d9d390, 0xa2616740, 0x6d6d23d6, 0x070be242, 0xd765eb83, 0x1625ceec, 0x4a0f473e, 0xf59f4e30, 0xe2817e62, 0x85bce284, 0x6f15f19d} - -/* Base Point Order */ -#define P_384_N {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973} - -/* Private Key */ -#define P_384_D {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24} - -/* Per-message Random Number */ -#define P_384_K {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359} - -/* Public Key */ -#define P_384_Q_X {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0} -#define P_384_Q_Y {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9} - -/* Part of Signature */ -#define P_384_R_X {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856} -#define P_384_R_Y {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b} - - -//------------------------------------------------------------------------------ -// Parameter and Test Vector Selection -//------------------------------------------------------------------------------ -#if USE_CURVE == 1 - -#define ECDSA_Q P_256_Q - -#define ECDSA_ZERO P_256_ZERO -#define ECDSA_ONE P_256_ONE - -#define ECDSA_DELTA P_256_DELTA - -#define ECDSA_G_X P_256_G_X -#define ECDSA_G_Y P_256_G_Y - -#define ECDSA_H_X P_256_H_X -#define ECDSA_H_Y P_256_H_Y - -#define ECDSA_N P_256_N -#define ECDSA_D P_256_D -#define ECDSA_K P_256_K - -#define ECDSA_Q_X P_256_Q_X -#define ECDSA_Q_Y P_256_Q_Y - -#define ECDSA_R_X P_256_R_X -#define ECDSA_R_Y P_256_R_Y - -#elif USE_CURVE == 2 - -#define ECDSA_Q P_384_Q - -#define ECDSA_ZERO P_384_ZERO -#define ECDSA_ONE P_384_ONE - -#define ECDSA_DELTA P_384_DELTA - -#define ECDSA_G_X P_384_G_X -#define ECDSA_G_Y P_384_G_Y - -#define ECDSA_H_X P_384_H_X -#define ECDSA_H_Y P_384_H_Y - -#define ECDSA_N P_384_N -#define ECDSA_D P_384_D -#define ECDSA_K P_384_K - -#define ECDSA_Q_X P_384_Q_X -#define ECDSA_Q_Y P_384_Q_Y - -#define ECDSA_R_X P_384_R_X -#define ECDSA_R_Y P_384_R_Y - -#else - -#error USE_CURVE must be either 1 or 2! - -#endif - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/ecdsa_model_fpga.cpp b/ecdsa_model_fpga.cpp deleted file mode 100644 index 8ad0962..0000000 --- a/ecdsa_model_fpga.cpp +++ /dev/null @@ -1,414 +0,0 @@ -//------------------------------------------------------------------------------ -// -// ecdsa_model_fpga.cpp -// -------------------------------------------- -// Base point scalar multiplier model for ECDSA -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Headers -//------------------------------------------------------------------------------ -#include -#include -#include -#include "ecdsa_model.h" -#include "fpga_lowlevel.h" -#include "fpga_modular.h" -#include "fpga_curve.h" -#include "fpga_util.h" - - -//------------------------------------------------------------------------------ -// Prototypes -//------------------------------------------------------------------------------ -void fpga_model_init (); -bool test_base_point_multiplier (FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy); - -bool abuse_internal_point_adder (); -bool abuse_internal_point_doubler (); - -void print_fpga_buffer (const char *s, FPGA_BUFFER *v); - -bool compare_fpga_buffers (FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *bx, FPGA_BUFFER *by); -bool compare_fpga_buffers (FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *az, FPGA_BUFFER *bx, FPGA_BUFFER *by, FPGA_BUFFER *bz); - - -//------------------------------------------------------------------------------ -// Locals -//------------------------------------------------------------------------------ -static FPGA_BUFFER ecdsa_d; -static FPGA_BUFFER ecdsa_k; -static FPGA_BUFFER ecdsa_n; - - -//------------------------------------------------------------------------------ -int main() -//------------------------------------------------------------------------------ -{ - bool ok; // flag - - // - // initialize buffers - // - fpga_model_init(); - fpga_modular_init(); - fpga_curve_init(); - - - // - // test base point multiplier: Q = d * G - // - printf("Trying to derive public key from private key...\n\n"); - ok = test_base_point_multiplier(&ecdsa_d, &ecdsa_q_x, &ecdsa_q_y); - if (!ok) return EXIT_FAILURE; - - - // - // test base point multiplier: R = k * G - // - printf("Trying to sign something...\n\n"); - ok = test_base_point_multiplier(&ecdsa_k, &ecdsa_r_x, &ecdsa_r_y); - if (!ok) return EXIT_FAILURE; - - - // - // test base point multiplier: O = n * G - // - printf("Trying to multiply the base point by its order...\n\n"); - ok = test_base_point_multiplier(&ecdsa_n, &ecdsa_zero, &ecdsa_zero); - if (!ok) return EXIT_FAILURE; - - - // - // try to abuse internal point doubler - // - ok = abuse_internal_point_doubler(); - if (!ok) return EXIT_FAILURE; - - - // - // try to abuse internal point adder - // - ok = abuse_internal_point_adder(); - if (!ok) return EXIT_FAILURE; - - - // - // everything went just fine - // - return EXIT_SUCCESS; -} - - -//------------------------------------------------------------------------------ -void fpga_model_init() -//------------------------------------------------------------------------------ -{ - int w; // word counter - - FPGA_BUFFER tmp_d = ECDSA_D; - FPGA_BUFFER tmp_k = ECDSA_K; - FPGA_BUFFER tmp_n = ECDSA_N; - - /* fill buffers for large multi-word integers */ - for (w=0; w0; w--) - { - printf("%08x", buf->words[w-1]); - - // insert space after every group of 4 bytes - if (w > 1) printf(" "); - } - - // print footer - printf("\n"); -} - - -//------------------------------------------------------------------------------ -bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *bx, FPGA_BUFFER *by) -//------------------------------------------------------------------------------ -// -// Compare affine coordinates of two points and return true when they match. -// -//------------------------------------------------------------------------------ -{ - int w; // word counter - - // print all the values - print_fpga_buffer(" Expected: X = ", ax); - print_fpga_buffer(" Calculated: X = ", bx); - printf("\n"); - print_fpga_buffer(" Expected: Y = ", ay); - print_fpga_buffer(" Calculated: Y = ", by); - - // compare values - for (w=0; wwords[w] != bx->words[w]) return false; - - // compare y - if (ay->words[w] != by->words[w]) return false; - } - - // values are the same - return true; -} - - -//------------------------------------------------------------------------------ -bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *az, FPGA_BUFFER *bx, FPGA_BUFFER *by, FPGA_BUFFER *bz) -//------------------------------------------------------------------------------ -// -// Compare projective coordinates of two points and return true when they match. -// -//------------------------------------------------------------------------------ -{ - int w; // word counter - - // print all the values - print_fpga_buffer(" Expected: X = ", ax); - print_fpga_buffer(" Calculated: X = ", bx); - printf("\n"); - print_fpga_buffer(" Expected: Y = ", ay); - print_fpga_buffer(" Calculated: Y = ", by); - printf("\n"); - print_fpga_buffer(" Expected: Z = ", az); - print_fpga_buffer(" Calculated: Z = ", bz); - - // compare values - for (w=0; wwords[w] != bx->words[w]) return false; - - // compare y - if (ay->words[w] != by->words[w]) return false; - - // compare z - if (az->words[w] != bz->words[w]) return false; - } - - // values are the same - return true; -} - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_curve.cpp b/fpga_curve.cpp deleted file mode 100644 index 9cc8ec0..0000000 --- a/fpga_curve.cpp +++ /dev/null @@ -1,340 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_curve.cpp -// ------------------------------------ -// Elliptic curve arithmetic procedures -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Headers -//------------------------------------------------------------------------------ -#include -#include "ecdsa_model.h" -#include "fpga_lowlevel.h" -#include "fpga_modular.h" -#include "fpga_curve.h" -#include "fpga_util.h" - - -//------------------------------------------------------------------------------ -// Globals -//------------------------------------------------------------------------------ -FPGA_BUFFER ecdsa_g_x, ecdsa_g_y; -FPGA_BUFFER ecdsa_h_x, ecdsa_h_y; -FPGA_BUFFER ecdsa_q_x, ecdsa_q_y; -FPGA_BUFFER ecdsa_r_x, ecdsa_r_y; - - -//------------------------------------------------------------------------------ -void fpga_curve_init() -//------------------------------------------------------------------------------ -{ - int w; // word counter - - FPGA_BUFFER tmp_g_x = ECDSA_G_X, tmp_g_y = ECDSA_G_Y; - FPGA_BUFFER tmp_h_x = ECDSA_H_X, tmp_h_y = ECDSA_H_Y; - FPGA_BUFFER tmp_q_x = ECDSA_Q_X, tmp_q_y = ECDSA_Q_Y; - FPGA_BUFFER tmp_r_x = ECDSA_R_X, tmp_r_y = ECDSA_R_Y; - - /* fill buffers for large multi-word integers */ - for (w=0; w R=2*G) : (P==-Q => R=O) - fpga_buffer_copy(t2_is_zero ? &ecdsa_h_y : &ecdsa_one, ry); // | - fpga_buffer_copy(t2_is_zero ? &ecdsa_one : &ecdsa_zero, rz); // | - } -} - - -//------------------------------------------------------------------------------ -// -// Conversion from projective Jacobian to affine coordinates. -// -// P(px,py,pz) -> Q(qx,qy) -// -// Note, that qx = px / Z^2 and qy = py / Z^3. Division in modular arithmetic -// is equivalent to multiplication by the inverse value of divisor, so -// qx = px * (pz^-1)^2 and qy = py * (pz^-1)^3. -// -// Note, that this procedure does *NOT* handle points at infinity correctly. It -// can only be called from the base point multiplication routine, that -// specifically makes sure that P is not at infinity, so pz will always be -// non-zero value. -// -//------------------------------------------------------------------------------ -void fpga_curve_point_to_affine(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *qx, FPGA_BUFFER *qy) -//------------------------------------------------------------------------------ -{ - FPGA_BUFFER pz1; // inverse value of pz - FPGA_BUFFER t2, t3; // intermediate values - - fpga_modular_inv(pz, &pz1); // pz1 = pz^-1 (mod q) - - fpga_modular_mul(&pz1, &pz1, &t2); // t2 = pz1 ^ 2 (mod q) - fpga_modular_mul(&pz1, &t2, &t3); // t3 = tz1 ^ 3 (mod q) - - fpga_modular_mul(px, &t2, qx); // qx = px * (pz^-1)^2 (mod q) - fpga_modular_mul(py, &t3, qy); // qy = py * (pz^-1)^3 (mod q) -} - - -//------------------------------------------------------------------------------ -// -// Elliptic curve base point scalar multiplication routine. -// -// Q(qx,qy) = k * G(px,py) -// -// Note, that Q is supposed to be in affine coordinates. Multiplication is done -// using the double-and-add algorithm 3.27 from "Guide to Elliptic Curve -// Cryptography". -// -// WARNING: Though this procedure always does the addition step, it only -// updates the result when current bit of k is set. It does not take any -// active measures to keep run-time constant. The main purpose of this model -// is to help debug Verilog code for FPGA, so *DO NOT* use it anywhere near -// production! -// -//------------------------------------------------------------------------------ -void fpga_curve_scalar_multiply(FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy) -//------------------------------------------------------------------------------ -{ - int word_count, bit_count; // counters - - FPGA_BUFFER rx, ry, rz; // intermediate result - FPGA_BUFFER tx, ty, tz; // temporary variable - - /* set initial value of R to point at infinity */ - fpga_buffer_copy(&ecdsa_one, &rx); - fpga_buffer_copy(&ecdsa_one, &ry); - fpga_buffer_copy(&ecdsa_zero, &rz); - - /* process bits of k left-to-right */ - for (word_count=OPERAND_NUM_WORDS; word_count>0; word_count--) - for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) - { - /* calculate T = 2 * R */ - fpga_curve_double_jacobian(&rx, &ry, &rz, &tx, &ty, &tz); - - /* always calculate R = T + P for constant-time */ - fpga_curve_add_jacobian(&tx, &ty, &tz, &rx, &ry, &rz); - - /* revert to the value of T before addition if the current bit of k is not set */ - if (!((k->words[word_count-1] >> (bit_count-1)) & 1)) - { fpga_buffer_copy(&tx, &rx); - fpga_buffer_copy(&ty, &ry); - fpga_buffer_copy(&tz, &rz); - } - - } - - // convert result to affine coordinates anyway - fpga_curve_point_to_affine(&rx, &ry, &rz, qx, qy); - - // check, that rz is non-zero (not point at infinity) - bool rz_is_zero = fpga_buffer_is_zero(&rz); - - // handle special case (result is point at infinity) - if (rz_is_zero) - { fpga_buffer_copy(&ecdsa_zero, qx); - fpga_buffer_copy(&ecdsa_zero, qy); - } -} - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_curve.h b/fpga_curve.h deleted file mode 100644 index c90cc16..0000000 --- a/fpga_curve.h +++ /dev/null @@ -1,61 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_curve.h -// ------------------------------------ -// Elliptic curve arithmetic procedures -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Globals -//------------------------------------------------------------------------------ -extern FPGA_BUFFER ecdsa_g_x, ecdsa_g_y; -extern FPGA_BUFFER ecdsa_h_x, ecdsa_h_y; -extern FPGA_BUFFER ecdsa_q_x, ecdsa_q_y; -extern FPGA_BUFFER ecdsa_r_x, ecdsa_r_y; - - -//------------------------------------------------------------------------------ -// Prototypes -//------------------------------------------------------------------------------ -void fpga_curve_init (); -void fpga_curve_scalar_multiply (FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy); -void fpga_curve_add_jacobian (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz); -void fpga_curve_double_jacobian (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz); -void fpga_curve_point_to_affine (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *qx, FPGA_BUFFER *qy); - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_lowlevel.cpp b/fpga_lowlevel.cpp deleted file mode 100644 index 511856a..0000000 --- a/fpga_lowlevel.cpp +++ /dev/null @@ -1,137 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_lowlevel.cpp -// ----------------------------------- -// Models of Low-level FPGA primitives -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Headers -//------------------------------------------------------------------------------ -#include -#include "ecdsa_model.h" -#include "fpga_lowlevel.h" - - -//------------------------------------------------------------------------------ -// -// Low-level 32-bit adder with carry input and carry output. -// -// Carries are 1 bit wide. -// -// {c_out, s} = x + y + c_in -// -//------------------------------------------------------------------------------ -void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out) -//------------------------------------------------------------------------------ -{ - // calculate sum - FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y; - - // process carry input - if (c_in) r++; - - // store sum - *s = (FPGA_WORD)r; - - // shift sum to obtain carry flag - r >>= FPGA_WORD_WIDTH; - - // store carry - *c_out = (r != 0); -} - - -//------------------------------------------------------------------------------ -// -// Low-level 32-bit subtractor with borrow input and borrow output. -// -// Borrows are 1 bit wide. -// -// {b_out, d} = x - y - b_in -// -void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out) -//------------------------------------------------------------------------------ -{ - // calculate difference - FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y; - - // process borrow input - if (b_in) r--; - - // store difference - *d = (FPGA_WORD)r; - - // shift difference to obtain borrow flag - r >>= FPGA_WORD_WIDTH; - - // store borrow - *b_out = (r != 0); -} - - -//------------------------------------------------------------------------------ -// -// Low-level 16x16-bit multiplier. -// -// Inputs are 16-bit wide, outputs are 32-bit wide. -// -// p = x * y -// -void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p) -//------------------------------------------------------------------------------ -{ - // multiply and store result - *p = (FPGA_WORD)x * (FPGA_WORD)y; -} - - -//------------------------------------------------------------------------------ -// -// Low-level 48-bit adder without carries. -// -// s = (x + y)[47:0] -// -void fpga_lowlevel_add48(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s) -//------------------------------------------------------------------------------ -{ - // add and store result truncated to 48 bits - *s = (x + y) & FPGA_MASK_ADDER48; -} - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_lowlevel.h b/fpga_lowlevel.h deleted file mode 100644 index eeb5c4f..0000000 --- a/fpga_lowlevel.h +++ /dev/null @@ -1,80 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_lowlevel.h -// ----------------------------------- -// Models of Low-level FPGA primitives -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// FPGA Pipeline Settings -//------------------------------------------------------------------------------ -#define FPGA_WORD_WIDTH (32) -#define OPERAND_NUM_WORDS (OPERAND_WIDTH / FPGA_WORD_WIDTH) - - -//------------------------------------------------------------------------------ -// Word Types (Normal, Extended, Reduced) -//------------------------------------------------------------------------------ -typedef uint32_t FPGA_WORD; -typedef uint64_t FPGA_WORD_EXTENDED; -typedef uint16_t FPGA_WORD_REDUCED; - - -//------------------------------------------------------------------------------ -// Extended Adder Mask -//------------------------------------------------------------------------------ -#define FPGA_MASK_ADDER48 ((FPGA_WORD_EXTENDED)0x0000FFFFFFFFFFFF) - - -//------------------------------------------------------------------------------ -struct FPGA_BUFFER -//------------------------------------------------------------------------------ -{ - FPGA_WORD words[OPERAND_NUM_WORDS]; -}; - - -//------------------------------------------------------------------------------ -// Prototypes -//------------------------------------------------------------------------------ -void fpga_lowlevel_add32 (FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out); -void fpga_lowlevel_sub32 (FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out); -void fpga_lowlevel_mul16 (FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p); -void fpga_lowlevel_add48 (FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s); - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_modular.cpp b/fpga_modular.cpp deleted file mode 100644 index 9b01df0..0000000 --- a/fpga_modular.cpp +++ /dev/null @@ -1,838 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_modular.cpp -// --------------------------- -// Modular arithmetic routines -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Headers -//------------------------------------------------------------------------------ -#include -#include "ecdsa_model.h" -#include "fpga_lowlevel.h" -#include "fpga_modular.h" - - -//------------------------------------------------------------------------------ -// Globals -//------------------------------------------------------------------------------ -FPGA_BUFFER ecdsa_q; -FPGA_BUFFER ecdsa_zero; -FPGA_BUFFER ecdsa_one; -FPGA_BUFFER ecdsa_delta; - - -//------------------------------------------------------------------------------ -void fpga_modular_init() -//------------------------------------------------------------------------------ -{ - int w; // word counter - - FPGA_BUFFER tmp_q = ECDSA_Q; - FPGA_BUFFER tmp_zero = ECDSA_ZERO; - FPGA_BUFFER tmp_one = ECDSA_ONE; - FPGA_BUFFER tmp_delta = ECDSA_DELTA; - - /* fill buffers for large multi-word integers */ - for (w=0; w= q) s -= q -// -// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q) -// and then select the right variant. -// -//------------------------------------------------------------------------------ -void fpga_modular_add(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *s) -//------------------------------------------------------------------------------ -{ - int w; // word counter - FPGA_BUFFER ab, ab_n; // intermediate buffers - bool c_in, c_out; // carries - bool b_in, b_out; // borrows - - c_in = false; // first word has no carry - b_in = false; // first word has no borrow - - // run parallel addition and subtraction - for (w=0; wwords[w], b->words[w], c_in, &ab.words[w], &c_out); - fpga_lowlevel_sub32(ab.words[w], ecdsa_q.words[w], b_in, &ab_n.words[w], &b_out); - - c_in = c_out; // propagate carry - b_in = b_out; // propagate borrow - } - - // now select the right buffer - - /* - * We select the right variant based on borrow and carry flags after - * addition and subtraction of the very last pair of words. Note, that - * we only need to select the first variant (a + b) when (a + b) < q. - * This way if we get negative number after subtraction, we discard it - * and use the output of the adder instead. The subtractor output is - * negative when borrow flag is set *and* carry flag is not set. When - * both borrow and carry are set, the number is non-negative, because - * borrow and carry cancel each other out. - */ - for (w=0; wwords[w] = (b_out && !c_out) ? ab.words[w] : ab_n.words[w]; -} - - -//------------------------------------------------------------------------------ -// -// Modular subtraction. -// -// This routine implements algorithm 3. from "Ultra High Performance ECC over -// NIST Primes on Commercial FPGAs". -// -// d = (a - b) mod q -// -// The naive approach is like the following: -// -// 1. d = a - b -// 2. if (a < b) d += q -// -// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q) -// and then select the right variant. -// -//------------------------------------------------------------------------------ -void fpga_modular_sub(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *d) -//------------------------------------------------------------------------------ -{ - int w; // word counter - FPGA_BUFFER ab, ab_n; // intermediate buffers - bool c_in, c_out; // carries - bool b_in, b_out; // borrows - - c_in = false; // first word has no carry - b_in = false; // first word has no borrow - - // run parallel subtraction and addition - for (w=0; wwords[w], b->words[w], b_in, &ab.words[w], &b_out); - fpga_lowlevel_add32(ab.words[w], ecdsa_q.words[w], c_in, &ab_n.words[w], &c_out); - - b_in = b_out; // propagate borrow - c_in = c_out; // propagate carry - } - - // now select the right buffer - - /* - * We select the right variant based on borrow flag after subtraction - * and addition of the very last pair of words. Note, that we only - * need to select the second variant (a - b + q) when a < b. This way - * if we get negative number after subtraction, we discard it - * and use the output of the adder instead. The Subtractor output is - * negative when borrow flag is set. - */ - for (w=0; wwords[w] = b_out ? ab_n.words[w] : ab.words[w]; -} - - -//------------------------------------------------------------------------------ -// -// Modular multiplication. -// -// This routine implements modular multiplication algorithm from "Ultra High -// Performance ECC over NIST Primes on Commercial FPGAs". -// -// p = (a * b) mod q -// -// The complex algorithm is split into three parts: -// -// 1. Calculation of partial words -// 2. Acccumulation of partial words into full-size product -// 3. Modular reduction of the full-size product -// -// See comments for corresponding helper routines for more information. -// -//------------------------------------------------------------------------------ -void fpga_modular_mul(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *p) -//------------------------------------------------------------------------------ -{ - FPGA_WORD_EXTENDED si[4*OPERAND_NUM_WORDS-1]; // parts of intermediate product - FPGA_WORD c[2*OPERAND_NUM_WORDS]; // full-size intermediate product - - /* multiply to get partial words */ - fpga_modular_mul_helper_multiply(a, b, si); - - /* accumulate partial words into full-size product */ - fpga_modular_mul_helper_accumulate(si, c); - - /* reduce full-size product using special routine */ - fpga_modular_mul_helper_reduce(c, p); -} - - -//------------------------------------------------------------------------------ -// -// Modular multiplicative inversion procedure. -// -// a1 = a^-1 (mod n) -// -// This routine implements the algorithm from "Constant Time Modular -// Inversion" by Joppe W. Bos (http://www.joppebos.com/files/CTInversion.pdf) -// -// The algorithm has two phases: 1) calculation of "almost" modular inverse, -// which is a^-1*2^k and 2) removal of redundant factor 2^k. -// -// The first stage has four temporary variables: r, s, u, v; they are updated -// at every iteration. Depending on flags there can be four branches, FPGA -// will pre-calculate all possible values in parallel and then use a multiplexor -// to select the next value. This implementation also calculates all possible -// outcomes. This is done for debugging purposes, *NOT* for constant run-time! -// -// The second part only works with s and k. -// -// Note, that k is a simple counter, and it can't exceed 2*OPERAND_WIDTH. -// -// The complex inversion algorithm uses helper routines. Note, that width of the -// intermediate results can temporarily exceed OPERAND_WIDTH, so all the helper -// routines process OPERAND_NUM_WORDS+1 words. -// -//------------------------------------------------------------------------------ -void fpga_modular_inv(FPGA_BUFFER *a, FPGA_BUFFER *a1) -{ - int i; // round counter - int w; // word counter - int k; // redundant power of 2 - - /* q, 1 */ - FPGA_WORD buf_q[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_1[OPERAND_NUM_WORDS+1]; - - /* r, s */ - FPGA_WORD buf_r[OPERAND_NUM_WORDS+1], buf_s[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_r_double[OPERAND_NUM_WORDS+1], buf_s_double[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_r_new[OPERAND_NUM_WORDS+1], buf_s_new[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_r_plus_s[OPERAND_NUM_WORDS+1], buf_s_plus_r[OPERAND_NUM_WORDS+1]; - - /* u, v */ - FPGA_WORD buf_u[OPERAND_NUM_WORDS+1], buf_v[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_u_half[OPERAND_NUM_WORDS+1], buf_v_half[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_u_minus_v[OPERAND_NUM_WORDS+1], buf_v_minus_u[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_u_minus_v_half[OPERAND_NUM_WORDS+1], buf_v_minus_u_half[OPERAND_NUM_WORDS+1]; - FPGA_WORD buf_u_new[OPERAND_NUM_WORDS+1], buf_v_new[OPERAND_NUM_WORDS+1]; - - /* comparison */ - int cmp_v_1, cmp_u_v; - - /* clear buffers */ - for (w=0; w<=OPERAND_NUM_WORDS; w++) - buf_r[w] = 0, buf_s[w] = 0, - buf_u[w] = 0, buf_v[w] = 0, - buf_q[w] = 0, buf_1[w] = 0; - - /* initialize q, 1 */ - for (w=0; wwords[w]; - - /* initialize k */ - k = 0; - - /* flags for the first stage */ - bool v_is_1, u_is_greater_than_v, u_is_even, v_is_even; - - /* first stage */ - for (i=0; i<(2*OPERAND_WIDTH); i++) - { - /* pre-calculate all possible values for r and s */ - fpga_modular_inv_helper_shl(buf_r, buf_r_double); // r_double = 2 * r - fpga_modular_inv_helper_shl(buf_s, buf_s_double); // s_double = 2 * s - fpga_modular_inv_helper_add(buf_r, buf_s, buf_r_plus_s); // r_plus_s = r + s - fpga_modular_inv_helper_add(buf_s, buf_r, buf_s_plus_r); // s_plus_r = s + r - - /* pre-calculate all possible values for u and v */ - fpga_modular_inv_helper_shr(buf_u, buf_u_half); // u_half = u / 2 - fpga_modular_inv_helper_shr(buf_v, buf_v_half); // v_half = v / 2 - fpga_modular_inv_helper_sub(buf_u, buf_v, buf_u_minus_v); // u_minus_v = u - v - fpga_modular_inv_helper_sub(buf_v, buf_u, buf_v_minus_u); // v_minus_u = v - u - fpga_modular_inv_helper_shr(buf_u_minus_v, buf_u_minus_v_half); // u_minus_v_half = u_minus_v / 2 - fpga_modular_inv_helper_shr(buf_v_minus_u, buf_v_minus_u_half); // v_minus_u_half = v_minus_u / 2 - - /* compare */ - fpga_modular_inv_helper_cmp(buf_v, buf_1, &cmp_v_1); - fpga_modular_inv_helper_cmp(buf_u, buf_v, &cmp_u_v); - - /* assign flags */ - v_is_1 = (cmp_v_1 == 0); - u_is_greater_than_v = (cmp_u_v > 0); - u_is_even = !(buf_u[0] & 1); - v_is_even = !(buf_v[0] & 1); - - /* select u */ - if ( u_is_even) fpga_modular_inv_helper_cpy(buf_u_new, buf_u_half); - if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_u_new, buf_u); - if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_u_new, u_is_greater_than_v ? buf_u_minus_v_half : buf_u); - - /* select v */ - if ( u_is_even) fpga_modular_inv_helper_cpy(buf_v_new, buf_v); - if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_v_new, buf_v_half); - if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_v_new, u_is_greater_than_v ? buf_v : buf_v_minus_u_half); - - /* select r */ - if ( u_is_even) fpga_modular_inv_helper_cpy(buf_r_new, buf_r); - if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_r_new, buf_r_double); - if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_r_new, u_is_greater_than_v ? buf_r_plus_s : buf_r_double); - - /* select s */ - if ( u_is_even) fpga_modular_inv_helper_cpy(buf_s_new, buf_s_double); - if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_s_new, buf_s); - if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_s_new, u_is_greater_than_v ? buf_s_double : buf_s_plus_r); - - /* update values */ - if (!v_is_1) - { fpga_modular_inv_helper_cpy(buf_u, buf_u_new); - fpga_modular_inv_helper_cpy(buf_v, buf_v_new); - fpga_modular_inv_helper_cpy(buf_r, buf_r_new); - fpga_modular_inv_helper_cpy(buf_s, buf_s_new); - } - - /* update k */ - if (!v_is_1) k++; - } - - // - // Note, that to save FPGA resources, the second stage re-uses buffers - // used in the first stage. - // - - /* flags for the second stage */ - bool k_is_0, s_is_odd; - - /* second stage */ - for (i=0; i<(2*OPERAND_WIDTH); i++) - { - /* pre-calculate all possible values */ - fpga_modular_inv_helper_shr(buf_s, buf_u); - fpga_modular_inv_helper_add(buf_s, buf_q, buf_r); - fpga_modular_inv_helper_shr(buf_r, buf_v); - - // - // assign flags - // - s_is_odd = buf_s[0] & 1; - k_is_0 = (k == 0); - - // - // select new values based on flags - // - fpga_modular_inv_helper_cpy(buf_s_new, s_is_odd ? buf_v : buf_u); - - /* update s */ - if (! k_is_0) - fpga_modular_inv_helper_cpy(buf_s, buf_s_new); - - /* update k */ - if (! k_is_0) k--; - } - - /* done, copy s into output buffer */ - for (w=0; wwords[w] = buf_s[w]; -} - - -//------------------------------------------------------------------------------ -// -// Parallelized multiplication. -// -// This routine implements the algorithm in Fig. 3. from "Ultra High -// Performance ECC over NIST Primes on Commercial FPGAs". -// -// Inputs a and b are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2 -// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide -// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS -// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial -// output words si[]. -// -// The order of loading a and b into the multipliers is a bit complicated, -// during the first 2*OPERAND_NUM_WORDS-1 cycles one si word per cycle is -// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the -// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously. -// -//------------------------------------------------------------------------------ -void fpga_modular_mul_helper_multiply(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si) -//------------------------------------------------------------------------------ -{ - int w; // counter - int t, x; // more counters - int j, i; // word indices - FPGA_WORD p; // product - - // buffers for smaller words that multipliers can handle - FPGA_WORD_REDUCED ai[2*OPERAND_NUM_WORDS]; - FPGA_WORD_REDUCED bj[2*OPERAND_NUM_WORDS]; - - // split a and b into smaller words - for (w=0; wwords[w], ai[2*w + 1] = (FPGA_WORD_REDUCED)(a->words[w] >> (FPGA_WORD_WIDTH / 2)), - bj[2*w] = (FPGA_WORD_REDUCED)b->words[w], bj[2*w + 1] = (FPGA_WORD_REDUCED)(b->words[w] >> (FPGA_WORD_WIDTH / 2)); - - // accumulators - FPGA_WORD_EXTENDED mac[2*OPERAND_NUM_WORDS]; - - // clear accumulators - for (w=0; w<(2*OPERAND_NUM_WORDS); w++) mac[w] = 0; - - // run the crazy algorithm :) - for (t=0; t<(2*OPERAND_NUM_WORDS); t++) - { - // save upper half of si[] (one word per cycle) - if (t > 0) - { si[4*OPERAND_NUM_WORDS - (t+1)] = mac[t]; - mac[t] = 0; - } - - // update index - j = 2*OPERAND_NUM_WORDS - (t+1); - - // parallel multiplication - for (x=0; x<(2*OPERAND_NUM_WORDS); x++) - { - // update index - i = t - x; - if (i < 0) i += 2*OPERAND_NUM_WORDS; - - // multiply... - fpga_lowlevel_mul16(ai[i], bj[j], &p); - - // ...accumulate - mac[x] += p; - } - - } - - // now finally save lower half of si[] (2*OPERAND_NUM_WORDS words at once) - for (w=0; w<(2*OPERAND_NUM_WORDS); w++) - si[w] = mac[2*OPERAND_NUM_WORDS - (w+1)]; -} - - -//------------------------------------------------------------------------------ -// -// Accumulation of partial words into full-size product. -// -// This routine implements the Algorithm 4. from "Ultra High Performance ECC -// over NIST Primes on Commercial FPGAs". -// -// Input words si[] are accumulated into full-size product c[]. -// -// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in -// si[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are -// summed in full, odd words are split into two parts. During every cycle the -// upper part of the previous word and the lower part of the following word are -// summed too. -// -//------------------------------------------------------------------------------ -void fpga_modular_mul_helper_accumulate(FPGA_WORD_EXTENDED *si, FPGA_WORD *c) -//------------------------------------------------------------------------------ -{ - int w; // word counter - FPGA_WORD_EXTENDED cw0, cw1; // intermediate sums - FPGA_WORD_REDUCED cw_carry; // wide carry - - // clear carry - cw_carry = 0; - - // execute the algorithm - for (w=0; w<(2*OPERAND_NUM_WORDS); w++) - { - // handy flags - bool w_is_first = (w == 0); - bool w_is_last = (w == (2*OPERAND_NUM_WORDS-1)); - - // accumulate full current even word... - // ...and also the upper part of the previous odd word (if not the first word) - fpga_lowlevel_add48(si[2*w], w_is_first ? 0 : si[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0); - - // generate another word from "carry" part of the previous even word... - // ...and also the lower part of the following odd word (if not the last word) - cw1 = w_is_last ? 0 : (FPGA_WORD)(si[2*w+1] << (FPGA_WORD_WIDTH / 2)); - cw1 |= (FPGA_WORD_EXTENDED)cw_carry; - - // accumulate once again - fpga_lowlevel_add48(cw0, cw1, &cw1); - - // store current word... - c[w] = (FPGA_WORD)cw1; - - // ...and carry - cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH); - } -} - - -//------------------------------------------------------------------------------ -// -// Fast modular reduction for NIST prime P-256. -// -// p = c mod p256 -// -// This routine implements the algorithm 2.29 from "Guide to Elliptic Curve -// Cryptography". -// -// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c -// on the other hand is the output of the parallelized Comba multiplier, so it -// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). -// -// To save FPGA resources, the calculation is done using only two adders and -// one subtractor. The algorithm is split into five steps. -// -//------------------------------------------------------------------------------ -#if USE_CURVE == 1 -void fpga_modular_mul_helper_reduce_p256(FPGA_WORD *c, FPGA_BUFFER *p) -{ - // "funny" words - FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9; - - // compose "funny" words out of input words - s1.words[7] = c[ 7], s1.words[6] = c[ 6], s1.words[5] = c[ 5], s1.words[4] = c[ 4], s1.words[3] = c[ 3], s1.words[2] = c[ 2], s1.words[1] = c[ 1], s1.words[0] = c[ 0]; - s2.words[7] = c[15], s2.words[6] = c[14], s2.words[5] = c[13], s2.words[4] = c[12], s2.words[3] = c[11], s2.words[2] = 0, s2.words[1] = 0, s2.words[0] = 0; - s3.words[7] = 0, s3.words[6] = c[15], s3.words[5] = c[14], s3.words[4] = c[13], s3.words[3] = c[12], s3.words[2] = 0, s3.words[1] = 0, s3.words[0] = 0; - s4.words[7] = c[15], s4.words[6] = c[14], s4.words[5] = 0, s4.words[4] = 0, s4.words[3] = 0, s4.words[2] = c[10], s4.words[1] = c[ 9], s4.words[0] = c[ 8]; - s5.words[7] = c[ 8], s5.words[6] = c[13], s5.words[5] = c[15], s5.words[4] = c[14], s5.words[3] = c[13], s5.words[2] = c[11], s5.words[1] = c[10], s5.words[0] = c[ 9]; - s6.words[7] = c[10], s6.words[6] = c[ 8], s6.words[5] = 0, s6.words[4] = 0, s6.words[3] = 0, s6.words[2] = c[13], s6.words[1] = c[12], s6.words[0] = c[11]; - s7.words[7] = c[11], s7.words[6] = c[ 9], s7.words[5] = 0, s7.words[4] = 0, s7.words[3] = c[15], s7.words[2] = c[14], s7.words[1] = c[13], s7.words[0] = c[12]; - s8.words[7] = c[12], s8.words[6] = 0, s8.words[5] = c[10], s8.words[4] = c[ 9], s8.words[3] = c[ 8], s8.words[2] = c[15], s8.words[1] = c[14], s8.words[0] = c[13]; - s9.words[7] = c[13], s9.words[6] = 0, s9.words[5] = c[11], s9.words[4] = c[10], s9.words[3] = c[ 9], s9.words[2] = 0, s9.words[1] = c[15], s9.words[0] = c[14]; - - // intermediate results - FPGA_BUFFER sum0, sum1, difference; - - /* Step 1. */ - fpga_modular_add(&s2, &s2, &sum0); // sum0 = 2*s2 - fpga_modular_add(&s3, &s3, &sum1); // sum1 = 2*s3 - fpga_modular_sub(&ecdsa_zero, &s6, &difference); // difference = -s6 - - /* Step 2. */ - fpga_modular_add(&sum0, &s1, &sum0); // sum0 = s1 + 2*s2 - fpga_modular_add(&sum1, &s4, &sum1); // sum1 = s4 + 2*s3 - fpga_modular_sub(&difference, &s7, &difference); // difference = -(s6 + s7) - - /* Step 3. */ - fpga_modular_add(&sum0, &s5, &sum0); // sum0 = s1 + 2*s2 + s5 - fpga_modular_add(&sum1, &ecdsa_zero, &sum1); // compulsory cycle to keep sum1 constant for next stage - fpga_modular_sub(&difference, &s8, &difference); // difference = -(s6 + s7 + s8) - - /* Step 4. */ - fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 -// fpga_modular_add(, , &sum1); // dummy cycle, result ignored - fpga_modular_sub(&difference, &s9, &difference); // difference = -(s6 + s7 + s8 + s9) - - /* Step 5. */ - fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + 2*s3 + s4 + s5 - s6 - s7 - s8 - s9 -// fpga_modular_add(, , &sum1); // dummy cycle, result ignored -// fpga_modular_add(, , &difference); // dummy cycle, result ignored -} -#endif - - -//------------------------------------------------------------------------------ -// -// Fast modular reduction for NIST prime P-384. -// -// p = c mod p384 -// -// This routine implements the algorithm 2.30 from "Guide to Elliptic Curve -// Cryptography". -// -// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c -// on the other hand is the output of the parallelized Comba multiplier, so it -// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). -// -// To save FPGA resources, the calculation is done using only two adders and -// one subtractor. The algorithm is split into five steps. -// -//------------------------------------------------------------------------------ -#if USE_CURVE == 2 -void fpga_modular_mul_helper_reduce_p384(FPGA_WORD *c, FPGA_BUFFER *p) -{ - // "funny" words - FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; - - // compose "funny" words - s1.words[11] = c[11], s1.words[10] = c[10], s1.words[ 9] = c[ 9], s1.words[ 8] = c[ 8], s1.words[ 7] = c[ 7], s1.words[ 6] = c[ 6], s1.words[ 5] = c[ 5], s1.words[ 4] = c[ 4], s1.words[ 3] = c[ 3], s1.words[ 2] = c[ 2], s1.words[ 1] = c[ 1], s1.words[ 0] = c[ 0]; - s2.words[11] = 0, s2.words[10] = 0, s2.words[ 9] = 0, s2.words[ 8] = 0, s2.words[ 7] = 0, s2.words[ 6] = c[23], s2.words[ 5] = c[22], s2.words[ 4] = c[21], s2.words[ 3] = 0, s2.words[ 2] = 0, s2.words[ 1] = 0, s2.words[ 0] = 0; - s3.words[11] = c[23], s3.words[10] = c[22], s3.words[ 9] = c[21], s3.words[ 8] = c[20], s3.words[ 7] = c[19], s3.words[ 6] = c[18], s3.words[ 5] = c[17], s3.words[ 4] = c[16], s3.words[ 3] = c[15], s3.words[ 2] = c[14], s3.words[ 1] = c[13], s3.words[ 0] = c[12]; - s4.words[11] = c[20], s4.words[10] = c[19], s4.words[ 9] = c[18], s4.words[ 8] = c[17], s4.words[ 7] = c[16], s4.words[ 6] = c[15], s4.words[ 5] = c[14], s4.words[ 4] = c[13], s4.words[ 3] = c[12], s4.words[ 2] = c[23], s4.words[ 1] = c[22], s4.words[ 0] = c[21]; - s5.words[11] = c[19], s5.words[10] = c[18], s5.words[ 9] = c[17], s5.words[ 8] = c[16], s5.words[ 7] = c[15], s5.words[ 6] = c[14], s5.words[ 5] = c[13], s5.words[ 4] = c[12], s5.words[ 3] = c[20], s5.words[ 2] = 0, s5.words[ 1] = c[23], s5.words[ 0] = 0; - s6.words[11] = 0, s6.words[10] = 0, s6.words[ 9] = 0, s6.words[ 8] = 0, s6.words[ 7] = c[23], s6.words[ 6] = c[22], s6.words[ 5] = c[21], s6.words[ 4] = c[20], s6.words[ 3] = 0, s6.words[ 2] = 0, s6.words[ 1] = 0, s6.words[ 0] = 0; - s7.words[11] = 0, s7.words[10] = 0, s7.words[ 9] = 0, s7.words[ 8] = 0, s7.words[ 7] = 0, s7.words[ 6] = 0, s7.words[ 5] = c[23], s7.words[ 4] = c[22], s7.words[ 3] = c[21], s7.words[ 2] = 0, s7.words[ 1] = 0, s7.words[ 0] = c[20]; - s8.words[11] = c[22], s8.words[10] = c[21], s8.words[ 9] = c[20], s8.words[ 8] = c[19], s8.words[ 7] = c[18], s8.words[ 6] = c[17], s8.words[ 5] = c[16], s8.words[ 4] = c[15], s8.words[ 3] = c[14], s8.words[ 2] = c[13], s8.words[ 1] = c[12], s8.words[ 0] = c[23]; - s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0; - s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0; - - // intermediate results - FPGA_BUFFER sum0, sum1, difference; - - /* Step 1. */ - fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3 - fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2 - fpga_modular_sub(&ecdsa_zero, &s8, &difference); // difference = -s8 - - /* Step 2. */ - fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4 - fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5 - fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9) - - /* Step 3. */ - fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6 - fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7 - fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10) - - /* Step 4. */ - fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 -// fpga_modular_add(, , &sum1); // dummy cycle, result ignored - fpga_modular_sub(&difference, &ecdsa_zero, &difference); // compulsory cycle to keep difference constant for next stage - - /* Step 5. */ - fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10 -// fpga_modular_add(, , &sum1); // dummy cycle, result ignored -// fpga_modular_add(, , &difference); // dummy cycle, result ignored -} -#endif - - -//------------------------------------------------------------------------------ -// -// Multi-word shift to the left by 1 bit. -// -// y = x << 1 -// -//------------------------------------------------------------------------------ -void fpga_modular_inv_helper_shl(FPGA_WORD *x, FPGA_WORD *y) -//------------------------------------------------------------------------------ -{ - int w; // word counter - FPGA_WORD carry_in, carry_out; // carries - - carry_in = 0; // first word has no carry - - // shift word-by-word - for (w=0; w<=OPERAND_NUM_WORDS; w++) - carry_out = x[w] >> (FPGA_WORD_WIDTH - 1), // store next carry - y[w] = x[w] << 1, // shift - y[w] |= carry_in, // process carry - carry_in = carry_out; // propagate carry -} - - -//------------------------------------------------------------------------------ -// -// Multi-word shift to the right by 1 bit. -// -// y = x >> 1 -// -//------------------------------------------------------------------------------ -void fpga_modular_inv_helper_shr(FPGA_WORD *x, FPGA_WORD *y) -//------------------------------------------------------------------------------ -{ - int w; // word counter - FPGA_WORD carry_in, carry_out; // carries - - carry_in = 0; // first word has no carry - - // shift word-by-word - for (w=OPERAND_NUM_WORDS; w>=0; w--) - carry_out = x[w], // store next carry - y[w] = x[w] >> 1, // shift - y[w] |= carry_in << (FPGA_WORD_WIDTH - 1), // process carry - carry_in = carry_out; // propagate carry -} - - -//------------------------------------------------------------------------------ -// -// Multi-word addition. -// -// s = x + y -// -//------------------------------------------------------------------------------ -void fpga_modular_inv_helper_add(FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *s) -//------------------------------------------------------------------------------ -{ - int w; // word counter - bool carry_in, carry_out; // carries - - // lowest word has no carry - carry_in = false; - - // sum a and b word-by-word - for (w=0; w<=OPERAND_NUM_WORDS; w++) - { - // low-level addition - fpga_lowlevel_add32(x[w], y[w], carry_in, &s[w], &carry_out); - - // propagate carry bit - carry_in = carry_out; - } -} - - -//------------------------------------------------------------------------------ -// -// Multi-word subtraction . -// -// d = x - y -// -//------------------------------------------------------------------------------ -void fpga_modular_inv_helper_sub(FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *d) -//------------------------------------------------------------------------------ -{ - int w; // word counter - bool borrow_in, borrow_out; // borrows - - // lowest word has no borrow - borrow_in = false; - - // subtract b from a word-by-word - for (w=0; w<=OPERAND_NUM_WORDS; w++) - { - // low-level subtraction - fpga_lowlevel_sub32(x[w], y[w], borrow_in, &d[w], &borrow_out); - - // propagate borrow bit - borrow_in = borrow_out; - } - -} - - -//------------------------------------------------------------------------------ -// -// Multi-word copy. -// -// dst = src -// -//------------------------------------------------------------------------------ -void fpga_modular_inv_helper_cpy(FPGA_WORD *dst, FPGA_WORD *src) -//------------------------------------------------------------------------------ -{ - int w; // word counter - - // copy all the words from src into dst - for (w=0; w<=OPERAND_NUM_WORDS; w++) - dst[w] = src[w]; -} - - -//------------------------------------------------------------------------------ -// -// Multi-word comparison. -// -// The return value is -1 when ab. -// -//------------------------------------------------------------------------------ -void fpga_modular_inv_helper_cmp(FPGA_WORD *a, FPGA_WORD *b, int *c) -//------------------------------------------------------------------------------ -{ - int w; // word counter - int r, r0, ra, rb; // result - bool borrow; // borrow - FPGA_WORD d; // partial difference - - // result is unknown so far - r = 0; - - // clear borrow for the very first word - borrow = false; - - // compare a and b word-by-word - for (w=OPERAND_NUM_WORDS; w>=0; w--) - { - // save result - r0 = r; - - // subtract current words - fpga_lowlevel_sub32(a[w], b[w], false, &d, &borrow); - - // analyze flags - rb = borrow ? -1 : 0; // a[w] < b[w] - ra = (!borrow && (d != 0)) ? 1 : 0; // a[w] > b[w] - - // - // Note, that ra is either 0 or 1, rb is either 0 or -1 and they - // can never be non-zero at the same time. - // - // Note, that r can only be updated if comparison result has not - // been resolved yet. Even if we already know comparison result, - // we continue doing dummy subtractions to keep run-time constant. - // - - // update result - r = (r == 0) ? ra + rb : r0; - } - - // done - *c = r; -} - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_modular.h b/fpga_modular.h deleted file mode 100644 index 4d31725..0000000 --- a/fpga_modular.h +++ /dev/null @@ -1,87 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_modular.h -// --------------------------- -// Modular arithmetic routines -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Globals -//------------------------------------------------------------------------------ -extern FPGA_BUFFER ecdsa_q; -extern FPGA_BUFFER ecdsa_zero; -extern FPGA_BUFFER ecdsa_one; -extern FPGA_BUFFER ecdsa_delta; - - -//------------------------------------------------------------------------------ -// Prototypes -//------------------------------------------------------------------------------ -void fpga_modular_init (); - -void fpga_modular_add (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *s); -void fpga_modular_sub (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *d); -void fpga_modular_mul (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *p); -void fpga_modular_inv (FPGA_BUFFER *a, FPGA_BUFFER *a1); - -void fpga_modular_mul_helper_multiply (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si); -void fpga_modular_mul_helper_accumulate (FPGA_WORD_EXTENDED *si, FPGA_WORD *c); -void fpga_modular_mul_helper_reduce_p256 (FPGA_WORD *c, FPGA_BUFFER *p); -void fpga_modular_mul_helper_reduce_p384 (FPGA_WORD *c, FPGA_BUFFER *p); - -void fpga_modular_inv_helper_shl (FPGA_WORD *x, FPGA_WORD *y); -void fpga_modular_inv_helper_shr (FPGA_WORD *x, FPGA_WORD *y); -void fpga_modular_inv_helper_add (FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *s); -void fpga_modular_inv_helper_sub (FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *d); -void fpga_modular_inv_helper_cpy (FPGA_WORD *dst, FPGA_WORD *src); -void fpga_modular_inv_helper_cmp (FPGA_WORD *a, FPGA_WORD *b, int *c); - - -//------------------------------------------------------------------------------ -// Reduction Routine Selection -//------------------------------------------------------------------------------ - -#if USE_CURVE == 1 -#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p256 -#elif USE_CURVE == 2 -#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p384 -#else -#error USE_CURVE must be either 1 or 2! -#endif - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_util.cpp b/fpga_util.cpp deleted file mode 100644 index 6f7a834..0000000 --- a/fpga_util.cpp +++ /dev/null @@ -1,86 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_util.cpp -// --------------------- -// Utility FPGA routines -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Headers -//------------------------------------------------------------------------------ -#include -#include "ecdsa_model.h" -#include "fpga_lowlevel.h" -#include "fpga_util.h" - - -//------------------------------------------------------------------------------ -bool fpga_buffer_is_zero(FPGA_BUFFER *x) -//------------------------------------------------------------------------------ -// -// Returns true when large multi-word integer x is zero. -// -//------------------------------------------------------------------------------ -{ - int w; // word counter - - // try to find non-zero words - for (w=0; wwords[w]) return false; - - // no non-zero words found - return true; -} - - -//------------------------------------------------------------------------------ -void fpga_buffer_copy(FPGA_BUFFER *src, FPGA_BUFFER *dst) -//------------------------------------------------------------------------------ -// -// Copies large multi-word integer from src into dst. -// -//------------------------------------------------------------------------------ -{ - int w; // word counter - - // copy all the words from src into dst - for (w=0; wwords[w] = src->words[w]; -} - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/fpga_util.h b/fpga_util.h deleted file mode 100644 index 24b2aa2..0000000 --- a/fpga_util.h +++ /dev/null @@ -1,49 +0,0 @@ -//------------------------------------------------------------------------------ -// -// fpga_util.h -// --------------------- -// Utility FPGA routines -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - - -//------------------------------------------------------------------------------ -// Prototypes -//------------------------------------------------------------------------------ -bool fpga_buffer_is_zero (FPGA_BUFFER *x); -void fpga_buffer_copy (FPGA_BUFFER *src, FPGA_BUFFER *dst); - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/test_vectors/charlie_p256.key b/test_vectors/charlie_p256.key new file mode 100644 index 0000000..34b5c20 --- /dev/null +++ b/test_vectors/charlie_p256.key @@ -0,0 +1,8 @@ +-----BEGIN EC PARAMETERS----- +BggqhkjOPQMBBw== +-----END EC PARAMETERS----- +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIFA+WK/POvMze/U0CURQErgftFSMTSsTAqYX+bHQDX3goAoGCCqGSM49 +AwEHoUQDQgAE2TSbSLDujDYTmx147cGRGyUId/t61Erhi7L4pvTcyuXgoFbotQkq +bHKPS8iQQ/vifCYnRN+9rxeD/C4BsGB3Gw== +-----END EC PRIVATE KEY----- diff --git a/test_vectors/charlie_p384.key b/test_vectors/charlie_p384.key new file mode 100644 index 0000000..8894220 --- /dev/null +++ b/test_vectors/charlie_p384.key @@ -0,0 +1,9 @@ +-----BEGIN EC PARAMETERS----- +BgUrgQQAIg== +-----END EC PARAMETERS----- +-----BEGIN EC PRIVATE KEY----- +MIGkAgEBBDC7SgnY5SfwYmZetNCzmh3OlNqixZNbMWOwkPB5PuNqWmKnBV1dhQ0b +FdxlbWXYs6KgBwYFK4EEACKhZANiAAS4Aj5grkLqFGMw8sOIMJbKlhsR9d/qSh1l +6Y5kszUn+1cibbSKUUMlHvBr3veOtXrTxmRpYlqqraNH4QM8FHS2NDqTaP8pRQG7 +1TscxJ/ZctpDnJ2oJ+IwJyDit43RT54= +-----END EC PRIVATE KEY----- diff --git a/test_vectors/ecdsa_test_vector_nsa.h b/test_vectors/ecdsa_test_vector_nsa.h new file mode 100644 index 0000000..762f284 --- /dev/null +++ b/test_vectors/ecdsa_test_vector_nsa.h @@ -0,0 +1,56 @@ +/* Values from "Suite B Implementer's Guide to FIPS 186-3 (ECDSA)" */ + +#define ECDSA_P256_D_NSA_INIT \ + {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, \ + 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a} + +#define ECDSA_P256_QX_NSA_INIT \ + {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, \ + 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8} + +#define ECDSA_P256_QY_NSA_INIT \ + {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, \ + 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9} + +#define ECDSA_P256_K_NSA_INIT \ + {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, \ + 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0} + +#define ECDSA_P256_RX_NSA_INIT \ + {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, \ + 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c} + +#define ECDSA_P256_RY_NSA_INIT \ + {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, \ + 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541} + + +#define ECDSA_P384_D_NSA_INIT \ + {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, \ + 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, \ + 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24} + +#define ECDSA_P384_QX_NSA_INIT \ + {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, \ + 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, \ + 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0} + +#define ECDSA_P384_QY_NSA_INIT \ + {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, \ + 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, \ + 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9} + +#define ECDSA_P384_K_NSA_INIT \ + {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, \ + 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, \ + 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359} + +#define ECDSA_P384_RX_NSA_INIT \ + {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, \ + 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, \ + 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856} + +#define ECDSA_P384_RY_NSA_INIT \ + {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, \ + 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, \ + 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b} diff --git a/test_vectors/ecdsa_test_vector_randomized.h b/test_vectors/ecdsa_test_vector_randomized.h new file mode 100644 index 0000000..1f334c2 --- /dev/null +++ b/test_vectors/ecdsa_test_vector_randomized.h @@ -0,0 +1,29 @@ +/* Generated automatically, do not edit. */ + +#define ECDSA_P256_D_RANDOM_INIT \ + {0x503e58af, 0xcf3af333, 0x7bf53409, 0x445012b8, \ + 0x1fb4548c, 0x4d2b1302, 0xa617f9b1, 0xd00d7de0} + +#define ECDSA_P256_QX_RANDOM_INIT \ + {0xd9349b48, 0xb0ee8c36, 0x139b1d78, 0xedc1911b, \ + 0x250877fb, 0x7ad44ae1, 0x8bb2f8a6, 0xf4dccae5} + +#define ECDSA_P256_QY_RANDOM_INIT \ + {0xe0a056e8, 0xb5092a6c, 0x728f4bc8, 0x9043fbe2, \ + 0x7c262744, 0xdfbdaf17, 0x83fc2e01, 0xb060771b} + +#define ECDSA_P384_D_RANDOM_INIT \ + {0xbb4a09d8, 0xe527f062, 0x665eb4d0, 0xb39a1dce, \ + 0x94daa2c5, 0x935b3163, 0xb090f079, 0x3ee36a5a, \ + 0x62a7055d, 0x5d850d1b, 0x15dc656d, 0x65d8b3a2} + +#define ECDSA_P384_QX_RANDOM_INIT \ + {0xb8023e60, 0xae42ea14, 0x6330f2c3, 0x883096ca, \ + 0x961b11f5, 0xdfea4a1d, 0x65e98e64, 0xb33527fb, \ + 0x57226db4, 0x8a514325, 0x1ef06bde, 0xf78eb57a} + +#define ECDSA_P384_QY_RANDOM_INIT \ + {0xd3c66469, 0x625aaaad, 0xa347e103, 0x3c1474b6, \ + 0x343a9368, 0xff294501, 0xbbd53b1c, 0xc49fd972, \ + 0xda439c9d, 0xa827e230, 0x2720e2b7, 0x8dd14f9e} + diff --git a/test_vectors/ecdsa_test_vector_randomized.vh b/test_vectors/ecdsa_test_vector_randomized.vh new file mode 100644 index 0000000..6c5cf80 --- /dev/null +++ b/test_vectors/ecdsa_test_vector_randomized.vh @@ -0,0 +1,29 @@ +/* Generated automatically, do not edit. */ + +localparam [255:0] ECDSA_P256_D_RANDOM = + {32'h503e58af, 32'hcf3af333, 32'h7bf53409, 32'h445012b8, + 32'h1fb4548c, 32'h4d2b1302, 32'ha617f9b1, 32'hd00d7de0}; + +localparam [255:0] ECDSA_P256_QX_RANDOM = + {32'hd9349b48, 32'hb0ee8c36, 32'h139b1d78, 32'hedc1911b, + 32'h250877fb, 32'h7ad44ae1, 32'h8bb2f8a6, 32'hf4dccae5}; + +localparam [255:0] ECDSA_P256_QY_RANDOM = + {32'he0a056e8, 32'hb5092a6c, 32'h728f4bc8, 32'h9043fbe2, + 32'h7c262744, 32'hdfbdaf17, 32'h83fc2e01, 32'hb060771b}; + +localparam [383:0] ECDSA_P384_D_RANDOM = + {32'hbb4a09d8, 32'he527f062, 32'h665eb4d0, 32'hb39a1dce, + 32'h94daa2c5, 32'h935b3163, 32'hb090f079, 32'h3ee36a5a, + 32'h62a7055d, 32'h5d850d1b, 32'h15dc656d, 32'h65d8b3a2}; + +localparam [383:0] ECDSA_P384_QX_RANDOM = + {32'hb8023e60, 32'hae42ea14, 32'h6330f2c3, 32'h883096ca, + 32'h961b11f5, 32'hdfea4a1d, 32'h65e98e64, 32'hb33527fb, + 32'h57226db4, 32'h8a514325, 32'h1ef06bde, 32'hf78eb57a}; + +localparam [383:0] ECDSA_P384_QY_RANDOM = + {32'hd3c66469, 32'h625aaaad, 32'ha347e103, 32'h3c1474b6, + 32'h343a9368, 32'hff294501, 32'hbbd53b1c, 32'hc49fd972, + 32'hda439c9d, 32'ha827e230, 32'h2720e2b7, 32'h8dd14f9e}; + diff --git a/test_vectors/format_random_test_vector.py b/test_vectors/format_random_test_vector.py new file mode 100644 index 0000000..861bab5 --- /dev/null +++ b/test_vectors/format_random_test_vector.py @@ -0,0 +1,327 @@ +# +# format_random_test_vector.py +# ------------------------------------------ +# Formats test vector for ecdsa_fpga_model +# +# Author: Pavel Shatov +# Copyright (c) 2017-2018, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +# +USAGE = "USAGE: format_random_test_vector.py [openssl_binary]" +# + + +# +# This script reads the test vector generated by the script +# regenerate_random_test_vector.py and writes nicely formatted C header file +# and Verilog include file. +# + + +# +# imports +# +import sys +import subprocess + + +# +# list of curve names of interest +# +CURVE_P256 = "p256" +CURVE_P384 = "p384" + + +# +# variables +# +OPENSSL = "" + + +# +# format one test vector +# +def format_c_header(f, curve, da, qax, qay): + + if curve == CURVE_P256: curve_str = "ECDSA_P256" + if curve == CURVE_P384: curve_str = "ECDSA_P384" + + # write all numbers in vector + format_c_array(f, da, "#define " + curve_str + "_D_RANDOM_INIT" + " \\\n") + format_c_array(f, qax, "#define " + curve_str + "_QX_RANDOM_INIT" + " \\\n") + format_c_array(f, qay, "#define " + curve_str + "_QY_RANDOM_INIT" + " \\\n") + + +# +# format one test vector +# +def format_verilog_include(f, curve, da, qax, qay): + + if curve == CURVE_P256: curve_str, msb_index = "ECDSA_P256", "255" + if curve == CURVE_P384: curve_str, msb_index = "ECDSA_P384", "383" + + # write all numbers in vector + format_verilog_concatenation(f, da, "localparam [" + msb_index + ":0] " + curve_str + "_D_RANDOM " + " =\n") + format_verilog_concatenation(f, qax, "localparam [" + msb_index + ":0] " + curve_str + "_QX_RANDOM" + " =\n") + format_verilog_concatenation(f, qay, "localparam [" + msb_index + ":0] " + curve_str + "_QY_RANDOM" + " =\n") + + +# +# nicely format multi-word integer into C array initializer +# +def format_c_array(f, n, s): + + # print '#define XYZ \' + f.write(s) + + # convert number to hex string and prepend it with zeroes if necessary + n_hex = hex(n).lstrip("0x").rstrip("L") + while (len(n_hex) % 8) > 0: + n_hex = "0" + n_hex + + # get number of 32-bit words + num_words = len(n_hex) // 8 + + # print all words in n + w = 0 + while w < num_words: + + n_part = "" + + # add tab for every new line + if w == 0: + n_part += "\t{" + elif (w % 4) == 0: + n_part += "\t " + + # add current word + n_part += "0x" + n_hex[8 * w : 8 * (w + 1)] + + # add separator or newline + if (w + 1) == num_words: + n_part += "}\n" + else: + n_part += ", " + if (w % 4) == 3: + n_part += "\\\n" + + w += 1 + + # write current part + f.write(n_part) + + # write final newline + f.write("\n") + + +def format_verilog_concatenation(f, n, s): + + # print 'localparam ZZZ =' + f.write(s) + + # convert number to hex string and prepend it with zeroes if necessary + n_hex = hex(n).split("0x")[1] + while (len(n_hex) % 8) > 0: + n_hex = "0" + n_hex + + # get number of 32-bit words + num_words = len(n_hex) // 8 + + # print all words in n + w = 0 + while w < num_words: + + n_part = "" + + if w == 0: + n_part += "\t{" + elif (w % 4) == 0: + n_part += "\t " + + n_part += "32'h" + n_hex[8 * w : 8 * (w + 1)] + + if (w + 1) == num_words: + n_part += "};\n" + else: + n_part += ", " + if (w % 4) == 3: + n_part += "\n" + w += 1 + + f.write(n_part) + + f.write("\n") + + + # + # returns d, qx, qy, where + # d is private key and qx, qy is the corresponding public key + # +def get_key(openssl, party, curve): + + # generate private key filename + key_file = party + "_" + curve + ".key" + + # retrieve key components using openssl + openssl_command = [openssl, "ec", "-in", key_file, "-noout", "-text"] + openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8") + stdout_lines = openssl_stdout.splitlines() + + found_priv = False + found_pub = False + + key_priv = "" + key_pub = "" + + # process lines looking for "priv:" and "pub:" markers + for line in stdout_lines: + + # found private key marker? + if line.strip() == "priv:": + found_priv = True + found_pub = False + continue + + # found public key marker? + if line.strip() == "pub:": # openssl 1.0.2g prints 'pub: ' (extra space before newline), + found_pub = True # so we need to compare against line.strip(), not just line + found_priv = False + continue + + # found part of private key? + if found_priv: + if not line.startswith(" "): + found_priv = False + continue + else: + key_priv += line.strip() + + # found part of public key? + if found_pub: + if not line.startswith(" "): + found_pub = False + continue + else: + key_pub += line.strip() + + # do some cleanup and sanity checking on private key + # * remove extra leading zero byte if present + # * remove colons + # * check length (256 bits or 384 bits) + while key_priv.startswith("00"): + key_priv = key_priv[2:] + + key_priv = key_priv.replace(":", "") + + if curve == CURVE_P256 and len(key_priv) != 256 / 4: sys.exit() + if curve == CURVE_P384 and len(key_priv) != 384 / 4: sys.exit() + + # do some cleanup and sanity checking on public key + # * make sure, that uncompressed form marker (0x04) is present and + # then remove it + # * remove colons + # * check length (2x256 or 2x384 bits) + if not key_pub.startswith("04"): sys.exit() + + key_pub = key_pub[2:] + key_pub = key_pub.replace(":", "") + + if curve == CURVE_P256 and len(key_pub) != 2 * 256 / 4: sys.exit() + if curve == CURVE_P384 and len(key_pub) != 2 * 384 / 4: sys.exit() + + # split public key into parts + if curve == CURVE_P256: + key_pub_x = key_pub[ 0: 64] + key_pub_y = key_pub[ 64:128] + + if curve == CURVE_P384: + key_pub_x = key_pub[ 0: 96] + key_pub_y = key_pub[ 96:192] + + # convert from strings to integers + key_priv = int(key_priv, 16) + key_pub_x = int(key_pub_x, 16) + key_pub_y = int(key_pub_y, 16) + + # done + return key_priv, key_pub_x, key_pub_y + + +if __name__ == "__main__": + + # detect whether user requested some specific binary + if len(sys.argv) == 1: + OPENSSL = "openssl" + print("Using system OpenSSL library.") + elif len(sys.argv) == 2: + OPENSSL = sys.argv[1] + print("Using OpenSSL binary '" + OPENSSL + "'...") + else: + print(USAGE) + + if len(OPENSSL) > 0: + + # list of curves to process + curves = [CURVE_P256, CURVE_P384] + + # open output files + file_h = open('ecdsa_test_vector_randomized.h', 'w') + file_v = open('ecdsa_test_vector_randomized.vh', 'w') + + # write headers + file_h.write("/* Generated automatically, do not edit. */\n\n") + file_v.write("/* Generated automatically, do not edit. */\n\n") + + # process all the keys + for next_curve in curves: + + # load keys + da, qax, qay = get_key(OPENSSL, "charlie", next_curve) + + # format numbers and write to files + + format_c_header(file_h, next_curve, + da, qax, qay); + + format_verilog_include(file_v, next_curve, + da, qax, qay); + + # done + file_h.close() + file_v.close() + + # everything went just fine + print("Test vector formatted.") + +# +# End of file +# diff --git a/test_vectors/regenerate_random_test_vector.py b/test_vectors/regenerate_random_test_vector.py new file mode 100644 index 0000000..75cb761 --- /dev/null +++ b/test_vectors/regenerate_random_test_vector.py @@ -0,0 +1,91 @@ +# +# regenerate_test_vector.py +# ----------------------------------------------------------- +# Generates a new randomized test vector for ecdsa_fpga_model +# +# Author: Pavel Shatov +# Copyright (c) 2017-2018, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +USAGE = "USAGE: regenerate_test_vector.py [openssl_binary]" +# + +# This script generates a test vector. The test vector contains two +# private keys. One is for P-256, the other one is for P-384. +# + +# +# imports +# +import sys +import subprocess + +OPENSSL = "" + +CURVE_P256 = "prime256v1" +CURVE_P384 = "secp384r1" + +SECRET_CHARLIE_256 = "charlie_p256.key" +SECRET_CHARLIE_384 = "charlie_p384.key" + +def openssl_ecparam_genkey(openssl, curve, file): + subprocess.call([openssl, "ecparam", "-genkey", "-name", curve, "-out", file]) + +# +# __main__ +# +if __name__ == "__main__": + + # detect whether user requested some specific binary + if len(sys.argv) == 1: + OPENSSL = "openssl" + print("Using system OpenSSL library.") + elif len(sys.argv) == 2: + OPENSSL = sys.argv[1] + print("Using OpenSSL binary '" + OPENSSL + "'...") + else: + print(USAGE) + + if len(OPENSSL) > 0: + + # generate a new private key for P-256 curve + openssl_ecparam_genkey(OPENSSL, CURVE_P256, SECRET_CHARLIE_256) + + # generate a new private key for P-384 curve + openssl_ecparam_genkey(OPENSSL, CURVE_P384, SECRET_CHARLIE_384) + + # done + print("New randomized test vector generated.") + + +# +# End of file +# -- cgit v1.2.3