diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2018-12-19 16:03:08 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2018-12-19 16:03:08 +0300 |
commit | 1f8d13bf8d2e813f0c5da653c4abffb7a817db9a (patch) | |
tree | 7b6290a838f460a9d104f28a32de08be8bcf8605 | |
parent | cae8718217846cfaefcbfecd55f9a117731a8d99 (diff) |
* New hardware architecture
* Randomized test vector
27 files changed, 4808 insertions, 2297 deletions
diff --git a/ecdsa_fpga_curve.h b/ecdsa_fpga_curve.h new file mode 100644 index 0000000..00448eb --- /dev/null +++ b/ecdsa_fpga_curve.h @@ -0,0 +1,203 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve.h +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-256) +//------------------------------------------------------------------------------ + +/* Base Point */ +#define ECDSA_P256_GX_INIT \ + {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, \ + 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296} + +#define ECDSA_P256_GY_INIT \ + {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, \ + 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5} + +/* Doubled Base Point */ +#define ECDSA_P256_HX_INIT \ + {0x7cf27b18, 0x8d034f7e, 0x8a523803, 0x04b51ac3, \ + 0xc08969e2, 0x77f21b35, 0xa60b48fc, 0x47669978} + +#define ECDSA_P256_HY_INIT \ + {0x07775510, 0xdb8ed040, 0x293d9ac6, 0x9f7430db, \ + 0xba7dade6, 0x3ce98229, 0x9e04b79d, 0x227873d1} + +/* Order of the Base Point */ +#define ECDSA_P256_N_INIT \ + {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, \ + 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551} + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-384) +//------------------------------------------------------------------------------ + +/* Base Point */ +#define ECDSA_P384_GX_INIT \ + {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, \ + 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, \ + 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7} + +#define ECDSA_P384_GY_INIT \ + {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, \ + 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, \ + 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f} + +/* Doubled Base Point */ +#define ECDSA_P384_HX_INIT \ + {0x08d99905, 0x7ba3d2d9, 0x69260045, 0xc55b97f0, \ + 0x89025959, 0xa6f434d6, 0x51d207d1, 0x9fb96e9e, \ + 0x4fe0e86e, 0xbe0e64f8, 0x5b96a9c7, 0x5295df61} + +#define ECDSA_P384_HY_INIT \ + {0x8e80f1fa, 0x5b1b3ced, 0xb7bfe8df, 0xfd6dba74, \ + 0xb275d875, 0xbc6cc43e, 0x904e505f, 0x256ab425, \ + 0x5ffd43e9, 0x4d39e22d, 0x61501e70, 0x0a940e80} + +/* Order of the Base Point */ +#define ECDSA_P384_N_INIT \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, \ + 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973} + +//------------------------------------------------------------------------------ +// ECDSA Parameters Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define ECDSA_GX_INIT ECDSA_P256_GX_INIT +#define ECDSA_GY_INIT ECDSA_P256_GY_INIT +#define ECDSA_HX_INIT ECDSA_P256_HX_INIT +#define ECDSA_HY_INIT ECDSA_P256_HY_INIT + +#define ECDSA_N_INIT ECDSA_P256_N_INIT + +#elif USE_CURVE == 2 + +#define ECDSA_GX_INIT ECDSA_P384_GX_INIT +#define ECDSA_GY_INIT ECDSA_P384_GY_INIT +#define ECDSA_HX_INIT ECDSA_P384_HX_INIT +#define ECDSA_HY_INIT ECDSA_P384_HY_INIT + +#define ECDSA_N_INIT ECDSA_P384_N_INIT + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER ECDSA_GX, ECDSA_GY; +extern FPGA_BUFFER ECDSA_HX, ECDSA_HY; +extern FPGA_BUFFER ECDSA_N; + + +//------------------------------------------------------------------------------ +// Switch +//------------------------------------------------------------------------------ +#ifdef USE_MICROCODE + +#define fpga_curve_base_scalar_multiply fpga_curve_base_scalar_multiply_microcode +#define fpga_curve_add_jacobian fpga_curve_add_jacobian_microcode_wrapper +#define fpga_curve_double_jacobian fpga_curve_double_jacobian_microcode_wrapper + +#else + +#define fpga_curve_base_scalar_multiply fpga_curve_base_scalar_multiply_abstract +#define fpga_curve_add_jacobian fpga_curve_add_jacobian_abstract +#define fpga_curve_double_jacobian fpga_curve_double_jacobian_abstract + +#endif + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_curve_init (); + +void fpga_curve_base_scalar_multiply_abstract (const FPGA_BUFFER *k, + FPGA_BUFFER *qx, + FPGA_BUFFER *qy); + +void fpga_curve_base_scalar_multiply_microcode (const FPGA_BUFFER *k, + FPGA_BUFFER *qx, + FPGA_BUFFER *qy); + +void fpga_curve_add_jacobian_abstract (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + +void fpga_curve_double_jacobian_abstract (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + +void fpga_curve_add_jacobian_microcode (); + +void fpga_curve_double_jacobian_microcode (); + +void fpga_curve_add_jacobian_microcode_wrapper (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + + +void fpga_curve_double_jacobian_microcode_wrapper (const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_curve_abstract.cpp b/ecdsa_fpga_curve_abstract.cpp new file mode 100644 index 0000000..5510ac1 --- /dev/null +++ b/ecdsa_fpga_curve_abstract.cpp @@ -0,0 +1,313 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve_abstract.cpp +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER ECDSA_GX, ECDSA_GY; +FPGA_BUFFER ECDSA_HX, ECDSA_HY; +FPGA_BUFFER ECDSA_N; + + +//------------------------------------------------------------------------------ +void fpga_curve_init() +//------------------------------------------------------------------------------ +{ + int w; // word counter + + FPGA_BUFFER tmp_gx = ECDSA_GX_INIT, tmp_gy = ECDSA_GY_INIT; + FPGA_BUFFER tmp_hx = ECDSA_HX_INIT, tmp_hy = ECDSA_HY_INIT; + FPGA_BUFFER tmp_n = ECDSA_N_INIT; + + /* fill buffers for large multi-word integers */ + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { ECDSA_GX.words[w] = tmp_gx.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ECDSA_GY.words[w] = tmp_gy.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ECDSA_HX.words[w] = tmp_hx.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ECDSA_HY.words[w] = tmp_hy.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ECDSA_N.words[w] = tmp_n.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + } +} + + +//------------------------------------------------------------------------------ +// +// Elliptic curve base point scalar multiplication routine. +// +// Q(qx,qy) = k * G(px,py) +// +// Note, that Q is supposed to be in affine coordinates. Multiplication is done +// using the double-and-add algorithm 3.27 from "Guide to Elliptic Curve +// Cryptography". +// +// WARNING: Though this procedure always does the addition step, it only +// updates the result when current bit of k is set. It does not take any +// active measures to keep run-time constant. The main purpose of this model +// is to help debug Verilog code for FPGA, so *DO NOT* use it anywhere near +// production! +// +//------------------------------------------------------------------------------ +void fpga_curve_base_scalar_multiply_abstract(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy) +//------------------------------------------------------------------------------ +{ + int word_count, bit_count; // counters + + FPGA_BUFFER rx, ry, rz; // intermediate result + FPGA_BUFFER tx, ty, tz; // temporary variable + + /* set initial value of R to point at infinity */ + fpga_multiword_copy(&ECDSA_ONE, &rx); + fpga_multiword_copy(&ECDSA_ONE, &ry); + fpga_multiword_copy(&ECDSA_ZERO, &rz); + + /* process bits of k left-to-right */ + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + /* calculate T = 2 * R */ + fpga_curve_double_jacobian_abstract(&rx, &ry, &rz, &tx, &ty, &tz); + + /* always calculate R = T + P for constant-time */ + fpga_curve_add_jacobian_abstract(&tx, &ty, &tz, &rx, &ry, &rz); + + /* revert to the value of T before addition if the current bit of k is not set */ + if (!((k->words[word_count-1] >> (bit_count-1)) & 1)) + { fpga_multiword_copy(&tx, &rx); + fpga_multiword_copy(&ty, &ry); + fpga_multiword_copy(&tz, &rz); + } + + } + + FPGA_BUFFER a2, a3; + + fpga_modular_inv23(&rz, &a2, &a3); + + fpga_modular_mul(&rx, &a2, qx); // qx = px * (pz^-1)^2 (mod q) + fpga_modular_mul(&ry, &a3, qy); // qy = py * (pz^-1)^3 (mod q) + + // check, that rz is non-zero (not point at infinity) + bool rz_is_zero = fpga_multiword_is_zero(&rz); + + // handle special case (result is point at infinity) + if (rz_is_zero) + { fpga_multiword_copy(&ECDSA_ZERO, qx); + fpga_multiword_copy(&ECDSA_ZERO, qy); + } +} + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point doubling routine. +// +// R(rx,ry,rz) = 2 * P(px,py,pz) +// +// Note, that P(px,py,pz) is supposed to be in projective Jacobian coordinates, +// R will be in projective Jacobian coordinates. +// +// This routine implements algorithm 3.21 from "Guide to Elliptic Curve +// Cryptography", the only difference is that step 6. does T1 = T2 + T2 and +// then T2 = T2 + T1 instead of T2 = 3 * T2, because our addition is much +// faster, than multiplication. +// +// Note, that this routine also handles one special case, namely when P is at +// infinity. +// +// Instead of actual modular division, multiplication by pre-computed constant +// (2^-1 mod q) is done. +// +// Note, that FPGA modular multiplier can't multiply a given buffer by itself, +// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome +// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and +// then fpga_modular_mul(pz, &t1, &t1) instead. +// +// WARNING: Though this procedure always does doubling steps, it does not take +// any active measures to keep run-time constant. The main purpose of this +// model is to help debug Verilog code for FPGA, so *DO NOT* use is anywhere +// near production! +// +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER t1, t2, t3; // temporary variables + + // check, whether P is at infinity + bool pz_is_zero = fpga_multiword_is_zero(pz); + + /* 2. */ fpga_multiword_copy(pz, &t1); + fpga_modular_mul(pz, &t1, &t1); + /* 3. */ fpga_modular_sub(px, &t1, &t2); + /* 4. */ fpga_modular_add(px, &t1, &t1); + /* 5. */ fpga_modular_mul(&t1, &t2, &t2); + /* 6. */ fpga_modular_add(&t2, &t2, &t1); + /* */ fpga_modular_add(&t1, &t2, &t2); + /* 7. */ fpga_modular_add(py, py, ry); + /* 8. */ fpga_modular_mul(pz, ry, rz); + /* 9. */ fpga_multiword_copy(ry, &t1); + fpga_multiword_copy(ry, &t3); + fpga_modular_mul(&t1, &t3, ry); + /* 10. */ fpga_modular_mul(px, ry, &t3); + /* 11. */ fpga_multiword_copy(ry, &t1); + fpga_modular_mul(ry, &t1, &t1); + /* 12. */ fpga_modular_mul(&t1, &ECDSA_DELTA, ry); + /* 13. */ fpga_multiword_copy(&t2, &t1); + fpga_modular_mul(&t1, &t2, rx); + /* 14. */ fpga_modular_add(&t3, &t3, &t1); + /* 15. */ fpga_modular_sub(rx, &t1, rx); + /* 16. */ fpga_modular_sub(&t3, rx, &t1); + /* 17. */ fpga_modular_mul(&t1, &t2, &t1); + /* 18. */ fpga_modular_sub(&t1, ry, ry); + + // handle special case (input point is at infinity) + if (pz_is_zero) + { fpga_multiword_copy(&ECDSA_ONE, rx); + fpga_multiword_copy(&ECDSA_ONE, ry); + fpga_multiword_copy(&ECDSA_ZERO, rz); + } +} + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point addition routine. +// +// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy) +// +// Note, that P(px, py, pz) is supposed to be in projective Jacobian +// coordinates, while Q(qx,qy) is supposed to be in affine coordinates, +// R(rx, ry, rz) will be in projective Jacobian coordinates. Moreover, in this +// particular implementation Q is always the base point G. +// +// This routine implements algorithm 3.22 from "Guide to Elliptic Curve +// Cryptography". Differences from the original algorithm: +// +// 1) Step 1. is omitted, because point Q is always the base point, which is +// not at infinity by definition. +// +// 2) Step 9.1 just returns the pre-computed double of the base point instead +// of actually doubling it. +// +// Note, that this routine also handles three special cases: +// +// 1) P is at infinity +// 2) P == Q +// 3) P == -Q +// +// Note, that FPGA modular multiplier can't multiply a given buffer by itself, +// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome +// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and +// then fpga_modular_mul(pz, &t1, &t1) instead. +// +// WARNING: This procedure does not take any active measures to keep run-time +// constant. The main purpose of this model is to help debug Verilog code for +// FPGA, so *DO NOT* use is anywhere near production! +// +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_abstract(const FPGA_BUFFER *px, + const FPGA_BUFFER *py, + const FPGA_BUFFER *pz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER t1, t2, t3, t4; // temporary variables + + bool pz_is_zero = fpga_multiword_is_zero(pz); // Step 2. + + /* 3. */ fpga_multiword_copy(pz, &t1); + fpga_modular_mul(pz, &t1, &t1); + /* 4. */ fpga_modular_mul(pz, &t1, &t2); + /* 5. */ fpga_modular_mul(&t1, &ECDSA_GX, &t1); + /* 6. */ fpga_modular_mul(&t2, &ECDSA_GY, &t2); + /* 7. */ fpga_modular_sub(&t1, px, &t1); + /* 8. */ fpga_modular_sub(&t2, py, &t2); + + bool t1_is_zero = fpga_multiword_is_zero(&t1); // | Step 9. + bool t2_is_zero = fpga_multiword_is_zero(&t2); // | + + /* 10. */ fpga_modular_mul(pz, &t1, rz); + /* 11. */ fpga_multiword_copy(&t1, &t3); + fpga_modular_mul(&t1, &t3, &t3); + /* 12. */ fpga_modular_mul(&t1, &t3, &t4); + /* 13. */ fpga_modular_mul(px, &t3, &t3); + /* 14. */ fpga_modular_add(&t3, &t3, &t1); + /* 15. */ fpga_multiword_copy(&t2, rx); + fpga_modular_mul(rx, &t2, rx); + /* 16. */ fpga_modular_sub(rx, &t1, rx); + /* 17. */ fpga_modular_sub(rx, &t4, rx); + /* 18. */ fpga_modular_sub(&t3, rx, &t3); + /* 19. */ fpga_modular_mul(&t2, &t3, &t3); + /* 20. */ fpga_modular_mul(py, &t4, &t4); + /* 21. */ fpga_modular_sub(&t3, &t4, ry); + + // + // final selection + // + if (pz_is_zero) // P at infinity ? + { fpga_multiword_copy(&ECDSA_GX, rx); + fpga_multiword_copy(&ECDSA_GY, ry); + fpga_multiword_copy(&ECDSA_ONE, rz); + } + else if (t1_is_zero) // same x for P and Q ? + { + fpga_multiword_copy(t2_is_zero ? &ECDSA_HX : &ECDSA_ONE, rx); // | same y ? (P==Q => R=2*G) : (P==-Q => R=O) + fpga_multiword_copy(t2_is_zero ? &ECDSA_HY : &ECDSA_ONE, ry); // | + fpga_multiword_copy(t2_is_zero ? &ECDSA_ONE : &ECDSA_ZERO, rz); // | + } +} + + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_curve_microcode.cpp b/ecdsa_fpga_curve_microcode.cpp new file mode 100644 index 0000000..553498c --- /dev/null +++ b/ecdsa_fpga_curve_microcode.cpp @@ -0,0 +1,494 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve_microcode.cpp +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Required for Microcode Routines +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// +// Doubles the point stored in CYCLE_R* and stores the result in CYCLE_S*. +// +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_microcode() +//------------------------------------------------------------------------------ +{ + // fpga_modular_mul(RZ, RZ, RZ2 ); // 2. RZ2 = RZ * RZ + // fpga_modular_sub(RX, RZ2, T1 ); // 3. T1 = RX - RZ2 + // fpga_modular_add(RX, RZ2, T2 ); // 4. T2 = RX + RZ2 + // fpga_modular_mul(T1, T2, T3 ); // 5. T3 = T1 * T2 + // fpga_modular_add(T3, T3, T4 ); // 6a. T4 = T3 + T3 + // fpga_modular_add(T3, T4, A ); // 6b. A = T3 + T4 + // fpga_modular_add(RY, RY, B ); // 7. B = RY + RY + // fpga_modular_mul(B, RZ, SZ ); // 8. SZ = B * RZ [output] + // fpga_modular_mul(B, B, C ); // 9. C = B * B + // fpga_modular_mul(C, RX, D ); // 10. D = C * RX + // fpga_modular_mul(C, C, C2 ); // 11. C2 = C * C + // fpga_modular_mul(C2, DELTA, C2_2); // 12. C2_2 = C / 2 + // fpga_modular_mul(A, A, A2 ); // 13. A2 = A * A + // fpga_modular_add(D, D, T1 ); // 14. T1 = D + D + // fpga_modular_sub(A2, T1, SX ); // 15. SX = A2 - T1 [output] + // fpga_modular_sub(D, SX, T1 ); // 16. T1 = D - SX + // fpga_modular_mul(A , T1, T2 ); // 17. T2 = A * T1 + // fpga_modular_sub(T2, C2_2, SY ); // 18. SY = T2 - C2_2 [output] + + /* BEGIN_MICROCODE: CYCLE_DOUBLE */ + + FPGA_BUFFER TEMP; + + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, CYCLE_Z2); + uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer("CYCLE_Z2 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T1); + uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T2); + uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_T1, CYCLE_T2, BANK_HI, CYCLE_T3); + uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer("CYCLE_T3 = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T3, BANK_LO, CYCLE_T4); + uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer("CYCLE_T4 = ", &TEMP); + + uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4); + + uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T4, BANK_LO, CYCLE_A); + uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer("CYCLE_A = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_RY, CYCLE_RY, BANK_LO, CYCLE_B); + uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer("CYCLE_B = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_RZ, BANK_HI, CYCLE_SZ); + uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer("CYCLE_SZ = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_B, BANK_HI, CYCLE_C); + uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer("CYCLE_C = ", &TEMP); + + uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_RX, BANK_LO, CYCLE_D); + uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer("CYCLE_D = ", &TEMP); + + uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_C, BANK_LO, CYCLE_C2); + uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer("CYCLE_C2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_C2, CONST_DELTA, BANK_HI, CYCLE_C2_2); + uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer("CYCLE_C2_2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_A, BANK_HI, CYCLE_A2); + uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer("CYCLE_A2 = ", &TEMP); + + uop_calc(ADD, BANK_LO, CYCLE_D, CYCLE_D, BANK_HI, CYCLE_T1); + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_A2, CYCLE_T1, BANK_LO, CYCLE_SX); + uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer("CYCLE_SX = ", &TEMP); + + uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SX, BANK_HI, CYCLE_T1); + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1); + + uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_T1, BANK_HI, CYCLE_T2); + uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_C2_2, BANK_LO, CYCLE_SY); + uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer("CYCLE_SY = ", &TEMP); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +// +// Adds the base point G to the point stored in CYCLE_S* and stores the result +// again in CYCLE_R*. +// +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_microcode() +{ + //fpga_modular_mul(SZ, SZ, A) ; // 3. A = SZ * SZ + //fpga_modular_mul(A, SZ, B ); // 4. B = A * SZ + //fpga_modular_mul(A, &ECDSA_GX, C ); // 5. C = A * GX + //fpga_modular_mul(B, &ECDSA_GY, D ); // 6. D = B * GY + //fpga_modular_sub(C, SX, E ); // 7. E = C - SX + //fpga_modular_sub(D, SY, F ); // 8. F = D - SY + //fpga_modular_mul(E, SZ, RZ); // 10. RZ = E * SZ [output] + //fpga_modular_mul(E, E, G ); // 11. G = E * E + //fpga_modular_mul(E, G, H ); // 12. H = E * G + //fpga_modular_mul(G, SX, J ); // 13. J = G * SX + //fpga_modular_add(J, J, T1); // 14. T1 = J + J + //fpga_modular_mul(F, F, T2); // 15. T2 = F * F + //fpga_modular_sub(T2, T1, T3); // 16. T3 = T2 - T1 + //fpga_modular_sub(T3, H, RX); // 17. RX = T3 - H [output] + //fpga_modular_sub(J, RX, T1); // 18. T1 = J - RX + //fpga_modular_mul(F, T1, T2); // 19. T2 = F * T1 + //fpga_modular_mul(H, SY, T3); // 20. T3 = H * SY + //fpga_modular_sub(T2, T3, RY); // 21. RY = T2 - T3 [output] + + /* BEGIN_MICROCODE: CYCLE_ADD */ + + uop_cmpz( BANK_HI, CYCLE_SZ); + uop_move( BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_SZ); + uop_calc(MUL, BANK_LO, CYCLE_SZ, CYCLE_SZ, BANK_HI, CYCLE_A); + uop_calc(MUL, BANK_HI, CYCLE_A, CYCLE_SZ, BANK_LO, CYCLE_B); + uop_move( BANK_LO, CYCLE_B, BANK_HI, CYCLE_B); + uop_calc(MUL, BANK_HI, CYCLE_A, CONST_GX, BANK_LO, CYCLE_C); + uop_calc(MUL, BANK_HI, CYCLE_B, CONST_GY, BANK_LO, CYCLE_D); + uop_calc(SUB, BANK_LO, CYCLE_C, CYCLE_SX, BANK_HI, CYCLE_E); + uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SY, BANK_HI, CYCLE_F); + uop_cmpz( BANK_HI, CYCLE_E); + uop_cmpz( BANK_HI, CYCLE_F); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_SZ, BANK_LO, CYCLE_RZ); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_E, BANK_LO, CYCLE_G); + uop_move( BANK_LO, CYCLE_G, BANK_HI, CYCLE_G); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_G, BANK_LO, CYCLE_H); + uop_calc(MUL, BANK_LO, CYCLE_G, CYCLE_SX, BANK_HI, CYCLE_J); + uop_calc(ADD, BANK_HI, CYCLE_J, CYCLE_J, BANK_LO, CYCLE_T1); + uop_calc(MUL, BANK_HI, CYCLE_F, CYCLE_F, BANK_LO, CYCLE_T2); + uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T1, BANK_HI, CYCLE_T3); + uop_move( BANK_HI, CYCLE_T3, BANK_LO, CYCLE_T3); + uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_H, BANK_HI, CYCLE_RX); + uop_calc(SUB, BANK_HI, CYCLE_J, CYCLE_RX, BANK_LO, CYCLE_T1); + uop_move( BANK_HI, CYCLE_F, BANK_LO, CYCLE_F); + uop_calc(MUL, BANK_LO, CYCLE_F, CYCLE_T1, BANK_HI, CYCLE_T2); + uop_calc(MUL, BANK_LO, CYCLE_H, CYCLE_SY, BANK_HI, CYCLE_T3); + uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_T3, BANK_LO, CYCLE_RY); + uop_move( BANK_LO, CYCLE_RY, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + + // + // handle special corner cases + // + if (uop_flagz_sz) + { + /* BEGIN_MICROCODE: CYCLE_ADD_AT_INFINITY */ + + uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + if (uop_flagz_e) + { + if (uop_flagz_f) + { + /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X_SAME_Y */ + + uop_move(BANK_LO, CONST_HX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_HY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X */ + + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + } + else + { + /* BEGIN_MICROCODE: CYCLE_ADD_REGULAR */ + + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T1); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T2); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_T3); + + /* END_MICROCODE */ + } + } +} + + +#ifdef USE_MICROCODE +//------------------------------------------------------------------------------ +void fpga_curve_base_scalar_multiply_microcode(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy) +//------------------------------------------------------------------------------ +{ + int word_count, bit_count; // counters + FPGA_WORD k_word; + bool k_bit; + + // initialize internal banks + fpga_multiword_copy(&ECDSA_ZERO, &BUF_LO[CONST_ZERO]); + fpga_multiword_copy(&ECDSA_ZERO, &BUF_HI[CONST_ZERO]); + + fpga_multiword_copy(&ECDSA_ONE, &BUF_LO[CONST_ONE]); + fpga_multiword_copy(&ECDSA_ONE, &BUF_HI[CONST_ONE]); + + fpga_multiword_copy(&ECDSA_DELTA, &BUF_LO[CONST_DELTA]); + fpga_multiword_copy(&ECDSA_DELTA, &BUF_HI[CONST_DELTA]); + + fpga_multiword_copy(&ECDSA_GX, &BUF_LO[CONST_GX]); + fpga_multiword_copy(&ECDSA_GX, &BUF_HI[CONST_GX]); + + fpga_multiword_copy(&ECDSA_GY, &BUF_LO[CONST_GY]); + fpga_multiword_copy(&ECDSA_GY, &BUF_HI[CONST_GY]); + + fpga_multiword_copy(&ECDSA_HX, &BUF_LO[CONST_HX]); + fpga_multiword_copy(&ECDSA_HX, &BUF_HI[CONST_HX]); + + fpga_multiword_copy(&ECDSA_HY, &BUF_LO[CONST_HY]); + fpga_multiword_copy(&ECDSA_HY, &BUF_HI[CONST_HY]); + + /* BEGIN_MICROCODE: PREPARE */ + + // set initial value of R to point at infinity + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + + /* process bits of k left-to-right */ + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + k_word = k->words[word_count-1]; + k_bit = (k_word & (FPGA_WORD)(1 << (bit_count-1))) > 0; + + // Banks of working cycle operands + // ------------------------------- + // RX: HI + // RY: HI + // RZ: LO + + // calculate S = 2 * R + fpga_curve_double_jacobian_microcode(); + + // Banks of working cycle operands + // ------------------------------- + // SX: LO + // SY: LO + // SZ: HI + + // always calculate R = S * G for constant-time operation + fpga_curve_add_jacobian_microcode(); + + // Banks of working cycle operands + // ------------------------------- + // RX: HI + // RY: HI + // RZ: LO + + + if (!k_bit) + { + /* BEGIN_MICROCODE: CYCLE_K0 */ + + // revert to the value of S before addition if the current bit of k is not set + uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CYCLE_K1 */ + + // do dummy overwrite for constant-time operation + uop_move(BANK_HI, CYCLE_RX, BANK_LO, CYCLE_SX); + uop_move(BANK_HI, CYCLE_RY, BANK_LO, CYCLE_SY); + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_SZ); + + /* END_MICROCODE */ + } + + FPGA_BUFFER TEMP; + + //printf("wc = %d, bc = %d\n", word_count-1, bit_count-1); + + uop_stor(BANK_LO, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RX = ", &TEMP); + uop_stor(BANK_LO, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RY = ", &TEMP); + uop_stor(BANK_LO, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RZ = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SX = ", &TEMP); + uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SY = ", &TEMP); + uop_stor(BANK_LO, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SZ = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A = ", &TEMP); + uop_stor(BANK_LO, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_B = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2_2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_D = ", &TEMP); + uop_stor(BANK_LO, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_E = ", &TEMP); + uop_stor(BANK_LO, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_F = ", &TEMP); + uop_stor(BANK_LO, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_G = ", &TEMP); + uop_stor(BANK_LO, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_H = ", &TEMP); + uop_stor(BANK_LO, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_J = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_Z2 = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T1 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T3 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T4 = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RX = ", &TEMP); + uop_stor(BANK_HI, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RY = ", &TEMP); + uop_stor(BANK_HI, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RZ = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SX = ", &TEMP); + uop_stor(BANK_HI, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SY = ", &TEMP); + uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SZ = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A = ", &TEMP); + uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_B = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2_2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_D = ", &TEMP); + uop_stor(BANK_HI, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_E = ", &TEMP); + uop_stor(BANK_HI, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_F = ", &TEMP); + uop_stor(BANK_HI, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_G = ", &TEMP); + uop_stor(BANK_HI, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_H = ", &TEMP); + uop_stor(BANK_HI, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_J = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_Z2 = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T1 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T3 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T4 = ", &TEMP); + + } + + // now convert to affine coordinates + fpga_modular_inv23_microcode(); + + /* BEGIN_MICROCODE: CONVERT */ + + uop_calc(MUL, BANK_HI, INVERT_A2, CYCLE_RX, BANK_LO, CYCLE_SX); + uop_calc(MUL, BANK_HI, INVERT_A3, CYCLE_RY, BANK_LO, CYCLE_SY); + uop_cmpz(BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + + if (uop_flagz_rz) + { + /* BEGIN_MICROCODE: CONVERT_AT_INFINITY */ + + uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CONVERT_REGULAR */ + + uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + } + + // return + uop_stor(BANK_HI, CYCLE_RX, qx); + uop_stor(BANK_HI, CYCLE_RY, qy); +} +#endif USE_MICROCODE + + +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_microcode_wrapper(const FPGA_BUFFER *rx, + const FPGA_BUFFER *ry, + const FPGA_BUFFER *rz, + FPGA_BUFFER *sx, + FPGA_BUFFER *sy, + FPGA_BUFFER *sz) +//------------------------------------------------------------------------------ +{ + uop_load(rx, BANK_HI, CYCLE_RX); + uop_load(ry, BANK_HI, CYCLE_RY); + uop_load(rz, BANK_LO, CYCLE_RZ); + + fpga_curve_double_jacobian_microcode(); + + uop_stor(BANK_LO, CYCLE_SX, sx); + uop_stor(BANK_LO, CYCLE_SY, sy); + uop_stor(BANK_HI, CYCLE_SZ, sz); +} + + +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_microcode_wrapper(const FPGA_BUFFER *sx, + const FPGA_BUFFER *sy, + const FPGA_BUFFER *sz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + uop_load(sx, BANK_LO, CYCLE_SX); + uop_load(sy, BANK_LO, CYCLE_SY); + uop_load(sz, BANK_HI, CYCLE_SZ); + + fpga_curve_add_jacobian_microcode(); + + uop_stor(BANK_HI, CYCLE_RX, rx); + uop_stor(BANK_HI, CYCLE_RY, ry); + uop_stor(BANK_LO, CYCLE_RZ, rz); +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/fpga_lowlevel.cpp b/ecdsa_fpga_lowlevel.cpp index 511856a..c7ab322 100644 --- a/fpga_lowlevel.cpp +++ b/ecdsa_fpga_lowlevel.cpp @@ -1,137 +1,135 @@ -//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.cpp
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit adder with carry input and carry output.
-//
-// Carries are 1 bit wide.
-//
-// {c_out, s} = x + y + c_in
-//
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
-//------------------------------------------------------------------------------
-{
- // calculate sum
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
-
- // process carry input
- if (c_in) r++;
-
- // store sum
- *s = (FPGA_WORD)r;
-
- // shift sum to obtain carry flag
- r >>= FPGA_WORD_WIDTH;
-
- // store carry
- *c_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit subtractor with borrow input and borrow output.
-//
-// Borrows are 1 bit wide.
-//
-// {b_out, d} = x - y - b_in
-//
-void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
-//------------------------------------------------------------------------------
-{
- // calculate difference
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
-
- // process borrow input
- if (b_in) r--;
-
- // store difference
- *d = (FPGA_WORD)r;
-
- // shift difference to obtain borrow flag
- r >>= FPGA_WORD_WIDTH;
-
- // store borrow
- *b_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 16x16-bit multiplier.
-//
-// Inputs are 16-bit wide, outputs are 32-bit wide.
-//
-// p = x * y
-//
-void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
-//------------------------------------------------------------------------------
-{
- // multiply and store result
- *p = (FPGA_WORD)x * (FPGA_WORD)y;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 48-bit adder without carries.
-//
-// s = (x + y)[47:0]
-//
-void fpga_lowlevel_add48(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
-//------------------------------------------------------------------------------
-{
- // add and store result truncated to 48 bits
- *s = (x + y) & FPGA_MASK_ADDER48;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------ +// +// fpga_ecdsa_lowlevel.cpp +// ----------------------------------- +// Models of low-level FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// +// Low-level 32-bit adder with carry input and carry output. +// +// Carries are 1 bit wide. +// +// {c_out, s} = x + y + c_in +// +//------------------------------------------------------------------------------ +void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out) +//------------------------------------------------------------------------------ +{ + // calculate sum + FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y; + + // process carry input + if (c_in) r++; + + // store sum + *s = (FPGA_WORD)r; + + // shift sum to obtain carry flag + r >>= FPGA_WORD_WIDTH; + + // store carry + *c_out = (r != 0); +} + + +//------------------------------------------------------------------------------ +// +// Low-level 32-bit subtractor with borrow input and borrow output. +// +// Borrows are 1 bit wide. +// +// {b_out, d} = x - y - b_in +// +void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out) +//------------------------------------------------------------------------------ +{ + // calculate difference + FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y; + + // process borrow input + if (b_in) r--; + + // store difference + *d = (FPGA_WORD)r; + + // shift difference to obtain borrow flag + r >>= FPGA_WORD_WIDTH; + + // store borrow + *b_out = (r != 0); +} + + +//------------------------------------------------------------------------------ +// +// Low-level 16x16-bit multiplier. +// +// Inputs are 16-bit wide, output is 32-bit wide. +// +// p = x * y +// +void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p) +//------------------------------------------------------------------------------ +{ + // multiply and store result + *p = (FPGA_WORD)x * (FPGA_WORD)y; +} + + +//------------------------------------------------------------------------------ +// +// Low-level 47-bit adder without carries. +// +// s = (x + y)[46:0] +// +void fpga_lowlevel_add47(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s) +//------------------------------------------------------------------------------ +{ + // add and store result truncated to 47 bits + *s = (x + y) & FPGA_MASK_ADDER47; +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/fpga_lowlevel.h b/ecdsa_fpga_lowlevel.h index eeb5c4f..8d0dccd 100644 --- a/fpga_lowlevel.h +++ b/ecdsa_fpga_lowlevel.h @@ -1,80 +1,79 @@ -//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.h
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// FPGA Pipeline Settings
-//------------------------------------------------------------------------------
-#define FPGA_WORD_WIDTH (32)
-#define OPERAND_NUM_WORDS (OPERAND_WIDTH / FPGA_WORD_WIDTH)
-
-
-//------------------------------------------------------------------------------
-// Word Types (Normal, Extended, Reduced)
-//------------------------------------------------------------------------------
-typedef uint32_t FPGA_WORD;
-typedef uint64_t FPGA_WORD_EXTENDED;
-typedef uint16_t FPGA_WORD_REDUCED;
-
-
-//------------------------------------------------------------------------------
-// Extended Adder Mask
-//------------------------------------------------------------------------------
-#define FPGA_MASK_ADDER48 ((FPGA_WORD_EXTENDED)0x0000FFFFFFFFFFFF)
-
-
-//------------------------------------------------------------------------------
-struct FPGA_BUFFER
-//------------------------------------------------------------------------------
-{
- FPGA_WORD words[OPERAND_NUM_WORDS];
-};
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32 (FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out);
-void fpga_lowlevel_sub32 (FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out);
-void fpga_lowlevel_mul16 (FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p);
-void fpga_lowlevel_add48 (FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s);
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------ +// +// fpga_ecdsa_lowlevel.h +// ----------------------------------- +// Models of low-level FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include <stdint.h> + + +//------------------------------------------------------------------------------ +// FPGA Pipeline Settings +//------------------------------------------------------------------------------ +#define FPGA_WORD_WIDTH (32) + + +//------------------------------------------------------------------------------ +// Word Types (Normal 32-bit, Extended 47-bit, Reduced 16-bit) +//------------------------------------------------------------------------------ +typedef uint32_t FPGA_WORD; +typedef uint64_t FPGA_WORD_EXTENDED; +typedef uint16_t FPGA_WORD_REDUCED; + + +//------------------------------------------------------------------------------ +// Wide 47-bit Adder Mask +//------------------------------------------------------------------------------ +#define FPGA_MASK_ADDER47 ((FPGA_WORD_EXTENDED)0x00007FFFFFFFFFFF) + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_lowlevel_add32 (FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out); +void fpga_lowlevel_sub32 (FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out); + +void fpga_lowlevel_mul16 (FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p); + +void fpga_lowlevel_add47 (FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp new file mode 100644 index 0000000..f02dc8a --- /dev/null +++ b/ecdsa_fpga_microcode.cpp @@ -0,0 +1,432 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_microcode.cpp +// -------------------------------- +// Microcode Architecture for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Required for Microcode Routines +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Global Buffers +//------------------------------------------------------------------------------ +FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT]; +FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// Global Flags +//------------------------------------------------------------------------------ +bool uop_flagz_sz; +bool uop_flagz_rz; +bool uop_flagz_e; +bool uop_flagz_f; + + +//------------------------------------------------------------------------------ +void uop_move(UOP_BANK src, int s_op, UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr = NULL; + FPGA_BUFFER *d_ptr = NULL; + + if (src == BANK_LO) s_ptr = &BUF_LO[s_op]; + if (src == BANK_HI) s_ptr = &BUF_HI[s_op]; + if (dst == BANK_LO) d_ptr = &BUF_LO[d_op]; + if (dst == BANK_HI) d_ptr = &BUF_HI[d_op]; + + fpga_multiword_copy(s_ptr, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_cmpz(UOP_BANK src, int s_op) +//------------------------------------------------------------------------------ +{ + bool flagz; + + FPGA_BUFFER *s_ptr = NULL; + + if (src == BANK_LO) s_ptr = &BUF_LO[s_op]; + if (src == BANK_HI) s_ptr = &BUF_HI[s_op]; + + flagz = fpga_multiword_is_zero(s_ptr); + + switch (s_op) + { + case CYCLE_SZ: + uop_flagz_sz = flagz; + break; + case CYCLE_RZ: + uop_flagz_rz = flagz; + break; + case CYCLE_E: + uop_flagz_e = flagz; + break; + case CYCLE_F: + uop_flagz_f = flagz; + break; + } +} + + +//------------------------------------------------------------------------------ +void uop_calc(UOP_MATH math, + UOP_BANK src, int s_op1, int s_op2, + UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr1 = NULL; + FPGA_BUFFER *s_ptr2 = NULL; + FPGA_BUFFER *d_ptr = NULL; + FPGA_BUFFER *n_ptr = NULL; + + if (src == BANK_LO) + { s_ptr1 = &BUF_LO[s_op1]; + s_ptr2 = &BUF_LO[s_op2]; + } + if (src == BANK_HI) + { s_ptr1 = &BUF_HI[s_op1]; + s_ptr2 = &BUF_HI[s_op2]; + } + if (dst == BANK_LO) + { d_ptr = &BUF_LO[d_op]; + } + if (dst == BANK_HI) + { d_ptr = &BUF_HI[d_op]; + } + + if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr); + if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr); + if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *d_ptr = NULL; + if (dst == BANK_LO) d_ptr = &BUF_LO[d_op]; + if (dst == BANK_HI) d_ptr = &BUF_HI[d_op]; + + fpga_multiword_copy(mem, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr = NULL; + if (src == BANK_LO) + { s_ptr = &BUF_LO[s_op]; + } + if (src == BANK_HI) + { s_ptr = &BUF_HI[s_op]; + } + + fpga_multiword_copy(s_ptr, mem); +} + + +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p256_microcode() +//------------------------------------------------------------------------------ +// +// This computes A2 = RZ^-2 and A3 = RZ^-3. +// +// RZ is read from the lower bank, A2 and A3 are written to the upper bank. +// +//------------------------------------------------------------------------------ +{ + uop_loop; + + // + // operand placement map: + // + // X1 - LO,HI (RZ) + // X2 - LO,HI + // X3 - LO,HI + // X6 - LO + // X12 - HI + // X15 - LO,HI + // X30 - HI + // X32 - LO,HI + + /* BEGIN_MICROCODE: INVERT_P256 */ + + // first obtain intermediate helper quantities (X#) + + // mirror X1 to HI bank (don't waste time copying to X1, just use RZ) + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ); + + // compute X2 and mirror to the other bank + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2); + uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2); + + // compute X3 and mirror to the other bank + uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3); + uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3); + + // compute X6 (stored in the lower bank) + uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6); + + // compute X12 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1); + uop_cycle(5); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12); + + // compute X15 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15); + uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15); + + // compute X30 (stored in the upper bank) + uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1); + uop_cycle(14); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30); + + // compute X32 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X2, BANK_LO, INVERT_X32); + uop_move(BANK_LO, INVERT_X32, BANK_HI, INVERT_X32); + + // now compute the final results + + uop_calc(MUL, BANK_LO, INVERT_X32, INVERT_X32, BANK_HI, INVERT_R1); + + uop_cycle(31); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1); + + uop_cycle(128); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X32, BANK_LO, INVERT_R2); + + uop_cycle(32); + uop_calc_if_even(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_repeat(); + + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X32, BANK_HI, INVERT_R1); + + uop_cycle(30); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X30, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + + // move A2 into the upper bank + uop_move(BANK_LO, INVERT_R2, BANK_HI, INVERT_A2); + + // A3 ends up in the upper bank by itself + uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p384_microcode() +//------------------------------------------------------------------------------ +// +// This computes A2 = RZ^-2 and A3 = RZ^-3. +// +// RZ is read from the lower bank, A2 and A3 are written to the upper bank. +// +//------------------------------------------------------------------------------ +{ + uop_loop; + + // + // operand placement map: + // + // X1 - LO,HI (RZ) + // X2 - LO,HI + // X3 - LO,HI + // X6 - LO + // X12 - HI + // X15 - LO,HI + // X30 - HI + // X32 - LO,HI + + /* BEGIN_MICROCODE: INVERT_P384 */ + + // first obtain intermediate helper quantities (X#) + + // mirror X1 to HI bank (don't waste time copying to X1, just use RZ) + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ); + + // compute X2 and mirror to the other bank + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2); + uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2); + + // compute X3 and mirror to the other bank + uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3); + uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3); + + // compute X6 (stored in the lower bank) + uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6); + + // compute X12 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1); + uop_cycle(5); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12); + + // compute X15 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15); + uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15); + + // compute X30 (stored in the upper bank) + uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1); + uop_cycle(14); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30); + + // compute X60 (stored in the lower bank) + uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1); + uop_cycle(29); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_X60); + + // compute X120 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X60, INVERT_X60, BANK_HI, INVERT_R1); + uop_cycle(59); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X60, BANK_HI, INVERT_X120); + + // now compute the final results + + uop_calc(MUL, BANK_HI, INVERT_X120, INVERT_X120, BANK_LO, INVERT_R1); + + uop_cycle(119); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X120, BANK_LO, INVERT_R1); + + uop_cycle(15); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X15, BANK_LO, INVERT_R1); + + uop_cycle(31); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X2, BANK_HI, INVERT_R2); + + uop_cycle(94); + uop_calc_if_even(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + + // move A2 into the upper bank + uop_move(BANK_LO, INVERT_R1, BANK_HI, INVERT_A2); + + // A3 ends up in the upper bank by itself + uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_microcode.h b/ecdsa_fpga_microcode.h new file mode 100644 index 0000000..f551d96 --- /dev/null +++ b/ecdsa_fpga_microcode.h @@ -0,0 +1,164 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_microcode.h +// -------------------------------- +// Microcode Architecture for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include <stdlib.h> // NULL + + +//------------------------------------------------------------------------------ +enum UOP_BANK +//------------------------------------------------------------------------------ +{ + BANK_LO, BANK_HI +}; + +//-------------------------- +enum UOP_OPERAND +//-------------------------- +{ + CONST_ZERO, // 0 + CONST_ONE, // 1 + CONST_DELTA, // 2 + + CONST_GX, // 3 + CONST_GY, // 4 + + CONST_HX, // 5 + CONST_HY, // 6 + + CYCLE_RX, // 7 + CYCLE_RY, // 8 + CYCLE_RZ, // 9 + + CYCLE_SX, // 10 + CYCLE_SY, // 11 + CYCLE_SZ, // 12 + + CYCLE_A, // 13 + CYCLE_A2, // 14 + CYCLE_B, // 15 + CYCLE_C, // 16 + CYCLE_C2, // 17 + CYCLE_C2_2, // 18 + CYCLE_D, // 19 + CYCLE_E, // 20 + CYCLE_F, // 21 + CYCLE_G, // 22 + CYCLE_H, // 23 + CYCLE_J, // 24 + + CYCLE_Z2, // 25 + + CYCLE_T1, // 26 + CYCLE_T2, // 27 + CYCLE_T3, // 28 + CYCLE_T4, // 29 + + INVERT_R1, // 30 + INVERT_R2, // 31 + + INVERT_X2, // 32 + INVERT_X3, // 33 + INVERT_X6, // 34 + INVERT_X12, // 35 + INVERT_X15, // 36 + INVERT_X30, // 37 + INVERT_X32, // 38 + INVERT_X60, // 39 + INVERT_X120, // 40 + + INVERT_A2, // 41 + INVERT_A3, // 42 + + ECDSA_UOP_OPERAND_COUNT +}; + +//------------------------------------------------------------------------------ +enum UOP_MATH +//------------------------------------------------------------------------------ +{ + ADD, SUB, MUL +}; + + +//------------------------------------------------------------------------------ +// Global Storage Buffers +//------------------------------------------------------------------------------ +extern FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT]; +extern FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// Global Flags +//------------------------------------------------------------------------------ +extern bool uop_flagz_sz; +extern bool uop_flagz_rz; +extern bool uop_flagz_e; +extern bool uop_flagz_f; + + +//------------------------------------------------------------------------------ +// Loop Macros +//------------------------------------------------------------------------------ +#define uop_loop int uop_cnt +#define uop_cycle(iters); for (uop_cnt=0; uop_cnt<iters; uop_cnt++) { +#define uop_repeat(); } +#define uop_calc_if_even(...) if (!(uop_cnt % 2)) uop_calc(__VA_ARGS__) +#define uop_calc_if_odd(...) else uop_calc(__VA_ARGS__) + + +//------------------------------------------------------------------------------ +// Prototypes (Micro-Operations) +//------------------------------------------------------------------------------ +void uop_move(enum UOP_BANK src, int s_op1, UOP_BANK dst, int d_op1); +void uop_cmpz(UOP_BANK src, int s_op); + +void uop_calc (UOP_MATH math, + UOP_BANK src, int s_op1, int s_op2, + UOP_BANK dst, int d_op); + +void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op); +void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_model.cpp b/ecdsa_fpga_model.cpp new file mode 100644 index 0000000..182c2ab --- /dev/null +++ b/ecdsa_fpga_model.cpp @@ -0,0 +1,539 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_model.cpp +// -------------------------------------------- +// Base point scalar multiplier model for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include <stdio.h> +#include <stdlib.h> + + +//------------------------------------------------------------------------------ +// Microcode Switch +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// More Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_model_init (); + +bool test_base_point_multiplier (const FPGA_BUFFER *k, + const FPGA_BUFFER *qx, + const FPGA_BUFFER *qy); + +bool abuse_internal_point_adder (); +bool abuse_internal_point_doubler (); + + +//------------------------------------------------------------------------------ +// Locals +//------------------------------------------------------------------------------ +static FPGA_BUFFER ecdsa_d_nsa; +static FPGA_BUFFER ecdsa_qx_nsa; +static FPGA_BUFFER ecdsa_qy_nsa; + +static FPGA_BUFFER ecdsa_k_nsa; +static FPGA_BUFFER ecdsa_rx_nsa; +static FPGA_BUFFER ecdsa_ry_nsa; + +static FPGA_BUFFER ecdsa_d_random; +static FPGA_BUFFER ecdsa_qx_random; +static FPGA_BUFFER ecdsa_qy_random; + + +//------------------------------------------------------------------------------ +int main() +//------------------------------------------------------------------------------ +{ + bool ok; // flag + + // initialize buffers + fpga_multiword_init(); + fpga_modular_init(); + fpga_curve_init(); + fpga_model_init(); + + printf("ECDSA_UOP_OPERAND_COUNT == %d\n\n", ECDSA_UOP_OPERAND_COUNT); + + // test base point multiplier: Q = d * G + printf("Trying to derive public key from private key (NSA test vector)...\n\n"); + ok = test_base_point_multiplier(&ecdsa_d_nsa, &ecdsa_qx_nsa, &ecdsa_qy_nsa); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: R = k * G + printf("Trying to sign something (NSA test vector)...\n\n"); + ok = test_base_point_multiplier(&ecdsa_k_nsa, &ecdsa_rx_nsa, &ecdsa_ry_nsa); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: Q = d * G + printf("Trying to derive public key from private key (random test vector)...\n\n"); + ok = test_base_point_multiplier(&ecdsa_d_random, &ecdsa_qx_random, &ecdsa_qy_random); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: O = n * G + printf("Trying to multiply the base point by its order...\n\n"); + ok = test_base_point_multiplier(&ECDSA_N, &ECDSA_ZERO, &ECDSA_ZERO); + if (!ok) return EXIT_FAILURE; + + // now run some intricate tests... + + /* Excerpt from the commit message e718fdfae6443466e566ed6ce1515cdecc215ac0: + + The model does multiplication using the double-and-add algorithm. When adding + two points P and Q on curves P-256 and P-384, four special cases must be + considered. One of them is P = Q, in that situation the explicit addition + formulae don't work and either 2*P or 2*Q must be returned from the addition + routine. In this model Q is always the base point G, so when P = G, then 2*G + must be returned. Since G is fixed, this model stores precomputed point H = 2*G + and returns it when adding G + G for true constant-time operation. + + During multiplication the bits of k are scanned left-to-right, so doubling is + done before addition. This way the only situation when both inputs to the + addition routine are equal to G is when after doubling the result is G. This in + its turn is only possible when k = n + 2 (where n is the order of the base + point G). ECDSA requires integer k to be [1, n-1], one of the side effects + is that the model has a code path that will never be used under normal + operation. This code path can be verified by first multiplying by k = 2 + (special handling for P = G not triggered), then multiplying by k = n + 2 + (special handling for P = G triggered). Both multiplications should produce + the same output. In the former case the output will be calculated on-the-fly, + in the latter case the pre-computed coordinates of H will be used. + */ + + + // test base point multiplier: H = 2 * G + FPGA_BUFFER two; + fpga_modular_add(&ECDSA_ONE, &ECDSA_ONE, &two); + + printf("Trying to double the base point...\n\n"); + ok = test_base_point_multiplier(&two, &ECDSA_HX, &ECDSA_HY); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: G = (n + 1) * G + FPGA_BUFFER n1; + fpga_modular_add(&ECDSA_N, &ECDSA_ONE, &n1); // n1 = n + 1 + + printf("Trying to multiply the base point by its order plus one...\n\n"); + ok = test_base_point_multiplier(&n1, &ECDSA_GX, &ECDSA_GY); + if (!ok) return EXIT_FAILURE; + + // test base point multiplier: H = (n + 2) * G + FPGA_BUFFER n2; + fpga_modular_add(&ECDSA_N, &two, &n2); // n2 = n + two + + printf("Trying to multiply the base point by its order plus two...\n\n"); + ok = test_base_point_multiplier(&n2, &ECDSA_HX, &ECDSA_HY); + if (!ok) return EXIT_FAILURE; + + // ..and some more tests + + // try to abuse internal point doubler + ok = abuse_internal_point_doubler(); + if (!ok) return EXIT_FAILURE; + + // try to abuse internal point adder + ok = abuse_internal_point_adder(); + if (!ok) return EXIT_FAILURE; + + // everything went just fine + return EXIT_SUCCESS; +} + + +//------------------------------------------------------------------------------ +void fpga_model_init() +//------------------------------------------------------------------------------ +{ + int w; // word counter + + FPGA_BUFFER tmp_d_nsa = ECDSA_D_NSA_INIT, tmp_k_nsa = ECDSA_K_NSA_INIT; + FPGA_BUFFER tmp_qx_nsa = ECDSA_QX_NSA_INIT, tmp_rx_nsa = ECDSA_RX_NSA_INIT; + FPGA_BUFFER tmp_qy_nsa = ECDSA_QY_NSA_INIT, tmp_ry_nsa = ECDSA_RY_NSA_INIT; + + FPGA_BUFFER tmp_d_random = ECDSA_D_RANDOM_INIT; + FPGA_BUFFER tmp_qx_random = ECDSA_QX_RANDOM_INIT; + FPGA_BUFFER tmp_qy_random = ECDSA_QY_RANDOM_INIT; + + // fill buffers for large multi-word integers + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { + ecdsa_d_nsa.words[w] = tmp_d_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ecdsa_qx_nsa.words[w] = tmp_qx_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ecdsa_qy_nsa.words[w] = tmp_qy_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + + ecdsa_k_nsa.words[w] = tmp_k_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ecdsa_rx_nsa.words[w] = tmp_rx_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ecdsa_ry_nsa.words[w] = tmp_ry_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + + ecdsa_d_random.words[w] = tmp_d_random.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ecdsa_qx_random.words[w] = tmp_qx_random.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + ecdsa_qy_random.words[w] = tmp_qy_random.words[FPGA_OPERAND_NUM_WORDS - (w+1)]; + } +} + + +//------------------------------------------------------------------------------ +bool test_base_point_multiplier (const FPGA_BUFFER *k, + const FPGA_BUFFER *qx, + const FPGA_BUFFER *qy) +//------------------------------------------------------------------------------ +// +// k - multiplier +// +// qx, qy - expected coordinates of product +// +// Returns true when point (rx,ry) = k * G matches the point (qx,qy). +// +//------------------------------------------------------------------------------ +{ + bool ok; // flag + FPGA_BUFFER rx, ry; // result + + // run the model + fpga_curve_base_scalar_multiply(k, &rx, &ry); + + // handle result + ok = compare_fpga_buffers(qx, qy, &rx, &ry); + + if (!ok) + { printf("\n ERROR\n\n"); + return false; + } + else printf("\n OK\n\n"); + + // everything went just fine + return true; +} + + +//------------------------------------------------------------------------------ +bool compare_fpga_buffers( const FPGA_BUFFER *az, + const FPGA_BUFFER *bz) +//------------------------------------------------------------------------------ +// +// Compare z-coordinates of two points and return true when they match. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // print all the values + print_fpga_buffer(" Expected: Z = ", az); + print_fpga_buffer(" Calculated: Z = ", bz); + + // compare values + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { + // compare x + if (az->words[w] != bz->words[w]) return false; + } + + // values are the same + return true; +} + + +//------------------------------------------------------------------------------ +bool compare_fpga_buffers( const FPGA_BUFFER *ax, + const FPGA_BUFFER *ay, + const FPGA_BUFFER *bx, + const FPGA_BUFFER *by) +//------------------------------------------------------------------------------ +// +// Compare affine coordinates of two points and return true when they match. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // print all the values + print_fpga_buffer(" Expected: X = ", ax); + print_fpga_buffer(" Calculated: X = ", bx); + printf("\n"); + print_fpga_buffer(" Expected: Y = ", ay); + print_fpga_buffer(" Calculated: Y = ", by); + + // compare values + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { + // compare x + if (ax->words[w] != bx->words[w]) return false; + + // compare y + if (ay->words[w] != by->words[w]) return false; + } + + // values are the same + return true; +} + + +//------------------------------------------------------------------------------ +bool compare_fpga_buffers( const FPGA_BUFFER *ax, + const FPGA_BUFFER *ay, + const FPGA_BUFFER *az, + const FPGA_BUFFER *bx, + const FPGA_BUFFER *by, + const FPGA_BUFFER *bz) +//------------------------------------------------------------------------------ +// +// Compare projective coordinates of two points and return true when they match. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // print all the values + print_fpga_buffer(" Expected: X = ", ax); + print_fpga_buffer(" Calculated: X = ", bx); + printf("\n"); + print_fpga_buffer(" Expected: Y = ", ay); + print_fpga_buffer(" Calculated: Y = ", by); + printf("\n"); + print_fpga_buffer(" Expected: Z = ", az); + print_fpga_buffer(" Calculated: Z = ", bz); + + // compare values + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { + // compare x + if (ax->words[w] != bx->words[w]) return false; + + // compare y + if (ay->words[w] != by->words[w]) return false; + + // compare z + if (az->words[w] != bz->words[w]) return false; + } + + // values are the same + return true; +} + + +//------------------------------------------------------------------------------ +void print_fpga_buffer(const char *s, const FPGA_BUFFER *buf) +//------------------------------------------------------------------------------ +// +// Pretty print large multi-word integer. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // print header + printf("%s", s); + + // print all bytes + for (w=FPGA_OPERAND_NUM_WORDS; w>0; w--) + { + printf("%08x", buf->words[w-1]); + + // insert space after every group of 4 bytes + if (w > 1) printf(" "); + } + + // print footer + printf("\n"); +} + + +//------------------------------------------------------------------------------ +void print_fpga_buffer_nodelim(const char *s, const FPGA_BUFFER *buf) +//------------------------------------------------------------------------------ +// +// Pretty print large multi-word integer. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // print header + printf("%s", s); + + // print all bytes + for (w=FPGA_OPERAND_NUM_WORDS; w>0; w--) + printf("%08x", buf->words[w-1]); + + // print footer + printf("\n"); +} + + +//------------------------------------------------------------------------------ +bool abuse_internal_point_doubler() +//------------------------------------------------------------------------------ +// +// This routine tries to abuse the internal curve point doubler by forcing it +// to double point at infinity. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + bool ok; // flag + + FPGA_BUFFER px, py, pz; // input + FPGA_BUFFER qx, qy, qz; // output + + // set P.X and P.Y to some "random" garbage and P.Z to zero + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { px.words[w] = ECDSA_GX.words[w] ^ ECDSA_HX.words[w]; + py.words[w] = ECDSA_GY.words[w] ^ ECDSA_HY.words[w]; + } + fpga_multiword_copy(&ECDSA_ZERO, &pz); + + // try to double point at infinity (should produce point at infinity) + printf("Trying to double something at infinity...\n\n"); + fpga_curve_double_jacobian(&px, &py, &pz, &qx, &qy, &qz); + + // handle result + ok = compare_fpga_buffers(&ECDSA_ZERO, &qz); + if (! ok) + { printf("\n ERROR\n\n"); + return false; + } + else printf("\n OK\n\n"); + + // everything went just fine + return true; +} + + +//------------------------------------------------------------------------------ +bool abuse_internal_point_adder() +//------------------------------------------------------------------------------ +// +// This routine tries to abuse the internal curve point adder by forcing it to +// go throgh all the possible "corner cases". +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + bool ok; // flag + + FPGA_BUFFER px, py, pz; // input + FPGA_BUFFER rx, ry, rz; // output + + // + // try to add point at infinity to the base point + // + { + // set P.X and P.Y to some "random" garbage and P.Z to zero + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { px.words[w] = ECDSA_GX.words[w] ^ ECDSA_HX.words[w]; + py.words[w] = ECDSA_GY.words[w] ^ ECDSA_HY.words[w]; + } + fpga_multiword_copy(&ECDSA_ZERO, &pz); + + // run addition proceduce + printf("Trying to add something at infinity to the base point...\n\n"); + fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz); + + // handle result + ok = compare_fpga_buffers(&ECDSA_GX, &ECDSA_GY, &ECDSA_ONE, &rx, &ry, &rz); + if (! ok) + { printf("\n ERROR\n\n"); + return false; + } + else printf("\n OK\n\n"); + } + + // + // try to add the base point to itself + // + { + // set P to G + fpga_multiword_copy(&ECDSA_GX, &px); + fpga_multiword_copy(&ECDSA_GY, &py); + fpga_multiword_copy(&ECDSA_ONE, &pz); + + // run addition proceduce + printf("Trying to add the base point to itself...\n\n"); + fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz); + + // handle result + ok = compare_fpga_buffers(&ECDSA_HX, &ECDSA_HY, &ECDSA_ONE, &rx, &ry, &rz); + if (! ok) + { printf("\n ERROR\n\n"); + return false; + } + else printf("\n OK\n\n"); + } + + // + // try to add the base point to its opposite + // + { + // set P to (G.X, -G.Y, 1) + fpga_multiword_copy(&ECDSA_ZERO, &px); + fpga_multiword_copy(&ECDSA_ZERO, &py); + fpga_multiword_copy(&ECDSA_ONE, &pz); + + fpga_modular_add(&ECDSA_ZERO, &ECDSA_GX, &px); + fpga_modular_sub(&ECDSA_ZERO, &ECDSA_GY, &py); + + // run addition proceduce + printf("Trying to add the base point to its opposite...\n\n"); + fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz); + + // handle result + ok = compare_fpga_buffers(&ECDSA_ONE, &ECDSA_ONE, &ECDSA_ZERO, &rx, &ry, &rz); + if (! ok) + { printf("\n ERROR\n\n"); + return false; + } + else printf("\n OK\n\n"); + } + + // everything went just fine + return true; +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_model.h b/ecdsa_fpga_model.h new file mode 100644 index 0000000..ea045b9 --- /dev/null +++ b/ecdsa_fpga_model.h @@ -0,0 +1,141 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_model.h +// -------------------------------------------- +// Base point scalar multiplier model for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// +// Curve Selection (don't override if already selected, handy for testing +// from the STM32 sample driver) +// +// USE_CURVE == 1 -> P-256 +// USE_CURVE == 2 -> P-384 +// +//------------------------------------------------------------------------------ +#ifndef USE_CURVE +#define USE_CURVE 2 +#endif +//------------------------------------------------------------------------------ +#define BAD_CURVE #error USE_CURVE must be either 1 or 2! +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_lowlevel.h" +#include "ecdsa_fpga_multiword.h" +#include "ecdsa_fpga_modular.h" +#include "ecdsa_fpga_curve.h" +#ifdef USE_MICROCODE +#include "ecdsa_fpga_microcode.h" +#endif + + +//------------------------------------------------------------------------------ +// Test Vectors +//------------------------------------------------------------------------------ +#include "ecdsa_test_vector_nsa.h" +#include "ecdsa_test_vector_randomized.h" + + +//------------------------------------------------------------------------------ +// Test Vectors Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define ECDSA_D_NSA_INIT ECDSA_P256_D_NSA_INIT +#define ECDSA_QX_NSA_INIT ECDSA_P256_QX_NSA_INIT +#define ECDSA_QY_NSA_INIT ECDSA_P256_QY_NSA_INIT + +#define ECDSA_K_NSA_INIT ECDSA_P256_K_NSA_INIT +#define ECDSA_RX_NSA_INIT ECDSA_P256_RX_NSA_INIT +#define ECDSA_RY_NSA_INIT ECDSA_P256_RY_NSA_INIT + +#define ECDSA_D_RANDOM_INIT ECDSA_P256_D_RANDOM_INIT +#define ECDSA_QX_RANDOM_INIT ECDSA_P256_QX_RANDOM_INIT +#define ECDSA_QY_RANDOM_INIT ECDSA_P256_QY_RANDOM_INIT + +#elif USE_CURVE == 2 + +#define ECDSA_D_NSA_INIT ECDSA_P384_D_NSA_INIT +#define ECDSA_QX_NSA_INIT ECDSA_P384_QX_NSA_INIT +#define ECDSA_QY_NSA_INIT ECDSA_P384_QY_NSA_INIT + +#define ECDSA_K_NSA_INIT ECDSA_P384_K_NSA_INIT +#define ECDSA_RX_NSA_INIT ECDSA_P384_RX_NSA_INIT +#define ECDSA_RY_NSA_INIT ECDSA_P384_RY_NSA_INIT + +#define ECDSA_D_RANDOM_INIT ECDSA_P384_D_RANDOM_INIT +#define ECDSA_QX_RANDOM_INIT ECDSA_P384_QX_RANDOM_INIT +#define ECDSA_QY_RANDOM_INIT ECDSA_P384_QY_RANDOM_INIT + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void print_fpga_buffer (const char *s, + const FPGA_BUFFER *v); + +void print_fpga_buffer_nodelim (const char *s, + const FPGA_BUFFER *v); + +bool compare_fpga_buffers (const FPGA_BUFFER *az, + const FPGA_BUFFER *bz); + +bool compare_fpga_buffers (const FPGA_BUFFER *ax, + const FPGA_BUFFER *ay, + const FPGA_BUFFER *bx, + const FPGA_BUFFER *by); + +bool compare_fpga_buffers (const FPGA_BUFFER *ax, + const FPGA_BUFFER *ay, + const FPGA_BUFFER *az, + const FPGA_BUFFER *bx, + const FPGA_BUFFER *by, + const FPGA_BUFFER *bz); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_modular.cpp b/ecdsa_fpga_modular.cpp new file mode 100644 index 0000000..9d22c05 --- /dev/null +++ b/ecdsa_fpga_modular.cpp @@ -0,0 +1,723 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_modular.cpp +// ------------------------------------- +// Modular arithmetic routines for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER ECDSA_Q; +FPGA_BUFFER ECDSA_DELTA; + + +//------------------------------------------------------------------------------ +void fpga_modular_init() +//------------------------------------------------------------------------------ +{ + int w_src, w_dst; // word counters + + // temporary things + FPGA_WORD TMP_Q [FPGA_OPERAND_NUM_WORDS] = ECDSA_Q_INIT; + FPGA_WORD TMP_DELTA[FPGA_OPERAND_NUM_WORDS] = ECDSA_DELTA_INIT; + + /* fill buffers for large multi-word integers, we need to fill them in + * reverse order because of the way C arrays are initialized + */ + for ( w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1; + w_src < FPGA_OPERAND_NUM_WORDS; + w_src++, w_dst--) + { + ECDSA_Q.words[w_dst] = TMP_Q[w_src]; + ECDSA_DELTA.words[w_dst] = TMP_DELTA[w_src]; + } +} + + +//------------------------------------------------------------------------------ +// +// Modular addition. +// +// This routine implements algorithm 3. from "Ultra High Performance ECC over +// NIST Primes on Commercial FPGAs". +// +// s = (a + b) mod q +// +// The naive approach is like the following: +// +// 1. s = a + b +// 2. if (s >= q) s -= q +// +// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q) +// and then select the right variant. +// +//------------------------------------------------------------------------------ +void fpga_modular_add(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *s) +//------------------------------------------------------------------------------ +{ + int w; // word counter + FPGA_BUFFER ab, ab_n; // intermediate buffers + bool c_in, c_out; // carries + bool b_in, b_out; // borrows + + c_in = false; // first word has no carry + b_in = false; // first word has no borrow + + // run parallel addition and subtraction + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { + fpga_lowlevel_add32(a->words[w], b->words[w], c_in, &ab.words[w], &c_out); + fpga_lowlevel_sub32(ab.words[w], ECDSA_Q.words[w], b_in, &ab_n.words[w], &b_out); + + c_in = c_out; // propagate carry + b_in = b_out; // propagate borrow + } + + // now select the right buffer + + /* + * We select the right variant based on borrow and carry flags after + * addition and subtraction of the very last pair of words. Note, that + * we only need to select the first variant (a + b) when (a + b) < q. + * This way if we get negative number after subtraction, we discard it + * and use the output of the adder instead. The subtractor output is + * negative when borrow flag is set *and* carry flag is not set. When + * both borrow and carry are set, the number is non-negative, because + * borrow and carry cancel each other out. + */ + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + s->words[w] = (b_out && !c_out) ? ab.words[w] : ab_n.words[w]; +} + + +//------------------------------------------------------------------------------ +// +// Modular subtraction. +// +// This routine implements algorithm 3. from "Ultra High Performance ECC over +// NIST Primes on Commercial FPGAs". +// +// d = (a - b) mod q +// +// The naive approach is like the following: +// +// 1. d = a - b +// 2. if (a < b) d += q +// +// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q) +// and then select the right variant. +// +//------------------------------------------------------------------------------ +void fpga_modular_sub(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *d) +//------------------------------------------------------------------------------ +{ + int w; // word counter + FPGA_BUFFER ab, ab_n; // intermediate buffers + bool c_in, c_out; // carries + bool b_in, b_out; // borrows + + c_in = false; // first word has no carry + b_in = false; // first word has no borrow + + // run parallel subtraction and addition + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + { + fpga_lowlevel_sub32(a->words[w], b->words[w], b_in, &ab.words[w], &b_out); + fpga_lowlevel_add32(ab.words[w], ECDSA_Q.words[w], c_in, &ab_n.words[w], &c_out); + + b_in = b_out; // propagate borrow + c_in = c_out; // propagate carry + } + + // now select the right buffer + + /* + * We select the right variant based on borrow flag after subtraction + * and addition of the very last pair of words. Note, that we only + * need to select the second variant (a - b + q) when a < b. This way + * if we get negative number after subtraction, we discard it + * and use the output of the adder instead. The Subtractor output is + * negative when borrow flag is set. + */ + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + d->words[w] = b_out ? ab_n.words[w] : ab.words[w]; +} + + +//------------------------------------------------------------------------------ +// +// Modular multiplication. +// +// This routine implements modular multiplication algorithm from "Ultra High +// Performance ECC over NIST Primes on Commercial FPGAs". +// +// p = (a * b) mod q +// +// The complex algorithm is split into three parts: +// +// 1. Calculation of partial words +// 2. Acccumulation of partial words into full-size product +// 3. Modular reduction of the full-size product +// +// See comments for corresponding helper routines for more information. +// +//------------------------------------------------------------------------------ +void fpga_modular_mul(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *p) +//------------------------------------------------------------------------------ +{ + FPGA_WORD_EXTENDED si[4*FPGA_OPERAND_NUM_WORDS-1]; // parts of intermediate product + FPGA_WORD c[2*FPGA_OPERAND_NUM_WORDS]; // full-size intermediate product + + /* multiply to get partial words */ + fpga_modular_mul_helper_multiply(a, b, si); + + /* accumulate partial words into full-size product */ + fpga_modular_mul_helper_accumulate(si, c); + + /* reduce full-size product using special routine */ + fpga_modular_mul_helper_reduce(c, p); +} + + +//------------------------------------------------------------------------------ +// +// Parallelized multiplication. +// +// This routine implements the algorithm in Fig. 3. from "Ultra High +// Performance ECC over NIST Primes on Commercial FPGAs". +// +// Inputs a and b are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2 +// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide +// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS +// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial +// output words si[]. +// +// The order of loading a and b into the multipliers is a bit complicated, +// during the first 2*OPERAND_NUM_WORDS-1 cycles one si word per cycle is +// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the +// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously. +// +//------------------------------------------------------------------------------ +void fpga_modular_mul_helper_multiply(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si) +//------------------------------------------------------------------------------ +{ + int w; // counter + int t, x; // more counters + int j, i; // word indices + FPGA_WORD p; // product + + // buffers for smaller words that multipliers can handle + FPGA_WORD_REDUCED ai[2*FPGA_OPERAND_NUM_WORDS]; + FPGA_WORD_REDUCED bj[2*FPGA_OPERAND_NUM_WORDS]; + + // split a and b into smaller words + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + ai[2*w] = (FPGA_WORD_REDUCED)a->words[w], ai[2*w + 1] = (FPGA_WORD_REDUCED)(a->words[w] >> (FPGA_WORD_WIDTH / 2)), + bj[2*w] = (FPGA_WORD_REDUCED)b->words[w], bj[2*w + 1] = (FPGA_WORD_REDUCED)(b->words[w] >> (FPGA_WORD_WIDTH / 2)); + + // accumulators + FPGA_WORD_EXTENDED mac[2*FPGA_OPERAND_NUM_WORDS]; + + // clear accumulators + for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) mac[w] = 0; + + // run the crazy algorithm :) + for (t=0; t<(2*FPGA_OPERAND_NUM_WORDS); t++) + { + // save upper half of si[] (one word per cycle) + if (t > 0) + { si[4*FPGA_OPERAND_NUM_WORDS - (t+1)] = mac[t]; + mac[t] = 0; + } + + // update index + j = 2*FPGA_OPERAND_NUM_WORDS - (t+1); + + // parallel multiplication + for (x=0; x<(2*FPGA_OPERAND_NUM_WORDS); x++) + { + // update index + i = t - x; + if (i < 0) i += 2*FPGA_OPERAND_NUM_WORDS; + + // multiply... + fpga_lowlevel_mul16(ai[i], bj[j], &p); + + // ...accumulate + mac[x] += p; + } + + } + + // now finally save lower half of si[] (2*OPERAND_NUM_WORDS words at once) + for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) + si[w] = mac[2*FPGA_OPERAND_NUM_WORDS - (w+1)]; +} + + +//------------------------------------------------------------------------------ +// +// Accumulation of partial words into full-size product. +// +// This routine implements the Algorithm 4. from "Ultra High Performance ECC +// over NIST Primes on Commercial FPGAs". +// +// Input words si[] are accumulated into full-size product c[]. +// +// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in +// si[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are +// summed in full, odd words are split into two parts. During every cycle the +// upper part of the previous word and the lower part of the following word are +// summed too. +// +//------------------------------------------------------------------------------ +void fpga_modular_mul_helper_accumulate(const FPGA_WORD_EXTENDED *si, FPGA_WORD *c) +//------------------------------------------------------------------------------ +{ + int w; // word counter + FPGA_WORD_EXTENDED cw0, cw1; // intermediate sums + FPGA_WORD_REDUCED cw_carry; // wide carry + + // clear carry + cw_carry = 0; + + // execute the algorithm + for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) + { + // handy flags + bool w_is_first = (w == 0); + bool w_is_last = (w == (2*FPGA_OPERAND_NUM_WORDS-1)); + + // accumulate full current even word... + // ...and also the upper part of the previous odd word (if not the first word) + fpga_lowlevel_add47(si[2*w], w_is_first ? 0 : si[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0); + + // generate another word from "carry" part of the previous even word... + // ...and also the lower part of the following odd word (if not the last word) + cw1 = w_is_last ? 0 : (FPGA_WORD)(si[2*w+1] << (FPGA_WORD_WIDTH / 2)); + cw1 |= (FPGA_WORD_EXTENDED)cw_carry; + + // accumulate once again + fpga_lowlevel_add47(cw0, cw1, &cw1); + + // store current word... + c[w] = (FPGA_WORD)cw1; + + // ...and carry + cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH); + } +} + + +//------------------------------------------------------------------------------ +// +// Fast modular reduction for NIST prime P-256. +// +// p = c mod p256 +// +// This routine implements the algorithm 2.29 from "Guide to Elliptic Curve +// Cryptography". +// +// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c +// on the other hand is the output of the parallelized Comba multiplier, so it +// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). +// +// To save FPGA resources, the calculation is done using only two adders and +// one subtractor. The algorithm is split into five steps. +// +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 +void fpga_modular_mul_helper_reduce_p256(const FPGA_WORD *c, FPGA_BUFFER *p) +{ + // "funny" words + FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9; + + // compose "funny" words out of input words + s1.words[7] = c[ 7], s1.words[6] = c[ 6], s1.words[5] = c[ 5], s1.words[4] = c[ 4], s1.words[3] = c[ 3], s1.words[2] = c[ 2], s1.words[1] = c[ 1], s1.words[0] = c[ 0]; + s2.words[7] = c[15], s2.words[6] = c[14], s2.words[5] = c[13], s2.words[4] = c[12], s2.words[3] = c[11], s2.words[2] = 0, s2.words[1] = 0, s2.words[0] = 0; + s3.words[7] = 0, s3.words[6] = c[15], s3.words[5] = c[14], s3.words[4] = c[13], s3.words[3] = c[12], s3.words[2] = 0, s3.words[1] = 0, s3.words[0] = 0; + s4.words[7] = c[15], s4.words[6] = c[14], s4.words[5] = 0, s4.words[4] = 0, s4.words[3] = 0, s4.words[2] = c[10], s4.words[1] = c[ 9], s4.words[0] = c[ 8]; + s5.words[7] = c[ 8], s5.words[6] = c[13], s5.words[5] = c[15], s5.words[4] = c[14], s5.words[3] = c[13], s5.words[2] = c[11], s5.words[1] = c[10], s5.words[0] = c[ 9]; + s6.words[7] = c[10], s6.words[6] = c[ 8], s6.words[5] = 0, s6.words[4] = 0, s6.words[3] = 0, s6.words[2] = c[13], s6.words[1] = c[12], s6.words[0] = c[11]; + s7.words[7] = c[11], s7.words[6] = c[ 9], s7.words[5] = 0, s7.words[4] = 0, s7.words[3] = c[15], s7.words[2] = c[14], s7.words[1] = c[13], s7.words[0] = c[12]; + s8.words[7] = c[12], s8.words[6] = 0, s8.words[5] = c[10], s8.words[4] = c[ 9], s8.words[3] = c[ 8], s8.words[2] = c[15], s8.words[1] = c[14], s8.words[0] = c[13]; + s9.words[7] = c[13], s9.words[6] = 0, s9.words[5] = c[11], s9.words[4] = c[10], s9.words[3] = c[ 9], s9.words[2] = 0, s9.words[1] = c[15], s9.words[0] = c[14]; + + // intermediate results + FPGA_BUFFER sum0, sum1, difference; + + /* Step 1. */ + fpga_modular_add(&s2, &s2, &sum0); // sum0 = 2*s2 + fpga_modular_add(&s3, &s3, &sum1); // sum1 = 2*s3 + fpga_modular_sub(&ECDSA_ZERO, &s6, &difference); // difference = -s6 + + /* Step 2. */ + fpga_modular_add(&sum0, &s1, &sum0); // sum0 = s1 + 2*s2 + fpga_modular_add(&sum1, &s4, &sum1); // sum1 = s4 + 2*s3 + fpga_modular_sub(&difference, &s7, &difference); // difference = -(s6 + s7) + + /* Step 3. */ + fpga_modular_add(&sum0, &s5, &sum0); // sum0 = s1 + 2*s2 + s5 + fpga_modular_add(&sum1, &ECDSA_ZERO, &sum1); // compulsory cycle to keep sum1 constant for next stage + fpga_modular_sub(&difference, &s8, &difference); // difference = -(s6 + s7 + s8) + + /* Step 4. */ + fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 +// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored + fpga_modular_sub(&difference, &s9, &difference); // difference = -(s6 + s7 + s8 + s9) + + /* Step 5. */ + fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + 2*s3 + s4 + s5 - s6 - s7 - s8 - s9 +// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored +// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored +} +#endif + + +//------------------------------------------------------------------------------ +// +// Fast modular reduction for NIST prime P-384. +// +// p = c mod p384 +// +// This routine implements the algorithm 2.30 from "Guide to Elliptic Curve +// Cryptography". +// +// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c +// on the other hand is the output of the parallelized Comba multiplier, so it +// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS). +// +// To save FPGA resources, the calculation is done using only two adders and +// one subtractor. The algorithm is split into five steps. +// +//------------------------------------------------------------------------------ +#if USE_CURVE == 2 +void fpga_modular_mul_helper_reduce_p384(const FPGA_WORD *c, FPGA_BUFFER *p) +{ + // "funny" words + FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; + + // compose "funny" words + s1.words[11] = c[11], s1.words[10] = c[10], s1.words[ 9] = c[ 9], s1.words[ 8] = c[ 8], s1.words[ 7] = c[ 7], s1.words[ 6] = c[ 6], s1.words[ 5] = c[ 5], s1.words[ 4] = c[ 4], s1.words[ 3] = c[ 3], s1.words[ 2] = c[ 2], s1.words[ 1] = c[ 1], s1.words[ 0] = c[ 0]; + s2.words[11] = 0, s2.words[10] = 0, s2.words[ 9] = 0, s2.words[ 8] = 0, s2.words[ 7] = 0, s2.words[ 6] = c[23], s2.words[ 5] = c[22], s2.words[ 4] = c[21], s2.words[ 3] = 0, s2.words[ 2] = 0, s2.words[ 1] = 0, s2.words[ 0] = 0; + s3.words[11] = c[23], s3.words[10] = c[22], s3.words[ 9] = c[21], s3.words[ 8] = c[20], s3.words[ 7] = c[19], s3.words[ 6] = c[18], s3.words[ 5] = c[17], s3.words[ 4] = c[16], s3.words[ 3] = c[15], s3.words[ 2] = c[14], s3.words[ 1] = c[13], s3.words[ 0] = c[12]; + s4.words[11] = c[20], s4.words[10] = c[19], s4.words[ 9] = c[18], s4.words[ 8] = c[17], s4.words[ 7] = c[16], s4.words[ 6] = c[15], s4.words[ 5] = c[14], s4.words[ 4] = c[13], s4.words[ 3] = c[12], s4.words[ 2] = c[23], s4.words[ 1] = c[22], s4.words[ 0] = c[21]; + s5.words[11] = c[19], s5.words[10] = c[18], s5.words[ 9] = c[17], s5.words[ 8] = c[16], s5.words[ 7] = c[15], s5.words[ 6] = c[14], s5.words[ 5] = c[13], s5.words[ 4] = c[12], s5.words[ 3] = c[20], s5.words[ 2] = 0, s5.words[ 1] = c[23], s5.words[ 0] = 0; + s6.words[11] = 0, s6.words[10] = 0, s6.words[ 9] = 0, s6.words[ 8] = 0, s6.words[ 7] = c[23], s6.words[ 6] = c[22], s6.words[ 5] = c[21], s6.words[ 4] = c[20], s6.words[ 3] = 0, s6.words[ 2] = 0, s6.words[ 1] = 0, s6.words[ 0] = 0; + s7.words[11] = 0, s7.words[10] = 0, s7.words[ 9] = 0, s7.words[ 8] = 0, s7.words[ 7] = 0, s7.words[ 6] = 0, s7.words[ 5] = c[23], s7.words[ 4] = c[22], s7.words[ 3] = c[21], s7.words[ 2] = 0, s7.words[ 1] = 0, s7.words[ 0] = c[20]; + s8.words[11] = c[22], s8.words[10] = c[21], s8.words[ 9] = c[20], s8.words[ 8] = c[19], s8.words[ 7] = c[18], s8.words[ 6] = c[17], s8.words[ 5] = c[16], s8.words[ 4] = c[15], s8.words[ 3] = c[14], s8.words[ 2] = c[13], s8.words[ 1] = c[12], s8.words[ 0] = c[23]; + s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0; + s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0; + + // intermediate results + FPGA_BUFFER sum0, sum1, difference; + + /* Step 1. */ + fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3 + fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2 + fpga_modular_sub(&ECDSA_ZERO, &s8, &difference); // difference = -s8 + + /* Step 2. */ + fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4 + fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5 + fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9) + + /* Step 3. */ + fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6 + fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7 + fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10) + + /* Step 4. */ + fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5 +// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored + fpga_modular_sub(&difference, &ECDSA_ZERO, &difference); // compulsory cycle to keep difference constant for next stage + + /* Step 5. */ + fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10 +// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored +// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored +} +#endif + + +#if USE_CURVE == 1 +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p256(const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3) +//------------------------------------------------------------------------------ +// +// This uses the addition chain from +// +// < https://briansmith.org/ecc-inversion-addition-chains-01 > +// +// to calculate A2 = A^-2 and A3 = A^-3. +// +//------------------------------------------------------------------------------ +{ + // counter + int cyc_cnt; + + // working variables + FPGA_BUFFER R1, R2, X1, X2, X3, X6, X12, X15, X30, X32; + + // first obtain intermediate helper quantities (X1..X32) + + // X1 + fpga_multiword_copy(A, &X1); + + // X2 + fpga_modular_mul(&X1, &X1, &R1); + fpga_modular_mul(&R1, &X1, &X2); + + // X3 + fpga_modular_mul(&X2, &X2, &R1); + fpga_modular_mul(&R1, &X1, &X3); + + // X6 + fpga_multiword_copy(&X3, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X6); + + // X12 + fpga_multiword_copy(&X6, &R1); + for (cyc_cnt=0; cyc_cnt<6; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X6, &X12); + + // X15 + fpga_multiword_copy(&X12, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X15); + + // X30 + fpga_multiword_copy(&X15, &R1); + for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X15, &X30); + + // X32 + fpga_multiword_copy(&X30, &R1); + for (cyc_cnt=0; cyc_cnt<2; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X2, &X32); + + // now compute the final results + + fpga_multiword_copy(&X32, &R1); + for (cyc_cnt=0; cyc_cnt<32; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X1, &R2); + + for (cyc_cnt=0; cyc_cnt<128; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R2, &X32, &R1); + + for (cyc_cnt=0; cyc_cnt<32; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X32, &R2); + + for (cyc_cnt=0; cyc_cnt<30; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R2, &X30, &R1); + + fpga_modular_mul(&R1, &R1, &R2); + fpga_modular_mul(&R2, &R2, &R1); + + // A2 obtained + fpga_multiword_copy(&R1, A2); + + // now calculate compute inverse cubed from inverse squared + fpga_modular_mul(&R1, &R1, &R2); + fpga_modular_mul(&R2, A, &R1); + + // A3 obtained + fpga_multiword_copy(&R1, A3); +} +#endif + + +#if USE_CURVE == 2 +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p384(const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3) +//------------------------------------------------------------------------------ +// +// This uses the addition chain from +// +// < https://briansmith.org/ecc-inversion-addition-chains-01 > +// +// to calculate A2 = A^-2 and A3 = A^-3. +// +//------------------------------------------------------------------------------ +{ + // counter + int cyc_cnt; + + // working variables + FPGA_BUFFER R1, R2, X1, X2, X3, X6, X12, X15, X30, X60, X120; + + // first obtain intermediate helper quantities (X1..X120) + + // X1 + fpga_multiword_copy(A, &X1); + + // X2 + fpga_modular_mul(&X1, &X1, &R1); + fpga_modular_mul(&R1, &X1, &X2); + + // X3 + fpga_modular_mul(&X2, &X2, &R1); + fpga_modular_mul(&R1, &X1, &X3); + + // X6 + fpga_multiword_copy(&X3, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X6); + + // X12 + fpga_multiword_copy(&X6, &R1); + for (cyc_cnt=0; cyc_cnt<6; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X6, &X12); + + // X15 + fpga_multiword_copy(&X12, &R1); + for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X3, &X15); + + // X30 + fpga_multiword_copy(&X15, &R1); + for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R2, &X15, &X30); + + // X60 + fpga_multiword_copy(&X30, &R1); + for (cyc_cnt=0; cyc_cnt<30; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X30, &X60); + + // X120 + fpga_multiword_copy(&X60, &R1); + for (cyc_cnt=0; cyc_cnt<60; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X60, &X120); + + // now compute the final results + + fpga_multiword_copy(&X120, &R1); + for (cyc_cnt=0; cyc_cnt<120; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X120, &R2); + + for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R1, &X15, &R2); + + for (cyc_cnt=0; cyc_cnt<31; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1); + else fpga_modular_mul(&R1, &R1, &R2); + } + fpga_modular_mul(&R1, &X30, &R2); + fpga_modular_mul(&R2, &R2, &R1); + fpga_modular_mul(&R1, &R1, &R2); + fpga_modular_mul(&R2, &X2, &R1); + + for (cyc_cnt=0; cyc_cnt<94; cyc_cnt++) + { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2); + else fpga_modular_mul(&R2, &R2, &R1); + } + fpga_modular_mul(&R1, &X30, &R2); + fpga_modular_mul(&R2, &R2, &R1); + fpga_modular_mul(&R1, &R1, &R2); + + // A2 obtained + fpga_multiword_copy(&R2, A2); + + // now calculate compute inverse cubed from inverse squared + fpga_modular_mul(&R2, &R2, &R1); + fpga_modular_mul(&R1, A, &R2); + + // A3 obtained + fpga_multiword_copy(&R2, A3); +} +#endif + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_fpga_modular.h b/ecdsa_fpga_modular.h new file mode 100644 index 0000000..3b75779 --- /dev/null +++ b/ecdsa_fpga_modular.h @@ -0,0 +1,144 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_modular.h +// ------------------------------------- +// Modular arithmetic routines for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-256) +//------------------------------------------------------------------------------ + +/* Field Size */ +#define ECDSA_P256_Q_INIT \ + {0xffffffff, 0x00000001, 0x00000000, 0x00000000, \ + 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff} + +/* Division Factor */ +#define ECDSA_P256_DELTA_INIT \ + {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, \ + 0x00000000, 0x80000000, 0x00000000, 0x00000000} + + +//------------------------------------------------------------------------------ +// ECDSA Parameters (P-384) +//------------------------------------------------------------------------------ + +/* Field Size */ +#define ECDSA_P384_Q_INIT \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, \ + 0xffffffff, 0x00000000, 0x00000000, 0xffffffff} + +/* Division Factor */ +#define ECDSA_P384_DELTA_INIT \ + {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \ + 0x7fffffff, 0x80000000, 0x00000000, 0x80000000} + + +//------------------------------------------------------------------------------ +// ECDSA Parameters Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define ECDSA_Q_INIT ECDSA_P256_Q_INIT +#define ECDSA_DELTA_INIT ECDSA_P256_DELTA_INIT + +#elif USE_CURVE == 2 + +#define ECDSA_Q_INIT ECDSA_P384_Q_INIT +#define ECDSA_DELTA_INIT ECDSA_P384_DELTA_INIT + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER ECDSA_Q; +extern FPGA_BUFFER ECDSA_DELTA; + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_modular_init (); + +void fpga_modular_add (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *S); +void fpga_modular_sub (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *D); +void fpga_modular_mul (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *P); + +void fpga_modular_mul_helper_multiply (const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si); +void fpga_modular_mul_helper_accumulate (const FPGA_WORD_EXTENDED *si, FPGA_WORD *c); +void fpga_modular_mul_helper_reduce_p256 (const FPGA_WORD *c, FPGA_BUFFER *p); +void fpga_modular_mul_helper_reduce_p384 (const FPGA_WORD *c, FPGA_BUFFER *p); + +void fpga_modular_inv23_p256 (const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3); +void fpga_modular_inv23_p384 (const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3); + +void fpga_modular_inv23_p256_microcode (); +void fpga_modular_inv23_p384_microcode (); + + +//------------------------------------------------------------------------------ +// ECDSA Prototype Switch +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p256 +#define fpga_modular_inv23 fpga_modular_inv23_p256 +#define fpga_modular_inv23_microcode fpga_modular_inv23_p256_microcode + +#elif USE_CURVE == 2 + +#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p384 +#define fpga_modular_inv23 fpga_modular_inv23_p384 +#define fpga_modular_inv23_microcode fpga_modular_inv23_p384_microcode + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/fpga_modular.h b/ecdsa_fpga_multiword.cpp index 4d31725..ab65e7a 100644 --- a/fpga_modular.h +++ b/ecdsa_fpga_multiword.cpp @@ -1,87 +1,107 @@ -//------------------------------------------------------------------------------
-//
-// fpga_modular.h
-// ---------------------------
-// Modular arithmetic routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-extern FPGA_BUFFER ecdsa_q;
-extern FPGA_BUFFER ecdsa_zero;
-extern FPGA_BUFFER ecdsa_one;
-extern FPGA_BUFFER ecdsa_delta;
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_modular_init ();
-
-void fpga_modular_add (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *s);
-void fpga_modular_sub (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *d);
-void fpga_modular_mul (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *p);
-void fpga_modular_inv (FPGA_BUFFER *a, FPGA_BUFFER *a1);
-
-void fpga_modular_mul_helper_multiply (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si);
-void fpga_modular_mul_helper_accumulate (FPGA_WORD_EXTENDED *si, FPGA_WORD *c);
-void fpga_modular_mul_helper_reduce_p256 (FPGA_WORD *c, FPGA_BUFFER *p);
-void fpga_modular_mul_helper_reduce_p384 (FPGA_WORD *c, FPGA_BUFFER *p);
-
-void fpga_modular_inv_helper_shl (FPGA_WORD *x, FPGA_WORD *y);
-void fpga_modular_inv_helper_shr (FPGA_WORD *x, FPGA_WORD *y);
-void fpga_modular_inv_helper_add (FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *s);
-void fpga_modular_inv_helper_sub (FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *d);
-void fpga_modular_inv_helper_cpy (FPGA_WORD *dst, FPGA_WORD *src);
-void fpga_modular_inv_helper_cmp (FPGA_WORD *a, FPGA_WORD *b, int *c);
-
-
-//------------------------------------------------------------------------------
-// Reduction Routine Selection
-//------------------------------------------------------------------------------
-
-#if USE_CURVE == 1
-#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p256
-#elif USE_CURVE == 2
-#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p384
-#else
-#error USE_CURVE must be either 1 or 2!
-#endif
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------ +// +// fpga_ecdsa_multiword.cpp +// ----------------------------------------- +// Models of multi-precision FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER ECDSA_ZERO; +FPGA_BUFFER ECDSA_ONE; + + +//------------------------------------------------------------------------------ +void fpga_multiword_init() +//------------------------------------------------------------------------------ +{ + int w; // word counter + + /* fill buffers for large multi-word integers */ + for ( w = FPGA_OPERAND_NUM_WORDS - 1; + w >= 0; + w -= 1) + { + ECDSA_ZERO.words[w] = 0; // all words are zero + ECDSA_ONE.words[w] = w ? 0 : 1; // only the lowest word is 1 + } +} + + +//------------------------------------------------------------------------------ +void fpga_multiword_copy(const FPGA_BUFFER *src, FPGA_BUFFER *dst) +//------------------------------------------------------------------------------ +// +// Copies large multi-word integer from src into dst. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // copy all the words from src into dst + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + dst->words[w] = src->words[w]; +} + + +//------------------------------------------------------------------------------ +bool fpga_multiword_is_zero(const FPGA_BUFFER *x) +//------------------------------------------------------------------------------ +// +// Returns true when large multi-word integer x is zero. +// +//------------------------------------------------------------------------------ +{ + int w; // word counter + + // try to find non-zero words + for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++) + if (x->words[w]) return false; + + // no non-zero words found + return true; +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/fpga_util.cpp b/ecdsa_fpga_multiword.h index 6f7a834..9190fd5 100644 --- a/fpga_util.cpp +++ b/ecdsa_fpga_multiword.h @@ -1,86 +1,91 @@ -//------------------------------------------------------------------------------
-//
-// fpga_util.cpp
-// ---------------------
-// Utility FPGA routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_util.h"
-
-
-//------------------------------------------------------------------------------
-bool fpga_buffer_is_zero(FPGA_BUFFER *x)
-//------------------------------------------------------------------------------
-//
-// Returns true when large multi-word integer x is zero.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // try to find non-zero words
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- if (x->words[w]) return false;
-
- // no non-zero words found
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-void fpga_buffer_copy(FPGA_BUFFER *src, FPGA_BUFFER *dst)
-//------------------------------------------------------------------------------
-//
-// Copies large multi-word integer from src into dst.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // copy all the words from src into dst
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- dst->words[w] = src->words[w];
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------ +// +// fpga_ecdsa_multiword.h +// ----------------------------------------- +// Models of multi-precision FPGA primitives +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Model Parameters +//------------------------------------------------------------------------------ +#if USE_CURVE == 1 + +#define FPGA_OPERAND_WIDTH (256) // largest supported operand width in bits + +#elif USE_CURVE == 2 + +#define FPGA_OPERAND_WIDTH (384) // largest supported operand width in bits + +#else + +BAD_CURVE + +#endif + + +//------------------------------------------------------------------------------ +// FPGA Pipeline Settings +//------------------------------------------------------------------------------ +#define FPGA_OPERAND_NUM_WORDS (FPGA_OPERAND_WIDTH / FPGA_WORD_WIDTH) + + +//------------------------------------------------------------------------------ +// Operand Data Type +//------------------------------------------------------------------------------ +typedef struct FPGA_BUFFER +{ + FPGA_WORD words[FPGA_OPERAND_NUM_WORDS]; +} +FPGA_BUFFER; + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER ECDSA_ZERO; +extern FPGA_BUFFER ECDSA_ONE; + + +//------------------------------------------------------------------------------ +// Prototpes +//------------------------------------------------------------------------------ +void fpga_multiword_init (); +void fpga_multiword_copy (const FPGA_BUFFER *src, FPGA_BUFFER *dst); +bool fpga_multiword_is_zero (const FPGA_BUFFER *x); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/ecdsa_microcode_parser.py b/ecdsa_microcode_parser.py new file mode 100644 index 0000000..56aa94c --- /dev/null +++ b/ecdsa_microcode_parser.py @@ -0,0 +1,694 @@ +# +# ecdsa_microcode_parser.py +# +# Author: Pavel Shatov +# +# Copyright (c) 2018, NORDUnet A/S +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of the NORDUnet A/S nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Imports +import re +import sys +from enum import Enum + +# Source File +C_FILES = [ "ecdsa_fpga_curve_microcode.cpp", + "ecdsa_fpga_microcode.cpp"] + +class MICROCODE_PARSER: + + # enumerate microcode pieces + class MICROCODE_PIECE_ENUM(Enum): + + NONE = -1 + + PREPARE = 0 + + CYCLE_DOUBLE = 1 + CYCLE_ADD = 2 + + CYCLE_ADD_AT_INFINITY = 3 + CYCLE_ADD_SAME_X_SAME_Y = 4 + CYCLE_ADD_SAME_X = 5 + CYCLE_ADD_REGULAR = 6 + + CYCLE_K0 = 7 + CYCLE_K1 = 8 + + CONVERT = 9 + + CONVERT_AT_INFINITY = 10 + CONVERT_REGULAR = 11 + + INVERT_P256 = 12 + INVERT_P384 = 13 + + + # magic pair of begin/end markers + MARKER_BEGIN_MICROCODE = "BEGIN_MICROCODE:" + MARKER_END_MICROCODE = "END_MICROCODE" + + # names of banks + MICROCODE_C_NAME_BANK_LO = "BANK_LO" + MICROCODE_C_NAME_BANK_HI = "BANK_HI" + + # micro-operation names + MICROCODE_C_NAME_UOP_CMPZ = "uop_cmpz" + MICROCODE_C_NAME_UOP_MOVE = "uop_move" + MICROCODE_C_NAME_UOP_CALC = "uop_calc" + MICROCODE_C_NAME_UOP_CYCLE = "uop_cycle" + MICROCODE_C_NAME_UOP_REPEAT = "uop_repeat" + MICROCODE_C_NAME_UOP_CALC_EVEN = "uop_calc_if_even" + MICROCODE_C_NAME_UOP_CALC_ODD = "uop_calc_if_odd" + + # calculate micro-operations + MICROCODE_C_NAME_UOP_MATH_MUL = "MUL" + MICROCODE_C_NAME_UOP_MATH_ADD = "ADD" + MICROCODE_C_NAME_UOP_MATH_SUB = "SUB" + + # names of banks in C source + MICROCODE_C_NAMES_BANKS = [MICROCODE_C_NAME_BANK_LO, MICROCODE_C_NAME_BANK_HI] + + # names of operands in C source + MICROCODE_C_NAMES_OPERANDS = [ "CONST_ZERO", "CONST_ONE", + "CONST_DELTA", + "CONST_GX", "CONST_GY", + "CONST_HX", "CONST_HY", + "INVERT_R1", "INVERT_R2", + "INVERT_X2", "INVERT_X3", "INVERT_X6", + "INVERT_X12", "INVERT_X15", "INVERT_X30", + "INVERT_X32", "INVERT_X60", "INVERT_X120", + "INVERT_A2", "INVERT_A3", + "CYCLE_RX", "CYCLE_RY", "CYCLE_RZ", + "CYCLE_SX", "CYCLE_SY", "CYCLE_SZ", + "CYCLE_A", "CYCLE_A2", "CYCLE_B", + "CYCLE_C", "CYCLE_C2", "CYCLE_C2_2", + "CYCLE_D", "CYCLE_E", "CYCLE_F", + "CYCLE_G", "CYCLE_H", "CYCLE_J", + "CYCLE_Z2", + "CYCLE_T1", "CYCLE_T2", + "CYCLE_T3", "CYCLE_T4"] + + # names of banks in Verilog source + MICROCODE_V_NAME_BANKS_DUMMY = "UOP_BANKS_DUMMY" + + MICROCODE_V_NAMES_BANKS = ["UOP_BANKS_LO2HI", "UOP_BANKS_HI2LO"] + + # names of operands in Verilog source + MICROCODE_V_NAMES_OPERANDS = ["UOP_OPERAND_" + op for op in MICROCODE_C_NAMES_OPERANDS] + + # names of opcodes in Verilog source + MICROCODE_V_NAME_OPCODE_CMPZ = "UOP_OPCODE_CMPZ" + MICROCODE_V_NAME_OPCODE_MOVE = "UOP_OPCODE_COPY" + MICROCODE_V_NAME_OPCODE_MUL = "UOP_OPCODE_MUL" + MICROCODE_V_NAME_OPCODE_ADD = "UOP_OPCODE_ADD" + MICROCODE_V_NAME_OPCODE_SUB = "UOP_OPCODE_SUB" + MICROCODE_V_NAME_OPCODE_STOP = "UOP_OPCODE_STOP" + + MICROCODE_V_NAME_OPERAND_DONTCARE = "UOP_OPERAND_DONTCARE" + + # match group to catch operand names + MATCH_GROUP_09AZ = "([0-9A-Z_]+)" + + # match group to catch bank names + MATCH_GROUP_BANK = "(" + MICROCODE_C_NAME_BANK_LO + "|" + MICROCODE_C_NAME_BANK_HI + ")" + + # match group to catch number of loop iterations + MATCH_GROUP_NUMBER = "(\d+)" + + # match group to catch math instruction + MATCH_GROUP_MATH = "(" + MICROCODE_C_NAME_UOP_MATH_MUL + "|" + MICROCODE_C_NAME_UOP_MATH_ADD + "|" + MICROCODE_C_NAME_UOP_MATH_SUB + ")" + + # match group to catch calculation micro-operation + MATCH_GROUP_CALC = "(" + MICROCODE_C_NAME_UOP_CALC + "|" + MICROCODE_C_NAME_UOP_CALC_EVEN + "|" + MICROCODE_C_NAME_UOP_CALC_ODD + ")" + + # map string microcode piece names to enum values + MICROCODE_PIECE_DICT = { "PREPARE": MICROCODE_PIECE_ENUM.PREPARE, + + "CYCLE_DOUBLE": MICROCODE_PIECE_ENUM.CYCLE_DOUBLE, + "CYCLE_ADD": MICROCODE_PIECE_ENUM.CYCLE_ADD, + + "CYCLE_ADD_AT_INFINITY": MICROCODE_PIECE_ENUM.CYCLE_ADD_AT_INFINITY, + "CYCLE_ADD_SAME_X_SAME_Y": MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X_SAME_Y, + "CYCLE_ADD_SAME_X": MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X, + "CYCLE_ADD_REGULAR": MICROCODE_PIECE_ENUM.CYCLE_ADD_REGULAR, + + "CYCLE_K0": MICROCODE_PIECE_ENUM.CYCLE_K0, + "CYCLE_K1": MICROCODE_PIECE_ENUM.CYCLE_K1, + + "INVERT_P256": MICROCODE_PIECE_ENUM.INVERT_P256, + "INVERT_P384": MICROCODE_PIECE_ENUM.INVERT_P384, + + "CONVERT": MICROCODE_PIECE_ENUM.CONVERT, + + "CONVERT_AT_INFINITY": MICROCODE_PIECE_ENUM.CONVERT_AT_INFINITY, + "CONVERT_REGULAR": MICROCODE_PIECE_ENUM.CONVERT_REGULAR} + + + # map C bank names to Verilog bank names + MICROCODE_BANK_DICT = dict(zip(MICROCODE_C_NAMES_BANKS, MICROCODE_V_NAMES_BANKS)) + + # map C operand names to Verilog operand names + MICROCODE_OPERAND_DICT = dict(zip(MICROCODE_C_NAMES_OPERANDS, MICROCODE_V_NAMES_OPERANDS)) + + # map C calculation names to Verilog opcode names + MICROCODE_MATH_DICT = { MICROCODE_C_NAME_UOP_MATH_MUL: MICROCODE_V_NAME_OPCODE_MUL, + MICROCODE_C_NAME_UOP_MATH_ADD: MICROCODE_V_NAME_OPCODE_ADD, + MICROCODE_C_NAME_UOP_MATH_SUB: MICROCODE_V_NAME_OPCODE_SUB} + + # microcode format + MICROCODE_FORMAT_ADDR = "9'd%03d" + MICROCODE_FORMAT_LINE = MICROCODE_FORMAT_ADDR + ": data <= %s;" + MICROCODE_FORMAT_OFFSET = "localparam [UOP_ADDR_WIDTH-1:0] %s = " + MICROCODE_FORMAT_ADDR + ";" + + # pieces of microcode + MICROCODE_LINES_PREPARE = [] + + MICROCODE_LINES_CYCLE_DOUBLE = [] + MICROCODE_LINES_CYCLE_ADD = [] + + MICROCODE_LINES_CYCLE_ADD_AT_INFINITY = [] + MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y = [] + MICROCODE_LINES_CYCLE_ADD_SAME_X = [] + MICROCODE_LINES_CYCLE_ADD_REGULAR = [] + + MICROCODE_LINES_CYCLE_K0 = [] + MICROCODE_LINES_CYCLE_K1 = [] + + MICROCODE_LINES_INVERT_P256 = [] + MICROCODE_LINES_INVERT_P384 = [] + + MICROCODE_LINES_CONVERT = [] + + MICROCODE_LINES_CONVERT_AT_INFINITY = [] + MICROCODE_LINES_CONVERT_REGULAR = [] + + MICROCODE_LINE_STOP = "{%s, %s, %s, %s, %s}" % ( MICROCODE_V_NAME_OPCODE_STOP, + MICROCODE_V_NAME_BANKS_DUMMY, + MICROCODE_V_NAME_OPERAND_DONTCARE, + MICROCODE_V_NAME_OPERAND_DONTCARE, + MICROCODE_V_NAME_OPERAND_DONTCARE) + + def __init__(self, filenames): + self.__filenames = filenames + + def parse(self): + for next_filename in self.__filenames: + print("Parsing '%s'..." % (next_filename)) + parsing_now = False + line_num = 0 + with open(next_filename, "r") as c_file: + c_lines = c_file.readlines() + for next_c_line in c_lines: + line_num += 1 + self.__line_num = line_num + self.__next_c_line = next_c_line.strip() + if len(self.__next_c_line) == 0: continue + if self.__next_c_line.startswith("//"): continue + if not parsing_now: + self.__current_piece = self.__try_start_parsing() + if self.__current_piece != self.MICROCODE_PIECE_ENUM.NONE: + print(" Found piece of microcode: %s" % (str(self.__current_piece))) + parsing_now = True + else: + parsing_now = self.__continue_parsing() + + + def format(self): + + mode = 0 + + if len(sys.argv) == 2: + if sys.argv[1] == "P256": mode = 1 + if sys.argv[1] == "P384": mode = 2 + + if mode == 0: + sys.exit("sys.exit(): Usage: ecdsa_microcode_parser.py <P256|P384>!") + + if len(self.MICROCODE_LINES_PREPARE) == 0: sys.exit("sys.exit(): Empty PREPARE piece!") + + if len(self.MICROCODE_LINES_CYCLE_DOUBLE) == 0: sys.exit("sys.exit(): Empty CYCLE_DOUBLE piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD piece!") + + if len(self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_AT_INFINITY piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_SAME_X_SAME_Y piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_SAME_X piece!") + if len(self.MICROCODE_LINES_CYCLE_ADD_REGULAR) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_REGULAR piece!") + + if len(self.MICROCODE_LINES_CYCLE_K0) == 0: sys.exit("sys.exit(): Empty CYCLE_K0 piece!") + if len(self.MICROCODE_LINES_CYCLE_K1) == 0: sys.exit("sys.exit(): Empty CYCLE_K1 piece!") + + if len(self.MICROCODE_LINES_INVERT_P256) == 0: sys.exit("sys.exit(): Empty INVERT_P256 piece!") + if len(self.MICROCODE_LINES_INVERT_P384) == 0: sys.exit("sys.exit(): Empty INVERT_P384 piece!") + + if len(self.MICROCODE_LINES_CONVERT) == 0: sys.exit("sys.exit(): Empty CONVERT piece!") + + if len(self.MICROCODE_LINES_CONVERT_AT_INFINITY) == 0: sys.exit("sys.exit(): Empty CONVERT_AT_INFINITY piece!") + if len(self.MICROCODE_LINES_CONVERT_REGULAR) == 0: sys.exit("sys.exit(): Empty CONVERT_REGULAR piece!") + + length = 0 + length += len(self.MICROCODE_LINES_PREPARE) + + length += len(self.MICROCODE_LINES_CYCLE_DOUBLE) + length += len(self.MICROCODE_LINES_CYCLE_ADD) + + length += len(self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY) + length += len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y) + length += len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X) + length += len(self.MICROCODE_LINES_CYCLE_ADD_REGULAR) + + length += len(self.MICROCODE_LINES_CYCLE_K0) + length += len(self.MICROCODE_LINES_CYCLE_K1) + + length += len(self.MICROCODE_LINES_CONVERT) + + length += len(self.MICROCODE_LINES_CONVERT_AT_INFINITY) + length += len(self.MICROCODE_LINES_CONVERT_REGULAR) + + if mode == 1: length += len(self.MICROCODE_LINES_INVERT_P256) + if mode == 2: length += len(self.MICROCODE_LINES_INVERT_P384) + + print("Total number of micro-operations (w/o stops): %s" % (length)) + + + self.__addr = 0 + + print("\n -=-=-=-=-=- CUT AND PASTE BELOW -=-=-=-=-=-\n") + + num_mul_cycle = 0 + num_mul_invert_p256 = 0 + num_mul_invert_p384 = 0 + + offset_prepare = self.__addr; + print("// PREPARE"); + for line in self.MICROCODE_LINES_PREPARE: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_double = self.__addr; + print("// CYCLE_DOUBLE"); + for line in self.MICROCODE_LINES_CYCLE_DOUBLE: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add = self.__addr; + print("// CYCLE_ADD"); + for line in self.MICROCODE_LINES_CYCLE_ADD: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_at_infinity = self.__addr; + print("// CYCLE_ADD_AT_INFINITY"); + for line in self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_same_x_same_y = self.__addr; + print("// CYCLE_ADD_SAME_X_SAME_Y"); + for line in self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_same_x = self.__addr; + print("// CYCLE_ADD_SAME_X"); + for line in self.MICROCODE_LINES_CYCLE_ADD_SAME_X: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_add_regular = self.__addr; + print("// CYCLE_ADD_REGULAR"); + for line in self.MICROCODE_LINES_CYCLE_ADD_REGULAR: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_k0 = self.__addr; + print("// CYCLE_K0"); + for line in self.MICROCODE_LINES_CYCLE_K0: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_cycle_k1 = self.__addr; + print("// CYCLE_K1"); + for line in self.MICROCODE_LINES_CYCLE_K1: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_convert = self.__addr; + print("// CONVERT"); + for line in self.MICROCODE_LINES_CONVERT: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_convert_at_infinity = self.__addr; + print("// CONVERT_AT_INFINITY"); + for line in self.MICROCODE_LINES_CONVERT_AT_INFINITY: + self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + offset_convert_regular = self.__addr; + print("// CONVERT_REGULAR"); + for line in self.MICROCODE_LINES_CONVERT_REGULAR: + num_mul_cycle += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + if mode == 1: + offset_invert_p256 = self.__addr; + print("// INVERT_P256"); + for line in self.MICROCODE_LINES_INVERT_P256: + num_mul_invert_p256 += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + if mode == 2: + offset_invert_p384 = self.__addr; + print("// INVERT_P384"); + for line in self.MICROCODE_LINES_INVERT_P384: + num_mul_invert_p384 += self.__format_line(line) + self.__format_line(self.MICROCODE_LINE_STOP) + + print("\n") + self.__format_offset("UOP_OFFSET_PREPARE ", offset_prepare) + self.__format_offset("UOP_OFFSET_CYCLE_DOUBLE ", offset_cycle_double) + self.__format_offset("UOP_OFFSET_CYCLE_ADD ", offset_cycle_add) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_AT_INFINITY ", offset_cycle_add_at_infinity) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_SAME_X_SAME_Y", offset_cycle_add_same_x_same_y) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_SAME_X ", offset_cycle_add_same_x) + self.__format_offset("UOP_OFFSET_CYCLE_ADD_REGULAR ", offset_cycle_add_regular) + self.__format_offset("UOP_OFFSET_CYCLE_K0 ", offset_cycle_k0) + self.__format_offset("UOP_OFFSET_CYCLE_K1 ", offset_cycle_k1) + self.__format_offset("UOP_OFFSET_CONVERT ", offset_convert) + self.__format_offset("UOP_OFFSET_CONVERT_AT_INFINITY ", offset_convert_at_infinity) + self.__format_offset("UOP_OFFSET_CONVERT_REGULAR ", offset_convert_regular) + if mode == 1: self.__format_offset("UOP_OFFSET_INVERT_P256 ", offset_invert_p256) + if mode == 2: self.__format_offset("UOP_OFFSET_INVERT_P384 ", offset_invert_p384) + + print("") + print("num_mul_cycle = %d" % num_mul_cycle) + print("num_mul_invert_p256 = %d" % num_mul_invert_p256) + print("num_mul_invert_p384 = %d" % num_mul_invert_p384) + + def __format_line(self, line): + line = self.MICROCODE_FORMAT_LINE % (self.__addr, line) + print(line) + self.__addr += 1 + is_mul = line.find(self.MICROCODE_V_NAME_OPCODE_MUL) + if is_mul >= 0: return 1 + return 0 + + def __format_offset(self, name, offset): + print(self.MICROCODE_FORMAT_OFFSET % (name, offset)) + + def __try_start_parsing(self): + piece = self.MICROCODE_PIECE_ENUM.NONE + begin_marker = re.match("^/\*\s*" + self.MARKER_BEGIN_MICROCODE + "\s*" + + self.MATCH_GROUP_09AZ + "\s*\*/$", + self.__next_c_line); + if begin_marker: + piece = self.MICROCODE_PIECE_DICT[begin_marker.group(1)] + + return piece + + def __encode_uop(self, opcode, banks, src1, src2, dst): + return "{%s, %s, %s, %s, %s}" % (opcode, banks, src1, src2, dst) + + def __continue_parsing(self): + + end_marker = re.match("^/\*\s*" + self.MARKER_END_MICROCODE + "\s*\*/$", + self.__next_c_line); + if end_marker: return False + + # cmpz? + uop_cmpz = re.match("^" + self.MICROCODE_C_NAME_UOP_CMPZ + "\(\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_cmpz: + ok = self.__parse_uop_cmpz(uop_cmpz) + if ok: return True + else: + self.__print_parse_error("parse_uop_cmpz() failed!") + self.__abort() + + # move? + uop_move = re.match("^" + self.MICROCODE_C_NAME_UOP_MOVE + "\(\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\,\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_move: + ok = self.__parse_uop_move(uop_move) + if ok: return True + else: + self.__print_parse_error("parse_uop_move() failed!") + self.__abort() + + # calc? + uop_calc = re.match("^" + self.MATCH_GROUP_CALC + "\s*\(" + + self.MATCH_GROUP_MATH + "\,\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\,\s*" + + self.MATCH_GROUP_09AZ + "\,\s*" + + self.MATCH_GROUP_BANK + "\,\s*" + + self.MATCH_GROUP_09AZ + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_calc: + ok = self.__parse_uop_calc(uop_calc) + if ok: return True + else: + self.__print_parse_error("parse_uop_calc() failed!") + self.__abort() + + # cycle? + uop_cycle = re.match("^" + self.MICROCODE_C_NAME_UOP_CYCLE + "\(" + + self.MATCH_GROUP_NUMBER + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_cycle: + ok = self.__parse_uop_cycle(uop_cycle) + if ok: return True + else: + self.__print_parse_error("parse_uop_cycle() failed!") + self.__abort() + + # repeat? + uop_repeat = re.match("^" + self.MICROCODE_C_NAME_UOP_REPEAT + "\(" + "\)\;$", + self.__next_c_line, re.IGNORECASE) + if uop_repeat: + ok = self.__parse_uop_repeat() + if ok: return True + else: + self.__print_parse_error("__parse_uop_repeat() failed!") + self.__abort() + + self.__print_parse_error("unknown micro-operation!") + self.__abort() + + def __check_math(self, math): + if not math in self.MICROCODE_MATH_DICT: + print_parse_error("bad math!") + return False + + return True + + def __check_banks(self, src, dst): + if not src in self.MICROCODE_BANK_DICT: + print_parse_error("bad src bank!") + return False + + if not dst in self.MICROCODE_BANK_DICT: + print_parse_error("bad dst bank!") + return False + + if src == dst: + print_parse_error("src bank == dst bank!") + return False + + return True + + def __check_bank(self, src): + if not src in self.MICROCODE_BANK_DICT: + print_parse_error("bad src bank!") + return False + + return True + + def __check_op3(self, src1, src2, dst): + + if not src1 in self.MICROCODE_OPERAND_DICT: + self.__print_parse_error("bad src1 operand!") + return False + + if src2 != "" and not src2 in self.MICROCODE_OPERAND_DICT: + self.__print_parse_error("bad src2 operand!") + return False + + if dst != "" and not dst in self.MICROCODE_OPERAND_DICT: + self.__print_parse_error("bad dst operand!") + return False + + return True + + def __check_op2(self, src, dst): + return self.__check_op3(src, "", dst) + + def __check_op1(self, src): + return self.__check_op2(src, "") + + def __parse_uop_move(self, params): + bank_src = params.group(1) + bank_dst = params.group(3) + op_src = params.group(2) + op_dst = params.group(4) + + if not self.__check_banks(bank_src, bank_dst): + self.__print_parse_error("check_banks() failed!") + return False + + if not self.__check_op2(op_src, op_dst): + self.__print_parse_error("check_op2() failed!") + return False + + opcode = self.MICROCODE_V_NAME_OPCODE_MOVE + bank = self.MICROCODE_BANK_DICT[bank_src] + src1 = self.MICROCODE_OPERAND_DICT[op_src] + src2 = self.MICROCODE_V_NAME_OPERAND_DONTCARE + dst = self.MICROCODE_OPERAND_DICT[op_dst] + + data = self.__encode_uop(opcode, bank, src1, src2, dst) + self.__store_uop(data) + return True + + def __parse_uop_cmpz(self, params): + bank_src = params.group(1) + op_src = params.group(2) + + if not self.__check_bank(bank_src): + self.__print_parse_error("check_bank() failed!") + return False + + if not self.__check_op1(op_src): + self.__print_parse_error("check_op1() failed!") + return False + + opcode = self.MICROCODE_V_NAME_OPCODE_CMPZ + bank = self.MICROCODE_BANK_DICT[bank_src] + src1 = self.MICROCODE_OPERAND_DICT[op_src] + src2 = self.MICROCODE_V_NAME_OPERAND_DONTCARE + dst = self.MICROCODE_V_NAME_OPERAND_DONTCARE + + data = self.__encode_uop(opcode, bank, src1, src2, dst) + self.__store_uop(data) + return True + + def __parse_uop_calc(self, params): + calc = params.group(1) + math = params.group(2) + bank_src = params.group(3) + bank_dst = params.group(6) + op_src1 = params.group(4) + op_src2 = params.group(5) + op_dst = params.group(7) + + if not self.__check_math(math): + self.__print_parse_error("check_calc() failed!") + return False + + if not self.__check_banks(bank_src, bank_dst): + self.__print_parse_error("check_banks() failed!") + return False + + if not self.__check_op3(op_src1, op_src2, op_dst): + self.__print_parse_error("check_op3() failed!") + return False + + opcode = self.MICROCODE_MATH_DICT[math] + banks = self.MICROCODE_BANK_DICT[bank_src] + src1 = self.MICROCODE_OPERAND_DICT[op_src1] + src2 = self.MICROCODE_OPERAND_DICT[op_src2] + dst = self.MICROCODE_OPERAND_DICT[op_dst] + + data = self.__encode_uop(opcode, banks, src1, src2, dst) + if calc == self.MICROCODE_C_NAME_UOP_CALC: self.__store_uop(data) + elif calc == self.MICROCODE_C_NAME_UOP_CALC_EVEN: self.__loop_even = data + elif calc == self.MICROCODE_C_NAME_UOP_CALC_ODD: self.__loop_odd = data + else: return False + + return True + + def __parse_uop_cycle(self, params): + self.__loop_iters = int(params.group(1)) + return True + + def __parse_uop_repeat(self): + print(" Unrolling loop (%d iters)..." % (self.__loop_iters)) + + for i in range(0, self.__loop_iters): + if i % 2 == 0: self.__store_uop(self.__loop_even) + else: self.__store_uop(self.__loop_odd) + + return True + + def __store_uop(self, data): + #print("\t" + data) + if self.__current_piece == self.MICROCODE_PIECE_ENUM.PREPARE: self.MICROCODE_LINES_PREPARE.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_DOUBLE: self.MICROCODE_LINES_CYCLE_DOUBLE.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD: self.MICROCODE_LINES_CYCLE_ADD.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_AT_INFINITY: self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X_SAME_Y: self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X: self.MICROCODE_LINES_CYCLE_ADD_SAME_X.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_REGULAR: self.MICROCODE_LINES_CYCLE_ADD_REGULAR.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_K0: self.MICROCODE_LINES_CYCLE_K0.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_K1: self.MICROCODE_LINES_CYCLE_K1.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.INVERT_P256: self.MICROCODE_LINES_INVERT_P256.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.INVERT_P384: self.MICROCODE_LINES_INVERT_P384.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT: self.MICROCODE_LINES_CONVERT.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT_AT_INFINITY: self.MICROCODE_LINES_CONVERT_AT_INFINITY.append(data) + elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT_REGULAR: self.MICROCODE_LINES_CONVERT_REGULAR.append(data) + + def __print_parse_error(self, msg): + print("PARSE ERROR: %s" % (msg)) + + def __abort(self): + sys.exit("Stopped at line #%d:\n%s" % (self.__line_num, self.__next_c_line)) + + +# ----------------------------------------------------------------------------- +def main(filenames): +# ----------------------------------------------------------------------------- + parser = MICROCODE_PARSER(filenames) + parser.parse() + parser.format() + +# ----------------------------------------------------------------------------- +if __name__ == "__main__": +# ----------------------------------------------------------------------------- + main(C_FILES) + +# +# End-of-File +# diff --git a/ecdsa_model.h b/ecdsa_model.h deleted file mode 100644 index fc7e571..0000000 --- a/ecdsa_model.h +++ /dev/null @@ -1,205 +0,0 @@ -//------------------------------------------------------------------------------
-//
-// ecdsa_model.h
-// --------------------------------------------
-// Base point scalar multiplier model for ECDSA
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-//
-// Curve Selection
-//
-// USE_CURVE == 1 -> P-256
-// USE_CURVE == 2 -> P-384
-//
-//------------------------------------------------------------------------------
-#define USE_CURVE 2
-
-
-//------------------------------------------------------------------------------
-// Model Parameters
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-#define OPERAND_WIDTH (256) // largest supported operand width in bits
-#elif USE_CURVE == 2
-#define OPERAND_WIDTH (384) // largest supported operand width in bits
-#else
-#error USE_CURVE must be either 1 or 2!
-#endif
-
-
-//------------------------------------------------------------------------------
-// P-256 Parameters and Test Vectors
-//------------------------------------------------------------------------------
-
-/* Field Size */
-#define P_256_Q {0xffffffff, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
-
-/* Generic Numbers */
-#define P_256_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
-#define P_256_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
-
-/* Division Factor */
-#define P_256_DELTA {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000}
-
-/* Base Point */
-#define P_256_G_X {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296}
-#define P_256_G_Y {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5}
-
-/* Doubled Base Point */
-#define P_256_H_X {0x29d05c19, 0x3da77b71, 0x0e863235, 0x38b77e1b, 0x11f904fe, 0xa42998be, 0x16bd8d74, 0x4ece7ad0}
-#define P_256_H_Y {0xb01cbd1c, 0x01e58065, 0x711814b5, 0x83f061e9, 0xd431cca9, 0x94cea131, 0x3449bf97, 0xc840ae07}
-
-/* Base Point Order */
-#define P_256_N {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551}
-
-/* Private Key */
-#define P_256_D {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a}
-
-/* Per-message Random Number */
-#define P_256_K {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0}
-
-/* Public Key */
-#define P_256_Q_X {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8}
-#define P_256_Q_Y {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9}
-
-/* Part of Signature */
-#define P_256_R_X {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c}
-#define P_256_R_Y {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541}
-
-
-//------------------------------------------------------------------------------
-// P-384 Parameters and Test Vectors
-//------------------------------------------------------------------------------
-
-/* Field Size */
-#define P_384_Q {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff}
-
-/* Generic Numbers */
-#define P_384_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
-#define P_384_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
-
-/* Division Factor */
-#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x00000000, 0x80000000}
-
-/* Base Point */
-#define P_384_G_X {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7}
-#define P_384_G_Y {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f}
-
-/* Doubled Base Point */
-#define P_384_H_X {0xaaf06bba, 0x82e9f590, 0xe29c71c2, 0x19bea517, 0x23c5893a, 0xe8b0c8cf, 0x4c117c3e, 0xfb57ab8d, 0x55fa1b42, 0x8155ad27, 0x8b574391, 0x1b13ea8a}
-#define P_384_H_Y {0xc9e821b5, 0x69d9d390, 0xa2616740, 0x6d6d23d6, 0x070be242, 0xd765eb83, 0x1625ceec, 0x4a0f473e, 0xf59f4e30, 0xe2817e62, 0x85bce284, 0x6f15f19d}
-
-/* Base Point Order */
-#define P_384_N {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973}
-
-/* Private Key */
-#define P_384_D {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24}
-
-/* Per-message Random Number */
-#define P_384_K {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359}
-
-/* Public Key */
-#define P_384_Q_X {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0}
-#define P_384_Q_Y {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9}
-
-/* Part of Signature */
-#define P_384_R_X {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856}
-#define P_384_R_Y {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b}
-
-
-//------------------------------------------------------------------------------
-// Parameter and Test Vector Selection
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-
-#define ECDSA_Q P_256_Q
-
-#define ECDSA_ZERO P_256_ZERO
-#define ECDSA_ONE P_256_ONE
-
-#define ECDSA_DELTA P_256_DELTA
-
-#define ECDSA_G_X P_256_G_X
-#define ECDSA_G_Y P_256_G_Y
-
-#define ECDSA_H_X P_256_H_X
-#define ECDSA_H_Y P_256_H_Y
-
-#define ECDSA_N P_256_N
-#define ECDSA_D P_256_D
-#define ECDSA_K P_256_K
-
-#define ECDSA_Q_X P_256_Q_X
-#define ECDSA_Q_Y P_256_Q_Y
-
-#define ECDSA_R_X P_256_R_X
-#define ECDSA_R_Y P_256_R_Y
-
-#elif USE_CURVE == 2
-
-#define ECDSA_Q P_384_Q
-
-#define ECDSA_ZERO P_384_ZERO
-#define ECDSA_ONE P_384_ONE
-
-#define ECDSA_DELTA P_384_DELTA
-
-#define ECDSA_G_X P_384_G_X
-#define ECDSA_G_Y P_384_G_Y
-
-#define ECDSA_H_X P_384_H_X
-#define ECDSA_H_Y P_384_H_Y
-
-#define ECDSA_N P_384_N
-#define ECDSA_D P_384_D
-#define ECDSA_K P_384_K
-
-#define ECDSA_Q_X P_384_Q_X
-#define ECDSA_Q_Y P_384_Q_Y
-
-#define ECDSA_R_X P_384_R_X
-#define ECDSA_R_Y P_384_R_Y
-
-#else
-
-#error USE_CURVE must be either 1 or 2!
-
-#endif
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/ecdsa_model_fpga.cpp b/ecdsa_model_fpga.cpp deleted file mode 100644 index 8ad0962..0000000 --- a/ecdsa_model_fpga.cpp +++ /dev/null @@ -1,414 +0,0 @@ -//------------------------------------------------------------------------------
-//
-// ecdsa_model_fpga.cpp
-// --------------------------------------------
-// Base point scalar multiplier model for ECDSA
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_modular.h"
-#include "fpga_curve.h"
-#include "fpga_util.h"
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_model_init ();
-bool test_base_point_multiplier (FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy);
-
-bool abuse_internal_point_adder ();
-bool abuse_internal_point_doubler ();
-
-void print_fpga_buffer (const char *s, FPGA_BUFFER *v);
-
-bool compare_fpga_buffers (FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *bx, FPGA_BUFFER *by);
-bool compare_fpga_buffers (FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *az, FPGA_BUFFER *bx, FPGA_BUFFER *by, FPGA_BUFFER *bz);
-
-
-//------------------------------------------------------------------------------
-// Locals
-//------------------------------------------------------------------------------
-static FPGA_BUFFER ecdsa_d;
-static FPGA_BUFFER ecdsa_k;
-static FPGA_BUFFER ecdsa_n;
-
-
-//------------------------------------------------------------------------------
-int main()
-//------------------------------------------------------------------------------
-{
- bool ok; // flag
-
- //
- // initialize buffers
- //
- fpga_model_init();
- fpga_modular_init();
- fpga_curve_init();
-
-
- //
- // test base point multiplier: Q = d * G
- //
- printf("Trying to derive public key from private key...\n\n");
- ok = test_base_point_multiplier(&ecdsa_d, &ecdsa_q_x, &ecdsa_q_y);
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // test base point multiplier: R = k * G
- //
- printf("Trying to sign something...\n\n");
- ok = test_base_point_multiplier(&ecdsa_k, &ecdsa_r_x, &ecdsa_r_y);
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // test base point multiplier: O = n * G
- //
- printf("Trying to multiply the base point by its order...\n\n");
- ok = test_base_point_multiplier(&ecdsa_n, &ecdsa_zero, &ecdsa_zero);
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // try to abuse internal point doubler
- //
- ok = abuse_internal_point_doubler();
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // try to abuse internal point adder
- //
- ok = abuse_internal_point_adder();
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // everything went just fine
- //
- return EXIT_SUCCESS;
-}
-
-
-//------------------------------------------------------------------------------
-void fpga_model_init()
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- FPGA_BUFFER tmp_d = ECDSA_D;
- FPGA_BUFFER tmp_k = ECDSA_K;
- FPGA_BUFFER tmp_n = ECDSA_N;
-
- /* fill buffers for large multi-word integers */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { ecdsa_d.words[w] = tmp_d.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_k.words[w] = tmp_k.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_n.words[w] = tmp_n.words[OPERAND_NUM_WORDS - (w+1)];
- }
-}
-
-
-//------------------------------------------------------------------------------
-bool test_base_point_multiplier(FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
-//------------------------------------------------------------------------------
-//
-// k - multiplier
-//
-// qx, qy - expected coordinates of product
-//
-// Returns true when point (rx,ry) = k * G matches the point (qx,qy).
-//
-//------------------------------------------------------------------------------
-{
- bool ok; // flag
- FPGA_BUFFER rx, ry; // result
-
- /* run the model */
- fpga_curve_scalar_multiply(k, &rx, &ry);
-
- /* handle result */
- ok = compare_fpga_buffers(qx, qy, &rx, &ry);
- if (!ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
-
- // everything went just fine
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-bool abuse_internal_point_doubler()
-//------------------------------------------------------------------------------
-//
-// This routine tries to abuse the internal curve point doubler by forcing it
-// to double point at infinity.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool ok; // flag
-
- FPGA_BUFFER px, py, pz; // input
- FPGA_BUFFER qx, qy, qz; // output
-
- // set P.X and P.Y to some "random" garbage and P.Z to zero
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { px.words[w] = ecdsa_g_x.words[w] ^ ecdsa_h_x.words[w];
- py.words[w] = ecdsa_g_y.words[w] ^ ecdsa_h_y.words[w];
- }
- fpga_buffer_copy(&ecdsa_zero, &pz);
-
- // try to double point at infinity (should produce point at infinity)
- printf("Trying to double something at infinity...\n\n");
- fpga_curve_double_jacobian(&px, &py, &pz, &qx, &qy, &qz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_one, &ecdsa_one, &ecdsa_zero, &qx, &qy, &qz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
-
- // everything went just fine
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-bool abuse_internal_point_adder()
-//------------------------------------------------------------------------------
-//
-// This routine tries to abuse the internal curve point adder by forcing it to
-// go throgh all the possible "corner cases".
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool ok; // flag
-
- FPGA_BUFFER px, py, pz; // input
- FPGA_BUFFER rx, ry, rz; // output
-
- //
- // try to add point at infinity to the base point
- //
- {
- // set P.X and P.Y to some "random" garbage and P.Z to zero
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { px.words[w] = ecdsa_g_x.words[w] ^ ecdsa_h_x.words[w];
- py.words[w] = ecdsa_g_y.words[w] ^ ecdsa_h_y.words[w];
- }
- fpga_buffer_copy(&ecdsa_zero, &pz);
-
- // run addition proceduce
- printf("Trying to add something at infinity to the base point...\n\n");
- fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_g_x, &ecdsa_g_y, &ecdsa_one, &rx, &ry, &rz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
- }
-
- //
- // try to add the base point to itself
- //
- {
- // set P to G
- fpga_buffer_copy(&ecdsa_g_x, &px);
- fpga_buffer_copy(&ecdsa_g_y, &py);
- fpga_buffer_copy(&ecdsa_one, &pz);
-
- // run addition proceduce
- printf("Trying to add the base point to itself...\n\n");
- fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_h_x, &ecdsa_h_y, &ecdsa_one, &rx, &ry, &rz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
- }
-
- //
- // try to add the base point to its opposite
- //
- {
- // set P to (G.X, -G.Y, 1)
- fpga_buffer_copy(&ecdsa_zero, &px);
- fpga_buffer_copy(&ecdsa_zero, &py);
- fpga_buffer_copy(&ecdsa_one, &pz);
-
- fpga_modular_add(&ecdsa_zero, &ecdsa_g_x, &px);
- fpga_modular_sub(&ecdsa_zero, &ecdsa_g_y, &py);
-
- // run addition proceduce
- printf("Trying to add the base point to its opposite...\n\n");
- fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_one, &ecdsa_one, &ecdsa_zero, &rx, &ry, &rz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
- }
-
- // everything went just fine
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-void print_fpga_buffer(const char *s, FPGA_BUFFER *buf)
-//------------------------------------------------------------------------------
-//
-// Pretty print large multi-word integer.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // print header
- printf("%s", s);
-
- // print all bytes
- for (w=OPERAND_NUM_WORDS; w>0; w--)
- {
- printf("%08x", buf->words[w-1]);
-
- // insert space after every group of 4 bytes
- if (w > 1) printf(" ");
- }
-
- // print footer
- printf("\n");
-}
-
-
-//------------------------------------------------------------------------------
-bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *bx, FPGA_BUFFER *by)
-//------------------------------------------------------------------------------
-//
-// Compare affine coordinates of two points and return true when they match.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // print all the values
- print_fpga_buffer(" Expected: X = ", ax);
- print_fpga_buffer(" Calculated: X = ", bx);
- printf("\n");
- print_fpga_buffer(" Expected: Y = ", ay);
- print_fpga_buffer(" Calculated: Y = ", by);
-
- // compare values
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- // compare x
- if (ax->words[w] != bx->words[w]) return false;
-
- // compare y
- if (ay->words[w] != by->words[w]) return false;
- }
-
- // values are the same
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *az, FPGA_BUFFER *bx, FPGA_BUFFER *by, FPGA_BUFFER *bz)
-//------------------------------------------------------------------------------
-//
-// Compare projective coordinates of two points and return true when they match.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // print all the values
- print_fpga_buffer(" Expected: X = ", ax);
- print_fpga_buffer(" Calculated: X = ", bx);
- printf("\n");
- print_fpga_buffer(" Expected: Y = ", ay);
- print_fpga_buffer(" Calculated: Y = ", by);
- printf("\n");
- print_fpga_buffer(" Expected: Z = ", az);
- print_fpga_buffer(" Calculated: Z = ", bz);
-
- // compare values
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- // compare x
- if (ax->words[w] != bx->words[w]) return false;
-
- // compare y
- if (ay->words[w] != by->words[w]) return false;
-
- // compare z
- if (az->words[w] != bz->words[w]) return false;
- }
-
- // values are the same
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_curve.cpp b/fpga_curve.cpp deleted file mode 100644 index 9cc8ec0..0000000 --- a/fpga_curve.cpp +++ /dev/null @@ -1,340 +0,0 @@ -//------------------------------------------------------------------------------
-//
-// fpga_curve.cpp
-// ------------------------------------
-// Elliptic curve arithmetic procedures
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_modular.h"
-#include "fpga_curve.h"
-#include "fpga_util.h"
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-FPGA_BUFFER ecdsa_g_x, ecdsa_g_y;
-FPGA_BUFFER ecdsa_h_x, ecdsa_h_y;
-FPGA_BUFFER ecdsa_q_x, ecdsa_q_y;
-FPGA_BUFFER ecdsa_r_x, ecdsa_r_y;
-
-
-//------------------------------------------------------------------------------
-void fpga_curve_init()
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- FPGA_BUFFER tmp_g_x = ECDSA_G_X, tmp_g_y = ECDSA_G_Y;
- FPGA_BUFFER tmp_h_x = ECDSA_H_X, tmp_h_y = ECDSA_H_Y;
- FPGA_BUFFER tmp_q_x = ECDSA_Q_X, tmp_q_y = ECDSA_Q_Y;
- FPGA_BUFFER tmp_r_x = ECDSA_R_X, tmp_r_y = ECDSA_R_Y;
-
- /* fill buffers for large multi-word integers */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { ecdsa_g_x.words[w] = tmp_g_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_g_y.words[w] = tmp_g_y.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_h_x.words[w] = tmp_h_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_h_y.words[w] = tmp_h_y.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_r_x.words[w] = tmp_r_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_r_y.words[w] = tmp_r_y.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_q_x.words[w] = tmp_q_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_q_y.words[w] = tmp_q_y.words[OPERAND_NUM_WORDS - (w+1)];
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Elliptic curve point doubling routine.
-//
-// R(rx,ry,rz) = 2 * P(px,py,pz)
-//
-// Note, that P(px,py,pz) is supposed to be in projective Jacobian coordinates,
-// R will be in projective Jacobian coordinates.
-//
-// This routine implements algorithm 3.21 from "Guide to Elliptic Curve
-// Cryptography", the only difference is that step 6. does T1 = T2 + T2 and
-// then T2 = T2 + T1 instead of T2 = 3 * T2, because our addition is much
-// faster, than multiplication.
-//
-// Note, that this routine also handles one special case, namely when P is at
-// infinity.
-//
-// Instead of actual modular division, multiplication by pre-computed constant
-// (2^-1 mod q) is done.
-//
-// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
-// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
-// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
-// then fpga_modular_mul(pz, &t1, &t1) instead.
-//
-// WARNING: Though this procedure always does doubling steps, it does not take
-// any active measures to keep run-time constant. The main purpose of this
-// model is to help debug Verilog code for FPGA, so *DO NOT* use is anywhere
-// near production!
-//
-//------------------------------------------------------------------------------
-void fpga_curve_double_jacobian(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz)
-//------------------------------------------------------------------------------
-{
- FPGA_BUFFER t1, t2, t3; // temporary variables
-
- // check, whether P is at infinity
- bool pz_is_zero = fpga_buffer_is_zero(pz);
-
- /* 2. */ fpga_buffer_copy(pz, &t1);
- fpga_modular_mul(pz, &t1, &t1);
- /* 3. */ fpga_modular_sub(px, &t1, &t2);
- /* 4. */ fpga_modular_add(px, &t1, &t1);
- /* 5. */ fpga_modular_mul(&t1, &t2, &t2);
- /* 6. */ fpga_modular_add(&t2, &t2, &t1);
- /* */ fpga_modular_add(&t1, &t2, &t2);
- /* 7. */ fpga_modular_add(py, py, ry);
- /* 8. */ fpga_modular_mul(pz, ry, rz);
- /* 9. */ fpga_buffer_copy(ry, &t1);
- fpga_buffer_copy(ry, &t3);
- fpga_modular_mul(&t1, &t3, ry);
- /* 10. */ fpga_modular_mul(px, ry, &t3);
- /* 11. */ fpga_buffer_copy(ry, &t1);
- fpga_modular_mul(ry, &t1, &t1);
- /* 12. */ fpga_modular_mul(&t1, &ecdsa_delta, ry);
- /* 13. */ fpga_buffer_copy(&t2, &t1);
- fpga_modular_mul(&t1, &t2, rx);
- /* 14. */ fpga_modular_add(&t3, &t3, &t1);
- /* 15. */ fpga_modular_sub(rx, &t1, rx);
- /* 16. */ fpga_modular_sub(&t3, rx, &t1);
- /* 17. */ fpga_modular_mul(&t1, &t2, &t1);
- /* 18. */ fpga_modular_sub(&t1, ry, ry);
-
- // handle special case (input point is at infinity)
- if (pz_is_zero)
- { fpga_buffer_copy(&ecdsa_one, rx);
- fpga_buffer_copy(&ecdsa_one, ry);
- fpga_buffer_copy(&ecdsa_zero, rz);
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Elliptic curve point addition routine.
-//
-// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy)
-//
-// Note, that P(px, py, pz) is supposed to be in projective Jacobian
-// coordinates, while Q(qx,qy) is supposed to be in affine coordinates,
-// R(rx, ry, rz) will be in projective Jacobian coordinates. Moreover, in this
-// particular implementation Q is always the base point G.
-//
-// This routine implements algorithm 3.22 from "Guide to Elliptic Curve
-// Cryptography". Differences from the original algorithm:
-//
-// 1) Step 1. is omitted, because point Q is always the base point, which is
-// not at infinity by definition.
-//
-// 2) Step 9.1 just returns the pre-computed double of the base point instead
-// of actually doubling it.
-//
-// Note, that this routine also handles three special cases:
-//
-// 1) P is at infinity
-// 2) P == Q
-// 3) P == -Q
-//
-// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
-// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
-// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
-// then fpga_modular_mul(pz, &t1, &t1) instead.
-//
-// WARNING: This procedure does not take any active measures to keep run-time
-// constant. The main purpose of this model is to help debug Verilog code for
-// FPGA, so *DO NOT* use is anywhere near production!
-//
-//------------------------------------------------------------------------------
-void fpga_curve_add_jacobian(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz)
-//------------------------------------------------------------------------------
-{
- FPGA_BUFFER t1, t2, t3, t4; // temporary variables
-
- bool pz_is_zero = fpga_buffer_is_zero(pz); // Step 2.
-
- /* 3. */ fpga_buffer_copy(pz, &t1);
- fpga_modular_mul(pz, &t1, &t1);
- /* 4. */ fpga_modular_mul(pz, &t1, &t2);
- /* 5. */ fpga_modular_mul(&t1, &ecdsa_g_x, &t1);
- /* 6. */ fpga_modular_mul(&t2, &ecdsa_g_y, &t2);
- /* 7. */ fpga_modular_sub(&t1, px, &t1);
- /* 8. */ fpga_modular_sub(&t2, py, &t2);
-
- bool t1_is_zero = fpga_buffer_is_zero(&t1); // | Step 9.
- bool t2_is_zero = fpga_buffer_is_zero(&t2); // |
-
- /* 10. */ fpga_modular_mul(pz, &t1, rz);
- /* 11. */ fpga_buffer_copy(&t1, &t3);
- fpga_modular_mul(&t1, &t3, &t3);
- /* 12. */ fpga_modular_mul(&t1, &t3, &t4);
- /* 13. */ fpga_modular_mul(px, &t3, &t3);
- /* 14. */ fpga_modular_add(&t3, &t3, &t1);
- /* 15. */ fpga_buffer_copy(&t2, rx);
- fpga_modular_mul(rx, &t2, rx);
- /* 16. */ fpga_modular_sub(rx, &t1, rx);
- /* 17. */ fpga_modular_sub(rx, &t4, rx);
- /* 18. */ fpga_modular_sub(&t3, rx, &t3);
- /* 19. */ fpga_modular_mul(&t2, &t3, &t3);
- /* 20. */ fpga_modular_mul(py, &t4, &t4);
- /* 21. */ fpga_modular_sub(&t3, &t4, ry);
-
- //
- // final selection
- //
- if (pz_is_zero) // P at infinity ?
- { fpga_buffer_copy(&ecdsa_g_x, rx);
- fpga_buffer_copy(&ecdsa_g_y, ry);
- fpga_buffer_copy(&ecdsa_one, rz);
- }
- else if (t1_is_zero) // same x for P and Q ?
- {
- fpga_buffer_copy(t2_is_zero ? &ecdsa_h_x : &ecdsa_one, rx); // | same y ? (P==Q => R=2*G) : (P==-Q => R=O)
- fpga_buffer_copy(t2_is_zero ? &ecdsa_h_y : &ecdsa_one, ry); // |
- fpga_buffer_copy(t2_is_zero ? &ecdsa_one : &ecdsa_zero, rz); // |
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Conversion from projective Jacobian to affine coordinates.
-//
-// P(px,py,pz) -> Q(qx,qy)
-//
-// Note, that qx = px / Z^2 and qy = py / Z^3. Division in modular arithmetic
-// is equivalent to multiplication by the inverse value of divisor, so
-// qx = px * (pz^-1)^2 and qy = py * (pz^-1)^3.
-//
-// Note, that this procedure does *NOT* handle points at infinity correctly. It
-// can only be called from the base point multiplication routine, that
-// specifically makes sure that P is not at infinity, so pz will always be
-// non-zero value.
-//
-//------------------------------------------------------------------------------
-void fpga_curve_point_to_affine(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
-//------------------------------------------------------------------------------
-{
- FPGA_BUFFER pz1; // inverse value of pz
- FPGA_BUFFER t2, t3; // intermediate values
-
- fpga_modular_inv(pz, &pz1); // pz1 = pz^-1 (mod q)
-
- fpga_modular_mul(&pz1, &pz1, &t2); // t2 = pz1 ^ 2 (mod q)
- fpga_modular_mul(&pz1, &t2, &t3); // t3 = tz1 ^ 3 (mod q)
-
- fpga_modular_mul(px, &t2, qx); // qx = px * (pz^-1)^2 (mod q)
- fpga_modular_mul(py, &t3, qy); // qy = py * (pz^-1)^3 (mod q)
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Elliptic curve base point scalar multiplication routine.
-//
-// Q(qx,qy) = k * G(px,py)
-//
-// Note, that Q is supposed to be in affine coordinates. Multiplication is done
-// using the double-and-add algorithm 3.27 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// WARNING: Though this procedure always does the addition step, it only
-// updates the result when current bit of k is set. It does not take any
-// active measures to keep run-time constant. The main purpose of this model
-// is to help debug Verilog code for FPGA, so *DO NOT* use it anywhere near
-// production!
-//
-//------------------------------------------------------------------------------
-void fpga_curve_scalar_multiply(FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
-//------------------------------------------------------------------------------
-{
- int word_count, bit_count; // counters
-
- FPGA_BUFFER rx, ry, rz; // intermediate result
- FPGA_BUFFER tx, ty, tz; // temporary variable
-
- /* set initial value of R to point at infinity */
- fpga_buffer_copy(&ecdsa_one, &rx);
- fpga_buffer_copy(&ecdsa_one, &ry);
- fpga_buffer_copy(&ecdsa_zero, &rz);
-
- /* process bits of k left-to-right */
- for (word_count=OPERAND_NUM_WORDS; word_count>0; word_count--)
- for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
- {
- /* calculate T = 2 * R */
- fpga_curve_double_jacobian(&rx, &ry, &rz, &tx, &ty, &tz);
-
- /* always calculate R = T + P for constant-time */
- fpga_curve_add_jacobian(&tx, &ty, &tz, &rx, &ry, &rz);
-
- /* revert to the value of T before addition if the current bit of k is not set */
- if (!((k->words[word_count-1] >> (bit_count-1)) & 1))
- { fpga_buffer_copy(&tx, &rx);
- fpga_buffer_copy(&ty, &ry);
- fpga_buffer_copy(&tz, &rz);
- }
-
- }
-
- // convert result to affine coordinates anyway
- fpga_curve_point_to_affine(&rx, &ry, &rz, qx, qy);
-
- // check, that rz is non-zero (not point at infinity)
- bool rz_is_zero = fpga_buffer_is_zero(&rz);
-
- // handle special case (result is point at infinity)
- if (rz_is_zero)
- { fpga_buffer_copy(&ecdsa_zero, qx);
- fpga_buffer_copy(&ecdsa_zero, qy);
- }
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_curve.h b/fpga_curve.h deleted file mode 100644 index c90cc16..0000000 --- a/fpga_curve.h +++ /dev/null @@ -1,61 +0,0 @@ -//------------------------------------------------------------------------------
-//
-// fpga_curve.h
-// ------------------------------------
-// Elliptic curve arithmetic procedures
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-extern FPGA_BUFFER ecdsa_g_x, ecdsa_g_y;
-extern FPGA_BUFFER ecdsa_h_x, ecdsa_h_y;
-extern FPGA_BUFFER ecdsa_q_x, ecdsa_q_y;
-extern FPGA_BUFFER ecdsa_r_x, ecdsa_r_y;
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_curve_init ();
-void fpga_curve_scalar_multiply (FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy);
-void fpga_curve_add_jacobian (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz);
-void fpga_curve_double_jacobian (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz);
-void fpga_curve_point_to_affine (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *qx, FPGA_BUFFER *qy);
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_modular.cpp b/fpga_modular.cpp deleted file mode 100644 index 9b01df0..0000000 --- a/fpga_modular.cpp +++ /dev/null @@ -1,838 +0,0 @@ -//------------------------------------------------------------------------------
-//
-// fpga_modular.cpp
-// ---------------------------
-// Modular arithmetic routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_modular.h"
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-FPGA_BUFFER ecdsa_q;
-FPGA_BUFFER ecdsa_zero;
-FPGA_BUFFER ecdsa_one;
-FPGA_BUFFER ecdsa_delta;
-
-
-//------------------------------------------------------------------------------
-void fpga_modular_init()
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- FPGA_BUFFER tmp_q = ECDSA_Q;
- FPGA_BUFFER tmp_zero = ECDSA_ZERO;
- FPGA_BUFFER tmp_one = ECDSA_ONE;
- FPGA_BUFFER tmp_delta = ECDSA_DELTA;
-
- /* fill buffers for large multi-word integers */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { ecdsa_q.words[w] = tmp_q.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_zero.words[w] = tmp_zero.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_one.words[w] = tmp_one.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_delta.words[w] = tmp_delta.words[OPERAND_NUM_WORDS - (w+1)];
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular addition.
-//
-// This routine implements algorithm 3. from "Ultra High Performance ECC over
-// NIST Primes on Commercial FPGAs".
-//
-// s = (a + b) mod q
-//
-// The naive approach is like the following:
-//
-// 1. s = a + b
-// 2. if (s >= q) s -= q
-//
-// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q)
-// and then select the right variant.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_add(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *s)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_BUFFER ab, ab_n; // intermediate buffers
- bool c_in, c_out; // carries
- bool b_in, b_out; // borrows
-
- c_in = false; // first word has no carry
- b_in = false; // first word has no borrow
-
- // run parallel addition and subtraction
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- fpga_lowlevel_add32(a->words[w], b->words[w], c_in, &ab.words[w], &c_out);
- fpga_lowlevel_sub32(ab.words[w], ecdsa_q.words[w], b_in, &ab_n.words[w], &b_out);
-
- c_in = c_out; // propagate carry
- b_in = b_out; // propagate borrow
- }
-
- // now select the right buffer
-
- /*
- * We select the right variant based on borrow and carry flags after
- * addition and subtraction of the very last pair of words. Note, that
- * we only need to select the first variant (a + b) when (a + b) < q.
- * This way if we get negative number after subtraction, we discard it
- * and use the output of the adder instead. The subtractor output is
- * negative when borrow flag is set *and* carry flag is not set. When
- * both borrow and carry are set, the number is non-negative, because
- * borrow and carry cancel each other out.
- */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- s->words[w] = (b_out && !c_out) ? ab.words[w] : ab_n.words[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular subtraction.
-//
-// This routine implements algorithm 3. from "Ultra High Performance ECC over
-// NIST Primes on Commercial FPGAs".
-//
-// d = (a - b) mod q
-//
-// The naive approach is like the following:
-//
-// 1. d = a - b
-// 2. if (a < b) d += q
-//
-// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q)
-// and then select the right variant.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_sub(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *d)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_BUFFER ab, ab_n; // intermediate buffers
- bool c_in, c_out; // carries
- bool b_in, b_out; // borrows
-
- c_in = false; // first word has no carry
- b_in = false; // first word has no borrow
-
- // run parallel subtraction and addition
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- fpga_lowlevel_sub32(a->words[w], b->words[w], b_in, &ab.words[w], &b_out);
- fpga_lowlevel_add32(ab.words[w], ecdsa_q.words[w], c_in, &ab_n.words[w], &c_out);
-
- b_in = b_out; // propagate borrow
- c_in = c_out; // propagate carry
- }
-
- // now select the right buffer
-
- /*
- * We select the right variant based on borrow flag after subtraction
- * and addition of the very last pair of words. Note, that we only
- * need to select the second variant (a - b + q) when a < b. This way
- * if we get negative number after subtraction, we discard it
- * and use the output of the adder instead. The Subtractor output is
- * negative when borrow flag is set.
- */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- d->words[w] = b_out ? ab_n.words[w] : ab.words[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular multiplication.
-//
-// This routine implements modular multiplication algorithm from "Ultra High
-// Performance ECC over NIST Primes on Commercial FPGAs".
-//
-// p = (a * b) mod q
-//
-// The complex algorithm is split into three parts:
-//
-// 1. Calculation of partial words
-// 2. Acccumulation of partial words into full-size product
-// 3. Modular reduction of the full-size product
-//
-// See comments for corresponding helper routines for more information.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_mul(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *p)
-//------------------------------------------------------------------------------
-{
- FPGA_WORD_EXTENDED si[4*OPERAND_NUM_WORDS-1]; // parts of intermediate product
- FPGA_WORD c[2*OPERAND_NUM_WORDS]; // full-size intermediate product
-
- /* multiply to get partial words */
- fpga_modular_mul_helper_multiply(a, b, si);
-
- /* accumulate partial words into full-size product */
- fpga_modular_mul_helper_accumulate(si, c);
-
- /* reduce full-size product using special routine */
- fpga_modular_mul_helper_reduce(c, p);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular multiplicative inversion procedure.
-//
-// a1 = a^-1 (mod n)
-//
-// This routine implements the algorithm from "Constant Time Modular
-// Inversion" by Joppe W. Bos (http://www.joppebos.com/files/CTInversion.pdf)
-//
-// The algorithm has two phases: 1) calculation of "almost" modular inverse,
-// which is a^-1*2^k and 2) removal of redundant factor 2^k.
-//
-// The first stage has four temporary variables: r, s, u, v; they are updated
-// at every iteration. Depending on flags there can be four branches, FPGA
-// will pre-calculate all possible values in parallel and then use a multiplexor
-// to select the next value. This implementation also calculates all possible
-// outcomes. This is done for debugging purposes, *NOT* for constant run-time!
-//
-// The second part only works with s and k.
-//
-// Note, that k is a simple counter, and it can't exceed 2*OPERAND_WIDTH.
-//
-// The complex inversion algorithm uses helper routines. Note, that width of the
-// intermediate results can temporarily exceed OPERAND_WIDTH, so all the helper
-// routines process OPERAND_NUM_WORDS+1 words.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv(FPGA_BUFFER *a, FPGA_BUFFER *a1)
-{
- int i; // round counter
- int w; // word counter
- int k; // redundant power of 2
-
- /* q, 1 */
- FPGA_WORD buf_q[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_1[OPERAND_NUM_WORDS+1];
-
- /* r, s */
- FPGA_WORD buf_r[OPERAND_NUM_WORDS+1], buf_s[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_r_double[OPERAND_NUM_WORDS+1], buf_s_double[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_r_new[OPERAND_NUM_WORDS+1], buf_s_new[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_r_plus_s[OPERAND_NUM_WORDS+1], buf_s_plus_r[OPERAND_NUM_WORDS+1];
-
- /* u, v */
- FPGA_WORD buf_u[OPERAND_NUM_WORDS+1], buf_v[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_half[OPERAND_NUM_WORDS+1], buf_v_half[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_minus_v[OPERAND_NUM_WORDS+1], buf_v_minus_u[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_minus_v_half[OPERAND_NUM_WORDS+1], buf_v_minus_u_half[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_new[OPERAND_NUM_WORDS+1], buf_v_new[OPERAND_NUM_WORDS+1];
-
- /* comparison */
- int cmp_v_1, cmp_u_v;
-
- /* clear buffers */
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- buf_r[w] = 0, buf_s[w] = 0,
- buf_u[w] = 0, buf_v[w] = 0,
- buf_q[w] = 0, buf_1[w] = 0;
-
- /* initialize q, 1 */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- buf_q[w] = ecdsa_q.words[w], buf_1[w] = ecdsa_one.words[w];
-
- /* initialize r, s */
- buf_r[0] = 0, buf_s[0] = 1;
-
- /* initialize u, v */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- buf_u[w] = ecdsa_q.words[w], buf_v[w] = a->words[w];
-
- /* initialize k */
- k = 0;
-
- /* flags for the first stage */
- bool v_is_1, u_is_greater_than_v, u_is_even, v_is_even;
-
- /* first stage */
- for (i=0; i<(2*OPERAND_WIDTH); i++)
- {
- /* pre-calculate all possible values for r and s */
- fpga_modular_inv_helper_shl(buf_r, buf_r_double); // r_double = 2 * r
- fpga_modular_inv_helper_shl(buf_s, buf_s_double); // s_double = 2 * s
- fpga_modular_inv_helper_add(buf_r, buf_s, buf_r_plus_s); // r_plus_s = r + s
- fpga_modular_inv_helper_add(buf_s, buf_r, buf_s_plus_r); // s_plus_r = s + r
-
- /* pre-calculate all possible values for u and v */
- fpga_modular_inv_helper_shr(buf_u, buf_u_half); // u_half = u / 2
- fpga_modular_inv_helper_shr(buf_v, buf_v_half); // v_half = v / 2
- fpga_modular_inv_helper_sub(buf_u, buf_v, buf_u_minus_v); // u_minus_v = u - v
- fpga_modular_inv_helper_sub(buf_v, buf_u, buf_v_minus_u); // v_minus_u = v - u
- fpga_modular_inv_helper_shr(buf_u_minus_v, buf_u_minus_v_half); // u_minus_v_half = u_minus_v / 2
- fpga_modular_inv_helper_shr(buf_v_minus_u, buf_v_minus_u_half); // v_minus_u_half = v_minus_u / 2
-
- /* compare */
- fpga_modular_inv_helper_cmp(buf_v, buf_1, &cmp_v_1);
- fpga_modular_inv_helper_cmp(buf_u, buf_v, &cmp_u_v);
-
- /* assign flags */
- v_is_1 = (cmp_v_1 == 0);
- u_is_greater_than_v = (cmp_u_v > 0);
- u_is_even = !(buf_u[0] & 1);
- v_is_even = !(buf_v[0] & 1);
-
- /* select u */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_u_new, buf_u_half);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_u_new, buf_u);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_u_new, u_is_greater_than_v ? buf_u_minus_v_half : buf_u);
-
- /* select v */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_v_new, buf_v);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_v_new, buf_v_half);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_v_new, u_is_greater_than_v ? buf_v : buf_v_minus_u_half);
-
- /* select r */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_r_new, buf_r);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_r_new, buf_r_double);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_r_new, u_is_greater_than_v ? buf_r_plus_s : buf_r_double);
-
- /* select s */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_s_new, buf_s_double);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_s_new, buf_s);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_s_new, u_is_greater_than_v ? buf_s_double : buf_s_plus_r);
-
- /* update values */
- if (!v_is_1)
- { fpga_modular_inv_helper_cpy(buf_u, buf_u_new);
- fpga_modular_inv_helper_cpy(buf_v, buf_v_new);
- fpga_modular_inv_helper_cpy(buf_r, buf_r_new);
- fpga_modular_inv_helper_cpy(buf_s, buf_s_new);
- }
-
- /* update k */
- if (!v_is_1) k++;
- }
-
- //
- // Note, that to save FPGA resources, the second stage re-uses buffers
- // used in the first stage.
- //
-
- /* flags for the second stage */
- bool k_is_0, s_is_odd;
-
- /* second stage */
- for (i=0; i<(2*OPERAND_WIDTH); i++)
- {
- /* pre-calculate all possible values */
- fpga_modular_inv_helper_shr(buf_s, buf_u);
- fpga_modular_inv_helper_add(buf_s, buf_q, buf_r);
- fpga_modular_inv_helper_shr(buf_r, buf_v);
-
- //
- // assign flags
- //
- s_is_odd = buf_s[0] & 1;
- k_is_0 = (k == 0);
-
- //
- // select new values based on flags
- //
- fpga_modular_inv_helper_cpy(buf_s_new, s_is_odd ? buf_v : buf_u);
-
- /* update s */
- if (! k_is_0)
- fpga_modular_inv_helper_cpy(buf_s, buf_s_new);
-
- /* update k */
- if (! k_is_0) k--;
- }
-
- /* done, copy s into output buffer */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- a1->words[w] = buf_s[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Parallelized multiplication.
-//
-// This routine implements the algorithm in Fig. 3. from "Ultra High
-// Performance ECC over NIST Primes on Commercial FPGAs".
-//
-// Inputs a and b are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2
-// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide
-// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS
-// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial
-// output words si[].
-//
-// The order of loading a and b into the multipliers is a bit complicated,
-// during the first 2*OPERAND_NUM_WORDS-1 cycles one si word per cycle is
-// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the
-// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_mul_helper_multiply(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si)
-//------------------------------------------------------------------------------
-{
- int w; // counter
- int t, x; // more counters
- int j, i; // word indices
- FPGA_WORD p; // product
-
- // buffers for smaller words that multipliers can handle
- FPGA_WORD_REDUCED ai[2*OPERAND_NUM_WORDS];
- FPGA_WORD_REDUCED bj[2*OPERAND_NUM_WORDS];
-
- // split a and b into smaller words
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- ai[2*w] = (FPGA_WORD_REDUCED)a->words[w], ai[2*w + 1] = (FPGA_WORD_REDUCED)(a->words[w] >> (FPGA_WORD_WIDTH / 2)),
- bj[2*w] = (FPGA_WORD_REDUCED)b->words[w], bj[2*w + 1] = (FPGA_WORD_REDUCED)(b->words[w] >> (FPGA_WORD_WIDTH / 2));
-
- // accumulators
- FPGA_WORD_EXTENDED mac[2*OPERAND_NUM_WORDS];
-
- // clear accumulators
- for (w=0; w<(2*OPERAND_NUM_WORDS); w++) mac[w] = 0;
-
- // run the crazy algorithm :)
- for (t=0; t<(2*OPERAND_NUM_WORDS); t++)
- {
- // save upper half of si[] (one word per cycle)
- if (t > 0)
- { si[4*OPERAND_NUM_WORDS - (t+1)] = mac[t];
- mac[t] = 0;
- }
-
- // update index
- j = 2*OPERAND_NUM_WORDS - (t+1);
-
- // parallel multiplication
- for (x=0; x<(2*OPERAND_NUM_WORDS); x++)
- {
- // update index
- i = t - x;
- if (i < 0) i += 2*OPERAND_NUM_WORDS;
-
- // multiply...
- fpga_lowlevel_mul16(ai[i], bj[j], &p);
-
- // ...accumulate
- mac[x] += p;
- }
-
- }
-
- // now finally save lower half of si[] (2*OPERAND_NUM_WORDS words at once)
- for (w=0; w<(2*OPERAND_NUM_WORDS); w++)
- si[w] = mac[2*OPERAND_NUM_WORDS - (w+1)];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Accumulation of partial words into full-size product.
-//
-// This routine implements the Algorithm 4. from "Ultra High Performance ECC
-// over NIST Primes on Commercial FPGAs".
-//
-// Input words si[] are accumulated into full-size product c[].
-//
-// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in
-// si[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are
-// summed in full, odd words are split into two parts. During every cycle the
-// upper part of the previous word and the lower part of the following word are
-// summed too.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_mul_helper_accumulate(FPGA_WORD_EXTENDED *si, FPGA_WORD *c)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_WORD_EXTENDED cw0, cw1; // intermediate sums
- FPGA_WORD_REDUCED cw_carry; // wide carry
-
- // clear carry
- cw_carry = 0;
-
- // execute the algorithm
- for (w=0; w<(2*OPERAND_NUM_WORDS); w++)
- {
- // handy flags
- bool w_is_first = (w == 0);
- bool w_is_last = (w == (2*OPERAND_NUM_WORDS-1));
-
- // accumulate full current even word...
- // ...and also the upper part of the previous odd word (if not the first word)
- fpga_lowlevel_add48(si[2*w], w_is_first ? 0 : si[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0);
-
- // generate another word from "carry" part of the previous even word...
- // ...and also the lower part of the following odd word (if not the last word)
- cw1 = w_is_last ? 0 : (FPGA_WORD)(si[2*w+1] << (FPGA_WORD_WIDTH / 2));
- cw1 |= (FPGA_WORD_EXTENDED)cw_carry;
-
- // accumulate once again
- fpga_lowlevel_add48(cw0, cw1, &cw1);
-
- // store current word...
- c[w] = (FPGA_WORD)cw1;
-
- // ...and carry
- cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH);
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Fast modular reduction for NIST prime P-256.
-//
-// p = c mod p256
-//
-// This routine implements the algorithm 2.29 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c
-// on the other hand is the output of the parallelized Comba multiplier, so it
-// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
-//
-// To save FPGA resources, the calculation is done using only two adders and
-// one subtractor. The algorithm is split into five steps.
-//
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-void fpga_modular_mul_helper_reduce_p256(FPGA_WORD *c, FPGA_BUFFER *p)
-{
- // "funny" words
- FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9;
-
- // compose "funny" words out of input words
- s1.words[7] = c[ 7], s1.words[6] = c[ 6], s1.words[5] = c[ 5], s1.words[4] = c[ 4], s1.words[3] = c[ 3], s1.words[2] = c[ 2], s1.words[1] = c[ 1], s1.words[0] = c[ 0];
- s2.words[7] = c[15], s2.words[6] = c[14], s2.words[5] = c[13], s2.words[4] = c[12], s2.words[3] = c[11], s2.words[2] = 0, s2.words[1] = 0, s2.words[0] = 0;
- s3.words[7] = 0, s3.words[6] = c[15], s3.words[5] = c[14], s3.words[4] = c[13], s3.words[3] = c[12], s3.words[2] = 0, s3.words[1] = 0, s3.words[0] = 0;
- s4.words[7] = c[15], s4.words[6] = c[14], s4.words[5] = 0, s4.words[4] = 0, s4.words[3] = 0, s4.words[2] = c[10], s4.words[1] = c[ 9], s4.words[0] = c[ 8];
- s5.words[7] = c[ 8], s5.words[6] = c[13], s5.words[5] = c[15], s5.words[4] = c[14], s5.words[3] = c[13], s5.words[2] = c[11], s5.words[1] = c[10], s5.words[0] = c[ 9];
- s6.words[7] = c[10], s6.words[6] = c[ 8], s6.words[5] = 0, s6.words[4] = 0, s6.words[3] = 0, s6.words[2] = c[13], s6.words[1] = c[12], s6.words[0] = c[11];
- s7.words[7] = c[11], s7.words[6] = c[ 9], s7.words[5] = 0, s7.words[4] = 0, s7.words[3] = c[15], s7.words[2] = c[14], s7.words[1] = c[13], s7.words[0] = c[12];
- s8.words[7] = c[12], s8.words[6] = 0, s8.words[5] = c[10], s8.words[4] = c[ 9], s8.words[3] = c[ 8], s8.words[2] = c[15], s8.words[1] = c[14], s8.words[0] = c[13];
- s9.words[7] = c[13], s9.words[6] = 0, s9.words[5] = c[11], s9.words[4] = c[10], s9.words[3] = c[ 9], s9.words[2] = 0, s9.words[1] = c[15], s9.words[0] = c[14];
-
- // intermediate results
- FPGA_BUFFER sum0, sum1, difference;
-
- /* Step 1. */
- fpga_modular_add(&s2, &s2, &sum0); // sum0 = 2*s2
- fpga_modular_add(&s3, &s3, &sum1); // sum1 = 2*s3
- fpga_modular_sub(&ecdsa_zero, &s6, &difference); // difference = -s6
-
- /* Step 2. */
- fpga_modular_add(&sum0, &s1, &sum0); // sum0 = s1 + 2*s2
- fpga_modular_add(&sum1, &s4, &sum1); // sum1 = s4 + 2*s3
- fpga_modular_sub(&difference, &s7, &difference); // difference = -(s6 + s7)
-
- /* Step 3. */
- fpga_modular_add(&sum0, &s5, &sum0); // sum0 = s1 + 2*s2 + s5
- fpga_modular_add(&sum1, &ecdsa_zero, &sum1); // compulsory cycle to keep sum1 constant for next stage
- fpga_modular_sub(&difference, &s8, &difference); // difference = -(s6 + s7 + s8)
-
- /* Step 4. */
- fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
- fpga_modular_sub(&difference, &s9, &difference); // difference = -(s6 + s7 + s8 + s9)
-
- /* Step 5. */
- fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + 2*s3 + s4 + s5 - s6 - s7 - s8 - s9
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
-// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
-}
-#endif
-
-
-//------------------------------------------------------------------------------
-//
-// Fast modular reduction for NIST prime P-384.
-//
-// p = c mod p384
-//
-// This routine implements the algorithm 2.30 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c
-// on the other hand is the output of the parallelized Comba multiplier, so it
-// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
-//
-// To save FPGA resources, the calculation is done using only two adders and
-// one subtractor. The algorithm is split into five steps.
-//
-//------------------------------------------------------------------------------
-#if USE_CURVE == 2
-void fpga_modular_mul_helper_reduce_p384(FPGA_WORD *c, FPGA_BUFFER *p)
-{
- // "funny" words
- FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
-
- // compose "funny" words
- s1.words[11] = c[11], s1.words[10] = c[10], s1.words[ 9] = c[ 9], s1.words[ 8] = c[ 8], s1.words[ 7] = c[ 7], s1.words[ 6] = c[ 6], s1.words[ 5] = c[ 5], s1.words[ 4] = c[ 4], s1.words[ 3] = c[ 3], s1.words[ 2] = c[ 2], s1.words[ 1] = c[ 1], s1.words[ 0] = c[ 0];
- s2.words[11] = 0, s2.words[10] = 0, s2.words[ 9] = 0, s2.words[ 8] = 0, s2.words[ 7] = 0, s2.words[ 6] = c[23], s2.words[ 5] = c[22], s2.words[ 4] = c[21], s2.words[ 3] = 0, s2.words[ 2] = 0, s2.words[ 1] = 0, s2.words[ 0] = 0;
- s3.words[11] = c[23], s3.words[10] = c[22], s3.words[ 9] = c[21], s3.words[ 8] = c[20], s3.words[ 7] = c[19], s3.words[ 6] = c[18], s3.words[ 5] = c[17], s3.words[ 4] = c[16], s3.words[ 3] = c[15], s3.words[ 2] = c[14], s3.words[ 1] = c[13], s3.words[ 0] = c[12];
- s4.words[11] = c[20], s4.words[10] = c[19], s4.words[ 9] = c[18], s4.words[ 8] = c[17], s4.words[ 7] = c[16], s4.words[ 6] = c[15], s4.words[ 5] = c[14], s4.words[ 4] = c[13], s4.words[ 3] = c[12], s4.words[ 2] = c[23], s4.words[ 1] = c[22], s4.words[ 0] = c[21];
- s5.words[11] = c[19], s5.words[10] = c[18], s5.words[ 9] = c[17], s5.words[ 8] = c[16], s5.words[ 7] = c[15], s5.words[ 6] = c[14], s5.words[ 5] = c[13], s5.words[ 4] = c[12], s5.words[ 3] = c[20], s5.words[ 2] = 0, s5.words[ 1] = c[23], s5.words[ 0] = 0;
- s6.words[11] = 0, s6.words[10] = 0, s6.words[ 9] = 0, s6.words[ 8] = 0, s6.words[ 7] = c[23], s6.words[ 6] = c[22], s6.words[ 5] = c[21], s6.words[ 4] = c[20], s6.words[ 3] = 0, s6.words[ 2] = 0, s6.words[ 1] = 0, s6.words[ 0] = 0;
- s7.words[11] = 0, s7.words[10] = 0, s7.words[ 9] = 0, s7.words[ 8] = 0, s7.words[ 7] = 0, s7.words[ 6] = 0, s7.words[ 5] = c[23], s7.words[ 4] = c[22], s7.words[ 3] = c[21], s7.words[ 2] = 0, s7.words[ 1] = 0, s7.words[ 0] = c[20];
- s8.words[11] = c[22], s8.words[10] = c[21], s8.words[ 9] = c[20], s8.words[ 8] = c[19], s8.words[ 7] = c[18], s8.words[ 6] = c[17], s8.words[ 5] = c[16], s8.words[ 4] = c[15], s8.words[ 3] = c[14], s8.words[ 2] = c[13], s8.words[ 1] = c[12], s8.words[ 0] = c[23];
- s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0;
- s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0;
-
- // intermediate results
- FPGA_BUFFER sum0, sum1, difference;
-
- /* Step 1. */
- fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3
- fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2
- fpga_modular_sub(&ecdsa_zero, &s8, &difference); // difference = -s8
-
- /* Step 2. */
- fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4
- fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5
- fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9)
-
- /* Step 3. */
- fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6
- fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7
- fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10)
-
- /* Step 4. */
- fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
- fpga_modular_sub(&difference, &ecdsa_zero, &difference); // compulsory cycle to keep difference constant for next stage
-
- /* Step 5. */
- fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
-// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
-}
-#endif
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word shift to the left by 1 bit.
-//
-// y = x << 1
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_shl(FPGA_WORD *x, FPGA_WORD *y)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_WORD carry_in, carry_out; // carries
-
- carry_in = 0; // first word has no carry
-
- // shift word-by-word
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- carry_out = x[w] >> (FPGA_WORD_WIDTH - 1), // store next carry
- y[w] = x[w] << 1, // shift
- y[w] |= carry_in, // process carry
- carry_in = carry_out; // propagate carry
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word shift to the right by 1 bit.
-//
-// y = x >> 1
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_shr(FPGA_WORD *x, FPGA_WORD *y)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_WORD carry_in, carry_out; // carries
-
- carry_in = 0; // first word has no carry
-
- // shift word-by-word
- for (w=OPERAND_NUM_WORDS; w>=0; w--)
- carry_out = x[w], // store next carry
- y[w] = x[w] >> 1, // shift
- y[w] |= carry_in << (FPGA_WORD_WIDTH - 1), // process carry
- carry_in = carry_out; // propagate carry
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word addition.
-//
-// s = x + y
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_add(FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *s)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool carry_in, carry_out; // carries
-
- // lowest word has no carry
- carry_in = false;
-
- // sum a and b word-by-word
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- {
- // low-level addition
- fpga_lowlevel_add32(x[w], y[w], carry_in, &s[w], &carry_out);
-
- // propagate carry bit
- carry_in = carry_out;
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word subtraction .
-//
-// d = x - y
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_sub(FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *d)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool borrow_in, borrow_out; // borrows
-
- // lowest word has no borrow
- borrow_in = false;
-
- // subtract b from a word-by-word
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- {
- // low-level subtraction
- fpga_lowlevel_sub32(x[w], y[w], borrow_in, &d[w], &borrow_out);
-
- // propagate borrow bit
- borrow_in = borrow_out;
- }
-
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word copy.
-//
-// dst = src
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_cpy(FPGA_WORD *dst, FPGA_WORD *src)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // copy all the words from src into dst
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- dst[w] = src[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word comparison.
-//
-// The return value is -1 when a<b, 0 when a=b and 1 when a>b.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_cmp(FPGA_WORD *a, FPGA_WORD *b, int *c)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- int r, r0, ra, rb; // result
- bool borrow; // borrow
- FPGA_WORD d; // partial difference
-
- // result is unknown so far
- r = 0;
-
- // clear borrow for the very first word
- borrow = false;
-
- // compare a and b word-by-word
- for (w=OPERAND_NUM_WORDS; w>=0; w--)
- {
- // save result
- r0 = r;
-
- // subtract current words
- fpga_lowlevel_sub32(a[w], b[w], false, &d, &borrow);
-
- // analyze flags
- rb = borrow ? -1 : 0; // a[w] < b[w]
- ra = (!borrow && (d != 0)) ? 1 : 0; // a[w] > b[w]
-
- //
- // Note, that ra is either 0 or 1, rb is either 0 or -1 and they
- // can never be non-zero at the same time.
- //
- // Note, that r can only be updated if comparison result has not
- // been resolved yet. Even if we already know comparison result,
- // we continue doing dummy subtractions to keep run-time constant.
- //
-
- // update result
- r = (r == 0) ? ra + rb : r0;
- }
-
- // done
- *c = r;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_util.h b/fpga_util.h deleted file mode 100644 index 24b2aa2..0000000 --- a/fpga_util.h +++ /dev/null @@ -1,49 +0,0 @@ -//------------------------------------------------------------------------------
-//
-// fpga_util.h
-// ---------------------
-// Utility FPGA routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-bool fpga_buffer_is_zero (FPGA_BUFFER *x);
-void fpga_buffer_copy (FPGA_BUFFER *src, FPGA_BUFFER *dst);
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/test_vectors/charlie_p256.key b/test_vectors/charlie_p256.key new file mode 100644 index 0000000..34b5c20 --- /dev/null +++ b/test_vectors/charlie_p256.key @@ -0,0 +1,8 @@ +-----BEGIN EC PARAMETERS----- +BggqhkjOPQMBBw== +-----END EC PARAMETERS----- +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIFA+WK/POvMze/U0CURQErgftFSMTSsTAqYX+bHQDX3goAoGCCqGSM49 +AwEHoUQDQgAE2TSbSLDujDYTmx147cGRGyUId/t61Erhi7L4pvTcyuXgoFbotQkq +bHKPS8iQQ/vifCYnRN+9rxeD/C4BsGB3Gw== +-----END EC PRIVATE KEY----- diff --git a/test_vectors/charlie_p384.key b/test_vectors/charlie_p384.key new file mode 100644 index 0000000..8894220 --- /dev/null +++ b/test_vectors/charlie_p384.key @@ -0,0 +1,9 @@ +-----BEGIN EC PARAMETERS----- +BgUrgQQAIg== +-----END EC PARAMETERS----- +-----BEGIN EC PRIVATE KEY----- +MIGkAgEBBDC7SgnY5SfwYmZetNCzmh3OlNqixZNbMWOwkPB5PuNqWmKnBV1dhQ0b +FdxlbWXYs6KgBwYFK4EEACKhZANiAAS4Aj5grkLqFGMw8sOIMJbKlhsR9d/qSh1l +6Y5kszUn+1cibbSKUUMlHvBr3veOtXrTxmRpYlqqraNH4QM8FHS2NDqTaP8pRQG7 +1TscxJ/ZctpDnJ2oJ+IwJyDit43RT54= +-----END EC PRIVATE KEY----- diff --git a/test_vectors/ecdsa_test_vector_nsa.h b/test_vectors/ecdsa_test_vector_nsa.h new file mode 100644 index 0000000..762f284 --- /dev/null +++ b/test_vectors/ecdsa_test_vector_nsa.h @@ -0,0 +1,56 @@ +/* Values from "Suite B Implementer's Guide to FIPS 186-3 (ECDSA)" */ + +#define ECDSA_P256_D_NSA_INIT \ + {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, \ + 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a} + +#define ECDSA_P256_QX_NSA_INIT \ + {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, \ + 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8} + +#define ECDSA_P256_QY_NSA_INIT \ + {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, \ + 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9} + +#define ECDSA_P256_K_NSA_INIT \ + {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, \ + 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0} + +#define ECDSA_P256_RX_NSA_INIT \ + {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, \ + 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c} + +#define ECDSA_P256_RY_NSA_INIT \ + {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, \ + 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541} + + +#define ECDSA_P384_D_NSA_INIT \ + {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, \ + 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, \ + 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24} + +#define ECDSA_P384_QX_NSA_INIT \ + {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, \ + 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, \ + 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0} + +#define ECDSA_P384_QY_NSA_INIT \ + {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, \ + 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, \ + 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9} + +#define ECDSA_P384_K_NSA_INIT \ + {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, \ + 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, \ + 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359} + +#define ECDSA_P384_RX_NSA_INIT \ + {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, \ + 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, \ + 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856} + +#define ECDSA_P384_RY_NSA_INIT \ + {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, \ + 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, \ + 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b} diff --git a/test_vectors/ecdsa_test_vector_randomized.h b/test_vectors/ecdsa_test_vector_randomized.h new file mode 100644 index 0000000..1f334c2 --- /dev/null +++ b/test_vectors/ecdsa_test_vector_randomized.h @@ -0,0 +1,29 @@ +/* Generated automatically, do not edit. */ + +#define ECDSA_P256_D_RANDOM_INIT \ + {0x503e58af, 0xcf3af333, 0x7bf53409, 0x445012b8, \ + 0x1fb4548c, 0x4d2b1302, 0xa617f9b1, 0xd00d7de0} + +#define ECDSA_P256_QX_RANDOM_INIT \ + {0xd9349b48, 0xb0ee8c36, 0x139b1d78, 0xedc1911b, \ + 0x250877fb, 0x7ad44ae1, 0x8bb2f8a6, 0xf4dccae5} + +#define ECDSA_P256_QY_RANDOM_INIT \ + {0xe0a056e8, 0xb5092a6c, 0x728f4bc8, 0x9043fbe2, \ + 0x7c262744, 0xdfbdaf17, 0x83fc2e01, 0xb060771b} + +#define ECDSA_P384_D_RANDOM_INIT \ + {0xbb4a09d8, 0xe527f062, 0x665eb4d0, 0xb39a1dce, \ + 0x94daa2c5, 0x935b3163, 0xb090f079, 0x3ee36a5a, \ + 0x62a7055d, 0x5d850d1b, 0x15dc656d, 0x65d8b3a2} + +#define ECDSA_P384_QX_RANDOM_INIT \ + {0xb8023e60, 0xae42ea14, 0x6330f2c3, 0x883096ca, \ + 0x961b11f5, 0xdfea4a1d, 0x65e98e64, 0xb33527fb, \ + 0x57226db4, 0x8a514325, 0x1ef06bde, 0xf78eb57a} + +#define ECDSA_P384_QY_RANDOM_INIT \ + {0xd3c66469, 0x625aaaad, 0xa347e103, 0x3c1474b6, \ + 0x343a9368, 0xff294501, 0xbbd53b1c, 0xc49fd972, \ + 0xda439c9d, 0xa827e230, 0x2720e2b7, 0x8dd14f9e} + diff --git a/test_vectors/ecdsa_test_vector_randomized.vh b/test_vectors/ecdsa_test_vector_randomized.vh new file mode 100644 index 0000000..6c5cf80 --- /dev/null +++ b/test_vectors/ecdsa_test_vector_randomized.vh @@ -0,0 +1,29 @@ +/* Generated automatically, do not edit. */ + +localparam [255:0] ECDSA_P256_D_RANDOM = + {32'h503e58af, 32'hcf3af333, 32'h7bf53409, 32'h445012b8, + 32'h1fb4548c, 32'h4d2b1302, 32'ha617f9b1, 32'hd00d7de0}; + +localparam [255:0] ECDSA_P256_QX_RANDOM = + {32'hd9349b48, 32'hb0ee8c36, 32'h139b1d78, 32'hedc1911b, + 32'h250877fb, 32'h7ad44ae1, 32'h8bb2f8a6, 32'hf4dccae5}; + +localparam [255:0] ECDSA_P256_QY_RANDOM = + {32'he0a056e8, 32'hb5092a6c, 32'h728f4bc8, 32'h9043fbe2, + 32'h7c262744, 32'hdfbdaf17, 32'h83fc2e01, 32'hb060771b}; + +localparam [383:0] ECDSA_P384_D_RANDOM = + {32'hbb4a09d8, 32'he527f062, 32'h665eb4d0, 32'hb39a1dce, + 32'h94daa2c5, 32'h935b3163, 32'hb090f079, 32'h3ee36a5a, + 32'h62a7055d, 32'h5d850d1b, 32'h15dc656d, 32'h65d8b3a2}; + +localparam [383:0] ECDSA_P384_QX_RANDOM = + {32'hb8023e60, 32'hae42ea14, 32'h6330f2c3, 32'h883096ca, + 32'h961b11f5, 32'hdfea4a1d, 32'h65e98e64, 32'hb33527fb, + 32'h57226db4, 32'h8a514325, 32'h1ef06bde, 32'hf78eb57a}; + +localparam [383:0] ECDSA_P384_QY_RANDOM = + {32'hd3c66469, 32'h625aaaad, 32'ha347e103, 32'h3c1474b6, + 32'h343a9368, 32'hff294501, 32'hbbd53b1c, 32'hc49fd972, + 32'hda439c9d, 32'ha827e230, 32'h2720e2b7, 32'h8dd14f9e}; + diff --git a/test_vectors/format_random_test_vector.py b/test_vectors/format_random_test_vector.py new file mode 100644 index 0000000..861bab5 --- /dev/null +++ b/test_vectors/format_random_test_vector.py @@ -0,0 +1,327 @@ +# +# format_random_test_vector.py +# ------------------------------------------ +# Formats test vector for ecdsa_fpga_model +# +# Author: Pavel Shatov +# Copyright (c) 2017-2018, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +# +USAGE = "USAGE: format_random_test_vector.py [openssl_binary]" +# + + +# +# This script reads the test vector generated by the script +# regenerate_random_test_vector.py and writes nicely formatted C header file +# and Verilog include file. +# + + +# +# imports +# +import sys +import subprocess + + +# +# list of curve names of interest +# +CURVE_P256 = "p256" +CURVE_P384 = "p384" + + +# +# variables +# +OPENSSL = "" + + +# +# format one test vector +# +def format_c_header(f, curve, da, qax, qay): + + if curve == CURVE_P256: curve_str = "ECDSA_P256" + if curve == CURVE_P384: curve_str = "ECDSA_P384" + + # write all numbers in vector + format_c_array(f, da, "#define " + curve_str + "_D_RANDOM_INIT" + " \\\n") + format_c_array(f, qax, "#define " + curve_str + "_QX_RANDOM_INIT" + " \\\n") + format_c_array(f, qay, "#define " + curve_str + "_QY_RANDOM_INIT" + " \\\n") + + +# +# format one test vector +# +def format_verilog_include(f, curve, da, qax, qay): + + if curve == CURVE_P256: curve_str, msb_index = "ECDSA_P256", "255" + if curve == CURVE_P384: curve_str, msb_index = "ECDSA_P384", "383" + + # write all numbers in vector + format_verilog_concatenation(f, da, "localparam [" + msb_index + ":0] " + curve_str + "_D_RANDOM " + " =\n") + format_verilog_concatenation(f, qax, "localparam [" + msb_index + ":0] " + curve_str + "_QX_RANDOM" + " =\n") + format_verilog_concatenation(f, qay, "localparam [" + msb_index + ":0] " + curve_str + "_QY_RANDOM" + " =\n") + + +# +# nicely format multi-word integer into C array initializer +# +def format_c_array(f, n, s): + + # print '#define XYZ \' + f.write(s) + + # convert number to hex string and prepend it with zeroes if necessary + n_hex = hex(n).lstrip("0x").rstrip("L") + while (len(n_hex) % 8) > 0: + n_hex = "0" + n_hex + + # get number of 32-bit words + num_words = len(n_hex) // 8 + + # print all words in n + w = 0 + while w < num_words: + + n_part = "" + + # add tab for every new line + if w == 0: + n_part += "\t{" + elif (w % 4) == 0: + n_part += "\t " + + # add current word + n_part += "0x" + n_hex[8 * w : 8 * (w + 1)] + + # add separator or newline + if (w + 1) == num_words: + n_part += "}\n" + else: + n_part += ", " + if (w % 4) == 3: + n_part += "\\\n" + + w += 1 + + # write current part + f.write(n_part) + + # write final newline + f.write("\n") + + +def format_verilog_concatenation(f, n, s): + + # print 'localparam ZZZ =' + f.write(s) + + # convert number to hex string and prepend it with zeroes if necessary + n_hex = hex(n).split("0x")[1] + while (len(n_hex) % 8) > 0: + n_hex = "0" + n_hex + + # get number of 32-bit words + num_words = len(n_hex) // 8 + + # print all words in n + w = 0 + while w < num_words: + + n_part = "" + + if w == 0: + n_part += "\t{" + elif (w % 4) == 0: + n_part += "\t " + + n_part += "32'h" + n_hex[8 * w : 8 * (w + 1)] + + if (w + 1) == num_words: + n_part += "};\n" + else: + n_part += ", " + if (w % 4) == 3: + n_part += "\n" + w += 1 + + f.write(n_part) + + f.write("\n") + + + # + # returns d, qx, qy, where + # d is private key and qx, qy is the corresponding public key + # +def get_key(openssl, party, curve): + + # generate private key filename + key_file = party + "_" + curve + ".key" + + # retrieve key components using openssl + openssl_command = [openssl, "ec", "-in", key_file, "-noout", "-text"] + openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8") + stdout_lines = openssl_stdout.splitlines() + + found_priv = False + found_pub = False + + key_priv = "" + key_pub = "" + + # process lines looking for "priv:" and "pub:" markers + for line in stdout_lines: + + # found private key marker? + if line.strip() == "priv:": + found_priv = True + found_pub = False + continue + + # found public key marker? + if line.strip() == "pub:": # openssl 1.0.2g prints 'pub: ' (extra space before newline), + found_pub = True # so we need to compare against line.strip(), not just line + found_priv = False + continue + + # found part of private key? + if found_priv: + if not line.startswith(" "): + found_priv = False + continue + else: + key_priv += line.strip() + + # found part of public key? + if found_pub: + if not line.startswith(" "): + found_pub = False + continue + else: + key_pub += line.strip() + + # do some cleanup and sanity checking on private key + # * remove extra leading zero byte if present + # * remove colons + # * check length (256 bits or 384 bits) + while key_priv.startswith("00"): + key_priv = key_priv[2:] + + key_priv = key_priv.replace(":", "") + + if curve == CURVE_P256 and len(key_priv) != 256 / 4: sys.exit() + if curve == CURVE_P384 and len(key_priv) != 384 / 4: sys.exit() + + # do some cleanup and sanity checking on public key + # * make sure, that uncompressed form marker (0x04) is present and + # then remove it + # * remove colons + # * check length (2x256 or 2x384 bits) + if not key_pub.startswith("04"): sys.exit() + + key_pub = key_pub[2:] + key_pub = key_pub.replace(":", "") + + if curve == CURVE_P256 and len(key_pub) != 2 * 256 / 4: sys.exit() + if curve == CURVE_P384 and len(key_pub) != 2 * 384 / 4: sys.exit() + + # split public key into parts + if curve == CURVE_P256: + key_pub_x = key_pub[ 0: 64] + key_pub_y = key_pub[ 64:128] + + if curve == CURVE_P384: + key_pub_x = key_pub[ 0: 96] + key_pub_y = key_pub[ 96:192] + + # convert from strings to integers + key_priv = int(key_priv, 16) + key_pub_x = int(key_pub_x, 16) + key_pub_y = int(key_pub_y, 16) + + # done + return key_priv, key_pub_x, key_pub_y + + +if __name__ == "__main__": + + # detect whether user requested some specific binary + if len(sys.argv) == 1: + OPENSSL = "openssl" + print("Using system OpenSSL library.") + elif len(sys.argv) == 2: + OPENSSL = sys.argv[1] + print("Using OpenSSL binary '" + OPENSSL + "'...") + else: + print(USAGE) + + if len(OPENSSL) > 0: + + # list of curves to process + curves = [CURVE_P256, CURVE_P384] + + # open output files + file_h = open('ecdsa_test_vector_randomized.h', 'w') + file_v = open('ecdsa_test_vector_randomized.vh', 'w') + + # write headers + file_h.write("/* Generated automatically, do not edit. */\n\n") + file_v.write("/* Generated automatically, do not edit. */\n\n") + + # process all the keys + for next_curve in curves: + + # load keys + da, qax, qay = get_key(OPENSSL, "charlie", next_curve) + + # format numbers and write to files + + format_c_header(file_h, next_curve, + da, qax, qay); + + format_verilog_include(file_v, next_curve, + da, qax, qay); + + # done + file_h.close() + file_v.close() + + # everything went just fine + print("Test vector formatted.") + +# +# End of file +# diff --git a/test_vectors/regenerate_random_test_vector.py b/test_vectors/regenerate_random_test_vector.py new file mode 100644 index 0000000..75cb761 --- /dev/null +++ b/test_vectors/regenerate_random_test_vector.py @@ -0,0 +1,91 @@ +# +# regenerate_test_vector.py +# ----------------------------------------------------------- +# Generates a new randomized test vector for ecdsa_fpga_model +# +# Author: Pavel Shatov +# Copyright (c) 2017-2018, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +USAGE = "USAGE: regenerate_test_vector.py [openssl_binary]" +# + +# This script generates a test vector. The test vector contains two +# private keys. One is for P-256, the other one is for P-384. +# + +# +# imports +# +import sys +import subprocess + +OPENSSL = "" + +CURVE_P256 = "prime256v1" +CURVE_P384 = "secp384r1" + +SECRET_CHARLIE_256 = "charlie_p256.key" +SECRET_CHARLIE_384 = "charlie_p384.key" + +def openssl_ecparam_genkey(openssl, curve, file): + subprocess.call([openssl, "ecparam", "-genkey", "-name", curve, "-out", file]) + +# +# __main__ +# +if __name__ == "__main__": + + # detect whether user requested some specific binary + if len(sys.argv) == 1: + OPENSSL = "openssl" + print("Using system OpenSSL library.") + elif len(sys.argv) == 2: + OPENSSL = sys.argv[1] + print("Using OpenSSL binary '" + OPENSSL + "'...") + else: + print(USAGE) + + if len(OPENSSL) > 0: + + # generate a new private key for P-256 curve + openssl_ecparam_genkey(OPENSSL, CURVE_P256, SECRET_CHARLIE_256) + + # generate a new private key for P-384 curve + openssl_ecparam_genkey(OPENSSL, CURVE_P384, SECRET_CHARLIE_384) + + # done + print("New randomized test vector generated.") + + +# +# End of file +# |