aboutsummaryrefslogtreecommitdiff
path: root/ecdsa_fpga_curve_abstract.cpp
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2021-04-11 17:42:52 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2021-04-11 17:42:52 +0300
commit1e16303d718986e0e991444a7cdcab3c5c89b1f4 (patch)
tree25f885054c06e870ad46e5a8d1f9756795111170 /ecdsa_fpga_curve_abstract.cpp
parentc5ee836423777703920593143f140bd0036caad4 (diff)
Updated curve math layer to do multiplication using the Montgomery ladder
method. Also added optional debugging output to help debug microcoded versions of double and add routines.
Diffstat (limited to 'ecdsa_fpga_curve_abstract.cpp')
-rw-r--r--ecdsa_fpga_curve_abstract.cpp326
1 files changed, 178 insertions, 148 deletions
diff --git a/ecdsa_fpga_curve_abstract.cpp b/ecdsa_fpga_curve_abstract.cpp
index 5510ac1..2d25cfc 100644
--- a/ecdsa_fpga_curve_abstract.cpp
+++ b/ecdsa_fpga_curve_abstract.cpp
@@ -6,7 +6,7 @@
//
// Authors: Pavel Shatov
//
-// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+// Copyright (c) 2015-2016, 2018, 2021 NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
@@ -79,14 +79,7 @@ void fpga_curve_init()
// Q(qx,qy) = k * G(px,py)
//
// Note, that Q is supposed to be in affine coordinates. Multiplication is done
-// using the double-and-add algorithm 3.27 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// WARNING: Though this procedure always does the addition step, it only
-// updates the result when current bit of k is set. It does not take any
-// active measures to keep run-time constant. The main purpose of this model
-// is to help debug Verilog code for FPGA, so *DO NOT* use it anywhere near
-// production!
+// using the Montgomery ladder method.
//
//------------------------------------------------------------------------------
void fpga_curve_base_scalar_multiply_abstract(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
@@ -94,42 +87,91 @@ void fpga_curve_base_scalar_multiply_abstract(const FPGA_BUFFER *k, FPGA_BUFFER
{
int word_count, bit_count; // counters
- FPGA_BUFFER rx, ry, rz; // intermediate result
- FPGA_BUFFER tx, ty, tz; // temporary variable
+ FPGA_BUFFER r0x, r0y, r0z; // intermediate result
+ FPGA_BUFFER r1x, r1y, r1z; // intermediate result
+ FPGA_BUFFER sx, sy, sz; // temporary variable
+ FPGA_BUFFER tx, ty, tz; // temporary variable
+
+ /* set initial value of R0 to point at infinity, R1 to the base point */
+ fpga_multiword_copy(&ECDSA_ONE, &r0x);
+ fpga_multiword_copy(&ECDSA_ONE, &r0y);
+ fpga_multiword_copy(&ECDSA_ZERO, &r0z);
- /* set initial value of R to point at infinity */
- fpga_multiword_copy(&ECDSA_ONE, &rx);
- fpga_multiword_copy(&ECDSA_ONE, &ry);
- fpga_multiword_copy(&ECDSA_ZERO, &rz);
+ fpga_multiword_copy(&ECDSA_GX, &r1x);
+ fpga_multiword_copy(&ECDSA_GY, &r1y);
+ fpga_multiword_copy(&ECDSA_ONE, &r1z);
+
+ /* handy vars */
+ FPGA_WORD k_word_shifted;
+ bool k_bit;
/* process bits of k left-to-right */
for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
{
- /* calculate T = 2 * R */
- fpga_curve_double_jacobian_abstract(&rx, &ry, &rz, &tx, &ty, &tz);
-
- /* always calculate R = T + P for constant-time */
- fpga_curve_add_jacobian_abstract(&tx, &ty, &tz, &rx, &ry, &rz);
-
- /* revert to the value of T before addition if the current bit of k is not set */
- if (!((k->words[word_count-1] >> (bit_count-1)) & 1))
- { fpga_multiword_copy(&tx, &rx);
- fpga_multiword_copy(&ty, &ry);
- fpga_multiword_copy(&tz, &rz);
- }
-
+ k_word_shifted = k->words[word_count-1] >> (bit_count-1);
+ k_bit = (k_word_shifted & 1) == 1;
+
+#ifdef DUMP_CYCLE_STATES
+ dump_cycle_header(word_count, bit_count, k_bit);
+#endif
+
+ /* calculate S = R0 + R */
+ fpga_curve_add_jacobian_abstract_2(&r0x, &r0y, &r0z, &r1x, &r1y, &r1z, &sx, &sy, &sz);
+
+ /* calculate T = 2 * (R0 | R1) */
+ if (!k_bit)
+ fpga_curve_double_jacobian_abstract(&r0x, &r0y, &r0z, &tx, &ty, &tz);
+ else
+ fpga_curve_double_jacobian_abstract(&r1x, &r1y, &r1z, &tx, &ty, &tz);
+
+ //
+ // dump cycle state
+ //
+#ifdef DUMP_CYCLE_STATES
+ dump_cycle_state(&r0x, &r0y, &r0z, &r1x, &r1y, &r1z,
+ &sx, &sy, &sz, &tx, &ty, &tz);
+#endif
+
+ /* now update working variables */
+ if (!k_bit)
+ { fpga_multiword_copy(&tx, &r0x);
+ fpga_multiword_copy(&ty, &r0y);
+ fpga_multiword_copy(&tz, &r0z);
+
+ fpga_multiword_copy(&sx, &r1x);
+ fpga_multiword_copy(&sy, &r1y);
+ fpga_multiword_copy(&sz, &r1z);
+ }
+ else
+ { fpga_multiword_copy(&tx, &r1x);
+ fpga_multiword_copy(&ty, &r1y);
+ fpga_multiword_copy(&tz, &r1z);
+
+ fpga_multiword_copy(&sx, &r0x);
+ fpga_multiword_copy(&sy, &r0y);
+ fpga_multiword_copy(&sz, &r0z);
+ }
}
+ //
+ // we now need to convert the point to affine coordinates
+ //
FPGA_BUFFER a2, a3;
- fpga_modular_inv23(&rz, &a2, &a3);
+#ifdef DUMP_UOP_OUTPUTS
+ _DUMP_MODULAR_RESULTS = true;
+#endif
+
+ fpga_modular_inv23(&r0z, &a2, &a3);
+
+ fpga_modular_mul(&r0x, &a2, qx); // qx = px * (pz^-1)^2 (mod q)
+ fpga_modular_mul(&r0y, &a3, qy); // qy = py * (pz^-1)^3 (mod q)
- fpga_modular_mul(&rx, &a2, qx); // qx = px * (pz^-1)^2 (mod q)
- fpga_modular_mul(&ry, &a3, qy); // qy = py * (pz^-1)^3 (mod q)
+ _DUMP_MODULAR_RESULTS = false;
// check, that rz is non-zero (not point at infinity)
- bool rz_is_zero = fpga_multiword_is_zero(&rz);
+ bool rz_is_zero = fpga_multiword_is_zero(&r0z);
// handle special case (result is point at infinity)
if (rz_is_zero)
@@ -154,21 +196,13 @@ void fpga_curve_base_scalar_multiply_abstract(const FPGA_BUFFER *k, FPGA_BUFFER
// faster, than multiplication.
//
// Note, that this routine also handles one special case, namely when P is at
-// infinity.
+// infinity. No actual extra "handling" is necessary, since when pz is zero,
+// rz will also be zero (and that's what the "at infinity" check takes into
+// account).
//
// Instead of actual modular division, multiplication by pre-computed constant
// (2^-1 mod q) is done.
//
-// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
-// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
-// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
-// then fpga_modular_mul(pz, &t1, &t1) instead.
-//
-// WARNING: Though this procedure always does doubling steps, it does not take
-// any active measures to keep run-time constant. The main purpose of this
-// model is to help debug Verilog code for FPGA, so *DO NOT* use is anywhere
-// near production!
-//
//------------------------------------------------------------------------------
void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px,
const FPGA_BUFFER *py,
@@ -178,41 +212,32 @@ void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px,
FPGA_BUFFER *rz)
//------------------------------------------------------------------------------
{
- FPGA_BUFFER t1, t2, t3; // temporary variables
-
- // check, whether P is at infinity
- bool pz_is_zero = fpga_multiword_is_zero(pz);
-
- /* 2. */ fpga_multiword_copy(pz, &t1);
- fpga_modular_mul(pz, &t1, &t1);
- /* 3. */ fpga_modular_sub(px, &t1, &t2);
- /* 4. */ fpga_modular_add(px, &t1, &t1);
- /* 5. */ fpga_modular_mul(&t1, &t2, &t2);
- /* 6. */ fpga_modular_add(&t2, &t2, &t1);
- /* */ fpga_modular_add(&t1, &t2, &t2);
- /* 7. */ fpga_modular_add(py, py, ry);
- /* 8. */ fpga_modular_mul(pz, ry, rz);
- /* 9. */ fpga_multiword_copy(ry, &t1);
- fpga_multiword_copy(ry, &t3);
- fpga_modular_mul(&t1, &t3, ry);
- /* 10. */ fpga_modular_mul(px, ry, &t3);
- /* 11. */ fpga_multiword_copy(ry, &t1);
- fpga_modular_mul(ry, &t1, &t1);
- /* 12. */ fpga_modular_mul(&t1, &ECDSA_DELTA, ry);
- /* 13. */ fpga_multiword_copy(&t2, &t1);
- fpga_modular_mul(&t1, &t2, rx);
- /* 14. */ fpga_modular_add(&t3, &t3, &t1);
- /* 15. */ fpga_modular_sub(rx, &t1, rx);
- /* 16. */ fpga_modular_sub(&t3, rx, &t1);
- /* 17. */ fpga_modular_mul(&t1, &t2, &t1);
- /* 18. */ fpga_modular_sub(&t1, ry, ry);
-
- // handle special case (input point is at infinity)
- if (pz_is_zero)
- { fpga_multiword_copy(&ECDSA_ONE, rx);
- fpga_multiword_copy(&ECDSA_ONE, ry);
- fpga_multiword_copy(&ECDSA_ZERO, rz);
- }
+ FPGA_BUFFER t1, t2, t3, t4, t5; // temporary variables
+
+#ifdef DUMP_UOP_OUTPUTS
+ _DUMP_MODULAR_RESULTS = true;
+#endif
+
+ fpga_modular_mul(pz, pz, &t1);
+ fpga_modular_sub(px, &t1, &t2);
+ fpga_modular_add(px, &t1, &t3);
+ fpga_modular_mul(&t3, &t2, &t4);
+ fpga_modular_add(&t4, &t4, &t1);
+ fpga_modular_add(&t1, &t4, &t2);
+ fpga_modular_add(py, py, ry);
+ fpga_modular_mul(pz, ry, rz);
+ fpga_modular_mul(ry, ry, &t1);
+ fpga_modular_mul(px, &t1, &t3);
+ fpga_modular_mul(&t1, &t1, &t4);
+ fpga_modular_mul(&t4, &ECDSA_DELTA, &t5);
+ fpga_modular_mul(&t2, &t2, &t4);
+ fpga_modular_add(&t3, &t3, &t1);
+ fpga_modular_sub(&t4, &t1, rx);
+ fpga_modular_sub(&t3, rx, &t1);
+ fpga_modular_mul(&t1, &t2, &t3);
+ fpga_modular_sub(&t3, &t5, ry);
+
+ _DUMP_MODULAR_RESULTS = false;
}
@@ -220,89 +245,94 @@ void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px,
//
// Elliptic curve point addition routine.
//
-// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy)
+// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy,qz)
//
-// Note, that P(px, py, pz) is supposed to be in projective Jacobian
-// coordinates, while Q(qx,qy) is supposed to be in affine coordinates,
-// R(rx, ry, rz) will be in projective Jacobian coordinates. Moreover, in this
-// particular implementation Q is always the base point G.
+// Note, that P(px, py, pz) and Q(qx, qy, qz) are supposed to be in projective
+// Jacobian coordinates, R(rx, ry, rz) will be in projective Jacobian
+// coordinates too.
//
-// This routine implements algorithm 3.22 from "Guide to Elliptic Curve
-// Cryptography". Differences from the original algorithm:
+// This routine implements the Point Addition algorithm from
+// https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates
//
-// 1) Step 1. is omitted, because point Q is always the base point, which is
-// not at infinity by definition.
-//
-// 2) Step 9.1 just returns the pre-computed double of the base point instead
-// of actually doubling it.
-//
-// Note, that this routine also handles three special cases:
-//
-// 1) P is at infinity
-// 2) P == Q
-// 3) P == -Q
-//
-// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
-// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
-// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
-// then fpga_modular_mul(pz, &t1, &t1) instead.
+// Since the routine is means to be used with Montgomery ladder, the invariant
+// R1 - R0 = G means, that the two special cases P == Q and P == -Q can never
+// happen and the checks are redundant. The checks for P === O and Q == O are
+// necessary, however. Note, that P and Q can't be at infinity at the same time
+// though.
//
// WARNING: This procedure does not take any active measures to keep run-time
// constant. The main purpose of this model is to help debug Verilog code for
// FPGA, so *DO NOT* use is anywhere near production!
//
//------------------------------------------------------------------------------
-void fpga_curve_add_jacobian_abstract(const FPGA_BUFFER *px,
- const FPGA_BUFFER *py,
- const FPGA_BUFFER *pz,
- FPGA_BUFFER *rx,
- FPGA_BUFFER *ry,
- FPGA_BUFFER *rz)
+void fpga_curve_add_jacobian_abstract_2(const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ const FPGA_BUFFER *qx,
+ const FPGA_BUFFER *qy,
+ const FPGA_BUFFER *qz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz)
//------------------------------------------------------------------------------
{
- FPGA_BUFFER t1, t2, t3, t4; // temporary variables
+ bool pz_is_zero = fpga_multiword_is_zero(pz);
+ bool qz_is_zero = fpga_multiword_is_zero(qz);
+
+ FPGA_BUFFER t1, t2, t3, t4, t5, t6, t7, t8;
+
+#ifdef DUMP_UOP_OUTPUTS
+ _DUMP_MODULAR_RESULTS = true;
+#endif
+
+ fpga_modular_mul(pz, pz, &t1); // pz2 = pz * pz (pz squared)
+ fpga_modular_mul(qz, qz, &t2); // qz2 = qz * qz (qz squared)
+
+ fpga_modular_mul(pz, &t1, &t3); // pz3 = pz * pz2 (pz cubed)
+ fpga_modular_mul(qz, &t2, &t4); // qz3 = qz * qz2 (qz cubed)
+
+ fpga_modular_mul(px, &t2, &t5); // pxz = px * qz2 (px z-adjusted)
+ fpga_modular_mul(qx, &t1, &t2); // qxz = qx * pz2 (qx z-adjusted)
+
+ fpga_modular_mul(py, &t4, &t6); // pyz = py * qz3 (py z-adjusted)
+ fpga_modular_mul(qy, &t3, &t4); // qyz = qy * pz3 (qy z-adjusted)
+
+ fpga_modular_sub(&t2, &t5, &t7); // dqpx = qxz - pxz (x-coordinate delta)
+ fpga_modular_sub(&t4, &t6, &t8); // dqpy = qyz - pyz (y-coordinate delta)
+
+ fpga_modular_mul(pz, qz, &t1); // pqz = pz * qz
+ fpga_modular_mul(&t7, &t1, rz); // rz = pqz * qdpx
+
+ fpga_modular_mul(&t8, &t8, &t2); // dqpy2 = dqpy * dqpy
+ fpga_modular_mul(&t7, &t7, &t3); // dqpx2 = dqpx * dqpx
+ fpga_modular_mul(&t7, &t3, &t4); // dqpx3 = dqpx * dqpx2
+
+ fpga_modular_sub(&t2, &t4, &t1); // t1 = dqpy2 - dqpx3
+ fpga_modular_mul(&t5, &t3, &t2); // t2 = pxz * dqpx2
+ fpga_modular_add(&t2, &t2, &t3); // t3 = 2 * t2 (= t2 + t2, which is faster)
+ fpga_modular_sub(&t1, &t3, rx); // rx = t1 - t3
+
+ fpga_modular_sub(&t2, rx, &t1); // t1 = t2 - rx
+ fpga_modular_mul(&t1, &t8, &t2); // t2 = t1 * dqpy
+ fpga_modular_mul(&t6, &t4, &t3); // t3 = pyz * dqpx3
+ fpga_modular_sub(&t2, &t3, ry); // ry = t2 - t3
- bool pz_is_zero = fpga_multiword_is_zero(pz); // Step 2.
-
- /* 3. */ fpga_multiword_copy(pz, &t1);
- fpga_modular_mul(pz, &t1, &t1);
- /* 4. */ fpga_modular_mul(pz, &t1, &t2);
- /* 5. */ fpga_modular_mul(&t1, &ECDSA_GX, &t1);
- /* 6. */ fpga_modular_mul(&t2, &ECDSA_GY, &t2);
- /* 7. */ fpga_modular_sub(&t1, px, &t1);
- /* 8. */ fpga_modular_sub(&t2, py, &t2);
-
- bool t1_is_zero = fpga_multiword_is_zero(&t1); // | Step 9.
- bool t2_is_zero = fpga_multiword_is_zero(&t2); // |
-
- /* 10. */ fpga_modular_mul(pz, &t1, rz);
- /* 11. */ fpga_multiword_copy(&t1, &t3);
- fpga_modular_mul(&t1, &t3, &t3);
- /* 12. */ fpga_modular_mul(&t1, &t3, &t4);
- /* 13. */ fpga_modular_mul(px, &t3, &t3);
- /* 14. */ fpga_modular_add(&t3, &t3, &t1);
- /* 15. */ fpga_multiword_copy(&t2, rx);
- fpga_modular_mul(rx, &t2, rx);
- /* 16. */ fpga_modular_sub(rx, &t1, rx);
- /* 17. */ fpga_modular_sub(rx, &t4, rx);
- /* 18. */ fpga_modular_sub(&t3, rx, &t3);
- /* 19. */ fpga_modular_mul(&t2, &t3, &t3);
- /* 20. */ fpga_modular_mul(py, &t4, &t4);
- /* 21. */ fpga_modular_sub(&t3, &t4, ry);
-
- //
- // final selection
- //
- if (pz_is_zero) // P at infinity ?
- { fpga_multiword_copy(&ECDSA_GX, rx);
- fpga_multiword_copy(&ECDSA_GY, ry);
- fpga_multiword_copy(&ECDSA_ONE, rz);
+ _DUMP_MODULAR_RESULTS = false;
+
+ // P == O
+ if (pz_is_zero)
+ { fpga_multiword_copy(qx, rx);
+ fpga_multiword_copy(qy, ry);
+ fpga_multiword_copy(qz, rz);
+ return;
}
- else if (t1_is_zero) // same x for P and Q ?
- {
- fpga_multiword_copy(t2_is_zero ? &ECDSA_HX : &ECDSA_ONE, rx); // | same y ? (P==Q => R=2*G) : (P==-Q => R=O)
- fpga_multiword_copy(t2_is_zero ? &ECDSA_HY : &ECDSA_ONE, ry); // |
- fpga_multiword_copy(t2_is_zero ? &ECDSA_ONE : &ECDSA_ZERO, rz); // |
+
+ // Q == O
+ if (qz_is_zero)
+ { fpga_multiword_copy(px, rx);
+ fpga_multiword_copy(py, ry);
+ fpga_multiword_copy(pz, rz);
+ return;
}
}