//------------------------------------------------------------------------------
//
// ecdsa_fpga_curve_abstract.cpp
// ----------------------------------------------
// Elliptic curve arithmetic procedures for ECDSA
//
// Authors: Pavel Shatov
//
// Copyright (c) 2015-2016, 2018, 2021 NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// Headers
//------------------------------------------------------------------------------
#include "ecdsa_fpga_model.h"
//------------------------------------------------------------------------------
// Globals
//------------------------------------------------------------------------------
FPGA_BUFFER ECDSA_GX, ECDSA_GY;
FPGA_BUFFER ECDSA_HX, ECDSA_HY;
FPGA_BUFFER ECDSA_N;
//------------------------------------------------------------------------------
void fpga_curve_init()
//------------------------------------------------------------------------------
{
int w; // word counter
FPGA_BUFFER tmp_gx = ECDSA_GX_INIT, tmp_gy = ECDSA_GY_INIT;
FPGA_BUFFER tmp_hx = ECDSA_HX_INIT, tmp_hy = ECDSA_HY_INIT;
FPGA_BUFFER tmp_n = ECDSA_N_INIT;
/* fill buffers for large multi-word integers */
for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
{ ECDSA_GX.words[w] = tmp_gx.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
ECDSA_GY.words[w] = tmp_gy.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
ECDSA_HX.words[w] = tmp_hx.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
ECDSA_HY.words[w] = tmp_hy.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
ECDSA_N.words[w] = tmp_n.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
}
}
//------------------------------------------------------------------------------
//
// Elliptic curve base point scalar multiplication routine.
//
// Q(qx,qy) = k * G(px,py)
//
// Note, that Q is supposed to be in affine coordinates. Multiplication is done
// using the Montgomery ladder method.
//
//------------------------------------------------------------------------------
void fpga_curve_base_scalar_multiply_abstract(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
//------------------------------------------------------------------------------
{
int word_count, bit_count; // counters
FPGA_BUFFER r0x, r0y, r0z; // intermediate result
FPGA_BUFFER r1x, r1y, r1z; // intermediate result
FPGA_BUFFER sx, sy, sz; // temporary variable
FPGA_BUFFER tx, ty, tz; // temporary variable
/* set initial value of R0 to point at infinity, R1 to the base point */
fpga_multiword_copy(&ECDSA_ONE, &r0x);
fpga_multiword_copy(&ECDSA_ONE, &r0y);
fpga_multiword_copy(&ECDSA_ZERO, &r0z);
fpga_multiword_copy(&ECDSA_GX, &r1x);
fpga_multiword_copy(&ECDSA_GY, &r1y);
fpga_multiword_copy(&ECDSA_ONE, &r1z);
/* handy vars */
FPGA_WORD k_word_shifted;
bool k_bit;
/* process bits of k left-to-right */
for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
{
k_word_shifted = k->words[word_count-1] >> (bit_count-1);
k_bit = (k_word_shifted & 1) == 1;
#ifdef DUMP_CYCLE_STATES
dump_cycle_header(word_count, bit_count, k_bit);
#endif
/* calculate S = R0 + R */
fpga_curve_add_jacobian_abstract_2(&r0x, &r0y, &r0z, &r1x, &r1y, &r1z, &sx, &sy, &sz);
/* calculate T = 2 * (R0 | R1) */
if (!k_bit)
fpga_curve_double_jacobian_abstract(&r0x, &r0y, &r0z, &tx, &ty, &tz);
else
fpga_curve_double_jacobian_abstract(&r1x, &r1y, &r1z, &tx, &ty, &tz);
//
// dump cycle state
//
#ifdef DUMP_CYCLE_STATES
dump_cycle_state(&r0x, &r0y, &r0z, &r1x, &r1y, &r1z,
&sx, &sy, &sz, &tx, &ty, &tz);
#endif
/* now update working variables */
if (!k_bit)
{ fpga_multiword_copy(&tx, &r0x);
fpga_multiword_copy(&ty, &r0y);
fpga_multiword_copy(&tz, &r0z);
fpga_multiword_copy(&sx, &r1x);
fpga_multiword_copy(&sy, &r1y);
fpga_multiword_copy(&sz, &r1z);
}
else
{ fpga_multiword_copy(&tx, &r1x);
fpga_multiword_copy(&ty, &r1y);
fpga_multiword_copy(&tz, &r1z);
fpga_multiword_copy(&sx, &r0x);
fpga_multiword_copy(&sy, &r0y);
fpga_multiword_copy(&sz, &r0z);
}
}
//
// we now need to convert the point to affine coordinates
//
FPGA_BUFFER a2, a3;
#ifdef DUMP_UOP_OUTPUTS
_DUMP_MODULAR_RESULTS = true;
#endif
fpga_modular_inv23(&r0z, &a2, &a3);
fpga_modular_mul(&r0x, &a2, qx); // qx = px * (pz^-1)^2 (mod q)
fpga_modular_mul(&r0y, &a3, qy); // qy = py * (pz^-1)^3 (mod q)
_DUMP_MODULAR_RESULTS = false;
// check, that rz is non-zero (not point at infinity)
bool rz_is_zero = fpga_multiword_is_zero(&r0z);
// handle special case (result is point at infinity)
if (rz_is_zero)
{ fpga_multiword_copy(&ECDSA_ZERO, qx);
fpga_multiword_copy(&ECDSA_ZERO, qy);
}
}
//------------------------------------------------------------------------------
//
// Elliptic curve point doubling routine.
//
// R(rx,ry,rz) = 2 * P(px,py,pz)
//
// Note, that P(px,py,pz) is supposed to be in projective Jacobian coordinates,
// R will be in projective Jacobian coordinates.
//
// This routine implements algorithm 3.21 from "Guide to Elliptic Curve
// Cryptography", the only difference is that step 6. does T1 = T2 + T2 and
// then T2 = T2 + T1 instead of T2 = 3 * T2, because our addition is much
// faster, than multiplication.
//
// Note, that this routine also handles one special case, namely when P is at
// infinity. No actual extra "handling" is necessary, since when pz is zero,
// rz will also be zero (and that's what the "at infinity" check takes into
// account).
//
// Instead of actual modular division, multiplication by pre-computed constant
// (2^-1 mod q) is done.
//
//------------------------------------------------------------------------------
void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px,
const FPGA_BUFFER *py,
const FPGA_BUFFER *pz,
FPGA_BUFFER *rx,
FPGA_BUFFER *ry,
FPGA_BUFFER *rz)
//------------------------------------------------------------------------------
{
FPGA_BUFFER t1, t2, t3, t4, t5; // temporary variables
#ifdef DUMP_UOP_OUTPUTS
_DUMP_MODULAR_RESULTS = true;
#endif
fpga_modular_mul(pz, pz, &t1);
fpga_modular_sub(px, &t1, &t2);
fpga_modular_add(px, &t1, &t3);
fpga_modular_mul(&t3, &t2, &t4);
fpga_modular_add(&t4, &t4, &t1);
fpga_modular_add(&t1, &t4, &t2);
fpga_modular_add(py, py, ry);
fpga_modular_mul(pz, ry, rz);
fpga_modular_mul(ry, ry, &t1);
fpga_modular_mul(px, &t1, &t3);
fpga_modular_mul(&t1, &t1, &t4);
fpga_modular_mul(&t4, &ECDSA_DELTA, &t5);
fpga_modular_mul(&t2, &t2, &t4);
fpga_modular_add(&t3, &t3, &t1);
fpga_modular_sub(&t4, &t1, rx);
fpga_modular_sub(&t3, rx, &t1);
fpga_modular_mul(&t1, &t2, &t3);
fpga_modular_sub(&t3, &t5, ry);
_DUMP_MODULAR_RESULTS = false;
}
//------------------------------------------------------------------------------
//
// Elliptic curve point addition routine.
//
// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy,qz)
//
// Note, that P(px, py, pz) and Q(qx, qy, qz) are supposed to be in projective
// Jacobian coordinates, R(rx, ry, rz) will be in projective Jacobian
// coordinates too.
//
// This routine implements the Point Addition algorithm from
// https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates
//
// Since the routine is means to be used with Montgomery ladder, the invariant
// R1 - R0 = G means, that the two special cases P == Q and P == -Q can never
// happen and the checks are redundant. The checks for P === O and Q == O are
// necessary, however. Note, that P and Q can't be at infinity at the same time
// though.
//
// WARNING: This procedure does not take any active measures to keep run-time
// constant. The main purpose of this model is to help debug Verilog code for
// FPGA, so *DO NOT* use is anywhere near production!
//
//------------------------------------------------------------------------------
void fpga_curve_add_jacobian_abstract_2(const FPGA_BUFFER *px,
const FPGA_BUFFER *py,
const FPGA_BUFFER *pz,
const FPGA_BUFFER *qx,
const FPGA_BUFFER *qy,
const FPGA_BUFFER *qz,
FPGA_BUFFER *rx,
FPGA_BUFFER *ry,
FPGA_BUFFER *rz)
//------------------------------------------------------------------------------
{
bool pz_is_zero = fpga_multiword_is_zero(pz);
bool qz_is_zero = fpga_multiword_is_zero(qz);
FPGA_BUFFER t1, t2, t3, t4, t5, t6, t7, t8;
#ifdef DUMP_UOP_OUTPUTS
_DUMP_MODULAR_RESULTS = true;
#endif
fpga_modular_mul(pz, pz, &t1); // pz2 = pz * pz (pz squared)
fpga_modular_mul(qz, qz, &t2); // qz2 = qz * qz (qz squared)
fpga_modular_mul(pz, &t1, &t3); // pz3 = pz * pz2 (pz cubed)
fpga_modular_mul(qz, &t2, &t4); // qz3 = qz * qz2 (qz cubed)
fpga_modular_mul(px, &t2, &t5); // pxz = px * qz2 (px z-adjusted)
fpga_modular_mul(qx, &t1, &t2); // qxz = qx * pz2 (qx z-adjusted)
fpga_modular_mul(py, &t4, &t6); // pyz = py * qz3 (py z-adjusted)
fpga_modular_mul(qy, &t3, &t4); // qyz = qy * pz3 (qy z-adjusted)
fpga_modular_sub(&t2, &t5, &t7); // dqpx = qxz - pxz (x-coordinate delta)
fpga_modular_sub(&t4, &t6, &t8); // dqpy = qyz - pyz (y-coordinate delta)
fpga_modular_mul(pz, qz, &t1); // pqz = pz * qz
fpga_modular_mul(&t7, &t1, rz); // rz = pqz * qdpx
fpga_modular_mul(&t8, &t8, &t2); // dqpy2 = dqpy * dqpy
fpga_modular_mul(&t7, &t7, &t3); // dqpx2 = dqpx * dqpx
fpga_modular_mul(&t7, &t3, &t4); // dqpx3 = dqpx * dqpx2
fpga_modular_sub(&t2, &t4, &t1); // t1 = dqpy2 - dqpx3
fpga_modular_mul(&t5, &t3, &t2); // t2 = pxz * dqpx2
fpga_modular_add(&t2, &t2, &t3); // t3 = 2 * t2 (= t2 + t2, which is faster)
fpga_modular_sub(&t1, &t3, rx); // rx = t1 - t3
fpga_modular_sub(&t2, rx, &t1); // t1 = t2 - rx
fpga_modular_mul(&t1, &t8, &t2); // t2 = t1 * dqpy
fpga_modular_mul(&t6, &t4, &t3); // t3 = pyz * dqpx3
fpga_modular_sub(&t2, &t3, ry); // ry = t2 - t3
_DUMP_MODULAR_RESULTS = false;
// P == O
if (pz_is_zero)
{ fpga_multiword_copy(qx, rx);
fpga_multiword_copy(qy, ry);
fpga_multiword_copy(qz, rz);
return;
}
// Q == O
if (qz_is_zero)
{ fpga_multiword_copy(px, rx);
fpga_multiword_copy(py, ry);
fpga_multiword_copy(pz, rz);
return;
}
}
//------------------------------------------------------------------------------
// End-of-File
//------------------------------------------------------------------------------