//------------------------------------------------------------------------------
//
// ecdsa_fpga_curve_microcode.cpp
// ----------------------------------------------
// Elliptic curve arithmetic procedures for ECDSA
//
// Authors: Pavel Shatov
//
// Copyright 2018 NORDUnet A/S
// Copyright 2021 The Commons Conservancy Cryptech Project
// SPDX-License-Identifier: BSD-3-Clause
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// - Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// Required for Microcode Routines
//------------------------------------------------------------------------------
#define USE_MICROCODE
//------------------------------------------------------------------------------
// Headers
//------------------------------------------------------------------------------
#include "ecdsa_fpga_model.h"
//------------------------------------------------------------------------------
//
// Doubles the point stored in CYCLE_R0* and stores the result in CYCLE_T*.
//
//------------------------------------------------------------------------------
void fpga_curve_double_jacobian_microcode_r0()
//------------------------------------------------------------------------------
{
/* BEGIN_MICROCODE: CYCLE_DOUBLE_R0 */
uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, CYCLE_T1);
uop_calc(SUB, BANK_HI, CYCLE_R0X, CYCLE_T1, BANK_LO, CYCLE_T2);
uop_calc(ADD, BANK_HI, CYCLE_R0X, CYCLE_T1, BANK_LO, CYCLE_T3);
uop_calc(MUL, BANK_LO, CYCLE_T3, CYCLE_T2, BANK_HI, CYCLE_T4);
uop_calc(ADD, BANK_HI, CYCLE_T4, CYCLE_T4, BANK_LO, CYCLE_T1);
uop_move( BANK_HI, CYCLE_T4, BANK_LO, CYCLE_T4);
uop_calc(ADD, BANK_LO, CYCLE_T1, CYCLE_T4, BANK_HI, CYCLE_T2);
uop_calc(ADD, BANK_HI, CYCLE_R0Y, CYCLE_R0Y, BANK_LO, CYCLE_TY);
uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_TY, BANK_HI, CYCLE_TZ);
uop_calc(MUL, BANK_LO, CYCLE_TY, CYCLE_TY, BANK_HI, CYCLE_T1);
uop_calc(MUL, BANK_HI, CYCLE_R0X, CYCLE_T1, BANK_LO, CYCLE_T3);
uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T1, BANK_LO, CYCLE_T4);
uop_calc(MUL, BANK_LO, CYCLE_T4, CONST_DELTA, BANK_HI, CYCLE_T5);
uop_calc(MUL, BANK_HI, CYCLE_T2, CYCLE_T2, BANK_LO, CYCLE_T4);
uop_calc(ADD, BANK_LO, CYCLE_T3, CYCLE_T3, BANK_HI, CYCLE_T1);
uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4);
uop_calc(SUB, BANK_HI, CYCLE_T4, CYCLE_T1, BANK_LO, CYCLE_TX);
uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_TX, BANK_HI, CYCLE_T1);
uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T2, BANK_LO, CYCLE_T3);
uop_move( BANK_LO, CYCLE_T3, BANK_HI, CYCLE_T3);
uop_calc(SUB, BANK_HI, CYCLE_T3, CYCLE_T5, BANK_LO, CYCLE_TY);
/* END_MICROCODE */
}
//------------------------------------------------------------------------------
//
// Doubles the point stored in CYCLE_R1* and stores the result in CYCLE_T*.
//
//------------------------------------------------------------------------------
void fpga_curve_double_jacobian_microcode_r1()
//------------------------------------------------------------------------------
{
/* BEGIN_MICROCODE: CYCLE_DOUBLE_R1 */
uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_R1Z, BANK_HI, CYCLE_T1);
uop_calc(SUB, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T2);
uop_calc(ADD, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T3);
uop_calc(MUL, BANK_LO, CYCLE_T3, CYCLE_T2, BANK_HI, CYCLE_T4);
uop_calc(ADD, BANK_HI, CYCLE_T4, CYCLE_T4, BANK_LO, CYCLE_T1);
uop_move( BANK_HI, CYCLE_T4, BANK_LO, CYCLE_T4);
uop_calc(ADD, BANK_LO, CYCLE_T1, CYCLE_T4, BANK_HI, CYCLE_T2);
uop_calc(ADD, BANK_HI, CYCLE_R1Y, CYCLE_R1Y, BANK_LO, CYCLE_TY);
uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_TY, BANK_HI, CYCLE_TZ);
uop_calc(MUL, BANK_LO, CYCLE_TY, CYCLE_TY, BANK_HI, CYCLE_T1);
uop_calc(MUL, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T3);
uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T1, BANK_LO, CYCLE_T4);
uop_calc(MUL, BANK_LO, CYCLE_T4, CONST_DELTA, BANK_HI, CYCLE_T5);
uop_calc(MUL, BANK_HI, CYCLE_T2, CYCLE_T2, BANK_LO, CYCLE_T4);
uop_calc(ADD, BANK_LO, CYCLE_T3, CYCLE_T3, BANK_HI, CYCLE_T1);
uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4);
uop_calc(SUB, BANK_HI, CYCLE_T4, CYCLE_T1, BANK_LO, CYCLE_TX);
uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_TX, BANK_HI, CYCLE_T1);
uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T2, BANK_LO, CYCLE_T3);
uop_move( BANK_LO, CYCLE_T3, BANK_HI, CYCLE_T3);
uop_calc(SUB, BANK_HI, CYCLE_T3, CYCLE_T5, BANK_LO, CYCLE_TY);
/* END_MICROCODE */
}
//------------------------------------------------------------------------------
//
// Adds the points stored in CYCLE_R0|1 and stores the result in CYCLE_S.
//
//------------------------------------------------------------------------------
void fpga_curve_add_jacobian_microcode_2()
{
/* BEGIN_MICROCODE: CYCLE_ADD */
uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, CYCLE_T1);
uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_R1Z, BANK_HI, CYCLE_T2);
uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1);
uop_move( BANK_HI, CYCLE_T2, BANK_LO, CYCLE_T2);
uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_T1, BANK_HI, CYCLE_T3);
uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_T2, BANK_HI, CYCLE_T4);
uop_calc(MUL, BANK_HI, CYCLE_R0X, CYCLE_T2, BANK_LO, CYCLE_T5);
uop_calc(MUL, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T2);
uop_calc(MUL, BANK_HI, CYCLE_R0Y, CYCLE_T4, BANK_LO, CYCLE_T6);
uop_calc(MUL, BANK_HI, CYCLE_R1Y, CYCLE_T3, BANK_LO, CYCLE_T4);
uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T5, BANK_HI, CYCLE_T7);
uop_calc(SUB, BANK_LO, CYCLE_T4, CYCLE_T6, BANK_HI, CYCLE_T8);
uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R1Z, BANK_HI, CYCLE_T1);
uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1);
uop_move( BANK_HI, CYCLE_T7, BANK_LO, CYCLE_T7);
uop_calc(MUL, BANK_LO, CYCLE_T7, CYCLE_T1, BANK_HI, CYCLE_SZ);
uop_calc(MUL, BANK_HI, CYCLE_T8, CYCLE_T8, BANK_LO, CYCLE_T2);
uop_calc(MUL, BANK_LO, CYCLE_T7, CYCLE_T7, BANK_HI, CYCLE_T3);
uop_calc(MUL, BANK_HI, CYCLE_T7, CYCLE_T3, BANK_LO, CYCLE_T4);
uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T4, BANK_HI, CYCLE_T1);
uop_move( BANK_LO, CYCLE_T5, BANK_HI, CYCLE_T5);
uop_calc(MUL, BANK_HI, CYCLE_T5, CYCLE_T3, BANK_LO, CYCLE_T2);
uop_calc(ADD, BANK_LO, CYCLE_T2, CYCLE_T2, BANK_HI, CYCLE_T3);
uop_calc(SUB, BANK_HI, CYCLE_T1, CYCLE_T3, BANK_LO, CYCLE_SX);
uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_SX, BANK_HI, CYCLE_T1);
uop_move( BANK_HI, CYCLE_T8, BANK_LO, CYCLE_T8);
uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1);
uop_calc(MUL, BANK_LO, CYCLE_T1, CYCLE_T8, BANK_HI, CYCLE_T2);
uop_calc(MUL, BANK_LO, CYCLE_T6, CYCLE_T4, BANK_HI, CYCLE_T3);
uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_T3, BANK_LO, CYCLE_SY);
uop_cmpz(BANK_LO, CYCLE_R0Z);
uop_cmpz(BANK_LO, CYCLE_R1Z);
/* END_MICROCODE */
//
// handle special corner cases
//
if (uop_flagz_r0z && !uop_flagz_r1z)
{
/* BEGIN_MICROCODE: CYCLE_ADD_R0_AT_INFINITY */
uop_move(BANK_HI, CYCLE_R1X, BANK_LO, CYCLE_SX);
uop_move(BANK_HI, CYCLE_R1Y, BANK_LO, CYCLE_SY);
uop_move(BANK_LO, CYCLE_R1Z, BANK_HI, CYCLE_SZ);
/* END_MICROCODE */
return;
}
if (!uop_flagz_r0z && uop_flagz_r1z)
{
/* BEGIN_MICROCODE: CYCLE_ADD_R1_AT_INFINITY */
uop_move(BANK_HI, CYCLE_R0X, BANK_LO, CYCLE_SX);
uop_move(BANK_HI, CYCLE_R0Y, BANK_LO, CYCLE_SY);
uop_move(BANK_LO, CYCLE_R0Z, BANK_HI, CYCLE_SZ);
/* END_MICROCODE */
return;
}
/* BEGIN_MICROCODE: CYCLE_ADD_REGULAR */
uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_SX);
uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_SY);
uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_SZ);
/* END_MICROCODE */
}
#ifdef USE_MICROCODE
//------------------------------------------------------------------------------
void fpga_curve_base_scalar_multiply_microcode(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
//------------------------------------------------------------------------------
{
int word_count, bit_count; // counters
FPGA_WORD k_word;
bool k_bit;
#ifdef DUMP_CYCLE_STATES
FPGA_BUFFER r0x, r0y, r0z;
FPGA_BUFFER r1x, r1y, r1z;
FPGA_BUFFER sx, sy, sz;
FPGA_BUFFER tx, ty, tz;
#endif
// initialize internal banks
fpga_multiword_copy(&ECDSA_ZERO, &BUF_LO[CONST_ZERO]);
fpga_multiword_copy(&ECDSA_ZERO, &BUF_HI[CONST_ZERO]);
fpga_multiword_copy(&ECDSA_ONE, &BUF_LO[CONST_ONE]);
fpga_multiword_copy(&ECDSA_ONE, &BUF_HI[CONST_ONE]);
fpga_multiword_copy(&ECDSA_DELTA, &BUF_LO[CONST_DELTA]);
fpga_multiword_copy(&ECDSA_DELTA, &BUF_HI[CONST_DELTA]);
fpga_multiword_copy(&ECDSA_GX, &BUF_LO[CONST_GX]);
fpga_multiword_copy(&ECDSA_GX, &BUF_HI[CONST_GX]);
fpga_multiword_copy(&ECDSA_GY, &BUF_LO[CONST_GY]);
fpga_multiword_copy(&ECDSA_GY, &BUF_HI[CONST_GY]);
/* BEGIN_MICROCODE: PREPARE */
// set initial value of R0 to point at infinity
// set initial value of R1 to the base point
uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_R0X);
uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_R0Y);
uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0Z);
uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_R1X);
uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_R1Y);
uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R1Z);
/* END_MICROCODE */
/* process bits of k left-to-right */
for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
{
k_word = k->words[word_count-1];
k_bit = (k_word & (FPGA_WORD)(1 << (bit_count-1))) > 0;
#ifdef DUMP_CYCLE_STATES
dump_cycle_header(word_count, bit_count, k_bit);
#endif
//
// calculate S = R0 + R1
//
// Banks of working cycle operands
// -------------------------------
// R0|1X: HI
// R0|1Y: HI
// R0|1Z: LO
// SX: LO
// SY: LO
// SZ: HI
fpga_curve_add_jacobian_microcode_2();
//
// calculate T = 2 * R0 or T = 2 * R1
//
// Banks of working cycle operands
// -------------------------------
// R0|1X: HI
// R0|1Y: HI
// R0|1Z: LO
// TX: LO
// TY: LO
// TZ: HI
if (!k_bit)
fpga_curve_double_jacobian_microcode_r0();
else
fpga_curve_double_jacobian_microcode_r1();
//
// dump cycle state
//
#ifdef DUMP_CYCLE_STATES
uop_stor(BANK_HI, CYCLE_R0X, &r0x);
uop_stor(BANK_HI, CYCLE_R0Y, &r0y);
uop_stor(BANK_LO, CYCLE_R0Z, &r0z);
uop_stor(BANK_HI, CYCLE_R1X, &r1x);
uop_stor(BANK_HI, CYCLE_R1Y, &r1y);
uop_stor(BANK_LO, CYCLE_R1Z, &r1z);
uop_stor(BANK_LO, CYCLE_SX, &sx);
uop_stor(BANK_LO, CYCLE_SY, &sy);
uop_stor(BANK_HI, CYCLE_SZ, &sz);
uop_stor(BANK_LO, CYCLE_TX, &tx);
uop_stor(BANK_LO, CYCLE_TY, &ty);
uop_stor(BANK_HI, CYCLE_TZ, &tz);
dump_cycle_state(&r0x, &r0y, &r0z, &r1x, &r1y, &r1z,
&sx, &sy, &sz, &tx, &ty, &tz);
#endif
//
// update working variables
//
if (!k_bit)
{
/* BEGIN_MICROCODE: CYCLE_K0 */
// R0 = 2 * R0 (double)
// R1 = R0 + R1 (add)
uop_move(BANK_LO, CYCLE_TX, BANK_HI, CYCLE_R0X);
uop_move(BANK_LO, CYCLE_TY, BANK_HI, CYCLE_R0Y);
uop_move(BANK_HI, CYCLE_TZ, BANK_LO, CYCLE_R0Z);
uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_R1X);
uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_R1Y);
uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_R1Z);
/* END_MICROCODE */
}
else
{
/* BEGIN_MICROCODE: CYCLE_K1 */
// R0 = R0 + R1 (add)
// R1 = 2 * R1 (double)
uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_R0X);
uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_R0Y);
uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_R0Z);
uop_move(BANK_LO, CYCLE_TX, BANK_HI, CYCLE_R1X);
uop_move(BANK_LO, CYCLE_TY, BANK_HI, CYCLE_R1Y);
uop_move(BANK_HI, CYCLE_TZ, BANK_LO, CYCLE_R1Z);
/* END_MICROCODE */
}
}
// now convert to affine coordinates
fpga_modular_inv23_microcode();
/* BEGIN_MICROCODE: CONVERT */
uop_calc(MUL, BANK_HI, INVERT_A2, CYCLE_R0X, BANK_LO, CYCLE_R1X);
uop_calc(MUL, BANK_HI, INVERT_A3, CYCLE_R0Y, BANK_LO, CYCLE_R1Y);
/* END_MICROCODE */
// return
uop_stor(BANK_LO, CYCLE_R1X, qx);
uop_stor(BANK_LO, CYCLE_R1Y, qy);
}
#endif USE_MICROCODE
//------------------------------------------------------------------------------
void fpga_curve_double_jacobian_microcode_wrapper(const FPGA_BUFFER *rx,
const FPGA_BUFFER *ry,
const FPGA_BUFFER *rz,
FPGA_BUFFER *sx,
FPGA_BUFFER *sy,
FPGA_BUFFER *sz)
//------------------------------------------------------------------------------
{
//
// we have two pieces of microcode to double either R0 or R1 (this
// depends on the current multiplier bit), here we can just always
// use the one meant for R0
uop_load(rx, BANK_HI, CYCLE_R0X);
uop_load(ry, BANK_HI, CYCLE_R0Y);
uop_load(rz, BANK_LO, CYCLE_R0Z);
fpga_curve_double_jacobian_microcode_r0();
uop_stor(BANK_LO, CYCLE_TX, sx);
uop_stor(BANK_LO, CYCLE_TY, sy);
uop_stor(BANK_HI, CYCLE_TZ, sz);
}
//------------------------------------------------------------------------------
void fpga_curve_add_jacobian_microcode_2_wrapper(const FPGA_BUFFER *px,
const FPGA_BUFFER *py,
const FPGA_BUFFER *pz,
const FPGA_BUFFER *qx,
const FPGA_BUFFER *qy,
const FPGA_BUFFER *qz,
FPGA_BUFFER *rx,
FPGA_BUFFER *ry,
FPGA_BUFFER *rz)
//------------------------------------------------------------------------------
{
uop_load(px, BANK_HI, CYCLE_R0X);
uop_load(py, BANK_HI, CYCLE_R0Y);
uop_load(pz, BANK_LO, CYCLE_R0Z);
uop_load(qx, BANK_HI, CYCLE_R1X);
uop_load(qy, BANK_HI, CYCLE_R1Y);
uop_load(qz, BANK_LO, CYCLE_R1Z);
fpga_curve_add_jacobian_microcode_2();
uop_stor(BANK_HI, CYCLE_SX, rx);
uop_stor(BANK_HI, CYCLE_SY, ry);
uop_stor(BANK_LO, CYCLE_SZ, rz);
}
//------------------------------------------------------------------------------
// End-of-File
//------------------------------------------------------------------------------