//------------------------------------------------------------------------------ // // ecdsa_fpga_curve_microcode.cpp // ---------------------------------------------- // Elliptic curve arithmetic procedures for ECDSA // // Authors: Pavel Shatov // // Copyright 2018 NORDUnet A/S // Copyright 2021 The Commons Conservancy Cryptech Project // SPDX-License-Identifier: BSD-3-Clause // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // - Neither the name of the copyright holder nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ // Required for Microcode Routines //------------------------------------------------------------------------------ #define USE_MICROCODE //------------------------------------------------------------------------------ // Headers //------------------------------------------------------------------------------ #include "ecdsa_fpga_model.h" //------------------------------------------------------------------------------ // // Doubles the point stored in CYCLE_R0* and stores the result in CYCLE_T*. // //------------------------------------------------------------------------------ void fpga_curve_double_jacobian_microcode_r0() //------------------------------------------------------------------------------ { /* BEGIN_MICROCODE: CYCLE_DOUBLE_R0 */ uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, CYCLE_T1); uop_calc(SUB, BANK_HI, CYCLE_R0X, CYCLE_T1, BANK_LO, CYCLE_T2); uop_calc(ADD, BANK_HI, CYCLE_R0X, CYCLE_T1, BANK_LO, CYCLE_T3); uop_calc(MUL, BANK_LO, CYCLE_T3, CYCLE_T2, BANK_HI, CYCLE_T4); uop_calc(ADD, BANK_HI, CYCLE_T4, CYCLE_T4, BANK_LO, CYCLE_T1); uop_move( BANK_HI, CYCLE_T4, BANK_LO, CYCLE_T4); uop_calc(ADD, BANK_LO, CYCLE_T1, CYCLE_T4, BANK_HI, CYCLE_T2); uop_calc(ADD, BANK_HI, CYCLE_R0Y, CYCLE_R0Y, BANK_LO, CYCLE_TY); uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_TY, BANK_HI, CYCLE_TZ); uop_calc(MUL, BANK_LO, CYCLE_TY, CYCLE_TY, BANK_HI, CYCLE_T1); uop_calc(MUL, BANK_HI, CYCLE_R0X, CYCLE_T1, BANK_LO, CYCLE_T3); uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T1, BANK_LO, CYCLE_T4); uop_calc(MUL, BANK_LO, CYCLE_T4, CONST_DELTA, BANK_HI, CYCLE_T5); uop_calc(MUL, BANK_HI, CYCLE_T2, CYCLE_T2, BANK_LO, CYCLE_T4); uop_calc(ADD, BANK_LO, CYCLE_T3, CYCLE_T3, BANK_HI, CYCLE_T1); uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4); uop_calc(SUB, BANK_HI, CYCLE_T4, CYCLE_T1, BANK_LO, CYCLE_TX); uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_TX, BANK_HI, CYCLE_T1); uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T2, BANK_LO, CYCLE_T3); uop_move( BANK_LO, CYCLE_T3, BANK_HI, CYCLE_T3); uop_calc(SUB, BANK_HI, CYCLE_T3, CYCLE_T5, BANK_LO, CYCLE_TY); /* END_MICROCODE */ } //------------------------------------------------------------------------------ // // Doubles the point stored in CYCLE_R1* and stores the result in CYCLE_T*. // //------------------------------------------------------------------------------ void fpga_curve_double_jacobian_microcode_r1() //------------------------------------------------------------------------------ { /* BEGIN_MICROCODE: CYCLE_DOUBLE_R1 */ uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_R1Z, BANK_HI, CYCLE_T1); uop_calc(SUB, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T2); uop_calc(ADD, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T3); uop_calc(MUL, BANK_LO, CYCLE_T3, CYCLE_T2, BANK_HI, CYCLE_T4); uop_calc(ADD, BANK_HI, CYCLE_T4, CYCLE_T4, BANK_LO, CYCLE_T1); uop_move( BANK_HI, CYCLE_T4, BANK_LO, CYCLE_T4); uop_calc(ADD, BANK_LO, CYCLE_T1, CYCLE_T4, BANK_HI, CYCLE_T2); uop_calc(ADD, BANK_HI, CYCLE_R1Y, CYCLE_R1Y, BANK_LO, CYCLE_TY); uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_TY, BANK_HI, CYCLE_TZ); uop_calc(MUL, BANK_LO, CYCLE_TY, CYCLE_TY, BANK_HI, CYCLE_T1); uop_calc(MUL, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T3); uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T1, BANK_LO, CYCLE_T4); uop_calc(MUL, BANK_LO, CYCLE_T4, CONST_DELTA, BANK_HI, CYCLE_T5); uop_calc(MUL, BANK_HI, CYCLE_T2, CYCLE_T2, BANK_LO, CYCLE_T4); uop_calc(ADD, BANK_LO, CYCLE_T3, CYCLE_T3, BANK_HI, CYCLE_T1); uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4); uop_calc(SUB, BANK_HI, CYCLE_T4, CYCLE_T1, BANK_LO, CYCLE_TX); uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_TX, BANK_HI, CYCLE_T1); uop_calc(MUL, BANK_HI, CYCLE_T1, CYCLE_T2, BANK_LO, CYCLE_T3); uop_move( BANK_LO, CYCLE_T3, BANK_HI, CYCLE_T3); uop_calc(SUB, BANK_HI, CYCLE_T3, CYCLE_T5, BANK_LO, CYCLE_TY); /* END_MICROCODE */ } //------------------------------------------------------------------------------ // // Adds the points stored in CYCLE_R0|1 and stores the result in CYCLE_S. // //------------------------------------------------------------------------------ void fpga_curve_add_jacobian_microcode_2() { /* BEGIN_MICROCODE: CYCLE_ADD */ uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, CYCLE_T1); uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_R1Z, BANK_HI, CYCLE_T2); uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1); uop_move( BANK_HI, CYCLE_T2, BANK_LO, CYCLE_T2); uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_T1, BANK_HI, CYCLE_T3); uop_calc(MUL, BANK_LO, CYCLE_R1Z, CYCLE_T2, BANK_HI, CYCLE_T4); uop_calc(MUL, BANK_HI, CYCLE_R0X, CYCLE_T2, BANK_LO, CYCLE_T5); uop_calc(MUL, BANK_HI, CYCLE_R1X, CYCLE_T1, BANK_LO, CYCLE_T2); uop_calc(MUL, BANK_HI, CYCLE_R0Y, CYCLE_T4, BANK_LO, CYCLE_T6); uop_calc(MUL, BANK_HI, CYCLE_R1Y, CYCLE_T3, BANK_LO, CYCLE_T4); uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T5, BANK_HI, CYCLE_T7); uop_calc(SUB, BANK_LO, CYCLE_T4, CYCLE_T6, BANK_HI, CYCLE_T8); uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R1Z, BANK_HI, CYCLE_T1); uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1); uop_move( BANK_HI, CYCLE_T7, BANK_LO, CYCLE_T7); uop_calc(MUL, BANK_LO, CYCLE_T7, CYCLE_T1, BANK_HI, CYCLE_SZ); uop_calc(MUL, BANK_HI, CYCLE_T8, CYCLE_T8, BANK_LO, CYCLE_T2); uop_calc(MUL, BANK_LO, CYCLE_T7, CYCLE_T7, BANK_HI, CYCLE_T3); uop_calc(MUL, BANK_HI, CYCLE_T7, CYCLE_T3, BANK_LO, CYCLE_T4); uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T4, BANK_HI, CYCLE_T1); uop_move( BANK_LO, CYCLE_T5, BANK_HI, CYCLE_T5); uop_calc(MUL, BANK_HI, CYCLE_T5, CYCLE_T3, BANK_LO, CYCLE_T2); uop_calc(ADD, BANK_LO, CYCLE_T2, CYCLE_T2, BANK_HI, CYCLE_T3); uop_calc(SUB, BANK_HI, CYCLE_T1, CYCLE_T3, BANK_LO, CYCLE_SX); uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_SX, BANK_HI, CYCLE_T1); uop_move( BANK_HI, CYCLE_T8, BANK_LO, CYCLE_T8); uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1); uop_calc(MUL, BANK_LO, CYCLE_T1, CYCLE_T8, BANK_HI, CYCLE_T2); uop_calc(MUL, BANK_LO, CYCLE_T6, CYCLE_T4, BANK_HI, CYCLE_T3); uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_T3, BANK_LO, CYCLE_SY); uop_cmpz(BANK_LO, CYCLE_R0Z); uop_cmpz(BANK_LO, CYCLE_R1Z); /* END_MICROCODE */ // // handle special corner cases // if (uop_flagz_r0z && !uop_flagz_r1z) { /* BEGIN_MICROCODE: CYCLE_ADD_R0_AT_INFINITY */ uop_move(BANK_HI, CYCLE_R1X, BANK_LO, CYCLE_SX); uop_move(BANK_HI, CYCLE_R1Y, BANK_LO, CYCLE_SY); uop_move(BANK_LO, CYCLE_R1Z, BANK_HI, CYCLE_SZ); /* END_MICROCODE */ return; } if (!uop_flagz_r0z && uop_flagz_r1z) { /* BEGIN_MICROCODE: CYCLE_ADD_R1_AT_INFINITY */ uop_move(BANK_HI, CYCLE_R0X, BANK_LO, CYCLE_SX); uop_move(BANK_HI, CYCLE_R0Y, BANK_LO, CYCLE_SY); uop_move(BANK_LO, CYCLE_R0Z, BANK_HI, CYCLE_SZ); /* END_MICROCODE */ return; } /* BEGIN_MICROCODE: CYCLE_ADD_REGULAR */ uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_SX); uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_SY); uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_SZ); /* END_MICROCODE */ } #ifdef USE_MICROCODE //------------------------------------------------------------------------------ void fpga_curve_base_scalar_multiply_microcode(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy) //------------------------------------------------------------------------------ { int word_count, bit_count; // counters FPGA_WORD k_word; bool k_bit; #ifdef DUMP_CYCLE_STATES FPGA_BUFFER r0x, r0y, r0z; FPGA_BUFFER r1x, r1y, r1z; FPGA_BUFFER sx, sy, sz; FPGA_BUFFER tx, ty, tz; #endif // initialize internal banks fpga_multiword_copy(&ECDSA_ZERO, &BUF_LO[CONST_ZERO]); fpga_multiword_copy(&ECDSA_ZERO, &BUF_HI[CONST_ZERO]); fpga_multiword_copy(&ECDSA_ONE, &BUF_LO[CONST_ONE]); fpga_multiword_copy(&ECDSA_ONE, &BUF_HI[CONST_ONE]); fpga_multiword_copy(&ECDSA_DELTA, &BUF_LO[CONST_DELTA]); fpga_multiword_copy(&ECDSA_DELTA, &BUF_HI[CONST_DELTA]); fpga_multiword_copy(&ECDSA_GX, &BUF_LO[CONST_GX]); fpga_multiword_copy(&ECDSA_GX, &BUF_HI[CONST_GX]); fpga_multiword_copy(&ECDSA_GY, &BUF_LO[CONST_GY]); fpga_multiword_copy(&ECDSA_GY, &BUF_HI[CONST_GY]); /* BEGIN_MICROCODE: PREPARE */ // set initial value of R0 to point at infinity // set initial value of R1 to the base point uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_R0X); uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_R0Y); uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0Z); uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_R1X); uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_R1Y); uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R1Z); /* END_MICROCODE */ /* process bits of k left-to-right */ for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) { k_word = k->words[word_count-1]; k_bit = (k_word & (FPGA_WORD)(1 << (bit_count-1))) > 0; #ifdef DUMP_CYCLE_STATES dump_cycle_header(word_count, bit_count, k_bit); #endif // // calculate S = R0 + R1 // // Banks of working cycle operands // ------------------------------- // R0|1X: HI // R0|1Y: HI // R0|1Z: LO // SX: LO // SY: LO // SZ: HI fpga_curve_add_jacobian_microcode_2(); // // calculate T = 2 * R0 or T = 2 * R1 // // Banks of working cycle operands // ------------------------------- // R0|1X: HI // R0|1Y: HI // R0|1Z: LO // TX: LO // TY: LO // TZ: HI if (!k_bit) fpga_curve_double_jacobian_microcode_r0(); else fpga_curve_double_jacobian_microcode_r1(); // // dump cycle state // #ifdef DUMP_CYCLE_STATES uop_stor(BANK_HI, CYCLE_R0X, &r0x); uop_stor(BANK_HI, CYCLE_R0Y, &r0y); uop_stor(BANK_LO, CYCLE_R0Z, &r0z); uop_stor(BANK_HI, CYCLE_R1X, &r1x); uop_stor(BANK_HI, CYCLE_R1Y, &r1y); uop_stor(BANK_LO, CYCLE_R1Z, &r1z); uop_stor(BANK_LO, CYCLE_SX, &sx); uop_stor(BANK_LO, CYCLE_SY, &sy); uop_stor(BANK_HI, CYCLE_SZ, &sz); uop_stor(BANK_LO, CYCLE_TX, &tx); uop_stor(BANK_LO, CYCLE_TY, &ty); uop_stor(BANK_HI, CYCLE_TZ, &tz); dump_cycle_state(&r0x, &r0y, &r0z, &r1x, &r1y, &r1z, &sx, &sy, &sz, &tx, &ty, &tz); #endif // // update working variables // if (!k_bit) { /* BEGIN_MICROCODE: CYCLE_K0 */ // R0 = 2 * R0 (double) // R1 = R0 + R1 (add) uop_move(BANK_LO, CYCLE_TX, BANK_HI, CYCLE_R0X); uop_move(BANK_LO, CYCLE_TY, BANK_HI, CYCLE_R0Y); uop_move(BANK_HI, CYCLE_TZ, BANK_LO, CYCLE_R0Z); uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_R1X); uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_R1Y); uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_R1Z); /* END_MICROCODE */ } else { /* BEGIN_MICROCODE: CYCLE_K1 */ // R0 = R0 + R1 (add) // R1 = 2 * R1 (double) uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_R0X); uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_R0Y); uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_R0Z); uop_move(BANK_LO, CYCLE_TX, BANK_HI, CYCLE_R1X); uop_move(BANK_LO, CYCLE_TY, BANK_HI, CYCLE_R1Y); uop_move(BANK_HI, CYCLE_TZ, BANK_LO, CYCLE_R1Z); /* END_MICROCODE */ } } // now convert to affine coordinates fpga_modular_inv23_microcode(); /* BEGIN_MICROCODE: CONVERT */ uop_calc(MUL, BANK_HI, INVERT_A2, CYCLE_R0X, BANK_LO, CYCLE_R1X); uop_calc(MUL, BANK_HI, INVERT_A3, CYCLE_R0Y, BANK_LO, CYCLE_R1Y); /* END_MICROCODE */ // return uop_stor(BANK_LO, CYCLE_R1X, qx); uop_stor(BANK_LO, CYCLE_R1Y, qy); } #endif USE_MICROCODE //------------------------------------------------------------------------------ void fpga_curve_double_jacobian_microcode_wrapper(const FPGA_BUFFER *rx, const FPGA_BUFFER *ry, const FPGA_BUFFER *rz, FPGA_BUFFER *sx, FPGA_BUFFER *sy, FPGA_BUFFER *sz) //------------------------------------------------------------------------------ { // // we have two pieces of microcode to double either R0 or R1 (this // depends on the current multiplier bit), here we can just always // use the one meant for R0 uop_load(rx, BANK_HI, CYCLE_R0X); uop_load(ry, BANK_HI, CYCLE_R0Y); uop_load(rz, BANK_LO, CYCLE_R0Z); fpga_curve_double_jacobian_microcode_r0(); uop_stor(BANK_LO, CYCLE_TX, sx); uop_stor(BANK_LO, CYCLE_TY, sy); uop_stor(BANK_HI, CYCLE_TZ, sz); } //------------------------------------------------------------------------------ void fpga_curve_add_jacobian_microcode_2_wrapper(const FPGA_BUFFER *px, const FPGA_BUFFER *py, const FPGA_BUFFER *pz, const FPGA_BUFFER *qx, const FPGA_BUFFER *qy, const FPGA_BUFFER *qz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz) //------------------------------------------------------------------------------ { uop_load(px, BANK_HI, CYCLE_R0X); uop_load(py, BANK_HI, CYCLE_R0Y); uop_load(pz, BANK_LO, CYCLE_R0Z); uop_load(qx, BANK_HI, CYCLE_R1X); uop_load(qy, BANK_HI, CYCLE_R1Y); uop_load(qz, BANK_LO, CYCLE_R1Z); fpga_curve_add_jacobian_microcode_2(); uop_stor(BANK_HI, CYCLE_SX, rx); uop_stor(BANK_HI, CYCLE_SY, ry); uop_stor(BANK_LO, CYCLE_SZ, rz); } //------------------------------------------------------------------------------ // End-of-File //------------------------------------------------------------------------------