//------------------------------------------------------------------------------ // // ed25519_fpga_curve_microcode.cpp // ----------------------------------------------- // Elliptic curve arithmetic procedures for Ed25519 // // Authors: Pavel Shatov // // Copyright (c) 2018 NORDUnet A/S // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ // Headers //------------------------------------------------------------------------------ #include "ed25519_fpga_model.h" //------------------------------------------------------------------------------ enum ED25519_UOP_OPERAND //------------------------------------------------------------------------------ { CONST_G_X = CURVE25519_UOP_OPERAND_COUNT + 1, CONST_G_Y, CONST_G_T, CYCLE_R0_X, CYCLE_R0_Y, CYCLE_R0_Z, CYCLE_R0_T, CYCLE_R1_X, CYCLE_R1_Y, CYCLE_R1_Z, CYCLE_R1_T, CYCLE_S_X, CYCLE_S_Y, CYCLE_S_Z, CYCLE_S_T, CYCLE_T_X, CYCLE_T_Y, CYCLE_T_Z, CYCLE_T_T, CYCLE_U_X, CYCLE_U_Y, CYCLE_U_Z, CYCLE_U_T, CYCLE_V_X, CYCLE_V_Y, CYCLE_V_Z, CYCLE_V_T, PROC_A, PROC_B, PROC_C, PROC_D, PROC_E, PROC_F, PROC_G, PROC_H, PROC_I, PROC_J, ED25519_UOP_OPERAND_COUNT }; //------------------------------------------------------------------------------ // Storage Buffers //------------------------------------------------------------------------------ static FPGA_BUFFER BUF_LO[ED25519_UOP_OPERAND_COUNT]; static FPGA_BUFFER BUF_HI[ED25519_UOP_OPERAND_COUNT]; //------------------------------------------------------------------------------ // // Elliptic curve point scalar multiplication routine. // //------------------------------------------------------------------------------ void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPGA_BUFFER *QY) //------------------------------------------------------------------------------ { bool k_bit; // 1-bit values FPGA_WORD k_word; // current word of multiplier int word_count, bit_count; // counters // initialize internal banks fpga_multiword_copy(&CURVE25519_ZERO, &BUF_LO[CONST_ZERO]); fpga_multiword_copy(&CURVE25519_ZERO, &BUF_HI[CONST_ZERO]); fpga_multiword_copy(&CURVE25519_ONE, &BUF_LO[CONST_ONE]); fpga_multiword_copy(&CURVE25519_ONE, &BUF_HI[CONST_ONE]); fpga_multiword_copy(&ED25519_G_X, &BUF_LO[CONST_G_X]); fpga_multiword_copy(&ED25519_G_X, &BUF_HI[CONST_G_X]); fpga_multiword_copy(&ED25519_G_Y, &BUF_LO[CONST_G_Y]); fpga_multiword_copy(&ED25519_G_Y, &BUF_HI[CONST_G_Y]); fpga_multiword_copy(&ED25519_G_T, &BUF_LO[CONST_G_T]); fpga_multiword_copy(&ED25519_G_T, &BUF_HI[CONST_G_T]); // force certain bit values FPGA_BUFFER K_INT; fpga_multiword_copy(K, &K_INT); K_INT.words[0] &= 0xFFFFFFF8; K_INT.words[FPGA_OPERAND_NUM_WORDS-1] &= 0x3FFFFFFF; K_INT.words[FPGA_OPERAND_NUM_WORDS-1] |= 0x40000000; /* BEGIN_MICROCODE: PREPARE */ // initialize uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R0_Z, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0_T, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_G_X, BANK_LO, CYCLE_R1_X, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_G_Y, BANK_LO, CYCLE_R1_Y, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R1_Z, BUF_LO, BUF_HI); uop_move(BANK_HI, CONST_G_T, BANK_LO, CYCLE_R1_T, BUF_LO, BUF_HI); /* END_MICROCODE */ // multiply for (word_count=0; word_count> bit_count; k_bit = (k_word & (FPGA_WORD)1) == 1; if (k_bit) { // U = R0 // V = R1 /* BEGIN_MICROCODE: BEFORE_ROUND_K1 */ uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_U_X, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_U_Y, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_U_Z, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R0_T, BANK_HI, CYCLE_U_T, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_X, BANK_HI, CYCLE_V_X, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_Y, BANK_HI, CYCLE_V_Y, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_Z, BANK_HI, CYCLE_V_Z, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_T, BANK_HI, CYCLE_V_T, BUF_LO, BUF_HI); /* END_MICROCODE */ } else { // U = R1 // V = R0 /* BEGIN_MICROCODE: BEFORE_ROUND_K0 */ uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_V_X, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_V_Y, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_V_Z, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R0_T, BANK_HI, CYCLE_V_T, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_X, BANK_HI, CYCLE_U_X, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_Y, BANK_HI, CYCLE_U_Y, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_Z, BANK_HI, CYCLE_U_Z, BUF_LO, BUF_HI); uop_move(BANK_LO, CYCLE_R1_T, BANK_HI, CYCLE_U_T, BUF_LO, BUF_HI); /* END_MICROCODE */ } /* BEGIN_MICROCODE: DURING_ROUND */ // S = double(U) uop_calc(MUL, BANK_HI, CYCLE_U_X, CYCLE_U_X, BANK_LO, PROC_A, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_HI, CYCLE_U_Y, CYCLE_U_Y, BANK_LO, PROC_B, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_HI, CYCLE_U_Z, CYCLE_U_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_C, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_HI, CYCLE_U_X, CYCLE_U_Y, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_D, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_LO, PROC_A, PROC_B, BANK_HI, PROC_H, BUF_LO, BUF_HI, MOD_2P); uop_calc(SUB, BANK_HI, PROC_H, PROC_D, BANK_LO, PROC_E, BUF_LO, BUF_HI, MOD_2P); uop_calc(SUB, BANK_LO, PROC_A, PROC_B, BANK_HI, PROC_G, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_HI, PROC_C, PROC_G, BANK_LO, PROC_F, BUF_LO, BUF_HI, MOD_2P); uop_move(BANK_HI, PROC_G, BANK_LO, PROC_G, BUF_LO, BUF_HI); uop_move(BANK_HI, PROC_H, BANK_LO, PROC_H, BUF_LO, BUF_HI); uop_calc(MUL, BANK_LO, PROC_E, PROC_F, BANK_HI, CYCLE_S_X, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_G, PROC_H, BANK_HI, CYCLE_S_Y, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_E, PROC_H, BANK_HI, CYCLE_S_T, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_F, PROC_G, BANK_HI, CYCLE_S_Z, BUF_LO, BUF_HI, MOD_2P); // T = add(S, V) uop_calc(SUB, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_I, PROC_J, BANK_HI, PROC_A, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); uop_calc(SUB, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_I, PROC_J, BANK_HI, PROC_B, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_HI, CYCLE_S_Z, CYCLE_V_T, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_C, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_HI, CYCLE_S_T, CYCLE_V_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_D, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_HI, PROC_C, PROC_D, BANK_LO, PROC_E, BUF_LO, BUF_HI, MOD_2P); uop_calc(SUB, BANK_HI, PROC_B, PROC_A, BANK_LO, PROC_F, BUF_LO, BUF_HI, MOD_2P); uop_calc(ADD, BANK_HI, PROC_B, PROC_A, BANK_LO, PROC_G, BUF_LO, BUF_HI, MOD_2P); uop_calc(SUB, BANK_HI, PROC_D, PROC_C, BANK_LO, PROC_H, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_E, PROC_F, BANK_HI, CYCLE_T_X, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_G, PROC_H, BANK_HI, CYCLE_T_Y, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_E, PROC_H, BANK_HI, CYCLE_T_T, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, PROC_F, PROC_G, BANK_HI, CYCLE_T_Z, BUF_LO, BUF_HI, MOD_2P); /* END_MICROCODE */ if (k_bit) { // R0 = T /* BEGIN_MICROCODE: AFTER_ROUND_K1 */ uop_move(BANK_HI, CYCLE_T_X, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_T_Y, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_T_Z, BANK_LO, CYCLE_R0_Z, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_T_T, BANK_LO, CYCLE_R0_T, BUF_LO, BUF_HI); /* END_MICROCODE */ } else { // R1 = T /* BEGIN_MICROCODE: AFTER_ROUND_K0 */ uop_move(BANK_HI, CYCLE_T_X, BANK_LO, CYCLE_R1_X, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_T_Y, BANK_LO, CYCLE_R1_Y, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_T_Z, BANK_LO, CYCLE_R1_Z, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_T_T, BANK_LO, CYCLE_R1_T, BUF_LO, BUF_HI); /* END_MICROCODE */ } } } /* BEGIN_MICROCODE: BEFORE_INVERSION */ // inversion expects result to be in LO: T1 uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_R0_Z, BUF_LO, BUF_HI); uop_move(BANK_HI, CYCLE_R0_Z, BANK_LO, INVERT_T_1, BUF_LO, BUF_HI); /* END_MICROCODE */ // just call piece of microcode fpga_modular_inv_microcode(BUF_LO, BUF_HI); /* BEGIN_MICROCODE: AFTER_INVERSION */ // inversion places result in HI: R1 // coordinates are in LO: R0_X, R0_Y uop_move(BANK_HI, INVERT_R1, BANK_LO, INVERT_R1, BUF_LO, BUF_HI); uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0_X, BANK_HI, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_2P); uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0_Y, BANK_HI, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_2P); /* END_MICROCODE */ /* BEGIN_MICROCODE: FINAL_REDUCTION */ // finally reduce to just 1*P uop_calc(ADD, BANK_HI, CYCLE_R0_X, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_1P); uop_calc(ADD, BANK_HI, CYCLE_R0_Y, CONST_ZERO, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_1P); /* END_MICROCODE */ // poke sign bit BUF_LO[CYCLE_R0_Y].words[FPGA_OPERAND_NUM_WORDS-1] |= (FPGA_WORD)((BUF_LO[CYCLE_R0_X].words[0] & (FPGA_WORD)1) << 31); /* BEGIN_MICROCODE: HANDLE_SIGN */ uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_R0_X, BUF_LO, BUF_HI); /* END_MICROCODE */ /* BEGIN_MICROCODE: OUTPUT */ uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_R0_Y, BUF_LO, BUF_HI); /* END_MICROCODE */ // store result uop_stor(BUF_LO, BUF_HI, BANK_LO, CYCLE_R0_Y, QY); } //------------------------------------------------------------------------------ // End-of-File //------------------------------------------------------------------------------