diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2018-09-24 21:38:06 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2018-09-24 21:38:06 +0300 |
commit | ed6437839977023ffe1eb95d87760d4f1b2c518b (patch) | |
tree | e8a55736a8b6451a483a4c46e85a7e2436246cea /x25519/x25519_fpga_curve_microcode.cpp | |
parent | 79b3be9be21e4f4bbbc8ea18760123ec72288131 (diff) |
X25519-specific code (curve point multiplication)
Diffstat (limited to 'x25519/x25519_fpga_curve_microcode.cpp')
-rw-r--r-- | x25519/x25519_fpga_curve_microcode.cpp | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/x25519/x25519_fpga_curve_microcode.cpp b/x25519/x25519_fpga_curve_microcode.cpp new file mode 100644 index 0000000..d57cb63 --- /dev/null +++ b/x25519/x25519_fpga_curve_microcode.cpp @@ -0,0 +1,208 @@ +//------------------------------------------------------------------------------ +// +// x25519_fpga_curve_microcode.cpp +// ----------------------------------------------- +// Elliptic curve arithmetic procedures for X25519 +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "x25519_fpga_model.h" + + +//------------------------------------------------------------------------------ +enum X25519_UOP_OPERAND +//------------------------------------------------------------------------------ +{ + CONST_A24 = CURVE25519_UOP_OPERAND_COUNT + 1, + + LADDER_R0_X, + LADDER_R0_Z, + + LADDER_R1_X, + LADDER_R1_Z, + + LADDER_T0_X, + LADDER_T0_Z, + + LADDER_T1_X, + LADDER_T1_Z, + + LADDER_S0, + LADDER_S1, + + LADDER_D0, + LADDER_D1, + + LADDER_QS0, + LADDER_QD0, + + LADDER_S0D1, + LADDER_S1D0, + + LADDER_TS, + LADDER_TD, + + LADDER_QTD, + + LADDER_T0, + LADDER_TA, + LADDER_T1, + + LADDER_P_X, + + X25519_UOP_OPERAND_COUNT +}; + + +//------------------------------------------------------------------------------ +// Storage Buffers +//------------------------------------------------------------------------------ +static FPGA_BUFFER BUF_LO[X25519_UOP_OPERAND_COUNT]; +static FPGA_BUFFER BUF_HI[X25519_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point scalar multiplication routine. +// +// This uses the Montgomery ladder to do the multiplication and then +// converts the result to affine coordinates. +// +// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder" +// https://eprint.iacr.org/2017/264.pdf +// +//------------------------------------------------------------------------------ +void fpga_curve_x25519_scalar_multiply_microcode(const FPGA_BUFFER *PX, const FPGA_BUFFER *K, FPGA_BUFFER *QX) +//------------------------------------------------------------------------------ +{ + bool k_bit, s; // 1-bit values + FPGA_WORD k_word; // current word of multiplier + int word_count, bit_count; // counters + + // initialize constant operands + fpga_multiword_copy(&CURVE25519_ZERO, &BUF_LO[CONST_ZERO]); + fpga_multiword_copy(&CURVE25519_ZERO, &BUF_HI[CONST_ZERO]); + + fpga_multiword_copy(&CURVE25519_ONE, &BUF_LO[CONST_ONE]); + fpga_multiword_copy(&CURVE25519_ONE, &BUF_HI[CONST_ONE]); + + fpga_multiword_copy(&X25519_A24, &BUF_LO[CONST_A24]); + fpga_multiword_copy(&X25519_A24, &BUF_HI[CONST_A24]); + + // + // BEGIN MICROCODE + // + + // initialization + uop_load(PX, BANK_HI, LADDER_P_X, BUF_LO, BUF_HI); + uop_move(BANK_HI, CONST_ONE, CONST_ZERO, BANK_LO, LADDER_R0_X, LADDER_R0_Z, BUF_LO, BUF_HI); + uop_move(BANK_HI, LADDER_P_X, CONST_ONE, BANK_LO, LADDER_R1_X, LADDER_R1_Z, BUF_LO, BUF_HI); + + // ladder + s = false; + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + { + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + k_word = K->words[word_count - 1] >> (bit_count - 1); // current word + k_bit = (k_word & (FPGA_WORD)1) == 1; // current bit + + // inputs are all in LO: R0_X, R0_Z, R1_X, R1_Z + + // swap if needed + if (s == k_bit) + { uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI); // HI: T0_X, T0_Z = LO: R0_X, R0_Z + uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI); // HI: T1_X, T1_Z = LO: R1_X, R1_Z + } + else + { uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI); // HI: T0_X, T0_Z = LO: R1_X, R1_Z + uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI); // HI: T1_X, T1_Z = LO: R0_X, R0_Z + } + + // remember whether we actually did the swap + s = k_bit; + + // run step + uop_calc(ADD, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_S0, BUF_LO, BUF_HI, MOD_2P); // LO: S0 = HI: T0_X + T0_Z + uop_calc(ADD, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_S1, BUF_LO, BUF_HI, MOD_2P); // LO: S1 = HI: T1_X + T1_Z + uop_calc(SUB, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_D0, BUF_LO, BUF_HI, MOD_2P); // LO: D0 = HI: T0_X - T0_Z + uop_calc(SUB, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_D1, BUF_LO, BUF_HI, MOD_2P); // LO: D1 = HI: T1_X - T1_Z + + uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_S0, BANK_HI, LADDER_QS0, BUF_LO, BUF_HI, MOD_2P); // HI: QS0 = LO: S0 * S0 + uop_calc(MUL, BANK_LO, LADDER_D0, LADDER_D0, BANK_HI, LADDER_QD0, BUF_LO, BUF_HI, MOD_2P); // HI: QD0 = LO: D0 * D0 + uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_D1, BANK_HI, LADDER_S0D1, BUF_LO, BUF_HI, MOD_2P); // HI: S0D1 = LO: S0 * D1 + uop_calc(MUL, BANK_LO, LADDER_S1, LADDER_D0, BANK_HI, LADDER_S1D0, BUF_LO, BUF_HI, MOD_2P); // HI: S1D0 = LO: S1 * D0 + + uop_calc(ADD, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TS, BUF_LO, BUF_HI, MOD_2P); // LO: TS = HI: S1D0 + S0D1 + uop_calc(SUB, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TD, BUF_LO, BUF_HI, MOD_2P); // LO: TD = HI: S1D0 - S0D1 + + uop_calc(MUL, BANK_LO, LADDER_TD, LADDER_TD, BANK_HI, LADDER_QTD, BUF_LO, BUF_HI, MOD_2P); // HI: QTD = LO: TD * TD + + uop_calc(SUB, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_T0, BUF_LO, BUF_HI, MOD_2P); // LO: T0 = HI: QS0 - QD0 + uop_calc(MUL, BANK_LO, LADDER_T0, CONST_A24, BANK_HI, LADDER_TA, BUF_LO, BUF_HI, MOD_2P); // HI: TA = LO: T0 * A24 + uop_calc(ADD, BANK_HI, LADDER_TA, LADDER_QD0, BANK_LO, LADDER_T1, BUF_LO, BUF_HI, MOD_2P); // LO: T1 = HI: TA * QD0 + + uop_calc(MUL, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_R0_X, BUF_LO, BUF_HI, MOD_2P); // LO: R0_X = HI: QS0 * QD0 + uop_calc(MUL, BANK_LO, LADDER_T0, LADDER_T1, BANK_HI, LADDER_R0_Z, BUF_LO, BUF_HI, MOD_2P); // HI: R0_Z = LO: T0 * T1 + uop_calc(MUL, BANK_LO, LADDER_TS, LADDER_TS, BANK_HI, LADDER_R1_X, BUF_LO, BUF_HI, MOD_2P); // HI: R1_X = LO: TS * TS + uop_calc(MUL, BANK_HI, LADDER_P_X, LADDER_QTD, BANK_LO, LADDER_R1_Z, BUF_LO, BUF_HI, MOD_2P); // LO: R1_Z = HI: PX * QTD + + uop_move(BANK_HI, LADDER_R0_Z, LADDER_R1_X, BANK_LO, LADDER_R0_Z, LADDER_R1_X, BUF_LO, BUF_HI); // LO: R0_Z, R1_X = HI: R0_Z, R1_X + } + } + + // inversion expects result to be in LO: T1 + uop_move(BANK_HI, LADDER_R0_Z, LADDER_R0_Z, BANK_LO, INVERT_T_1, INVERT_T_1, BUF_LO, BUF_HI); + + // just call piece of microcode + fpga_modular_inv_microcode(BUF_LO, BUF_HI); + + // inversion places result in HI: R1 + uop_move(BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R1, INVERT_R1, BUF_LO, BUF_HI); + uop_calc(MUL, BANK_LO, INVERT_R1, LADDER_R0_X, BANK_HI, INVERT_R2, BUF_LO, BUF_HI, MOD_2P); + + // finally reduce to just 1*P + uop_calc(ADD, BANK_HI, INVERT_R2, CONST_ZERO, BANK_LO, INVERT_R1, BUF_LO, BUF_HI, MOD_1P); // !!! + + // store result + uop_stor(BUF_LO, BUF_HI, BANK_LO, INVERT_R1, QX); +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ |