aboutsummaryrefslogtreecommitdiff
path: root/x25519/x25519_fpga_curve_microcode.cpp
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-09-24 21:38:06 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-09-24 21:38:06 +0300
commited6437839977023ffe1eb95d87760d4f1b2c518b (patch)
treee8a55736a8b6451a483a4c46e85a7e2436246cea /x25519/x25519_fpga_curve_microcode.cpp
parent79b3be9be21e4f4bbbc8ea18760123ec72288131 (diff)
X25519-specific code (curve point multiplication)
Diffstat (limited to 'x25519/x25519_fpga_curve_microcode.cpp')
-rw-r--r--x25519/x25519_fpga_curve_microcode.cpp208
1 files changed, 208 insertions, 0 deletions
diff --git a/x25519/x25519_fpga_curve_microcode.cpp b/x25519/x25519_fpga_curve_microcode.cpp
new file mode 100644
index 0000000..d57cb63
--- /dev/null
+++ b/x25519/x25519_fpga_curve_microcode.cpp
@@ -0,0 +1,208 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve_microcode.cpp
+// -----------------------------------------------
+// Elliptic curve arithmetic procedures for X25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+enum X25519_UOP_OPERAND
+//------------------------------------------------------------------------------
+{
+ CONST_A24 = CURVE25519_UOP_OPERAND_COUNT + 1,
+
+ LADDER_R0_X,
+ LADDER_R0_Z,
+
+ LADDER_R1_X,
+ LADDER_R1_Z,
+
+ LADDER_T0_X,
+ LADDER_T0_Z,
+
+ LADDER_T1_X,
+ LADDER_T1_Z,
+
+ LADDER_S0,
+ LADDER_S1,
+
+ LADDER_D0,
+ LADDER_D1,
+
+ LADDER_QS0,
+ LADDER_QD0,
+
+ LADDER_S0D1,
+ LADDER_S1D0,
+
+ LADDER_TS,
+ LADDER_TD,
+
+ LADDER_QTD,
+
+ LADDER_T0,
+ LADDER_TA,
+ LADDER_T1,
+
+ LADDER_P_X,
+
+ X25519_UOP_OPERAND_COUNT
+};
+
+
+//------------------------------------------------------------------------------
+// Storage Buffers
+//------------------------------------------------------------------------------
+static FPGA_BUFFER BUF_LO[X25519_UOP_OPERAND_COUNT];
+static FPGA_BUFFER BUF_HI[X25519_UOP_OPERAND_COUNT];
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+// This uses the Montgomery ladder to do the multiplication and then
+// converts the result to affine coordinates.
+//
+// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder"
+// https://eprint.iacr.org/2017/264.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_x25519_scalar_multiply_microcode(const FPGA_BUFFER *PX, const FPGA_BUFFER *K, FPGA_BUFFER *QX)
+//------------------------------------------------------------------------------
+{
+ bool k_bit, s; // 1-bit values
+ FPGA_WORD k_word; // current word of multiplier
+ int word_count, bit_count; // counters
+
+ // initialize constant operands
+ fpga_multiword_copy(&CURVE25519_ZERO, &BUF_LO[CONST_ZERO]);
+ fpga_multiword_copy(&CURVE25519_ZERO, &BUF_HI[CONST_ZERO]);
+
+ fpga_multiword_copy(&CURVE25519_ONE, &BUF_LO[CONST_ONE]);
+ fpga_multiword_copy(&CURVE25519_ONE, &BUF_HI[CONST_ONE]);
+
+ fpga_multiword_copy(&X25519_A24, &BUF_LO[CONST_A24]);
+ fpga_multiword_copy(&X25519_A24, &BUF_HI[CONST_A24]);
+
+ //
+ // BEGIN MICROCODE
+ //
+
+ // initialization
+ uop_load(PX, BANK_HI, LADDER_P_X, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_ONE, CONST_ZERO, BANK_LO, LADDER_R0_X, LADDER_R0_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, LADDER_P_X, CONST_ONE, BANK_LO, LADDER_R1_X, LADDER_R1_Z, BUF_LO, BUF_HI);
+
+ // ladder
+ s = false;
+ for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+ {
+ for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+ {
+ k_word = K->words[word_count - 1] >> (bit_count - 1); // current word
+ k_bit = (k_word & (FPGA_WORD)1) == 1; // current bit
+
+ // inputs are all in LO: R0_X, R0_Z, R1_X, R1_Z
+
+ // swap if needed
+ if (s == k_bit)
+ { uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI); // HI: T0_X, T0_Z = LO: R0_X, R0_Z
+ uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI); // HI: T1_X, T1_Z = LO: R1_X, R1_Z
+ }
+ else
+ { uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI); // HI: T0_X, T0_Z = LO: R1_X, R1_Z
+ uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI); // HI: T1_X, T1_Z = LO: R0_X, R0_Z
+ }
+
+ // remember whether we actually did the swap
+ s = k_bit;
+
+ // run step
+ uop_calc(ADD, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_S0, BUF_LO, BUF_HI, MOD_2P); // LO: S0 = HI: T0_X + T0_Z
+ uop_calc(ADD, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_S1, BUF_LO, BUF_HI, MOD_2P); // LO: S1 = HI: T1_X + T1_Z
+ uop_calc(SUB, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_D0, BUF_LO, BUF_HI, MOD_2P); // LO: D0 = HI: T0_X - T0_Z
+ uop_calc(SUB, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_D1, BUF_LO, BUF_HI, MOD_2P); // LO: D1 = HI: T1_X - T1_Z
+
+ uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_S0, BANK_HI, LADDER_QS0, BUF_LO, BUF_HI, MOD_2P); // HI: QS0 = LO: S0 * S0
+ uop_calc(MUL, BANK_LO, LADDER_D0, LADDER_D0, BANK_HI, LADDER_QD0, BUF_LO, BUF_HI, MOD_2P); // HI: QD0 = LO: D0 * D0
+ uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_D1, BANK_HI, LADDER_S0D1, BUF_LO, BUF_HI, MOD_2P); // HI: S0D1 = LO: S0 * D1
+ uop_calc(MUL, BANK_LO, LADDER_S1, LADDER_D0, BANK_HI, LADDER_S1D0, BUF_LO, BUF_HI, MOD_2P); // HI: S1D0 = LO: S1 * D0
+
+ uop_calc(ADD, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TS, BUF_LO, BUF_HI, MOD_2P); // LO: TS = HI: S1D0 + S0D1
+ uop_calc(SUB, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TD, BUF_LO, BUF_HI, MOD_2P); // LO: TD = HI: S1D0 - S0D1
+
+ uop_calc(MUL, BANK_LO, LADDER_TD, LADDER_TD, BANK_HI, LADDER_QTD, BUF_LO, BUF_HI, MOD_2P); // HI: QTD = LO: TD * TD
+
+ uop_calc(SUB, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_T0, BUF_LO, BUF_HI, MOD_2P); // LO: T0 = HI: QS0 - QD0
+ uop_calc(MUL, BANK_LO, LADDER_T0, CONST_A24, BANK_HI, LADDER_TA, BUF_LO, BUF_HI, MOD_2P); // HI: TA = LO: T0 * A24
+ uop_calc(ADD, BANK_HI, LADDER_TA, LADDER_QD0, BANK_LO, LADDER_T1, BUF_LO, BUF_HI, MOD_2P); // LO: T1 = HI: TA * QD0
+
+ uop_calc(MUL, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_R0_X, BUF_LO, BUF_HI, MOD_2P); // LO: R0_X = HI: QS0 * QD0
+ uop_calc(MUL, BANK_LO, LADDER_T0, LADDER_T1, BANK_HI, LADDER_R0_Z, BUF_LO, BUF_HI, MOD_2P); // HI: R0_Z = LO: T0 * T1
+ uop_calc(MUL, BANK_LO, LADDER_TS, LADDER_TS, BANK_HI, LADDER_R1_X, BUF_LO, BUF_HI, MOD_2P); // HI: R1_X = LO: TS * TS
+ uop_calc(MUL, BANK_HI, LADDER_P_X, LADDER_QTD, BANK_LO, LADDER_R1_Z, BUF_LO, BUF_HI, MOD_2P); // LO: R1_Z = HI: PX * QTD
+
+ uop_move(BANK_HI, LADDER_R0_Z, LADDER_R1_X, BANK_LO, LADDER_R0_Z, LADDER_R1_X, BUF_LO, BUF_HI); // LO: R0_Z, R1_X = HI: R0_Z, R1_X
+ }
+ }
+
+ // inversion expects result to be in LO: T1
+ uop_move(BANK_HI, LADDER_R0_Z, LADDER_R0_Z, BANK_LO, INVERT_T_1, INVERT_T_1, BUF_LO, BUF_HI);
+
+ // just call piece of microcode
+ fpga_modular_inv_microcode(BUF_LO, BUF_HI);
+
+ // inversion places result in HI: R1
+ uop_move(BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R1, INVERT_R1, BUF_LO, BUF_HI);
+ uop_calc(MUL, BANK_LO, INVERT_R1, LADDER_R0_X, BANK_HI, INVERT_R2, BUF_LO, BUF_HI, MOD_2P);
+
+ // finally reduce to just 1*P
+ uop_calc(ADD, BANK_HI, INVERT_R2, CONST_ZERO, BANK_LO, INVERT_R1, BUF_LO, BUF_HI, MOD_1P); // !!!
+
+ // store result
+ uop_stor(BUF_LO, BUF_HI, BANK_LO, INVERT_R1, QX);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------