aboutsummaryrefslogtreecommitdiff
path: root/ed25519/ed25519_fpga_curve_microcode.cpp
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-09-24 21:38:37 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-09-24 21:38:37 +0300
commit250716dfed0da0bdcf8f975293fbf72c2b56c247 (patch)
treebba9012c8775317506f62a1d6b58d9196c11f35f /ed25519/ed25519_fpga_curve_microcode.cpp
parented6437839977023ffe1eb95d87760d4f1b2c518b (diff)
Ed25519-specific code (curve base point multiplication)
Diffstat (limited to 'ed25519/ed25519_fpga_curve_microcode.cpp')
-rw-r--r--ed25519/ed25519_fpga_curve_microcode.cpp299
1 files changed, 299 insertions, 0 deletions
diff --git a/ed25519/ed25519_fpga_curve_microcode.cpp b/ed25519/ed25519_fpga_curve_microcode.cpp
new file mode 100644
index 0000000..25826c5
--- /dev/null
+++ b/ed25519/ed25519_fpga_curve_microcode.cpp
@@ -0,0 +1,299 @@
+//------------------------------------------------------------------------------
+//
+// ed25519_fpga_curve_microcode.cpp
+// -----------------------------------------------
+// Elliptic curve arithmetic procedures for Ed25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ed25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+enum ED25519_UOP_OPERAND
+//------------------------------------------------------------------------------
+{
+ CONST_G_X = CURVE25519_UOP_OPERAND_COUNT + 1,
+ CONST_G_Y,
+ CONST_G_T,
+
+ CYCLE_R0_X,
+ CYCLE_R0_Y,
+ CYCLE_R0_Z,
+ CYCLE_R0_T,
+
+ CYCLE_R1_X,
+ CYCLE_R1_Y,
+ CYCLE_R1_Z,
+ CYCLE_R1_T,
+
+ CYCLE_S_X,
+ CYCLE_S_Y,
+ CYCLE_S_Z,
+ CYCLE_S_T,
+
+ CYCLE_T_X,
+ CYCLE_T_Y,
+ CYCLE_T_Z,
+ CYCLE_T_T,
+
+ CYCLE_U_X,
+ CYCLE_U_Y,
+ CYCLE_U_Z,
+ CYCLE_U_T,
+
+ CYCLE_V_X,
+ CYCLE_V_Y,
+ CYCLE_V_Z,
+ CYCLE_V_T,
+
+ PROC_A,
+ PROC_B,
+ PROC_C,
+ PROC_D,
+ PROC_E,
+ PROC_F,
+ PROC_G,
+ PROC_H,
+ PROC_I,
+ PROC_J,
+
+ ED25519_UOP_OPERAND_COUNT
+};
+
+
+//------------------------------------------------------------------------------
+// Storage Buffers
+//------------------------------------------------------------------------------
+static FPGA_BUFFER BUF_LO[ED25519_UOP_OPERAND_COUNT];
+static FPGA_BUFFER BUF_HI[ED25519_UOP_OPERAND_COUNT];
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+//------------------------------------------------------------------------------
+void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPGA_BUFFER *QY)
+//------------------------------------------------------------------------------
+{
+ bool k_bit; // 1-bit values
+ FPGA_WORD k_word; // current word of multiplier
+ int word_count, bit_count; // counters
+
+ // initialize internal banks
+ fpga_multiword_copy(&CURVE25519_ZERO, &BUF_LO[CONST_ZERO]);
+ fpga_multiword_copy(&CURVE25519_ZERO, &BUF_HI[CONST_ZERO]);
+
+ fpga_multiword_copy(&CURVE25519_ONE, &BUF_LO[CONST_ONE]);
+ fpga_multiword_copy(&CURVE25519_ONE, &BUF_HI[CONST_ONE]);
+
+ fpga_multiword_copy(&ED25519_G_X, &BUF_LO[CONST_G_X]);
+ fpga_multiword_copy(&ED25519_G_X, &BUF_HI[CONST_G_X]);
+
+ fpga_multiword_copy(&ED25519_G_Y, &BUF_LO[CONST_G_Y]);
+ fpga_multiword_copy(&ED25519_G_Y, &BUF_HI[CONST_G_Y]);
+
+ fpga_multiword_copy(&ED25519_G_T, &BUF_LO[CONST_G_T]);
+ fpga_multiword_copy(&ED25519_G_T, &BUF_HI[CONST_G_T]);
+
+ // force certain bit values
+ FPGA_BUFFER K_INT;
+ fpga_multiword_copy(K, &K_INT);
+
+ K_INT.words[0] &= 0xFFFFFFF8;
+ K_INT.words[FPGA_OPERAND_NUM_WORDS-1] &= 0x3FFFFFFF;
+ K_INT.words[FPGA_OPERAND_NUM_WORDS-1] |= 0x40000000;
+
+ // initialization
+ uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R0_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0_T, BUF_LO, BUF_HI);
+
+ uop_move(BANK_HI, CONST_G_X, BANK_LO, CYCLE_R1_X, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_G_Y, BANK_LO, CYCLE_R1_Y, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R1_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_HI, CONST_G_T, BANK_LO, CYCLE_R1_T, BUF_LO, BUF_HI);
+
+ // multiply
+ for (word_count=0; word_count<FPGA_OPERAND_NUM_WORDS; word_count++)
+ {
+ for (bit_count=0; bit_count<FPGA_WORD_WIDTH; bit_count++)
+ {
+ // get current bit of K
+ k_word = K_INT.words[word_count] >> bit_count;
+ k_bit = (k_word & (FPGA_WORD)1) == 1;
+
+ if (k_bit)
+ {
+ // U = R0
+ // V = R1
+
+ uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_U_X, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_U_Y, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_U_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R0_T, BANK_HI, CYCLE_U_T, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_X, BANK_HI, CYCLE_V_X, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_Y, BANK_HI, CYCLE_V_Y, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_Z, BANK_HI, CYCLE_V_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_T, BANK_HI, CYCLE_V_T, BUF_LO, BUF_HI);
+ }
+ else
+ {
+ // U = R1
+ // V = R0
+
+ uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_V_X, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_V_Y, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_V_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R0_T, BANK_HI, CYCLE_V_T, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_X, BANK_HI, CYCLE_U_X, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_Y, BANK_HI, CYCLE_U_Y, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_Z, BANK_HI, CYCLE_U_Z, BUF_LO, BUF_HI);
+ uop_move(BANK_LO, CYCLE_R1_T, BANK_HI, CYCLE_U_T, BUF_LO, BUF_HI);
+ }
+
+ // S = double(U)
+ uop_calc(MUL, BANK_HI, CYCLE_U_X, CYCLE_U_X, BANK_LO, PROC_A, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul(P_X, P_X, &A, &CURVE25519_2P);
+ uop_calc(MUL, BANK_HI, CYCLE_U_Y, CYCLE_U_Y, BANK_LO, PROC_B, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul(P_Y, P_Y, &B, &CURVE25519_2P);
+
+ uop_calc(MUL, BANK_HI, CYCLE_U_Z, CYCLE_U_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul(P_Z, P_Z, &I, &CURVE25519_2P);
+ uop_calc(ADD, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_C, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_add( &I, &I, &C, &CURVE25519_2P);
+ uop_calc(ADD, BANK_HI, CYCLE_U_X, CYCLE_U_Y, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_add(P_X, P_Y, &I, &CURVE25519_2P);
+ uop_calc(MUL, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_D, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul( &I, &I, &D, &CURVE25519_2P);
+
+ uop_calc(ADD, BANK_LO, PROC_A, PROC_B, BANK_HI, PROC_H, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_add( &A, &B, &H, &CURVE25519_2P);
+ uop_calc(SUB, BANK_HI, PROC_H, PROC_D, BANK_LO, PROC_E, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_sub( &H, &D, &E, &CURVE25519_2P);
+ uop_calc(SUB, BANK_LO, PROC_A, PROC_B, BANK_HI, PROC_G, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_sub( &A, &B, &G, &CURVE25519_2P);
+ uop_calc(ADD, BANK_HI, PROC_C, PROC_G, BANK_LO, PROC_F, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_add( &C, &G, &F, &CURVE25519_2P);
+
+ uop_move2(BANK_HI, PROC_G, PROC_H, BANK_LO, PROC_G, PROC_H, BUF_LO, BUF_HI);
+
+ uop_calc(MUL, BANK_LO, PROC_E, PROC_F, BANK_HI, CYCLE_S_X, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul( &E, &F, Q_X, &CURVE25519_2P);
+ uop_calc(MUL, BANK_LO, PROC_G, PROC_H, BANK_HI, CYCLE_S_Y, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul( &G, &H, Q_Y, &CURVE25519_2P);
+
+ uop_calc(MUL, BANK_LO, PROC_E, PROC_H, BANK_HI, CYCLE_S_T, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul( &E, &H, Q_T, &CURVE25519_2P);
+ uop_calc(MUL, BANK_LO, PROC_F, PROC_G, BANK_HI, CYCLE_S_Z, BUF_LO, BUF_HI, MOD_2P); // fpga_modular_mul( &F, &G, Q_Z, &CURVE25519_2P);
+
+ // T = add(S, V)
+ uop_calc(SUB, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_sub(S_Y, S_X, &I, &CURVE25519_2P); // I = (qy - qx) % mod
+ uop_calc(ADD, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_add(V_Y, V_X, &J, &CURVE25519_2P); // J = (py + px) % mod
+ uop_calc(MUL, BANK_LO, PROC_I, PROC_J, BANK_HI, PROC_A, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul( &I, &J, &A, &CURVE25519_2P); // A = ( I * J) % mod
+
+ uop_calc(ADD, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_add(S_Y, S_X, &I, &CURVE25519_2P); // I = (qy + qx) % mod
+ uop_calc(SUB, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_sub(V_Y, V_X, &J, &CURVE25519_2P); // J = (py - px) % mod
+ uop_calc(MUL, BANK_LO, PROC_I, PROC_J, BANK_HI, PROC_B, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul( &I, &J, &B, &CURVE25519_2P); // B = ( I * J) % mod
+
+ uop_calc(MUL, BANK_HI, CYCLE_S_Z, CYCLE_V_T, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul(S_Z, V_T, &I, &CURVE25519_2P); // I = (qz * pt) % mod
+ uop_calc(ADD, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_C, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_add( &I, &I, &C, &CURVE25519_2P); // C = ( I + I) % mod
+ uop_calc(MUL, BANK_HI, CYCLE_S_T, CYCLE_V_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul(S_T, V_Z, &I, &CURVE25519_2P); // I = (qt * pz) % mod
+ uop_calc(ADD, BANK_LO, PROC_I, PROC_I, BANK_HI, PROC_D, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_add( &I, &I, &D, &CURVE25519_2P); // D = ( I + I) % mod
+
+ uop_calc(ADD, BANK_HI, PROC_C, PROC_D, BANK_LO, PROC_E, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_add( &D, &C, &E, &CURVE25519_2P); // E = (D + C) % mod
+ uop_calc(SUB, BANK_HI, PROC_B, PROC_A, BANK_LO, PROC_F, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_sub( &B, &A, &F, &CURVE25519_2P); // F = (B - A) % mod
+ uop_calc(ADD, BANK_HI, PROC_B, PROC_A, BANK_LO, PROC_G, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_add( &B, &A, &G, &CURVE25519_2P); // G = (B + A) % mod
+ uop_calc(SUB, BANK_HI, PROC_D, PROC_C, BANK_LO, PROC_H, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_sub( &D, &C, &H, &CURVE25519_2P); // H = (D - C) % mod
+
+ uop_calc(MUL, BANK_LO, PROC_E, PROC_F, BANK_HI, CYCLE_T_X, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul( &E, &F, R_X, &CURVE25519_2P); // rx = (E * F) % mod
+ uop_calc(MUL, BANK_LO, PROC_G, PROC_H, BANK_HI, CYCLE_T_Y, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul( &G, &H, R_Y, &CURVE25519_2P); // ry = (G * H) % mod
+ uop_calc(MUL, BANK_LO, PROC_E, PROC_H, BANK_HI, CYCLE_T_T, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul( &E, &H, R_T, &CURVE25519_2P); // rt = (E * H) % mod
+ uop_calc(MUL, BANK_LO, PROC_F, PROC_G, BANK_HI, CYCLE_T_Z, BUF_LO, BUF_HI, MOD_2P); //fpga_modular_mul( &F, &G, R_Z, &CURVE25519_2P); // rz = (F * G) % mod
+
+ if (k_bit)
+ {
+ // R0 = T
+
+ uop_move2(BANK_HI, CYCLE_T_X, CYCLE_T_Y, BANK_LO, CYCLE_R0_X, CYCLE_R0_Y, BUF_LO, BUF_HI);
+ uop_move2(BANK_HI, CYCLE_T_Z, CYCLE_T_T, BANK_LO, CYCLE_R0_Z, CYCLE_R0_T, BUF_LO, BUF_HI);
+ }
+ else
+ {
+ // R1 = T
+
+ uop_move2(BANK_HI, CYCLE_T_X, CYCLE_T_Y, BANK_LO, CYCLE_R1_X, CYCLE_R1_Y, BUF_LO, BUF_HI);
+ uop_move2(BANK_HI, CYCLE_T_Z, CYCLE_T_T, BANK_LO, CYCLE_R1_Z, CYCLE_R1_T, BUF_LO, BUF_HI);
+ }
+ }
+ }
+
+ // inversion expects result to be in LO: T1
+ uop_move2(BANK_LO, CYCLE_R0_Z, CYCLE_R0_Z, BANK_HI, CYCLE_R0_Z, CYCLE_R0_Z, BUF_LO, BUF_HI);
+ uop_move2(BANK_HI, CYCLE_R0_Z, CYCLE_R0_Z, BANK_LO, INVERT_T_1, INVERT_T_1, BUF_LO, BUF_HI);
+
+ // just call piece of microcode
+ fpga_modular_inv_microcode(BUF_LO, BUF_HI);
+
+ // inversion places result in HI: R1
+ // coordinates are in HI: R0_X, R0_Y
+
+ uop_move2(BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R1, INVERT_R1, BUF_LO, BUF_HI);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0_X, BANK_HI, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_2P);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0_Y, BANK_HI, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_2P);
+
+ // finally reduce to just 1*P
+ uop_calc(ADD, BANK_HI, CYCLE_R0_X, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_1P); // !!!
+ uop_calc(ADD, BANK_HI, CYCLE_R0_Y, CONST_ZERO, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_1P); // !!!
+
+ // poke sign bit
+ BUF_LO[CYCLE_R0_Y].words[FPGA_OPERAND_NUM_WORDS-1] |=
+ (FPGA_WORD)((BUF_LO[CYCLE_R0_X].words[0] & (FPGA_WORD)1) << 31);
+
+ // store result
+ uop_stor(BUF_LO, BUF_HI, BANK_LO, CYCLE_R0_Y, QY);
+
+
+
+ /*
+ // process "sign" of x, see this Cryptography Stack Exchange
+ // answer for more details:
+ //
+ // https://crypto.stackexchange.com/questions/58921/decoding-a-ed25519-key-per-rfc8032
+ //
+ // the short story is that odd values of x are negative, so we
+ // just copy the lsb of x into msb of y
+ R0_Y.words[FPGA_OPERAND_NUM_WORDS-1] |= (R0_X.words[0] & (FPGA_WORD)1) << 31;
+
+ // store result
+ fpga_multiword_copy(&R0_Y, QY);
+ */
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------