From ed6437839977023ffe1eb95d87760d4f1b2c518b Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 24 Sep 2018 21:38:06 +0300 Subject: X25519-specific code (curve point multiplication) --- x25519/x25519_fpga_curve.h | 90 +++++++++++++ x25519/x25519_fpga_curve_abstract.cpp | 222 +++++++++++++++++++++++++++++++++ x25519/x25519_fpga_curve_microcode.cpp | 208 ++++++++++++++++++++++++++++++ 3 files changed, 520 insertions(+) create mode 100644 x25519/x25519_fpga_curve.h create mode 100644 x25519/x25519_fpga_curve_abstract.cpp create mode 100644 x25519/x25519_fpga_curve_microcode.cpp diff --git a/x25519/x25519_fpga_curve.h b/x25519/x25519_fpga_curve.h new file mode 100644 index 0000000..9f8bff9 --- /dev/null +++ b/x25519/x25519_fpga_curve.h @@ -0,0 +1,90 @@ +//------------------------------------------------------------------------------ +// +// x25519_fpga_curve.h +// ----------------------------------------------- +// Elliptic curve arithmetic procedures for X25519 +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Curve25519 Parameters +//------------------------------------------------------------------------------ + +/* x-coordinate of the base point */ +#define X25519_G_X_INIT {0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x00000009} + +/* coefficient (A + 2) / 4 */ +#define X25519_A24_INIT {0x00000000, 0x00000000, 0x00000000, 0x00000000, \ + 0x00000000, 0x00000000, 0x00000000, 0x0001DB42} + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +extern FPGA_BUFFER X25519_G_X; // the base point +extern FPGA_BUFFER X25519_A24; // coefficient (A + 2) / 4 + + +//------------------------------------------------------------------------------ +// Implementation switch +//------------------------------------------------------------------------------ +#ifdef USE_MICROCODE +#define fpga_curve_x25519_scalar_multiply fpga_curve_x25519_scalar_multiply_microcode +#else +#define fpga_curve_x25519_scalar_multiply fpga_curve_x25519_scalar_multiply_abstract +#endif + + +//------------------------------------------------------------------------------ +// Prototypes +//------------------------------------------------------------------------------ +void fpga_curve_x25519_init (); + +void fpga_curve_x25519_scalar_multiply_abstract (const FPGA_BUFFER *P_X, const FPGA_BUFFER *K, FPGA_BUFFER *Q_X); +void fpga_curve_x25519_scalar_multiply_microcode (const FPGA_BUFFER *P_X, const FPGA_BUFFER *K, FPGA_BUFFER *Q_X); + +void fpga_curve_x25519_ladder_step (const FPGA_BUFFER *P_X, + const FPGA_BUFFER *R0_X_in, const FPGA_BUFFER *R0_Z_in, + const FPGA_BUFFER *R1_X_in, const FPGA_BUFFER *R1_Z_in, + FPGA_BUFFER *R0_X_out, FPGA_BUFFER *R0_Z_out, + FPGA_BUFFER *R1_X_out, FPGA_BUFFER *R1_Z_out); + +void fpga_curve_x25519_to_affine (const FPGA_BUFFER *P_X, + const FPGA_BUFFER *P_Z, + FPGA_BUFFER *Q_X); + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/x25519/x25519_fpga_curve_abstract.cpp b/x25519/x25519_fpga_curve_abstract.cpp new file mode 100644 index 0000000..bb551df --- /dev/null +++ b/x25519/x25519_fpga_curve_abstract.cpp @@ -0,0 +1,222 @@ +//------------------------------------------------------------------------------ +// +// x25519_fpga_curve_abstract.cpp +// ----------------------------------------------- +// Elliptic curve arithmetic procedures for X25519 +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "x25519_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Globals +//------------------------------------------------------------------------------ +FPGA_BUFFER X25519_G_X; // x-coordinate of the base point +FPGA_BUFFER X25519_A24; // coefficient (A + 2) / 4 + + +//------------------------------------------------------------------------------ +void fpga_curve_x25519_init() +//------------------------------------------------------------------------------ +{ + int w_src, w_dst; // word counters + + FPGA_WORD TMP_G_X[FPGA_OPERAND_NUM_WORDS] = X25519_G_X_INIT; + FPGA_WORD TMP_A24[FPGA_OPERAND_NUM_WORDS] = X25519_A24_INIT; + + /* fill buffers for large multi-word integers */ + for ( w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1; + w_src < FPGA_OPERAND_NUM_WORDS; + w_src++, w_dst--) + { + X25519_G_X.words[w_dst] = TMP_G_X[w_src]; + X25519_A24.words[w_dst] = TMP_A24[w_src]; + } +} + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point scalar multiplication routine. +// +// This uses the Montgomery ladder to do the multiplication and then +// converts the result to affine coordinates. +// +// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder" +// https://eprint.iacr.org/2017/264.pdf +// +//------------------------------------------------------------------------------ +void fpga_curve_x25519_scalar_multiply_abstract(const FPGA_BUFFER *PX, const FPGA_BUFFER *K, FPGA_BUFFER *QX) +//------------------------------------------------------------------------------ +{ + int word_count, bit_count; // counters + + // temporary buffers + FPGA_BUFFER R0_X; + FPGA_BUFFER R0_Z; + FPGA_BUFFER R1_X; + FPGA_BUFFER R1_Z; + + FPGA_BUFFER T0_X; + FPGA_BUFFER T0_Z; + FPGA_BUFFER T1_X; + FPGA_BUFFER T1_Z; + + // initialization + fpga_multiword_copy(&CURVE25519_ONE, &R0_X); + fpga_multiword_copy(&CURVE25519_ZERO, &R0_Z); + fpga_multiword_copy(PX, &R1_X); + fpga_multiword_copy(&CURVE25519_ONE, &R1_Z); + + // handy vars + FPGA_WORD k_word; + bool k_bit, r_swap = false; + + // multiply + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + { + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + // get current bit of K + k_word = K->words[word_count - 1] >> (bit_count - 1); + k_bit = (k_word & (FPGA_WORD)1) == 1; + + // we feed either R0, R1 or R1, R0 into the ladder + fpga_multiword_copy(r_swap == k_bit ? &R0_X : &R1_X, &T0_X); + fpga_multiword_copy(r_swap == k_bit ? &R0_Z : &R1_Z, &T0_Z); + fpga_multiword_copy(r_swap == k_bit ? &R1_X : &R0_X, &T1_X); + fpga_multiword_copy(r_swap == k_bit ? &R1_Z : &R0_Z, &T1_Z); + + // remember whether we did swapping + r_swap = k_bit; + + // montgomery ladder step + fpga_curve_x25519_ladder_step( PX, + &T0_X, &T0_Z, &T1_X, &T1_Z, + &R0_X, &R0_Z, &R1_X, &R1_Z); + } + } + + // since the lower three bits of the private key are always ...000, + // the result is in R0_X, R0_Z and + + // now conversion to affine coordinates + fpga_curve_x25519_to_affine(&R0_X, &R0_Z, &T0_X); + + // so far we've done everything modulo 2*P, we now need + // to do final reduction modulo P, this can be done using + // our modular adder this way: + fpga_modular_add(&T0_X, &CURVE25519_ZERO, QX, &CURVE25519_1P); +} + + +//------------------------------------------------------------------------------ +// +// Montgomery Ladder Step +// +// There are many papers describing Montgomery ladder, this particular +// implementation is based on Algorithm 2 from "Fast elliptic-curve +// cryptography on the Cell Broadband Engine" by Neil Costigan and Peter +// Schwabe +// https://cryptojedi.org/papers/celldh-20090107.pdf +// +//------------------------------------------------------------------------------ +void fpga_curve_x25519_ladder_step (const FPGA_BUFFER *PX, + const FPGA_BUFFER *R0X_in, const FPGA_BUFFER *R0Z_in, + const FPGA_BUFFER *R1X_in, const FPGA_BUFFER *R1Z_in, + FPGA_BUFFER *R0X_out, FPGA_BUFFER *R0Z_out, + FPGA_BUFFER *R1X_out, FPGA_BUFFER *R1Z_out) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER S0, S1; + FPGA_BUFFER D0, D1; + FPGA_BUFFER QS0, QD0; + FPGA_BUFFER S0D1, S1D0; + FPGA_BUFFER TS, TD; + FPGA_BUFFER QTD; + FPGA_BUFFER T0, TA, T1; + + fpga_modular_add(R0X_in, R0Z_in, &S0, &CURVE25519_2P); + fpga_modular_add(R1X_in, R1Z_in, &S1, &CURVE25519_2P); + fpga_modular_sub(R0X_in, R0Z_in, &D0, &CURVE25519_2P); + fpga_modular_sub(R1X_in, R1Z_in, &D1, &CURVE25519_2P); + // + fpga_modular_mul(&S0, &S0, &QS0, &CURVE25519_2P); + fpga_modular_mul(&D0, &D0, &QD0, &CURVE25519_2P); + fpga_modular_mul(&S0, &D1, &S0D1, &CURVE25519_2P); + fpga_modular_mul(&S1, &D0, &S1D0, &CURVE25519_2P); + // + fpga_modular_add(&S1D0, &S0D1, &TS, &CURVE25519_2P); + fpga_modular_sub(&S1D0, &S0D1, &TD, &CURVE25519_2P); + // + fpga_modular_mul(&TD, &TD, &QTD, &CURVE25519_2P); + // + fpga_modular_sub(&QS0, &QD0, &T0, &CURVE25519_2P); + fpga_modular_mul(&T0, &X25519_A24, &TA, &CURVE25519_2P); + fpga_modular_add(&TA, &QD0, &T1, &CURVE25519_2P); + // + fpga_modular_mul(&QS0, &QD0, R0X_out, &CURVE25519_2P); + fpga_modular_mul(&T0, &T1, R0Z_out, &CURVE25519_2P); + fpga_modular_mul(&TS, &TS, R1X_out, &CURVE25519_2P); + fpga_modular_mul(PX, &QTD, R1Z_out, &CURVE25519_2P); +} + + +//------------------------------------------------------------------------------ +// +// Conversion to affine coordinates. +// +// Q_X = P_X / P_Z = P_X * P_Z ^ -1 +// +//------------------------------------------------------------------------------ +void fpga_curve_x25519_to_affine (const FPGA_BUFFER *P_X, + const FPGA_BUFFER *P_Z, + FPGA_BUFFER *Q_X) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER P_Z_1; + + fpga_modular_inv_abstract(P_Z, &P_Z_1, &CURVE25519_2P); + + fpga_modular_mul(P_X, &P_Z_1, Q_X, &CURVE25519_2P); +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/x25519/x25519_fpga_curve_microcode.cpp b/x25519/x25519_fpga_curve_microcode.cpp new file mode 100644 index 0000000..d57cb63 --- /dev/null +++ b/x25519/x25519_fpga_curve_microcode.cpp @@ -0,0 +1,208 @@ +//------------------------------------------------------------------------------ +// +// x25519_fpga_curve_microcode.cpp +// ----------------------------------------------- +// Elliptic curve arithmetic procedures for X25519 +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "x25519_fpga_model.h" + + +//------------------------------------------------------------------------------ +enum X25519_UOP_OPERAND +//------------------------------------------------------------------------------ +{ + CONST_A24 = CURVE25519_UOP_OPERAND_COUNT + 1, + + LADDER_R0_X, + LADDER_R0_Z, + + LADDER_R1_X, + LADDER_R1_Z, + + LADDER_T0_X, + LADDER_T0_Z, + + LADDER_T1_X, + LADDER_T1_Z, + + LADDER_S0, + LADDER_S1, + + LADDER_D0, + LADDER_D1, + + LADDER_QS0, + LADDER_QD0, + + LADDER_S0D1, + LADDER_S1D0, + + LADDER_TS, + LADDER_TD, + + LADDER_QTD, + + LADDER_T0, + LADDER_TA, + LADDER_T1, + + LADDER_P_X, + + X25519_UOP_OPERAND_COUNT +}; + + +//------------------------------------------------------------------------------ +// Storage Buffers +//------------------------------------------------------------------------------ +static FPGA_BUFFER BUF_LO[X25519_UOP_OPERAND_COUNT]; +static FPGA_BUFFER BUF_HI[X25519_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// +// Elliptic curve point scalar multiplication routine. +// +// This uses the Montgomery ladder to do the multiplication and then +// converts the result to affine coordinates. +// +// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder" +// https://eprint.iacr.org/2017/264.pdf +// +//------------------------------------------------------------------------------ +void fpga_curve_x25519_scalar_multiply_microcode(const FPGA_BUFFER *PX, const FPGA_BUFFER *K, FPGA_BUFFER *QX) +//------------------------------------------------------------------------------ +{ + bool k_bit, s; // 1-bit values + FPGA_WORD k_word; // current word of multiplier + int word_count, bit_count; // counters + + // initialize constant operands + fpga_multiword_copy(&CURVE25519_ZERO, &BUF_LO[CONST_ZERO]); + fpga_multiword_copy(&CURVE25519_ZERO, &BUF_HI[CONST_ZERO]); + + fpga_multiword_copy(&CURVE25519_ONE, &BUF_LO[CONST_ONE]); + fpga_multiword_copy(&CURVE25519_ONE, &BUF_HI[CONST_ONE]); + + fpga_multiword_copy(&X25519_A24, &BUF_LO[CONST_A24]); + fpga_multiword_copy(&X25519_A24, &BUF_HI[CONST_A24]); + + // + // BEGIN MICROCODE + // + + // initialization + uop_load(PX, BANK_HI, LADDER_P_X, BUF_LO, BUF_HI); + uop_move(BANK_HI, CONST_ONE, CONST_ZERO, BANK_LO, LADDER_R0_X, LADDER_R0_Z, BUF_LO, BUF_HI); + uop_move(BANK_HI, LADDER_P_X, CONST_ONE, BANK_LO, LADDER_R1_X, LADDER_R1_Z, BUF_LO, BUF_HI); + + // ladder + s = false; + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + { + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + k_word = K->words[word_count - 1] >> (bit_count - 1); // current word + k_bit = (k_word & (FPGA_WORD)1) == 1; // current bit + + // inputs are all in LO: R0_X, R0_Z, R1_X, R1_Z + + // swap if needed + if (s == k_bit) + { uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI); // HI: T0_X, T0_Z = LO: R0_X, R0_Z + uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI); // HI: T1_X, T1_Z = LO: R1_X, R1_Z + } + else + { uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI); // HI: T0_X, T0_Z = LO: R1_X, R1_Z + uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI); // HI: T1_X, T1_Z = LO: R0_X, R0_Z + } + + // remember whether we actually did the swap + s = k_bit; + + // run step + uop_calc(ADD, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_S0, BUF_LO, BUF_HI, MOD_2P); // LO: S0 = HI: T0_X + T0_Z + uop_calc(ADD, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_S1, BUF_LO, BUF_HI, MOD_2P); // LO: S1 = HI: T1_X + T1_Z + uop_calc(SUB, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_D0, BUF_LO, BUF_HI, MOD_2P); // LO: D0 = HI: T0_X - T0_Z + uop_calc(SUB, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_D1, BUF_LO, BUF_HI, MOD_2P); // LO: D1 = HI: T1_X - T1_Z + + uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_S0, BANK_HI, LADDER_QS0, BUF_LO, BUF_HI, MOD_2P); // HI: QS0 = LO: S0 * S0 + uop_calc(MUL, BANK_LO, LADDER_D0, LADDER_D0, BANK_HI, LADDER_QD0, BUF_LO, BUF_HI, MOD_2P); // HI: QD0 = LO: D0 * D0 + uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_D1, BANK_HI, LADDER_S0D1, BUF_LO, BUF_HI, MOD_2P); // HI: S0D1 = LO: S0 * D1 + uop_calc(MUL, BANK_LO, LADDER_S1, LADDER_D0, BANK_HI, LADDER_S1D0, BUF_LO, BUF_HI, MOD_2P); // HI: S1D0 = LO: S1 * D0 + + uop_calc(ADD, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TS, BUF_LO, BUF_HI, MOD_2P); // LO: TS = HI: S1D0 + S0D1 + uop_calc(SUB, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TD, BUF_LO, BUF_HI, MOD_2P); // LO: TD = HI: S1D0 - S0D1 + + uop_calc(MUL, BANK_LO, LADDER_TD, LADDER_TD, BANK_HI, LADDER_QTD, BUF_LO, BUF_HI, MOD_2P); // HI: QTD = LO: TD * TD + + uop_calc(SUB, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_T0, BUF_LO, BUF_HI, MOD_2P); // LO: T0 = HI: QS0 - QD0 + uop_calc(MUL, BANK_LO, LADDER_T0, CONST_A24, BANK_HI, LADDER_TA, BUF_LO, BUF_HI, MOD_2P); // HI: TA = LO: T0 * A24 + uop_calc(ADD, BANK_HI, LADDER_TA, LADDER_QD0, BANK_LO, LADDER_T1, BUF_LO, BUF_HI, MOD_2P); // LO: T1 = HI: TA * QD0 + + uop_calc(MUL, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_R0_X, BUF_LO, BUF_HI, MOD_2P); // LO: R0_X = HI: QS0 * QD0 + uop_calc(MUL, BANK_LO, LADDER_T0, LADDER_T1, BANK_HI, LADDER_R0_Z, BUF_LO, BUF_HI, MOD_2P); // HI: R0_Z = LO: T0 * T1 + uop_calc(MUL, BANK_LO, LADDER_TS, LADDER_TS, BANK_HI, LADDER_R1_X, BUF_LO, BUF_HI, MOD_2P); // HI: R1_X = LO: TS * TS + uop_calc(MUL, BANK_HI, LADDER_P_X, LADDER_QTD, BANK_LO, LADDER_R1_Z, BUF_LO, BUF_HI, MOD_2P); // LO: R1_Z = HI: PX * QTD + + uop_move(BANK_HI, LADDER_R0_Z, LADDER_R1_X, BANK_LO, LADDER_R0_Z, LADDER_R1_X, BUF_LO, BUF_HI); // LO: R0_Z, R1_X = HI: R0_Z, R1_X + } + } + + // inversion expects result to be in LO: T1 + uop_move(BANK_HI, LADDER_R0_Z, LADDER_R0_Z, BANK_LO, INVERT_T_1, INVERT_T_1, BUF_LO, BUF_HI); + + // just call piece of microcode + fpga_modular_inv_microcode(BUF_LO, BUF_HI); + + // inversion places result in HI: R1 + uop_move(BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R1, INVERT_R1, BUF_LO, BUF_HI); + uop_calc(MUL, BANK_LO, INVERT_R1, LADDER_R0_X, BANK_HI, INVERT_R2, BUF_LO, BUF_HI, MOD_2P); + + // finally reduce to just 1*P + uop_calc(ADD, BANK_HI, INVERT_R2, CONST_ZERO, BANK_LO, INVERT_R1, BUF_LO, BUF_HI, MOD_1P); // !!! + + // store result + uop_stor(BUF_LO, BUF_HI, BANK_LO, INVERT_R1, QX); +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ -- cgit v1.2.3