diff options
Diffstat (limited to 'ecdsa_fpga_microcode.cpp')
-rw-r--r-- | ecdsa_fpga_microcode.cpp | 432 |
1 files changed, 432 insertions, 0 deletions
diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp new file mode 100644 index 0000000..f02dc8a --- /dev/null +++ b/ecdsa_fpga_microcode.cpp @@ -0,0 +1,432 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_microcode.cpp +// -------------------------------- +// Microcode Architecture for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Required for Microcode Routines +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// Global Buffers +//------------------------------------------------------------------------------ +FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT]; +FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT]; + + +//------------------------------------------------------------------------------ +// Global Flags +//------------------------------------------------------------------------------ +bool uop_flagz_sz; +bool uop_flagz_rz; +bool uop_flagz_e; +bool uop_flagz_f; + + +//------------------------------------------------------------------------------ +void uop_move(UOP_BANK src, int s_op, UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr = NULL; + FPGA_BUFFER *d_ptr = NULL; + + if (src == BANK_LO) s_ptr = &BUF_LO[s_op]; + if (src == BANK_HI) s_ptr = &BUF_HI[s_op]; + if (dst == BANK_LO) d_ptr = &BUF_LO[d_op]; + if (dst == BANK_HI) d_ptr = &BUF_HI[d_op]; + + fpga_multiword_copy(s_ptr, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_cmpz(UOP_BANK src, int s_op) +//------------------------------------------------------------------------------ +{ + bool flagz; + + FPGA_BUFFER *s_ptr = NULL; + + if (src == BANK_LO) s_ptr = &BUF_LO[s_op]; + if (src == BANK_HI) s_ptr = &BUF_HI[s_op]; + + flagz = fpga_multiword_is_zero(s_ptr); + + switch (s_op) + { + case CYCLE_SZ: + uop_flagz_sz = flagz; + break; + case CYCLE_RZ: + uop_flagz_rz = flagz; + break; + case CYCLE_E: + uop_flagz_e = flagz; + break; + case CYCLE_F: + uop_flagz_f = flagz; + break; + } +} + + +//------------------------------------------------------------------------------ +void uop_calc(UOP_MATH math, + UOP_BANK src, int s_op1, int s_op2, + UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr1 = NULL; + FPGA_BUFFER *s_ptr2 = NULL; + FPGA_BUFFER *d_ptr = NULL; + FPGA_BUFFER *n_ptr = NULL; + + if (src == BANK_LO) + { s_ptr1 = &BUF_LO[s_op1]; + s_ptr2 = &BUF_LO[s_op2]; + } + if (src == BANK_HI) + { s_ptr1 = &BUF_HI[s_op1]; + s_ptr2 = &BUF_HI[s_op2]; + } + if (dst == BANK_LO) + { d_ptr = &BUF_LO[d_op]; + } + if (dst == BANK_HI) + { d_ptr = &BUF_HI[d_op]; + } + + if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr); + if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr); + if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *d_ptr = NULL; + if (dst == BANK_LO) d_ptr = &BUF_LO[d_op]; + if (dst == BANK_HI) d_ptr = &BUF_HI[d_op]; + + fpga_multiword_copy(mem, d_ptr); +} + + +//------------------------------------------------------------------------------ +void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem) +//------------------------------------------------------------------------------ +{ + FPGA_BUFFER *s_ptr = NULL; + if (src == BANK_LO) + { s_ptr = &BUF_LO[s_op]; + } + if (src == BANK_HI) + { s_ptr = &BUF_HI[s_op]; + } + + fpga_multiword_copy(s_ptr, mem); +} + + +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p256_microcode() +//------------------------------------------------------------------------------ +// +// This computes A2 = RZ^-2 and A3 = RZ^-3. +// +// RZ is read from the lower bank, A2 and A3 are written to the upper bank. +// +//------------------------------------------------------------------------------ +{ + uop_loop; + + // + // operand placement map: + // + // X1 - LO,HI (RZ) + // X2 - LO,HI + // X3 - LO,HI + // X6 - LO + // X12 - HI + // X15 - LO,HI + // X30 - HI + // X32 - LO,HI + + /* BEGIN_MICROCODE: INVERT_P256 */ + + // first obtain intermediate helper quantities (X#) + + // mirror X1 to HI bank (don't waste time copying to X1, just use RZ) + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ); + + // compute X2 and mirror to the other bank + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2); + uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2); + + // compute X3 and mirror to the other bank + uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3); + uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3); + + // compute X6 (stored in the lower bank) + uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6); + + // compute X12 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1); + uop_cycle(5); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12); + + // compute X15 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15); + uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15); + + // compute X30 (stored in the upper bank) + uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1); + uop_cycle(14); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30); + + // compute X32 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X2, BANK_LO, INVERT_X32); + uop_move(BANK_LO, INVERT_X32, BANK_HI, INVERT_X32); + + // now compute the final results + + uop_calc(MUL, BANK_LO, INVERT_X32, INVERT_X32, BANK_HI, INVERT_R1); + + uop_cycle(31); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1); + + uop_cycle(128); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X32, BANK_LO, INVERT_R2); + + uop_cycle(32); + uop_calc_if_even(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_repeat(); + + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X32, BANK_HI, INVERT_R1); + + uop_cycle(30); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X30, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + + // move A2 into the upper bank + uop_move(BANK_LO, INVERT_R2, BANK_HI, INVERT_A2); + + // A3 ends up in the upper bank by itself + uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +void fpga_modular_inv23_p384_microcode() +//------------------------------------------------------------------------------ +// +// This computes A2 = RZ^-2 and A3 = RZ^-3. +// +// RZ is read from the lower bank, A2 and A3 are written to the upper bank. +// +//------------------------------------------------------------------------------ +{ + uop_loop; + + // + // operand placement map: + // + // X1 - LO,HI (RZ) + // X2 - LO,HI + // X3 - LO,HI + // X6 - LO + // X12 - HI + // X15 - LO,HI + // X30 - HI + // X32 - LO,HI + + /* BEGIN_MICROCODE: INVERT_P384 */ + + // first obtain intermediate helper quantities (X#) + + // mirror X1 to HI bank (don't waste time copying to X1, just use RZ) + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ); + + // compute X2 and mirror to the other bank + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2); + uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2); + + // compute X3 and mirror to the other bank + uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3); + uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3); + + // compute X6 (stored in the lower bank) + uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6); + + // compute X12 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1); + uop_cycle(5); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12); + + // compute X15 and mirror to the other bank + uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15); + uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15); + + // compute X30 (stored in the upper bank) + uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1); + uop_cycle(14); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30); + + // compute X60 (stored in the lower bank) + uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1); + uop_cycle(29); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_X60); + + // compute X120 (stored in the upper bank) + uop_calc(MUL, BANK_LO, INVERT_X60, INVERT_X60, BANK_HI, INVERT_R1); + uop_cycle(59); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1); + uop_repeat(); + uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X60, BANK_HI, INVERT_X120); + + // now compute the final results + + uop_calc(MUL, BANK_HI, INVERT_X120, INVERT_X120, BANK_LO, INVERT_R1); + + uop_cycle(119); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X120, BANK_LO, INVERT_R1); + + uop_cycle(15); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X15, BANK_LO, INVERT_R1); + + uop_cycle(31); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X2, BANK_HI, INVERT_R2); + + uop_cycle(94); + uop_calc_if_even(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_repeat(); + + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2); + uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1); + + // move A2 into the upper bank + uop_move(BANK_LO, INVERT_R1, BANK_HI, INVERT_A2); + + // A3 ends up in the upper bank by itself + uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1); + uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ |