aboutsummaryrefslogtreecommitdiff
path: root/ecdsa_fpga_microcode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ecdsa_fpga_microcode.cpp')
-rw-r--r--ecdsa_fpga_microcode.cpp432
1 files changed, 432 insertions, 0 deletions
diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp
new file mode 100644
index 0000000..f02dc8a
--- /dev/null
+++ b/ecdsa_fpga_microcode.cpp
@@ -0,0 +1,432 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_microcode.cpp
+// --------------------------------
+// Microcode Architecture for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Required for Microcode Routines
+//------------------------------------------------------------------------------
+#define USE_MICROCODE
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Global Buffers
+//------------------------------------------------------------------------------
+FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT];
+FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
+
+
+//------------------------------------------------------------------------------
+// Global Flags
+//------------------------------------------------------------------------------
+bool uop_flagz_sz;
+bool uop_flagz_rz;
+bool uop_flagz_e;
+bool uop_flagz_f;
+
+
+//------------------------------------------------------------------------------
+void uop_move(UOP_BANK src, int s_op, UOP_BANK dst, int d_op)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *s_ptr = NULL;
+ FPGA_BUFFER *d_ptr = NULL;
+
+ if (src == BANK_LO) s_ptr = &BUF_LO[s_op];
+ if (src == BANK_HI) s_ptr = &BUF_HI[s_op];
+ if (dst == BANK_LO) d_ptr = &BUF_LO[d_op];
+ if (dst == BANK_HI) d_ptr = &BUF_HI[d_op];
+
+ fpga_multiword_copy(s_ptr, d_ptr);
+}
+
+
+//------------------------------------------------------------------------------
+void uop_cmpz(UOP_BANK src, int s_op)
+//------------------------------------------------------------------------------
+{
+ bool flagz;
+
+ FPGA_BUFFER *s_ptr = NULL;
+
+ if (src == BANK_LO) s_ptr = &BUF_LO[s_op];
+ if (src == BANK_HI) s_ptr = &BUF_HI[s_op];
+
+ flagz = fpga_multiword_is_zero(s_ptr);
+
+ switch (s_op)
+ {
+ case CYCLE_SZ:
+ uop_flagz_sz = flagz;
+ break;
+ case CYCLE_RZ:
+ uop_flagz_rz = flagz;
+ break;
+ case CYCLE_E:
+ uop_flagz_e = flagz;
+ break;
+ case CYCLE_F:
+ uop_flagz_f = flagz;
+ break;
+ }
+}
+
+
+//------------------------------------------------------------------------------
+void uop_calc(UOP_MATH math,
+ UOP_BANK src, int s_op1, int s_op2,
+ UOP_BANK dst, int d_op)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *s_ptr1 = NULL;
+ FPGA_BUFFER *s_ptr2 = NULL;
+ FPGA_BUFFER *d_ptr = NULL;
+ FPGA_BUFFER *n_ptr = NULL;
+
+ if (src == BANK_LO)
+ { s_ptr1 = &BUF_LO[s_op1];
+ s_ptr2 = &BUF_LO[s_op2];
+ }
+ if (src == BANK_HI)
+ { s_ptr1 = &BUF_HI[s_op1];
+ s_ptr2 = &BUF_HI[s_op2];
+ }
+ if (dst == BANK_LO)
+ { d_ptr = &BUF_LO[d_op];
+ }
+ if (dst == BANK_HI)
+ { d_ptr = &BUF_HI[d_op];
+ }
+
+ if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr);
+ if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr);
+ if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr);
+}
+
+
+//------------------------------------------------------------------------------
+void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *d_ptr = NULL;
+ if (dst == BANK_LO) d_ptr = &BUF_LO[d_op];
+ if (dst == BANK_HI) d_ptr = &BUF_HI[d_op];
+
+ fpga_multiword_copy(mem, d_ptr);
+}
+
+
+//------------------------------------------------------------------------------
+void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *s_ptr = NULL;
+ if (src == BANK_LO)
+ { s_ptr = &BUF_LO[s_op];
+ }
+ if (src == BANK_HI)
+ { s_ptr = &BUF_HI[s_op];
+ }
+
+ fpga_multiword_copy(s_ptr, mem);
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_modular_inv23_p256_microcode()
+//------------------------------------------------------------------------------
+//
+// This computes A2 = RZ^-2 and A3 = RZ^-3.
+//
+// RZ is read from the lower bank, A2 and A3 are written to the upper bank.
+//
+//------------------------------------------------------------------------------
+{
+ uop_loop;
+
+ //
+ // operand placement map:
+ //
+ // X1 - LO,HI (RZ)
+ // X2 - LO,HI
+ // X3 - LO,HI
+ // X6 - LO
+ // X12 - HI
+ // X15 - LO,HI
+ // X30 - HI
+ // X32 - LO,HI
+
+ /* BEGIN_MICROCODE: INVERT_P256 */
+
+ // first obtain intermediate helper quantities (X#)
+
+ // mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
+ uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+
+ // compute X2 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
+
+ // compute X3 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
+
+ // compute X6 (stored in the lower bank)
+ uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6);
+
+ // compute X12 (stored in the upper bank)
+ uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1);
+ uop_cycle(5);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12);
+
+ // compute X15 and mirror to the other bank
+ uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15);
+ uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15);
+
+ // compute X30 (stored in the upper bank)
+ uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1);
+ uop_cycle(14);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30);
+
+ // compute X32 and mirror to the other bank
+ uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X2, BANK_LO, INVERT_X32);
+ uop_move(BANK_LO, INVERT_X32, BANK_HI, INVERT_X32);
+
+ // now compute the final results
+
+ uop_calc(MUL, BANK_LO, INVERT_X32, INVERT_X32, BANK_HI, INVERT_R1);
+
+ uop_cycle(31);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1);
+
+ uop_cycle(128);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X32, BANK_LO, INVERT_R2);
+
+ uop_cycle(32);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X32, BANK_HI, INVERT_R1);
+
+ uop_cycle(30);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X30, BANK_LO, INVERT_R2);
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+
+ // move A2 into the upper bank
+ uop_move(BANK_LO, INVERT_R2, BANK_HI, INVERT_A2);
+
+ // A3 ends up in the upper bank by itself
+ uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+
+ /* END_MICROCODE */
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_modular_inv23_p384_microcode()
+//------------------------------------------------------------------------------
+//
+// This computes A2 = RZ^-2 and A3 = RZ^-3.
+//
+// RZ is read from the lower bank, A2 and A3 are written to the upper bank.
+//
+//------------------------------------------------------------------------------
+{
+ uop_loop;
+
+ //
+ // operand placement map:
+ //
+ // X1 - LO,HI (RZ)
+ // X2 - LO,HI
+ // X3 - LO,HI
+ // X6 - LO
+ // X12 - HI
+ // X15 - LO,HI
+ // X30 - HI
+ // X32 - LO,HI
+
+ /* BEGIN_MICROCODE: INVERT_P384 */
+
+ // first obtain intermediate helper quantities (X#)
+
+ // mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
+ uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+
+ // compute X2 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
+
+ // compute X3 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
+
+ // compute X6 (stored in the lower bank)
+ uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6);
+
+ // compute X12 (stored in the upper bank)
+ uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1);
+ uop_cycle(5);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12);
+
+ // compute X15 and mirror to the other bank
+ uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15);
+ uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15);
+
+ // compute X30 (stored in the upper bank)
+ uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1);
+ uop_cycle(14);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30);
+
+ // compute X60 (stored in the lower bank)
+ uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_cycle(29);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_X60);
+
+ // compute X120 (stored in the upper bank)
+ uop_calc(MUL, BANK_LO, INVERT_X60, INVERT_X60, BANK_HI, INVERT_R1);
+ uop_cycle(59);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X60, BANK_HI, INVERT_X120);
+
+ // now compute the final results
+
+ uop_calc(MUL, BANK_HI, INVERT_X120, INVERT_X120, BANK_LO, INVERT_R1);
+
+ uop_cycle(119);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X120, BANK_LO, INVERT_R1);
+
+ uop_cycle(15);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X15, BANK_LO, INVERT_R1);
+
+ uop_cycle(31);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X2, BANK_HI, INVERT_R2);
+
+ uop_cycle(94);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+
+ // move A2 into the upper bank
+ uop_move(BANK_LO, INVERT_R1, BANK_HI, INVERT_A2);
+
+ // A3 ends up in the upper bank by itself
+ uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+
+ /* END_MICROCODE */
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------