diff options
Diffstat (limited to 'ecdsa_fpga_curve_microcode.cpp')
-rw-r--r-- | ecdsa_fpga_curve_microcode.cpp | 494 |
1 files changed, 494 insertions, 0 deletions
diff --git a/ecdsa_fpga_curve_microcode.cpp b/ecdsa_fpga_curve_microcode.cpp new file mode 100644 index 0000000..553498c --- /dev/null +++ b/ecdsa_fpga_curve_microcode.cpp @@ -0,0 +1,494 @@ +//------------------------------------------------------------------------------ +// +// ecdsa_fpga_curve_microcode.cpp +// ---------------------------------------------- +// Elliptic curve arithmetic procedures for ECDSA +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + + +//------------------------------------------------------------------------------ +// Required for Microcode Routines +//------------------------------------------------------------------------------ +#define USE_MICROCODE + + +//------------------------------------------------------------------------------ +// Headers +//------------------------------------------------------------------------------ +#include "ecdsa_fpga_model.h" + + +//------------------------------------------------------------------------------ +// +// Doubles the point stored in CYCLE_R* and stores the result in CYCLE_S*. +// +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_microcode() +//------------------------------------------------------------------------------ +{ + // fpga_modular_mul(RZ, RZ, RZ2 ); // 2. RZ2 = RZ * RZ + // fpga_modular_sub(RX, RZ2, T1 ); // 3. T1 = RX - RZ2 + // fpga_modular_add(RX, RZ2, T2 ); // 4. T2 = RX + RZ2 + // fpga_modular_mul(T1, T2, T3 ); // 5. T3 = T1 * T2 + // fpga_modular_add(T3, T3, T4 ); // 6a. T4 = T3 + T3 + // fpga_modular_add(T3, T4, A ); // 6b. A = T3 + T4 + // fpga_modular_add(RY, RY, B ); // 7. B = RY + RY + // fpga_modular_mul(B, RZ, SZ ); // 8. SZ = B * RZ [output] + // fpga_modular_mul(B, B, C ); // 9. C = B * B + // fpga_modular_mul(C, RX, D ); // 10. D = C * RX + // fpga_modular_mul(C, C, C2 ); // 11. C2 = C * C + // fpga_modular_mul(C2, DELTA, C2_2); // 12. C2_2 = C / 2 + // fpga_modular_mul(A, A, A2 ); // 13. A2 = A * A + // fpga_modular_add(D, D, T1 ); // 14. T1 = D + D + // fpga_modular_sub(A2, T1, SX ); // 15. SX = A2 - T1 [output] + // fpga_modular_sub(D, SX, T1 ); // 16. T1 = D - SX + // fpga_modular_mul(A , T1, T2 ); // 17. T2 = A * T1 + // fpga_modular_sub(T2, C2_2, SY ); // 18. SY = T2 - C2_2 [output] + + /* BEGIN_MICROCODE: CYCLE_DOUBLE */ + + FPGA_BUFFER TEMP; + + uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, CYCLE_Z2); + uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer("CYCLE_Z2 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T1); + uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T2); + uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_T1, CYCLE_T2, BANK_HI, CYCLE_T3); + uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer("CYCLE_T3 = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T3, BANK_LO, CYCLE_T4); + uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer("CYCLE_T4 = ", &TEMP); + + uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4); + + uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T4, BANK_LO, CYCLE_A); + uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer("CYCLE_A = ", &TEMP); + + uop_calc(ADD, BANK_HI, CYCLE_RY, CYCLE_RY, BANK_LO, CYCLE_B); + uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer("CYCLE_B = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_RZ, BANK_HI, CYCLE_SZ); + uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer("CYCLE_SZ = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_B, BANK_HI, CYCLE_C); + uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer("CYCLE_C = ", &TEMP); + + uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_RX, BANK_LO, CYCLE_D); + uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer("CYCLE_D = ", &TEMP); + + uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_C, BANK_LO, CYCLE_C2); + uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer("CYCLE_C2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_C2, CONST_DELTA, BANK_HI, CYCLE_C2_2); + uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer("CYCLE_C2_2 = ", &TEMP); + + uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_A, BANK_HI, CYCLE_A2); + uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer("CYCLE_A2 = ", &TEMP); + + uop_calc(ADD, BANK_LO, CYCLE_D, CYCLE_D, BANK_HI, CYCLE_T1); + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_A2, CYCLE_T1, BANK_LO, CYCLE_SX); + uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer("CYCLE_SX = ", &TEMP); + + uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SX, BANK_HI, CYCLE_T1); + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP); + + uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1); + + uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_T1, BANK_HI, CYCLE_T2); + uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP); + + uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_C2_2, BANK_LO, CYCLE_SY); + uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer("CYCLE_SY = ", &TEMP); + + /* END_MICROCODE */ +} + + +//------------------------------------------------------------------------------ +// +// Adds the base point G to the point stored in CYCLE_S* and stores the result +// again in CYCLE_R*. +// +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_microcode() +{ + //fpga_modular_mul(SZ, SZ, A) ; // 3. A = SZ * SZ + //fpga_modular_mul(A, SZ, B ); // 4. B = A * SZ + //fpga_modular_mul(A, &ECDSA_GX, C ); // 5. C = A * GX + //fpga_modular_mul(B, &ECDSA_GY, D ); // 6. D = B * GY + //fpga_modular_sub(C, SX, E ); // 7. E = C - SX + //fpga_modular_sub(D, SY, F ); // 8. F = D - SY + //fpga_modular_mul(E, SZ, RZ); // 10. RZ = E * SZ [output] + //fpga_modular_mul(E, E, G ); // 11. G = E * E + //fpga_modular_mul(E, G, H ); // 12. H = E * G + //fpga_modular_mul(G, SX, J ); // 13. J = G * SX + //fpga_modular_add(J, J, T1); // 14. T1 = J + J + //fpga_modular_mul(F, F, T2); // 15. T2 = F * F + //fpga_modular_sub(T2, T1, T3); // 16. T3 = T2 - T1 + //fpga_modular_sub(T3, H, RX); // 17. RX = T3 - H [output] + //fpga_modular_sub(J, RX, T1); // 18. T1 = J - RX + //fpga_modular_mul(F, T1, T2); // 19. T2 = F * T1 + //fpga_modular_mul(H, SY, T3); // 20. T3 = H * SY + //fpga_modular_sub(T2, T3, RY); // 21. RY = T2 - T3 [output] + + /* BEGIN_MICROCODE: CYCLE_ADD */ + + uop_cmpz( BANK_HI, CYCLE_SZ); + uop_move( BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_SZ); + uop_calc(MUL, BANK_LO, CYCLE_SZ, CYCLE_SZ, BANK_HI, CYCLE_A); + uop_calc(MUL, BANK_HI, CYCLE_A, CYCLE_SZ, BANK_LO, CYCLE_B); + uop_move( BANK_LO, CYCLE_B, BANK_HI, CYCLE_B); + uop_calc(MUL, BANK_HI, CYCLE_A, CONST_GX, BANK_LO, CYCLE_C); + uop_calc(MUL, BANK_HI, CYCLE_B, CONST_GY, BANK_LO, CYCLE_D); + uop_calc(SUB, BANK_LO, CYCLE_C, CYCLE_SX, BANK_HI, CYCLE_E); + uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SY, BANK_HI, CYCLE_F); + uop_cmpz( BANK_HI, CYCLE_E); + uop_cmpz( BANK_HI, CYCLE_F); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_SZ, BANK_LO, CYCLE_RZ); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_E, BANK_LO, CYCLE_G); + uop_move( BANK_LO, CYCLE_G, BANK_HI, CYCLE_G); + uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_G, BANK_LO, CYCLE_H); + uop_calc(MUL, BANK_LO, CYCLE_G, CYCLE_SX, BANK_HI, CYCLE_J); + uop_calc(ADD, BANK_HI, CYCLE_J, CYCLE_J, BANK_LO, CYCLE_T1); + uop_calc(MUL, BANK_HI, CYCLE_F, CYCLE_F, BANK_LO, CYCLE_T2); + uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T1, BANK_HI, CYCLE_T3); + uop_move( BANK_HI, CYCLE_T3, BANK_LO, CYCLE_T3); + uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_H, BANK_HI, CYCLE_RX); + uop_calc(SUB, BANK_HI, CYCLE_J, CYCLE_RX, BANK_LO, CYCLE_T1); + uop_move( BANK_HI, CYCLE_F, BANK_LO, CYCLE_F); + uop_calc(MUL, BANK_LO, CYCLE_F, CYCLE_T1, BANK_HI, CYCLE_T2); + uop_calc(MUL, BANK_LO, CYCLE_H, CYCLE_SY, BANK_HI, CYCLE_T3); + uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_T3, BANK_LO, CYCLE_RY); + uop_move( BANK_LO, CYCLE_RY, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + + // + // handle special corner cases + // + if (uop_flagz_sz) + { + /* BEGIN_MICROCODE: CYCLE_ADD_AT_INFINITY */ + + uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + if (uop_flagz_e) + { + if (uop_flagz_f) + { + /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X_SAME_Y */ + + uop_move(BANK_LO, CONST_HX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_HY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X */ + + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + } + else + { + /* BEGIN_MICROCODE: CYCLE_ADD_REGULAR */ + + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T1); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T2); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_T3); + + /* END_MICROCODE */ + } + } +} + + +#ifdef USE_MICROCODE +//------------------------------------------------------------------------------ +void fpga_curve_base_scalar_multiply_microcode(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy) +//------------------------------------------------------------------------------ +{ + int word_count, bit_count; // counters + FPGA_WORD k_word; + bool k_bit; + + // initialize internal banks + fpga_multiword_copy(&ECDSA_ZERO, &BUF_LO[CONST_ZERO]); + fpga_multiword_copy(&ECDSA_ZERO, &BUF_HI[CONST_ZERO]); + + fpga_multiword_copy(&ECDSA_ONE, &BUF_LO[CONST_ONE]); + fpga_multiword_copy(&ECDSA_ONE, &BUF_HI[CONST_ONE]); + + fpga_multiword_copy(&ECDSA_DELTA, &BUF_LO[CONST_DELTA]); + fpga_multiword_copy(&ECDSA_DELTA, &BUF_HI[CONST_DELTA]); + + fpga_multiword_copy(&ECDSA_GX, &BUF_LO[CONST_GX]); + fpga_multiword_copy(&ECDSA_GX, &BUF_HI[CONST_GX]); + + fpga_multiword_copy(&ECDSA_GY, &BUF_LO[CONST_GY]); + fpga_multiword_copy(&ECDSA_GY, &BUF_HI[CONST_GY]); + + fpga_multiword_copy(&ECDSA_HX, &BUF_LO[CONST_HX]); + fpga_multiword_copy(&ECDSA_HX, &BUF_HI[CONST_HX]); + + fpga_multiword_copy(&ECDSA_HY, &BUF_LO[CONST_HY]); + fpga_multiword_copy(&ECDSA_HY, &BUF_HI[CONST_HY]); + + /* BEGIN_MICROCODE: PREPARE */ + + // set initial value of R to point at infinity + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + + /* process bits of k left-to-right */ + for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--) + for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--) + { + k_word = k->words[word_count-1]; + k_bit = (k_word & (FPGA_WORD)(1 << (bit_count-1))) > 0; + + // Banks of working cycle operands + // ------------------------------- + // RX: HI + // RY: HI + // RZ: LO + + // calculate S = 2 * R + fpga_curve_double_jacobian_microcode(); + + // Banks of working cycle operands + // ------------------------------- + // SX: LO + // SY: LO + // SZ: HI + + // always calculate R = S * G for constant-time operation + fpga_curve_add_jacobian_microcode(); + + // Banks of working cycle operands + // ------------------------------- + // RX: HI + // RY: HI + // RZ: LO + + + if (!k_bit) + { + /* BEGIN_MICROCODE: CYCLE_K0 */ + + // revert to the value of S before addition if the current bit of k is not set + uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY); + uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CYCLE_K1 */ + + // do dummy overwrite for constant-time operation + uop_move(BANK_HI, CYCLE_RX, BANK_LO, CYCLE_SX); + uop_move(BANK_HI, CYCLE_RY, BANK_LO, CYCLE_SY); + uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_SZ); + + /* END_MICROCODE */ + } + + FPGA_BUFFER TEMP; + + //printf("wc = %d, bc = %d\n", word_count-1, bit_count-1); + + uop_stor(BANK_LO, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RX = ", &TEMP); + uop_stor(BANK_LO, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RY = ", &TEMP); + uop_stor(BANK_LO, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RZ = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SX = ", &TEMP); + uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SY = ", &TEMP); + uop_stor(BANK_LO, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SZ = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A = ", &TEMP); + uop_stor(BANK_LO, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_B = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2_2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_D = ", &TEMP); + uop_stor(BANK_LO, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_E = ", &TEMP); + uop_stor(BANK_LO, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_F = ", &TEMP); + uop_stor(BANK_LO, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_G = ", &TEMP); + uop_stor(BANK_LO, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_H = ", &TEMP); + uop_stor(BANK_LO, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_J = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_Z2 = ", &TEMP); + + uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T1 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T2 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T3 = ", &TEMP); + uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T4 = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RX = ", &TEMP); + uop_stor(BANK_HI, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RY = ", &TEMP); + uop_stor(BANK_HI, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RZ = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SX = ", &TEMP); + uop_stor(BANK_HI, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SY = ", &TEMP); + uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SZ = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A = ", &TEMP); + uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_B = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2_2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_D = ", &TEMP); + uop_stor(BANK_HI, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_E = ", &TEMP); + uop_stor(BANK_HI, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_F = ", &TEMP); + uop_stor(BANK_HI, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_G = ", &TEMP); + uop_stor(BANK_HI, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_H = ", &TEMP); + uop_stor(BANK_HI, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_J = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_Z2 = ", &TEMP); + + uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T1 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T2 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T3 = ", &TEMP); + uop_stor(BANK_HI, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T4 = ", &TEMP); + + } + + // now convert to affine coordinates + fpga_modular_inv23_microcode(); + + /* BEGIN_MICROCODE: CONVERT */ + + uop_calc(MUL, BANK_HI, INVERT_A2, CYCLE_RX, BANK_LO, CYCLE_SX); + uop_calc(MUL, BANK_HI, INVERT_A3, CYCLE_RY, BANK_LO, CYCLE_SY); + uop_cmpz(BANK_LO, CYCLE_RZ); + + /* END_MICROCODE */ + + if (uop_flagz_rz) + { + /* BEGIN_MICROCODE: CONVERT_AT_INFINITY */ + + uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + } + else + { + /* BEGIN_MICROCODE: CONVERT_REGULAR */ + + uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX); + uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY); + + /* END_MICROCODE */ + } + + // return + uop_stor(BANK_HI, CYCLE_RX, qx); + uop_stor(BANK_HI, CYCLE_RY, qy); +} +#endif USE_MICROCODE + + +//------------------------------------------------------------------------------ +void fpga_curve_double_jacobian_microcode_wrapper(const FPGA_BUFFER *rx, + const FPGA_BUFFER *ry, + const FPGA_BUFFER *rz, + FPGA_BUFFER *sx, + FPGA_BUFFER *sy, + FPGA_BUFFER *sz) +//------------------------------------------------------------------------------ +{ + uop_load(rx, BANK_HI, CYCLE_RX); + uop_load(ry, BANK_HI, CYCLE_RY); + uop_load(rz, BANK_LO, CYCLE_RZ); + + fpga_curve_double_jacobian_microcode(); + + uop_stor(BANK_LO, CYCLE_SX, sx); + uop_stor(BANK_LO, CYCLE_SY, sy); + uop_stor(BANK_HI, CYCLE_SZ, sz); +} + + +//------------------------------------------------------------------------------ +void fpga_curve_add_jacobian_microcode_wrapper(const FPGA_BUFFER *sx, + const FPGA_BUFFER *sy, + const FPGA_BUFFER *sz, + FPGA_BUFFER *rx, + FPGA_BUFFER *ry, + FPGA_BUFFER *rz) +//------------------------------------------------------------------------------ +{ + uop_load(sx, BANK_LO, CYCLE_SX); + uop_load(sy, BANK_LO, CYCLE_SY); + uop_load(sz, BANK_HI, CYCLE_SZ); + + fpga_curve_add_jacobian_microcode(); + + uop_stor(BANK_HI, CYCLE_RX, rx); + uop_stor(BANK_HI, CYCLE_RY, ry); + uop_stor(BANK_LO, CYCLE_RZ, rz); +} + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ |