aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2021-04-11 17:21:36 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2021-04-11 17:21:36 +0300
commit516ca870fd3ad2a87e0ac56f0d453667e021c52d (patch)
treede39313a891c387b1ee2f2cc6e7e892a4d263676
parent1b3b4b655b171e2e016970b19f0b43fdd2da5f8d (diff)
* Microcode layer redesigned to take advantage of Montgomery ladder
architecture. Instead of R and S there are now two working ("cycle") registers R0 and R1. After every cycle R0+R1 is placed in register S ("sum"), 2*R0|1 (depending on current multiplier bit) is placed in register T. Then the working variables are updated, final result ends up in R0. * Due to the change of working registers, modular inversion routines were updated accordingly. * Added optional debugging output control
-rw-r--r--ecdsa_fpga_microcode.cpp49
-rw-r--r--ecdsa_fpga_microcode.h96
2 files changed, 68 insertions, 77 deletions
diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp
index f02dc8a..2171ac2 100644
--- a/ecdsa_fpga_microcode.cpp
+++ b/ecdsa_fpga_microcode.cpp
@@ -59,10 +59,8 @@ FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
//------------------------------------------------------------------------------
// Global Flags
//------------------------------------------------------------------------------
-bool uop_flagz_sz;
-bool uop_flagz_rz;
-bool uop_flagz_e;
-bool uop_flagz_f;
+bool uop_flagz_r0z;
+bool uop_flagz_r1z;
//------------------------------------------------------------------------------
@@ -96,17 +94,11 @@ void uop_cmpz(UOP_BANK src, int s_op)
switch (s_op)
{
- case CYCLE_SZ:
- uop_flagz_sz = flagz;
+ case CYCLE_R0Z:
+ uop_flagz_r0z = flagz;
break;
- case CYCLE_RZ:
- uop_flagz_rz = flagz;
- break;
- case CYCLE_E:
- uop_flagz_e = flagz;
- break;
- case CYCLE_F:
- uop_flagz_f = flagz;
+ case CYCLE_R1Z:
+ uop_flagz_r1z = flagz;
break;
}
}
@@ -141,6 +133,13 @@ void uop_calc(UOP_MATH math,
if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr);
if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr);
if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr);
+
+#ifdef DUMP_UOP_OUTPUTS
+ if (math == ADD) dump_uop_output("ADD", d_ptr);
+ if (math == SUB) dump_uop_output("SUB", d_ptr);
+ if (math == MUL) dump_uop_output("MUL", d_ptr);
+#endif
+
}
@@ -201,16 +200,16 @@ void fpga_modular_inv23_p256_microcode()
// first obtain intermediate helper quantities (X#)
// mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
- uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+ uop_move(BANK_LO, CYCLE_R0Z, BANK_HI, CYCLE_R0Z);
// compute X2 and mirror to the other bank
- uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_R0Z, INVERT_R1, BANK_LO, INVERT_X2);
uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
// compute X3 and mirror to the other bank
uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_R0Z, BANK_LO, INVERT_X3);
uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
// compute X6 (stored in the lower bank)
@@ -257,7 +256,7 @@ void fpga_modular_inv23_p256_microcode()
uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
uop_repeat();
- uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_R0Z, BANK_HI, INVERT_R1);
uop_cycle(128);
uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
@@ -287,7 +286,7 @@ void fpga_modular_inv23_p256_microcode()
// A3 ends up in the upper bank by itself
uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
- uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0Z, BANK_HI, INVERT_A3);
/* END_MICROCODE */
}
@@ -322,16 +321,16 @@ void fpga_modular_inv23_p384_microcode()
// first obtain intermediate helper quantities (X#)
// mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
- uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+ uop_move(BANK_LO, CYCLE_R0Z, BANK_HI, CYCLE_R0Z);
// compute X2 and mirror to the other bank
- uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_R0Z, INVERT_R1, BANK_LO, INVERT_X2);
uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
// compute X3 and mirror to the other bank
uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_R0Z, BANK_LO, INVERT_X3);
uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
// compute X6 (stored in the lower bank)
@@ -421,7 +420,7 @@ void fpga_modular_inv23_p384_microcode()
// A3 ends up in the upper bank by itself
uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
- uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0Z, BANK_HI, INVERT_A3);
/* END_MICROCODE */
}
diff --git a/ecdsa_fpga_microcode.h b/ecdsa_fpga_microcode.h
index f551d96..32e061e 100644
--- a/ecdsa_fpga_microcode.h
+++ b/ecdsa_fpga_microcode.h
@@ -57,56 +57,50 @@ enum UOP_OPERAND
CONST_ZERO, // 0
CONST_ONE, // 1
CONST_DELTA, // 2
-
+
CONST_GX, // 3
CONST_GY, // 4
-
- CONST_HX, // 5
- CONST_HY, // 6
-
- CYCLE_RX, // 7
- CYCLE_RY, // 8
- CYCLE_RZ, // 9
-
- CYCLE_SX, // 10
- CYCLE_SY, // 11
- CYCLE_SZ, // 12
-
- CYCLE_A, // 13
- CYCLE_A2, // 14
- CYCLE_B, // 15
- CYCLE_C, // 16
- CYCLE_C2, // 17
- CYCLE_C2_2, // 18
- CYCLE_D, // 19
- CYCLE_E, // 20
- CYCLE_F, // 21
- CYCLE_G, // 22
- CYCLE_H, // 23
- CYCLE_J, // 24
-
- CYCLE_Z2, // 25
-
- CYCLE_T1, // 26
- CYCLE_T2, // 27
- CYCLE_T3, // 28
- CYCLE_T4, // 29
-
- INVERT_R1, // 30
- INVERT_R2, // 31
-
- INVERT_X2, // 32
- INVERT_X3, // 33
- INVERT_X6, // 34
- INVERT_X12, // 35
- INVERT_X15, // 36
- INVERT_X30, // 37
- INVERT_X32, // 38
- INVERT_X60, // 39
- INVERT_X120, // 40
-
- INVERT_A2, // 41
- INVERT_A3, // 42
+
+ CYCLE_R0X, // 5
+ CYCLE_R0Y, // 6
+ CYCLE_R0Z, // 7
+
+ CYCLE_R1X, // 8
+ CYCLE_R1Y, // 9
+ CYCLE_R1Z, // 10
+
+ CYCLE_SX, // 11
+ CYCLE_SY, // 12
+ CYCLE_SZ, // 13
+
+ CYCLE_TX, // 14
+ CYCLE_TY, // 15
+ CYCLE_TZ, // 16
+
+ CYCLE_T1, // 17
+ CYCLE_T2, // 18
+ CYCLE_T3, // 19
+ CYCLE_T4, // 20
+ CYCLE_T5, // 21
+ CYCLE_T6, // 22
+ CYCLE_T7, // 23
+ CYCLE_T8, // 24
+
+ INVERT_R1, // 25
+ INVERT_R2, // 26
+
+ INVERT_X2, // 27
+ INVERT_X3, // 28
+ INVERT_X6, // 29
+ INVERT_X12, // 30
+ INVERT_X15, // 31
+ INVERT_X30, // 32
+ INVERT_X32, // 33
+ INVERT_X60, // 34
+ INVERT_X120, // 35
+
+ INVERT_A2, // 36
+ INVERT_A3, // 37
ECDSA_UOP_OPERAND_COUNT
};
@@ -129,10 +123,8 @@ extern FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
//------------------------------------------------------------------------------
// Global Flags
//------------------------------------------------------------------------------
-extern bool uop_flagz_sz;
-extern bool uop_flagz_rz;
-extern bool uop_flagz_e;
-extern bool uop_flagz_f;
+extern bool uop_flagz_r0z;
+extern bool uop_flagz_r1z;
//------------------------------------------------------------------------------