diff options
-rw-r--r-- | curve25519/curve25519_fpga_microcode.cpp | 105 | ||||
-rw-r--r-- | curve25519/curve25519_fpga_microcode.h | 4 |
2 files changed, 57 insertions, 52 deletions
diff --git a/curve25519/curve25519_fpga_microcode.cpp b/curve25519/curve25519_fpga_microcode.cpp index 37d2f8e..e0ddf44 100644 --- a/curve25519/curve25519_fpga_microcode.cpp +++ b/curve25519/curve25519_fpga_microcode.cpp @@ -44,6 +44,14 @@ //------------------------------------------------------------------------------ +// Macros +//------------------------------------------------------------------------------ +#define uop_cycle(iters); for (cyc_count=0; cyc_count<iters; cyc_count++) { +#define uop_repeat(); } +#define uop_calc_if_even(...) if (!(cyc_count % 2)) uop_calc(__VA_ARGS__) +#define uop_calc_if_odd(...) else uop_calc(__VA_ARGS__) + +//------------------------------------------------------------------------------ void uop_move (UOP_BANK src, int s_op, UOP_BANK dst, int d_op, FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi) @@ -62,17 +70,6 @@ void uop_move (UOP_BANK src, int s_op, //------------------------------------------------------------------------------ -void uop_move2 (UOP_BANK src, int s_op1, int s_op2, - UOP_BANK dst, int d_op1, int d_op2, - FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi) -//------------------------------------------------------------------------------ -{ - uop_move(src, s_op1, dst, d_op1, buf_lo, buf_hi); - uop_move(src, s_op2, dst, d_op2, buf_lo, buf_hi); -} - - -//------------------------------------------------------------------------------ void uop_calc (UOP_MATH math, UOP_BANK src, int s_op1, int s_op2, UOP_BANK dst, int d_op, @@ -143,8 +140,10 @@ void fpga_modular_inv_microcode(FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi) { int cyc_count; // counters + /* BEGIN_MICROCODE: DURING_INVERSION */ + // T_1 - uop_move2(BANK_LO, INVERT_T_1, INVERT_T_1, BANK_HI, INVERT_T_1, INVERT_T_1, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_1, BANK_HI, INVERT_T_1, buf_lo, buf_hi); // T_10 uop_calc(MUL, BANK_LO, INVERT_T_1, INVERT_T_1, BANK_HI, INVERT_T_10, buf_lo, buf_hi, MOD_2P); @@ -155,7 +154,7 @@ void fpga_modular_inv_microcode(FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi) uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_T_1, BANK_LO, INVERT_T_1001, buf_lo, buf_hi, MOD_2P); // T_1011 - uop_move2(BANK_HI, INVERT_T_10, INVERT_T_10, BANK_LO, INVERT_T_10, INVERT_T_10, buf_lo, buf_hi); + uop_move(BANK_HI, INVERT_T_10, BANK_LO, INVERT_T_10, buf_lo, buf_hi); uop_calc(MUL, BANK_LO, INVERT_T_1001, INVERT_T_10, BANK_HI, INVERT_T_1011, buf_lo, buf_hi, MOD_2P); // T_X5 @@ -163,78 +162,88 @@ void fpga_modular_inv_microcode(FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi) uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_T_1001, BANK_HI, INVERT_T_X5, buf_lo, buf_hi, MOD_2P); // T_X10 - uop_move2(BANK_HI, INVERT_T_X5, INVERT_T_X5, BANK_LO, INVERT_R1, INVERT_R1, buf_lo, buf_hi); + uop_move(BANK_HI, INVERT_T_X5, BANK_LO, INVERT_R1, buf_lo, buf_hi); - for (cyc_count=0; cyc_count<4; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(4); + uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2, buf_lo, buf_hi, MOD_2P); uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_T_X5, BANK_LO, INVERT_T_X10, buf_lo, buf_hi, MOD_2P); // T_X20 - uop_move2(BANK_LO, INVERT_T_X10, INVERT_T_X10, BANK_HI, INVERT_R1, INVERT_R1, buf_lo, buf_hi); - uop_move2(BANK_LO, INVERT_T_X10, INVERT_T_X10, BANK_HI, INVERT_T_X10, INVERT_T_X10, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X10, BANK_HI, INVERT_R1, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X10, BANK_HI, INVERT_T_X10, buf_lo, buf_hi); - for (cyc_count=0; cyc_count<10; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(10); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_T_X10, BANK_LO, INVERT_T_X20, buf_lo, buf_hi, MOD_2P); // T_X40 - uop_move2(BANK_LO, INVERT_T_X20, INVERT_T_X20, BANK_HI, INVERT_R1, INVERT_R1, buf_lo, buf_hi); - uop_move2(BANK_LO, INVERT_T_X20, INVERT_T_X20, BANK_HI, INVERT_T_X20, INVERT_T_X20, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X20, BANK_HI, INVERT_R1, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X20, BANK_HI, INVERT_T_X20, buf_lo, buf_hi); - for (cyc_count=0; cyc_count<20; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(20); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_T_X20, BANK_LO, INVERT_T_X40, buf_lo, buf_hi, MOD_2P); // T_X50 - uop_move2(BANK_LO, INVERT_T_X40, INVERT_T_X40, BANK_HI, INVERT_R1, INVERT_R1, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X40, BANK_HI, INVERT_R1, buf_lo, buf_hi); - for (cyc_count=0; cyc_count<10; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(10); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_T_X10, BANK_LO, INVERT_T_X50, buf_lo, buf_hi, MOD_2P); // T_X100 - uop_move2(BANK_LO, INVERT_T_X50, INVERT_T_X50, BANK_HI, INVERT_R1, INVERT_R1, buf_lo, buf_hi); - uop_move2(BANK_LO, INVERT_T_X50, INVERT_T_X50, BANK_HI, INVERT_T_X50, INVERT_T_X50, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X50, BANK_HI, INVERT_R1, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X50, BANK_HI, INVERT_T_X50, buf_lo, buf_hi); - for (cyc_count=0; cyc_count<50; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(50); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_T_X50, BANK_LO, INVERT_T_X100, buf_lo, buf_hi, MOD_2P); - uop_move2(BANK_LO, INVERT_T_X100, INVERT_T_X100, BANK_HI, INVERT_R1, INVERT_R1, buf_lo, buf_hi); - uop_move2(BANK_LO, INVERT_T_X100, INVERT_T_X100, BANK_HI, INVERT_T_X100, INVERT_T_X100, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X100, BANK_HI, INVERT_R1, buf_lo, buf_hi); + uop_move(BANK_LO, INVERT_T_X100, BANK_HI, INVERT_T_X100, buf_lo, buf_hi); - for (cyc_count=0; cyc_count<100; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(100); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_T_X100, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - for (cyc_count=0; cyc_count<50; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_cycle(50); + uop_calc_if_even(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_T_X50, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); - for (cyc_count=0; cyc_count<4; cyc_count++) - if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - else uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_cycle(4); + uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); + uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + uop_repeat(); uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2, buf_lo, buf_hi, MOD_2P); - uop_move2(BANK_HI, INVERT_T_1011, INVERT_T_1011, BANK_LO, INVERT_T_1011, INVERT_T_1011, buf_lo, buf_hi); + uop_move(BANK_HI, INVERT_T_1011, BANK_LO, INVERT_T_1011, buf_lo, buf_hi); uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_T_1011, BANK_HI, INVERT_R1, buf_lo, buf_hi, MOD_2P); + + /* END_MICROCODE */ } diff --git a/curve25519/curve25519_fpga_microcode.h b/curve25519/curve25519_fpga_microcode.h index 10d29ca..dfe09b5 100644 --- a/curve25519/curve25519_fpga_microcode.h +++ b/curve25519/curve25519_fpga_microcode.h @@ -101,10 +101,6 @@ void uop_move (UOP_BANK src, int s_op1, UOP_BANK dst, int d_op1, FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi); -void uop_move2 (UOP_BANK src, int s_op1, int s_op2, - UOP_BANK dst, int d_op1, int d_op2, - FPGA_BUFFER *buf_lo, FPGA_BUFFER *buf_hi); - void uop_calc (UOP_MATH math, UOP_BANK src, int s_op1, int s_op2, UOP_BANK dst, int d_op, |