From 22f6cc0496f29d909c3f777d7c9b59559ab5723d Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Sat, 24 Jun 2017 23:29:33 +0300 Subject: Improved the model: * added CRT support * fixed bug in systolic array when operand width is not a multiple of array width --- modexp_fpga_model.cpp | 171 +++++++++++++++++++++++++++++++++++---- modexp_fpga_model.h | 2 +- modexp_fpga_model_montgomery.cpp | 22 +++-- modexp_fpga_model_montgomery.h | 3 +- test/format_test_vectors.py | 99 +++++++++++++++++++---- test/modexp_fpga_model_vectors.h | 48 +++++++++++ 6 files changed, 306 insertions(+), 39 deletions(-) diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp index b19cdeb..e1c7f4e 100644 --- a/modexp_fpga_model.cpp +++ b/modexp_fpga_model.cpp @@ -59,16 +59,30 @@ //---------------------------------------------------------------- // Test vectors //---------------------------------------------------------------- -static const FPGA_WORD N_384_ROM[] = N_384; // 384-bit -static const FPGA_WORD M_384_ROM[] = M_384; // -static const FPGA_WORD D_384_ROM[] = D_384; // -static const FPGA_WORD S_384_ROM[] = S_384; // +static const FPGA_WORD N_384_ROM[] = N_384; // 384-bit +static const FPGA_WORD M_384_ROM[] = M_384; // +static const FPGA_WORD D_384_ROM[] = D_384; // +static const FPGA_WORD S_384_ROM[] = S_384; // + +static const FPGA_WORD P_384_ROM[] = P_384; // 192-bit +static const FPGA_WORD Q_384_ROM[] = Q_384; // +static const FPGA_WORD DP_384_ROM[] = DP_384; // +static const FPGA_WORD DQ_384_ROM[] = DQ_384; // +static const FPGA_WORD MP_384_ROM[] = MP_384; // +static const FPGA_WORD MQ_384_ROM[] = MQ_384; // static const FPGA_WORD N_512_ROM[] = N_512; // 512-bit static const FPGA_WORD M_512_ROM[] = M_512; // static const FPGA_WORD D_512_ROM[] = D_512; // static const FPGA_WORD S_512_ROM[] = S_512; // +static const FPGA_WORD P_512_ROM[] = P_512; // 256-bit +static const FPGA_WORD Q_512_ROM[] = Q_512; // +static const FPGA_WORD DP_512_ROM[] = DP_512; // +static const FPGA_WORD DQ_512_ROM[] = DQ_512; // +static const FPGA_WORD MP_512_ROM[] = MP_512; // +static const FPGA_WORD MQ_512_ROM[] = MQ_512; // + //---------------------------------------------------------------- // Prototypes @@ -77,11 +91,17 @@ void print_fpga_buffer (const char *str, const FPGA_WORD *buf, size_t len); bool compare_fpga_buffers (const FPGA_WORD *src, const FPGA_WORD *dst, size_t len); void load_value_from_rom (const FPGA_WORD *src, FPGA_WORD *dst, size_t len); -void modexp (const FPGA_WORD *M, const FPGA_WORD *D, - const FPGA_WORD *N, FPGA_WORD *R, size_t len); +void modexp (const FPGA_WORD *M, const FPGA_WORD *D, + const FPGA_WORD *N, FPGA_WORD *R, size_t len); + +void modexp_crt (const FPGA_WORD *M, const FPGA_WORD *D, + const FPGA_WORD *N, FPGA_WORD *R, size_t len); + +bool test_modexp (const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, + const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len); -bool test_modexp (const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, - const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len); +bool test_modexp_crt (const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, + const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len); //---------------------------------------------------------------- @@ -94,10 +114,26 @@ int main() ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384); if (!ok) return EXIT_FAILURE; + printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n"); + ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1); + if (!ok) return EXIT_FAILURE; + + printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n"); + ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1); + if (!ok) return EXIT_FAILURE; + printf("Trying to sign 512-bit message...\n\n"); ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512); if (!ok) return EXIT_FAILURE; + printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n"); + ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1); + if (!ok) return EXIT_FAILURE; + + printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n"); + ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1); + if (!ok) return EXIT_FAILURE; + return EXIT_SUCCESS; } @@ -126,7 +162,7 @@ void modexp( const FPGA_WORD *M, montgomery_calc_n_coeff(N, N_COEFF, len); // bring M into Montgomery domain - montgomery_multiply(M, FACTOR, N, N_COEFF, M_FACTOR, len); + montgomery_multiply(M, FACTOR, N, N_COEFF, M_FACTOR, len, false); /* * Montgomery multiplication adds an extra factor of 2 ^ -w to every product. @@ -155,6 +191,69 @@ void modexp( const FPGA_WORD *M, } +//---------------------------------------------------------------- +// Modular exponentiation routine with CRT support +//---------------------------------------------------------------- +void modexp_crt( const FPGA_WORD *M, + const FPGA_WORD *D, + const FPGA_WORD *N, + FPGA_WORD *R, + size_t len) +//---------------------------------------------------------------- +// +// R = (A mod N) ** B mod N +// +//---------------------------------------------------------------- +{ + // temporary buffers + FPGA_WORD M0 [MAX_OPERAND_WORDS]; + FPGA_WORD M1 [MAX_OPERAND_WORDS]; + FPGA_WORD FACTOR [MAX_OPERAND_WORDS]; + FPGA_WORD N_COEFF[MAX_OPERAND_WORDS]; + FPGA_WORD M_FACTOR[MAX_OPERAND_WORDS]; + + // pre-calculate modulus-dependant coefficients + montgomery_calc_factor(N, FACTOR, len); + montgomery_calc_n_coeff(N, N_COEFF, len); + + // reduce M to make it smaller than N + montgomery_multiply(M, FACTOR, N, N_COEFF, M0, len, true); + + // bring M into Montgomery domain + montgomery_multiply(M0, FACTOR, N, N_COEFF, M1, len, false); + montgomery_multiply(M1, FACTOR, N, N_COEFF, M_FACTOR, len, false); + + /* + * Montgomery multiplication adds an extra factor of 2 ^ -w to every product, + * Montgomery reduction adds that factor too. The message must be reduced before + * exponentiation, because in CRT mode it is twice larger, than the modulus + * and the exponent. After reduction the message carries an extra factor of + * 2 ^ -w. We pre-calculate a special factor of 2 ^ 2w and multiply the message + * by this factor *twice* using our Montgomery multiplier. This way we get the + * message with an extra factor of just 2 ^ w: + * 1. (m * 2 ^ -w) * (2 ^ 2w) * (2 ^ -w) = m + * 2. (m) * (2 ^ 2w) * (2 ^ -w) = m * 2 ^ w + * + * Now we feed this message with that extra factor to the binary exponentiation + * routine. The current power of m will always keep that additional factor: + * (p * 2 ^ w) * (p * 2 ^ w) * (2 ^ -w) = p ^ 2 * 2 ^ w + * + * The result starts at 1, i.e. without any extra factors. If at any particular + * iteration it gets multiplied with the current power of m, the product will + * not carry any extra factors, because the power's factor gets eliminated + * by the extra factor of Montgomery multiplication: + * (r) * (p * 2 ^ w) * (2 ^ -w) = r * p + * + * This way we don't need any extra post-processing to convert the final result + * from Montgomery domain. + * + */ + + // exponentiate + montgomery_exponentiate(M_FACTOR, D, N, N_COEFF, R, len); +} + + //---------------------------------------------------------------- // Copies words from src into dst reversing their order //---------------------------------------------------------------- @@ -246,13 +345,13 @@ void print_fpga_buffer(const char *str, const FPGA_WORD *buf, size_t len) //---------------------------------------------------------------- -// Test the modular multiplication model +// Test the modular exponentiation model //---------------------------------------------------------------- bool test_modexp(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len) //---------------------------------------------------------------- // -// This routine uses the Montgomery exponentiation routine to -// calculate r = m ** d mod m, and then compares it to the +// This routine uses the Montgomery exponentiation model to +// calculate r = m ** d mod n, and then compares it to the // reference value s. // //---------------------------------------------------------------- @@ -278,12 +377,56 @@ bool test_modexp(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD // check result ok = compare_fpga_buffers(S, R, len); if (!ok) - { printf("\n ERROR\n\n"); + { printf(" ERROR\n\n\n"); + return false; + } + + // everything went just fine + printf(" OK\n\n\n"); + return true; +} + + +//---------------------------------------------------------------- +// Test the modular exponentiation model with CRT enabled +//---------------------------------------------------------------- +bool test_modexp_crt(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len) +//---------------------------------------------------------------- +// +// This routine uses the Montgomery exponentiation model to +// calculate r = (m mod n) ** d mod n, and then compares it to the +// reference value s. The difference from test_modexp() is that +// m_rom is twice larger than n_rom and d_rom. +// +//---------------------------------------------------------------- +{ + bool ok; // flag + + // buffers + FPGA_WORD N[MAX_OPERAND_WORDS]; + FPGA_WORD M[MAX_OPERAND_WORDS]; + FPGA_WORD D[MAX_OPERAND_WORDS]; + FPGA_WORD S[MAX_OPERAND_WORDS]; + FPGA_WORD R[MAX_OPERAND_WORDS]; + + // fill buffers with test vector (message is twice is large!) + load_value_from_rom(n_rom, N, len); + load_value_from_rom(m_rom, M, len << 1); + load_value_from_rom(d_rom, D, len); + load_value_from_rom(s_rom, S, len); + + // calculate power + modexp_crt(M, D, N, R, len); + + // check result + ok = compare_fpga_buffers(S, R, len); + if (!ok) + { printf(" ERROR\n\n\n"); return false; } // everything went just fine - printf("\n OK\n\n"); + printf(" OK\n\n\n"); return true; } diff --git a/modexp_fpga_model.h b/modexp_fpga_model.h index 2a91d32..f30a41b 100644 --- a/modexp_fpga_model.h +++ b/modexp_fpga_model.h @@ -31,7 +31,7 @@ // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp index d1cca60..34ef2b6 100644 --- a/modexp_fpga_model_montgomery.cpp +++ b/modexp_fpga_model_montgomery.cpp @@ -46,7 +46,7 @@ //---------------------------------------------------------------- // Montgomery modular multiplier //---------------------------------------------------------------- -void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD *N, const FPGA_WORD *N_COEFF, FPGA_WORD *R, size_t len) +void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD *N, const FPGA_WORD *N_COEFF, FPGA_WORD *R, size_t len, bool reduce_only) //---------------------------------------------------------------- // // R = A * B * 2^-len mod N @@ -94,8 +94,11 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD FPGA_WORD S [2 * MAX_OPERAND_WORDS]; // final sum FPGA_WORD SN[2 * MAX_OPERAND_WORDS]; // final difference - // number of systolic cycles needed to multiply entire B by one word of A + // number of full systolic cycles needed to multiply entire B by one word of A size_t num_systolic_cycles = len / SYSTOLIC_NUM_WORDS; + + // adjust number of cycles + if ((num_systolic_cycles * SYSTOLIC_NUM_WORDS) < len) num_systolic_cycles++; // initialize arrays of accumulators and carries to zeroes for (i=0; i