From 22f6cc0496f29d909c3f777d7c9b59559ab5723d Mon Sep 17 00:00:00 2001
From: "Pavel V. Shatov (Meister)" <meisterpaul1@yandex.ru>
Date: Sat, 24 Jun 2017 23:29:33 +0300
Subject: Improved the model:  * added CRT support  * fixed bug in systolic
 array when operand width is not a multiple of array width

---
 modexp_fpga_model.cpp | 171 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 157 insertions(+), 14 deletions(-)

(limited to 'modexp_fpga_model.cpp')

diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp
index b19cdeb..e1c7f4e 100644
--- a/modexp_fpga_model.cpp
+++ b/modexp_fpga_model.cpp
@@ -59,16 +59,30 @@
 //----------------------------------------------------------------
 // Test vectors
 //----------------------------------------------------------------
-static const FPGA_WORD N_384_ROM[] = N_384;		// 384-bit
-static const FPGA_WORD M_384_ROM[] = M_384;		//
-static const FPGA_WORD D_384_ROM[] = D_384;		//
-static const FPGA_WORD S_384_ROM[] = S_384;		//
+static const FPGA_WORD N_384_ROM[]  = N_384;	// 384-bit
+static const FPGA_WORD M_384_ROM[]  = M_384;	//
+static const FPGA_WORD D_384_ROM[]  = D_384;	//
+static const FPGA_WORD S_384_ROM[]  = S_384;	//
+
+static const FPGA_WORD P_384_ROM[]  = P_384;	// 192-bit
+static const FPGA_WORD Q_384_ROM[]  = Q_384;	//
+static const FPGA_WORD DP_384_ROM[] = DP_384;	//
+static const FPGA_WORD DQ_384_ROM[] = DQ_384;	//
+static const FPGA_WORD MP_384_ROM[] = MP_384;	//
+static const FPGA_WORD MQ_384_ROM[] = MQ_384;	//
 
 static const FPGA_WORD N_512_ROM[] = N_512;		// 512-bit
 static const FPGA_WORD M_512_ROM[] = M_512;		//
 static const FPGA_WORD D_512_ROM[] = D_512;		//
 static const FPGA_WORD S_512_ROM[] = S_512;		//
 
+static const FPGA_WORD P_512_ROM[]  = P_512;	// 256-bit
+static const FPGA_WORD Q_512_ROM[]  = Q_512;	//
+static const FPGA_WORD DP_512_ROM[] = DP_512;	//
+static const FPGA_WORD DQ_512_ROM[] = DQ_512;	//
+static const FPGA_WORD MP_512_ROM[] = MP_512;	//
+static const FPGA_WORD MQ_512_ROM[] = MQ_512;	//
+
 
 //----------------------------------------------------------------
 // Prototypes
@@ -77,11 +91,17 @@ void print_fpga_buffer		(const char *str, const FPGA_WORD *buf, size_t len);
 bool compare_fpga_buffers	(const FPGA_WORD *src, const FPGA_WORD *dst, size_t len);
 void load_value_from_rom	(const FPGA_WORD *src, FPGA_WORD *dst, size_t len);
 
-void modexp			(const FPGA_WORD *M, const FPGA_WORD *D,
-					 const FPGA_WORD *N,       FPGA_WORD *R, size_t len);
+void modexp				(const FPGA_WORD *M, const FPGA_WORD *D,
+						 const FPGA_WORD *N,       FPGA_WORD *R, size_t len);
+
+void modexp_crt			(const FPGA_WORD *M, const FPGA_WORD *D,
+						 const FPGA_WORD *N,       FPGA_WORD *R, size_t len);
+
+bool test_modexp		(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom,
+						 const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len);
 
-bool test_modexp	(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom,
-					 const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len);
+bool test_modexp_crt	(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom,
+						 const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len);
 
 
 //----------------------------------------------------------------
@@ -94,10 +114,26 @@ int main()
 	ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384);
 	if (!ok) return EXIT_FAILURE;
 
+	printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n");
+	ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
+	printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n");
+	ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
 	printf("Trying to sign 512-bit message...\n\n");
 	ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512);
 	if (!ok) return EXIT_FAILURE;
 
+	printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n");
+	ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
+	printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n");
+	ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
 	return EXIT_SUCCESS;
 }
 
@@ -126,7 +162,7 @@ void modexp(	const FPGA_WORD *M,
 	montgomery_calc_n_coeff(N, N_COEFF, len);
 		
 		// bring M into Montgomery domain
-	montgomery_multiply(M, FACTOR, N, N_COEFF, M_FACTOR, len);
+	montgomery_multiply(M, FACTOR, N, N_COEFF, M_FACTOR, len, false);
 
 		/*
 		 * Montgomery multiplication adds an extra factor of 2 ^ -w to every product.
@@ -155,6 +191,69 @@ void modexp(	const FPGA_WORD *M,
 }
 
 
+//----------------------------------------------------------------
+// Modular exponentiation routine with CRT support
+//----------------------------------------------------------------
+void modexp_crt(	const FPGA_WORD *M,
+					const FPGA_WORD *D,
+					const FPGA_WORD *N,
+					      FPGA_WORD *R,
+						  size_t     len)
+//----------------------------------------------------------------
+//
+// R = (A mod N) ** B mod N
+//
+//----------------------------------------------------------------
+{
+		// temporary buffers
+	FPGA_WORD M0     [MAX_OPERAND_WORDS];
+	FPGA_WORD M1     [MAX_OPERAND_WORDS];
+	FPGA_WORD FACTOR [MAX_OPERAND_WORDS];
+	FPGA_WORD N_COEFF[MAX_OPERAND_WORDS];
+	FPGA_WORD M_FACTOR[MAX_OPERAND_WORDS];
+
+		// pre-calculate modulus-dependant coefficients
+	montgomery_calc_factor(N, FACTOR, len);
+	montgomery_calc_n_coeff(N, N_COEFF, len);
+	
+		// reduce M to make it smaller than N
+	montgomery_multiply(M, FACTOR, N, N_COEFF, M0, len, true);
+
+		// bring M into Montgomery domain
+	montgomery_multiply(M0, FACTOR, N, N_COEFF, M1,       len, false);
+	montgomery_multiply(M1, FACTOR, N, N_COEFF, M_FACTOR, len, false);
+
+		/*
+		 * Montgomery multiplication adds an extra factor of 2 ^ -w to every product,
+		 * Montgomery reduction adds that factor too. The message must be reduced before
+		 * exponentiation, because in CRT mode it is twice larger, than the modulus
+		 * and the exponent. After reduction the message carries an extra factor of
+		 * 2 ^ -w. We pre-calculate a special factor of 2 ^ 2w and multiply the message
+		 * by this factor *twice* using our Montgomery multiplier. This way we get the
+		 * message with an extra factor of just 2 ^ w:
+		 * 1. (m * 2 ^ -w) * (2 ^ 2w) * (2 ^ -w) = m
+		 * 2. (m) * (2 ^ 2w) * (2 ^ -w) = m * 2 ^ w
+		 *
+		 * Now we feed this message with that extra factor to the binary exponentiation
+		 * routine. The current power of m will always keep that additional factor:
+		 * (p * 2 ^ w) * (p * 2 ^ w) * (2 ^ -w) = p ^ 2 * 2 ^ w
+		 *
+		 * The result starts at 1, i.e. without any extra factors. If at any particular
+		 * iteration it gets multiplied with the current power of m, the product will
+		 * not carry any extra factors, because the power's factor gets eliminated
+		 * by the extra factor of Montgomery multiplication:
+		 * (r) * (p * 2 ^ w) * (2 ^ -w) = r * p
+		 *
+		 * This way we don't need any extra post-processing to convert the final result
+		 * from Montgomery domain. 
+		 *
+		 */
+
+		// exponentiate
+	montgomery_exponentiate(M_FACTOR, D, N, N_COEFF, R, len);
+}
+
+
 //----------------------------------------------------------------
 // Copies words from src into dst reversing their order
 //----------------------------------------------------------------
@@ -246,13 +345,13 @@ void print_fpga_buffer(const char *str, const FPGA_WORD *buf, size_t len)
 
 
 //----------------------------------------------------------------
-// Test the modular multiplication model
+// Test the modular exponentiation model
 //----------------------------------------------------------------
 bool test_modexp(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len)
 //----------------------------------------------------------------
 //
-// This routine uses the Montgomery exponentiation routine to
-// calculate r = m ** d mod m, and then compares it to the
+// This routine uses the Montgomery exponentiation model to
+// calculate r = m ** d mod n, and then compares it to the
 // reference value s.
 //
 //----------------------------------------------------------------
@@ -278,12 +377,56 @@ bool test_modexp(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD
 		// check result
 	ok = compare_fpga_buffers(S, R, len);
 	if (!ok)
-	{	printf("\n    ERROR\n\n");
+	{	printf("    ERROR\n\n\n");
+		return false;
+	}
+
+		// everything went just fine
+	printf("    OK\n\n\n");
+	return true;
+}
+
+
+//----------------------------------------------------------------
+// Test the modular exponentiation model with CRT enabled
+//----------------------------------------------------------------
+bool test_modexp_crt(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len)
+//----------------------------------------------------------------
+//
+// This routine uses the Montgomery exponentiation model to
+// calculate r = (m mod n) ** d mod n, and then compares it to the
+// reference value s. The difference from test_modexp() is that
+// m_rom is twice larger than n_rom and d_rom.
+//
+//----------------------------------------------------------------
+{
+	bool ok;	// flag
+
+		// buffers
+	FPGA_WORD N[MAX_OPERAND_WORDS];
+	FPGA_WORD M[MAX_OPERAND_WORDS];
+	FPGA_WORD D[MAX_OPERAND_WORDS];
+	FPGA_WORD S[MAX_OPERAND_WORDS];
+	FPGA_WORD R[MAX_OPERAND_WORDS];
+
+		// fill buffers with test vector (message is twice is large!)
+	load_value_from_rom(n_rom, N, len);
+	load_value_from_rom(m_rom, M, len << 1);
+	load_value_from_rom(d_rom, D, len);
+	load_value_from_rom(s_rom, S, len);
+
+		// calculate power
+	modexp_crt(M, D, N, R, len);
+
+		// check result
+	ok = compare_fpga_buffers(S, R, len);
+	if (!ok)
+	{	printf("    ERROR\n\n\n");
 		return false;
 	}
 
 		// everything went just fine
-	printf("\n    OK\n\n");
+	printf("    OK\n\n\n");
 	return true;
 }
 
-- 
cgit v1.2.3