From 22f6cc0496f29d909c3f777d7c9b59559ab5723d Mon Sep 17 00:00:00 2001
From: "Pavel V. Shatov (Meister)" <meisterpaul1@yandex.ru>
Date: Sat, 24 Jun 2017 23:29:33 +0300
Subject: Improved the model:  * added CRT support  * fixed bug in systolic
 array when operand width is not a multiple of array width

---
 modexp_fpga_model.cpp            | 171 +++++++++++++++++++++++++++++++++++----
 modexp_fpga_model.h              |   2 +-
 modexp_fpga_model_montgomery.cpp |  22 +++--
 modexp_fpga_model_montgomery.h   |   3 +-
 test/format_test_vectors.py      |  99 +++++++++++++++++++----
 test/modexp_fpga_model_vectors.h |  48 +++++++++++
 6 files changed, 306 insertions(+), 39 deletions(-)

diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp
index b19cdeb..e1c7f4e 100644
--- a/modexp_fpga_model.cpp
+++ b/modexp_fpga_model.cpp
@@ -59,16 +59,30 @@
 //----------------------------------------------------------------
 // Test vectors
 //----------------------------------------------------------------
-static const FPGA_WORD N_384_ROM[] = N_384;		// 384-bit
-static const FPGA_WORD M_384_ROM[] = M_384;		//
-static const FPGA_WORD D_384_ROM[] = D_384;		//
-static const FPGA_WORD S_384_ROM[] = S_384;		//
+static const FPGA_WORD N_384_ROM[]  = N_384;	// 384-bit
+static const FPGA_WORD M_384_ROM[]  = M_384;	//
+static const FPGA_WORD D_384_ROM[]  = D_384;	//
+static const FPGA_WORD S_384_ROM[]  = S_384;	//
+
+static const FPGA_WORD P_384_ROM[]  = P_384;	// 192-bit
+static const FPGA_WORD Q_384_ROM[]  = Q_384;	//
+static const FPGA_WORD DP_384_ROM[] = DP_384;	//
+static const FPGA_WORD DQ_384_ROM[] = DQ_384;	//
+static const FPGA_WORD MP_384_ROM[] = MP_384;	//
+static const FPGA_WORD MQ_384_ROM[] = MQ_384;	//
 
 static const FPGA_WORD N_512_ROM[] = N_512;		// 512-bit
 static const FPGA_WORD M_512_ROM[] = M_512;		//
 static const FPGA_WORD D_512_ROM[] = D_512;		//
 static const FPGA_WORD S_512_ROM[] = S_512;		//
 
+static const FPGA_WORD P_512_ROM[]  = P_512;	// 256-bit
+static const FPGA_WORD Q_512_ROM[]  = Q_512;	//
+static const FPGA_WORD DP_512_ROM[] = DP_512;	//
+static const FPGA_WORD DQ_512_ROM[] = DQ_512;	//
+static const FPGA_WORD MP_512_ROM[] = MP_512;	//
+static const FPGA_WORD MQ_512_ROM[] = MQ_512;	//
+
 
 //----------------------------------------------------------------
 // Prototypes
@@ -77,11 +91,17 @@ void print_fpga_buffer		(const char *str, const FPGA_WORD *buf, size_t len);
 bool compare_fpga_buffers	(const FPGA_WORD *src, const FPGA_WORD *dst, size_t len);
 void load_value_from_rom	(const FPGA_WORD *src, FPGA_WORD *dst, size_t len);
 
-void modexp			(const FPGA_WORD *M, const FPGA_WORD *D,
-					 const FPGA_WORD *N,       FPGA_WORD *R, size_t len);
+void modexp				(const FPGA_WORD *M, const FPGA_WORD *D,
+						 const FPGA_WORD *N,       FPGA_WORD *R, size_t len);
+
+void modexp_crt			(const FPGA_WORD *M, const FPGA_WORD *D,
+						 const FPGA_WORD *N,       FPGA_WORD *R, size_t len);
+
+bool test_modexp		(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom,
+						 const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len);
 
-bool test_modexp	(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom,
-					 const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len);
+bool test_modexp_crt	(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom,
+						 const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len);
 
 
 //----------------------------------------------------------------
@@ -94,10 +114,26 @@ int main()
 	ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384);
 	if (!ok) return EXIT_FAILURE;
 
+	printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n");
+	ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
+	printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n");
+	ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
 	printf("Trying to sign 512-bit message...\n\n");
 	ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512);
 	if (!ok) return EXIT_FAILURE;
 
+	printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n");
+	ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
+	printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n");
+	ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
+	if (!ok) return EXIT_FAILURE;
+
 	return EXIT_SUCCESS;
 }
 
@@ -126,7 +162,7 @@ void modexp(	const FPGA_WORD *M,
 	montgomery_calc_n_coeff(N, N_COEFF, len);
 		
 		// bring M into Montgomery domain
-	montgomery_multiply(M, FACTOR, N, N_COEFF, M_FACTOR, len);
+	montgomery_multiply(M, FACTOR, N, N_COEFF, M_FACTOR, len, false);
 
 		/*
 		 * Montgomery multiplication adds an extra factor of 2 ^ -w to every product.
@@ -155,6 +191,69 @@ void modexp(	const FPGA_WORD *M,
 }
 
 
+//----------------------------------------------------------------
+// Modular exponentiation routine with CRT support
+//----------------------------------------------------------------
+void modexp_crt(	const FPGA_WORD *M,
+					const FPGA_WORD *D,
+					const FPGA_WORD *N,
+					      FPGA_WORD *R,
+						  size_t     len)
+//----------------------------------------------------------------
+//
+// R = (A mod N) ** B mod N
+//
+//----------------------------------------------------------------
+{
+		// temporary buffers
+	FPGA_WORD M0     [MAX_OPERAND_WORDS];
+	FPGA_WORD M1     [MAX_OPERAND_WORDS];
+	FPGA_WORD FACTOR [MAX_OPERAND_WORDS];
+	FPGA_WORD N_COEFF[MAX_OPERAND_WORDS];
+	FPGA_WORD M_FACTOR[MAX_OPERAND_WORDS];
+
+		// pre-calculate modulus-dependant coefficients
+	montgomery_calc_factor(N, FACTOR, len);
+	montgomery_calc_n_coeff(N, N_COEFF, len);
+	
+		// reduce M to make it smaller than N
+	montgomery_multiply(M, FACTOR, N, N_COEFF, M0, len, true);
+
+		// bring M into Montgomery domain
+	montgomery_multiply(M0, FACTOR, N, N_COEFF, M1,       len, false);
+	montgomery_multiply(M1, FACTOR, N, N_COEFF, M_FACTOR, len, false);
+
+		/*
+		 * Montgomery multiplication adds an extra factor of 2 ^ -w to every product,
+		 * Montgomery reduction adds that factor too. The message must be reduced before
+		 * exponentiation, because in CRT mode it is twice larger, than the modulus
+		 * and the exponent. After reduction the message carries an extra factor of
+		 * 2 ^ -w. We pre-calculate a special factor of 2 ^ 2w and multiply the message
+		 * by this factor *twice* using our Montgomery multiplier. This way we get the
+		 * message with an extra factor of just 2 ^ w:
+		 * 1. (m * 2 ^ -w) * (2 ^ 2w) * (2 ^ -w) = m
+		 * 2. (m) * (2 ^ 2w) * (2 ^ -w) = m * 2 ^ w
+		 *
+		 * Now we feed this message with that extra factor to the binary exponentiation
+		 * routine. The current power of m will always keep that additional factor:
+		 * (p * 2 ^ w) * (p * 2 ^ w) * (2 ^ -w) = p ^ 2 * 2 ^ w
+		 *
+		 * The result starts at 1, i.e. without any extra factors. If at any particular
+		 * iteration it gets multiplied with the current power of m, the product will
+		 * not carry any extra factors, because the power's factor gets eliminated
+		 * by the extra factor of Montgomery multiplication:
+		 * (r) * (p * 2 ^ w) * (2 ^ -w) = r * p
+		 *
+		 * This way we don't need any extra post-processing to convert the final result
+		 * from Montgomery domain. 
+		 *
+		 */
+
+		// exponentiate
+	montgomery_exponentiate(M_FACTOR, D, N, N_COEFF, R, len);
+}
+
+
 //----------------------------------------------------------------
 // Copies words from src into dst reversing their order
 //----------------------------------------------------------------
@@ -246,13 +345,13 @@ void print_fpga_buffer(const char *str, const FPGA_WORD *buf, size_t len)
 
 
 //----------------------------------------------------------------
-// Test the modular multiplication model
+// Test the modular exponentiation model
 //----------------------------------------------------------------
 bool test_modexp(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len)
 //----------------------------------------------------------------
 //
-// This routine uses the Montgomery exponentiation routine to
-// calculate r = m ** d mod m, and then compares it to the
+// This routine uses the Montgomery exponentiation model to
+// calculate r = m ** d mod n, and then compares it to the
 // reference value s.
 //
 //----------------------------------------------------------------
@@ -278,12 +377,56 @@ bool test_modexp(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD
 		// check result
 	ok = compare_fpga_buffers(S, R, len);
 	if (!ok)
-	{	printf("\n    ERROR\n\n");
+	{	printf("    ERROR\n\n\n");
+		return false;
+	}
+
+		// everything went just fine
+	printf("    OK\n\n\n");
+	return true;
+}
+
+
+//----------------------------------------------------------------
+// Test the modular exponentiation model with CRT enabled
+//----------------------------------------------------------------
+bool test_modexp_crt(const FPGA_WORD *n_rom, const FPGA_WORD *m_rom, const FPGA_WORD *d_rom, const FPGA_WORD *s_rom, size_t len)
+//----------------------------------------------------------------
+//
+// This routine uses the Montgomery exponentiation model to
+// calculate r = (m mod n) ** d mod n, and then compares it to the
+// reference value s. The difference from test_modexp() is that
+// m_rom is twice larger than n_rom and d_rom.
+//
+//----------------------------------------------------------------
+{
+	bool ok;	// flag
+
+		// buffers
+	FPGA_WORD N[MAX_OPERAND_WORDS];
+	FPGA_WORD M[MAX_OPERAND_WORDS];
+	FPGA_WORD D[MAX_OPERAND_WORDS];
+	FPGA_WORD S[MAX_OPERAND_WORDS];
+	FPGA_WORD R[MAX_OPERAND_WORDS];
+
+		// fill buffers with test vector (message is twice is large!)
+	load_value_from_rom(n_rom, N, len);
+	load_value_from_rom(m_rom, M, len << 1);
+	load_value_from_rom(d_rom, D, len);
+	load_value_from_rom(s_rom, S, len);
+
+		// calculate power
+	modexp_crt(M, D, N, R, len);
+
+		// check result
+	ok = compare_fpga_buffers(S, R, len);
+	if (!ok)
+	{	printf("    ERROR\n\n\n");
 		return false;
 	}
 
 		// everything went just fine
-	printf("\n    OK\n\n");
+	printf("    OK\n\n\n");
 	return true;
 }
 
diff --git a/modexp_fpga_model.h b/modexp_fpga_model.h
index 2a91d32..f30a41b 100644
--- a/modexp_fpga_model.h
+++ b/modexp_fpga_model.h
@@ -31,7 +31,7 @@
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 
 
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index d1cca60..34ef2b6 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -46,7 +46,7 @@
 //----------------------------------------------------------------
 // Montgomery modular multiplier
 //----------------------------------------------------------------
-void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD *N, const FPGA_WORD *N_COEFF, FPGA_WORD *R, size_t len)
+void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD *N, const FPGA_WORD *N_COEFF, FPGA_WORD *R, size_t len, bool reduce_only)
 //----------------------------------------------------------------
 //
 // R = A * B * 2^-len mod N
@@ -94,8 +94,11 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 	FPGA_WORD S [2 * MAX_OPERAND_WORDS];							// final sum
 	FPGA_WORD SN[2 * MAX_OPERAND_WORDS];							// final difference
 
-		// number of systolic cycles needed to multiply entire B by one word of A
+		// number of full systolic cycles needed to multiply entire B by one word of A
 	size_t num_systolic_cycles = len / SYSTOLIC_NUM_WORDS;
+	
+		// adjust number of cycles
+	if ((num_systolic_cycles * SYSTOLIC_NUM_WORDS) < len) num_systolic_cycles++;
 
 		// initialize arrays of accumulators and carries to zeroes
 	for (i=0; i<num_systolic_cycles; i++)
@@ -117,10 +120,12 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 				// simulate how a systolic array would work
 			for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
 			{
+				size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
+
 					// current words of B, N_COEFF, N
-				FPGA_WORD Bj       = B      [k * SYSTOLIC_NUM_WORDS + j];
-				FPGA_WORD N_COEFFj = N_COEFF[k * SYSTOLIC_NUM_WORDS + j];
-				FPGA_WORD Nj       = N      [k * SYSTOLIC_NUM_WORDS + j];
+				FPGA_WORD Bj       = (j_index < len) ? B      [k * SYSTOLIC_NUM_WORDS + j] : 0;
+				FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
+				FPGA_WORD Nj       = (j_index < len) ? N      [k * SYSTOLIC_NUM_WORDS + j] : 0;
 
 					// current word of A
 				FPGA_WORD Aj_ab = (i < len) ? A[i] : 0;
@@ -129,7 +134,7 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 				pe_mul(Aj_ab, Bj, t_ab[k][j], c_in_ab[k][j], &s_ab[k][j], &c_out_ab[k][j]);
 
 					// store current word of AB
-				if ((k == 0) && (j == 0)) AB[i] = s_ab[0][0];
+				if ((k == 0) && (j == 0)) AB[i] = reduce_only ? A[i] : s_ab[0][0];
 
 					// current word of AB
 				FPGA_WORD Aj_q = (i < len) ? AB[i] : 0;
@@ -225,8 +230,8 @@ void montgomery_exponentiate(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_
 		// scan all bits of the exponent
 	for (bit_cnt=0; bit_cnt<(len * CHAR_BIT * sizeof(FPGA_WORD)); bit_cnt++)
 	{
-		montgomery_multiply(P, P, N, N_COEFF, M_PP, len);	// M_PP = P * P
-		montgomery_multiply(R, P, N, N_COEFF, M_RP, len);	// M_RP = R * P
+		montgomery_multiply(P, P, N, N_COEFF, M_PP, len, false);	// M_PP = P * P
+		montgomery_multiply(R, P, N, N_COEFF, M_RP, len, false);	// M_RP = R * P
 		
 		word_index = bit_cnt / (CHAR_BIT * sizeof(FPGA_WORD));
 		bit_index = bit_cnt & ((CHAR_BIT * sizeof(FPGA_WORD)) - 1);
@@ -308,6 +313,7 @@ void montgomery_calc_factor(const FPGA_WORD *N, FPGA_WORD *FACTOR, size_t len)
 		for (j=0; j<len; j++)
 			FACTOR[j] = flag_keep_f ? FACTOR[j] : FACTOR_N[j];
 	}
+
 }
 
 
diff --git a/modexp_fpga_model_montgomery.h b/modexp_fpga_model_montgomery.h
index bb4dbae..3f9bc9f 100644
--- a/modexp_fpga_model_montgomery.h
+++ b/modexp_fpga_model_montgomery.h
@@ -43,7 +43,8 @@ void montgomery_multiply(		const FPGA_WORD *A,
 								const FPGA_WORD *N,
 								const FPGA_WORD *N_COEFF,
 								      FPGA_WORD *R,
-								      size_t     len);
+								      size_t     len,
+									  bool       reduce_only);
 
 void montgomery_exponentiate(	const FPGA_WORD *A,
 								const FPGA_WORD *B,
diff --git a/test/format_test_vectors.py b/test/format_test_vectors.py
index dd8670d..21b9262 100644
--- a/test/format_test_vectors.py
+++ b/test/format_test_vectors.py
@@ -79,6 +79,46 @@ def read_secret(key):
 	openssl_secret = openssl_secret.replace(" ", "")	
 	return openssl_secret
 
+#
+# read part of private key from file
+#
+def read_prime1(key):
+	openssl_command = ["openssl", "rsa", "-in", key + ".key", "-noout", "-text"]
+	openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8")
+	openssl_secret = string_between(openssl_stdout, "prime1", "prime2")
+	openssl_secret = openssl_secret.replace(":", "")
+	openssl_secret = openssl_secret.replace("\n", "")
+	openssl_secret = openssl_secret.replace(" ", "")	
+	return openssl_secret
+def read_prime2(key):
+	openssl_command = ["openssl", "rsa", "-in", key + ".key", "-noout", "-text"]
+	openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8")
+	openssl_secret = string_between(openssl_stdout, "prime2", "exponent1")
+	openssl_secret = openssl_secret.replace(":", "")
+	openssl_secret = openssl_secret.replace("\n", "")
+	openssl_secret = openssl_secret.replace(" ", "")	
+	return openssl_secret
+
+#
+# read prive exponent from file
+#
+def read_exponent1(key):
+	openssl_command = ["openssl", "rsa", "-in", key + ".key", "-noout", "-text"]
+	openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8")
+	openssl_secret = string_between(openssl_stdout, "exponent1", "exponent2")
+	openssl_secret = openssl_secret.replace(":", "")
+	openssl_secret = openssl_secret.replace("\n", "")
+	openssl_secret = openssl_secret.replace(" ", "")	
+	return openssl_secret
+def read_exponent2(key):
+	openssl_command = ["openssl", "rsa", "-in", key + ".key", "-noout", "-text"]
+	openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8")
+	openssl_secret = string_between(openssl_stdout, "exponent2", "coefficient")
+	openssl_secret = openssl_secret.replace(":", "")
+	openssl_secret = openssl_secret.replace("\n", "")
+	openssl_secret = openssl_secret.replace(" ", "")	
+	return openssl_secret
+
 # 
 # https://en.wikibooks.org/wiki/Algorithm_Implementation/Mathematics/Extended_Euclidean_algorithm
 #
@@ -99,13 +139,19 @@ def modinv(a, m):
 #
 # format one test vector
 #
-def format_c_header(f, key, n, m, d, s):
+def format_c_header(f, key, n, m, d, s, p, q, dp, dq, mp, mq):
 
 		# write all numbers in vector
-	format_c_array(f, n, "#define N_" + str(key) + " \\\n")
-	format_c_array(f, m, "#define M_" + str(key) + " \\\n")
-	format_c_array(f, d, "#define D_" + str(key) + " \\\n")
-	format_c_array(f, s, "#define S_" + str(key) + " \\\n")
+	format_c_array(f, n,  "#define N_"  + str(key) + " \\\n")
+	format_c_array(f, m,  "#define M_"  + str(key) + " \\\n")
+	format_c_array(f, d,  "#define D_"  + str(key) + " \\\n")
+	format_c_array(f, s,  "#define S_"  + str(key) + " \\\n")
+	format_c_array(f, p,  "#define P_"  + str(key) + " \\\n")
+	format_c_array(f, q,  "#define Q_"  + str(key) + " \\\n")
+	format_c_array(f, dp, "#define DP_" + str(key) + " \\\n")
+	format_c_array(f, dq, "#define DQ_" + str(key) + " \\\n")
+	format_c_array(f, mp, "#define MP_" + str(key) + " \\\n")
+	format_c_array(f, mq, "#define MQ_" + str(key) + " \\\n")
 
 #
 # calculate Montgomery factor
@@ -274,20 +320,43 @@ if __name__ == "__main__":
 	for key in keys:
 	
 			# prepare all the numbers
-		modulus = int(read_modulus(key), 16)		# read number n from .key file
-		message = int(read_message(key), 16)		# read number m from .txt file
-		secret  = int(read_secret(key),  16)		# read number d from .key file
-		signature = pow(message, secret, modulus)	# calculate signature
+		modulus = int(read_modulus(key), 16)			# read number n from .key file
+		message = int(read_message(key), 16)			# read number m from .txt file
+		secret  = int(read_secret(key),  16)			# read number d from .key file
+		signature = pow(message, secret, modulus)		# calculate signature
+		prime1 = int(read_prime1(key), 16)				# read p
+		prime2 = int(read_prime2(key), 16)				# read q
+		exponent1 = int(read_exponent1(key), 16)		# read dp
+		exponent2 = int(read_exponent2(key), 16)		# read dq
+		message1 = pow(message, exponent1, prime1)		# calculate mp = m ^ dp mod p
+		message2 = pow(message, exponent2, prime2)		# calculate mq = m ^ dq mod q
+		coefficient = modinv(prime2, prime1)			# calculate
+
+			# do CRT to make sure everything is correct
+		h = coefficient * (message1 - message2) % prime1
+		crt = message2 + h * prime2
 		
 			# print all the numbers
 		print("key = " + key)
-		print("  modulus   = " + hex(modulus))
-		print("  message   = " + hex(message))
-		print("  secret    = " + hex(secret))
-		print("  signature = " + hex(signature))
-
+		print("  modulus     = " + hex(modulus))
+		print("  message     = " + hex(message))
+		print("  secret      = " + hex(secret))
+		print("  signature   = " + hex(signature))
+		print("  prime1      = " + hex(prime1))
+		print("  prime2      = " + hex(prime2))
+		print("  exponent1   = " + hex(exponent1))
+		print("  exponent2   = " + hex(exponent2))
+		print("  message1    = " + hex(message1))
+		print("  message2    = " + hex(message2))
+		print("  coefficient = " + hex(coefficient))
+		print("  crt         = " + hex(crt))
+		
+			# check
+		if crt != signature:
+			raise Exception("Error, crt != signature (?)")			
+			
 			# format numbers and write to file
-		format_c_header(file_h, key, modulus, message, secret, signature)
+		format_c_header(file_h, key, modulus, message, secret, signature, prime1, prime2, exponent1, exponent2, message1, message2)
 		format_verilog_include(file_v, key, modulus, message)
 
 
diff --git a/test/modexp_fpga_model_vectors.h b/test/modexp_fpga_model_vectors.h
index d889ada..622b16c 100644
--- a/test/modexp_fpga_model_vectors.h
+++ b/test/modexp_fpga_model_vectors.h
@@ -20,6 +20,30 @@
 	 0xa76b945b, 0x49a3f645, 0x76801499, 0xb98e6a16, \
 	 0xd2467b6a, 0x75b7d614, 0x0fff0fde, 0xb31d1819}
 
+#define P_384 \
+	{0xe9ac4cf6, 0x03b2d80a, 0x7f1d091e, 0x49d5f1a0, \
+	 0xac2ae4ff, 0xbf9bf375}
+
+#define Q_384 \
+	{0xc1468f3e, 0xc6909231, 0x5a4d74ba, 0x477b303f, \
+	 0x4b2e10d1, 0x1f44e815}
+
+#define DP_384 \
+	{0x69b6c286, 0x95fbc613, 0x51988034, 0x8cb0d684, \
+	 0x9aff38e4, 0x9ef9ddb5}
+
+#define DQ_384 \
+	{0x1eda82b7, 0x84bf4377, 0x39712ff7, 0x24be179f, \
+	 0xa302c190, 0x80ab6159}
+
+#define MP_384 \
+	{0x9e163bb5, 0x35e718cb, 0xcde52b7b, 0x5db8552b, \
+	 0x46a300e0, 0x34f91e6b}
+
+#define MQ_384 \
+	{0x7b01a724, 0x90f0d5f9, 0x9e237ce5, 0x6d31fd28, \
+	 0x4ecb9dad, 0x58bf366a}
+
 #define N_512 \
 	{0xef78b4ed, 0xaee1cc78, 0x659b9935, 0x39d5f5e1, \
 	 0xa47c2b29, 0x5a38e8c4, 0x85e2b846, 0xa354614f, \
@@ -44,3 +68,27 @@
 	 0xfd1e029d, 0xfe887387, 0x4312635f, 0xb2b54b8d, \
 	 0x5d3b379e, 0x161eaa4f, 0xedfd932b, 0x780f0203}
 
+#define P_512 \
+	{0xfedea889, 0x97cfdb79, 0xcca87074, 0xe5abcda1, \
+	 0x3be201c4, 0xc416fd15, 0xf2130931, 0x61ff5937}
+
+#define Q_512 \
+	{0xf0889147, 0x5aa60f93, 0xb9927d86, 0x8f795c5c, \
+	 0x8e98dcf2, 0xad3aad74, 0x9441583a, 0x967dce41}
+
+#define DP_512 \
+	{0x2504d437, 0xfffbe9e5, 0xfc0aef22, 0x9b8563bd, \
+	 0xaa83fe3b, 0xc53b8d91, 0x15731c5f, 0xb6db2eeb}
+
+#define DQ_512 \
+	{0xd3265fba, 0x2eb65638, 0x4d106ec7, 0x000dfe69, \
+	 0x75f87505, 0x47d299d0, 0x1c115cdd, 0x599ca8c1}
+
+#define MP_512 \
+	{0x23359955, 0xcad299b6, 0x049bb248, 0x3828b6a5, \
+	 0x74c85825, 0x7dd8e109, 0x07edbda9, 0x4980c2c9}
+
+#define MQ_512 \
+	{0x8578120b, 0x91f4ca9e, 0x371d3e70, 0x0005bb89, \
+	 0xd31ed864, 0x477bd9cf, 0x65a1f03b, 0x606d3bc8}
+
-- 
cgit v1.2.3