From 2db58a7ba317da318eca5ae19dcc0e4899c423e1 Mon Sep 17 00:00:00 2001
From: "Pavel V. Shatov (Meister)" <meisterpaul1@yandex.ru>
Date: Tue, 18 Jul 2017 02:14:27 +0300
Subject: Changes to the model:  * Follow what Verilog does more closely: FPGA
 can't do PP = P * P, because    it can't read from two different block mem P
 locations at the same time,    we have to do P1 = P2 = P, PP = P1 * P2  *
 Updated the test vector script to format additional quantities to help   
 debug Verilog exponentiation module  * Added the trick suggested by Bernd
 Paysan to help better conceal whether we're    discarding multiplication
 result when the current exponent bit is not set

---
 modexp_fpga_model.h              |  6 +++++
 modexp_fpga_model_montgomery.cpp | 47 +++++++++++++++++++++++++++-------------
 test/format_test_vectors.py      |  6 +++--
 test/modexp_fpga_model_vectors.v | 22 +++++++++++++++++++
 4 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/modexp_fpga_model.h b/modexp_fpga_model.h
index 2a91d32..567b625 100644
--- a/modexp_fpga_model.h
+++ b/modexp_fpga_model.h
@@ -57,6 +57,12 @@ typedef uint64_t _WIDE_WORD;		// only used internally to mimic DSP slice operati
 #define SYSTOLIC_WIDTH		128		// width of systolic array in bits
 
 
+//----------------------------------------------------------------
+// Power Consumption Masking Constant
+//----------------------------------------------------------------
+#define POWER_MASK	0x5A5A5A5A
+
+
 //----------------------------------------------------------------
 // Handy values
 //----------------------------------------------------------------
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index 5bc5ba4..92a5e47 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -132,22 +132,35 @@ void montgomery_exponentiate(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_
 
 	bool flag_update_r;					// flag
 
-	FPGA_WORD P[MAX_OPERAND_WORDS];		// power of A
-	FPGA_WORD mask;						// mask		
+	FPGA_WORD T0[MAX_OPERAND_WORDS];	//
+	FPGA_WORD T1[MAX_OPERAND_WORDS];	//
+	FPGA_WORD T2[MAX_OPERAND_WORDS];	//
+
+	FPGA_WORD P1[MAX_OPERAND_WORDS];	//
+	FPGA_WORD P2[MAX_OPERAND_WORDS];	//
+	FPGA_WORD P3[MAX_OPERAND_WORDS];	//
+
+	FPGA_WORD mask;						//
 
 		// R = 1, P = 1
 	for (word_cnt=0; word_cnt<len; word_cnt++)
-		R[word_cnt] = (word_cnt > 0) ? 0 : 1,
-		P[word_cnt] = A[word_cnt];
+		T1[word_cnt] = (word_cnt > 0) ? 0 : 1,
+		T2[word_cnt] = (word_cnt > 0) ? 0 : 1,
+		P1[word_cnt] = A[word_cnt],
+		P2[word_cnt] = A[word_cnt],
+		P3[word_cnt] = A[word_cnt];
 
-	FPGA_WORD M_PP[MAX_OPERAND_WORDS];	// intermediate buffer for next power
-	FPGA_WORD M_RP[MAX_OPERAND_WORDS];	// intermediate buffer for next result
+	FPGA_WORD PP[MAX_OPERAND_WORDS];	// intermediate buffer for next power
+	FPGA_WORD TP[MAX_OPERAND_WORDS];	// intermediate buffer for next result
 
 		// scan all bits of the exponent
 	for (bit_cnt=0; bit_cnt<(len * CHAR_BIT * sizeof(FPGA_WORD)); bit_cnt++)
 	{
-		montgomery_multiply(P, P, N, N_COEFF, M_PP, len, false);	// M_PP = P * P
-		montgomery_multiply(R, P, N, N_COEFF, M_RP, len, false);	// M_RP = R * P
+		for (word_cnt=0; word_cnt<len; word_cnt++)
+			T0[word_cnt] = T1[word_cnt] ^ POWER_MASK;
+
+		montgomery_multiply(P1, P2, N, N_COEFF, PP, len, false);	// PP = P1 * P2
+		montgomery_multiply(T2, P3, N, N_COEFF, TP, len, false);	// TP =  T * P3
 		
 		word_index = bit_cnt / (CHAR_BIT * sizeof(FPGA_WORD));
 		bit_index = bit_cnt & ((CHAR_BIT * sizeof(FPGA_WORD)) - 1);
@@ -159,15 +172,19 @@ void montgomery_exponentiate(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_
 
 			// always update P
 		for (word_cnt=0; word_cnt<len; word_cnt++)
-			P[word_cnt] = M_PP[word_cnt];
+			P1[word_cnt] = PP[word_cnt],
+			P2[word_cnt] = PP[word_cnt],
+			P3[word_cnt] = PP[word_cnt];
 
-			// only update R when necessary
-		if (flag_update_r)
-		{
-			for (word_cnt=0; word_cnt<len; word_cnt++)
-				R[word_cnt] = M_RP[word_cnt];
-		}
+			// update T
+		for (word_cnt=0; word_cnt<len; word_cnt++)
+			T1[word_cnt] = flag_update_r ? TP[word_cnt] : T0[word_cnt] ^ POWER_MASK,
+			T2[word_cnt] = flag_update_r ? TP[word_cnt] : T0[word_cnt] ^ POWER_MASK;
 	}
+
+		// store result
+	for (word_cnt=0; word_cnt<len; word_cnt++)
+		R[word_cnt] = T1[word_cnt];
 }
 
 
diff --git a/test/format_test_vectors.py b/test/format_test_vectors.py
index 21b9262..c56fe18 100644
--- a/test/format_test_vectors.py
+++ b/test/format_test_vectors.py
@@ -194,7 +194,7 @@ def calc_montgomery_n_coeff(k, n):
 #
 # format one test vector
 #
-def format_verilog_include(f, key, n, m):
+def format_verilog_include(f, key, n, m, d, s):
 
 		# calculate factor to bring message into Montgomery domain
 	factor = calc_montgomery_factor(int(key), n)
@@ -215,6 +215,8 @@ def format_verilog_include(f, key, n, m):
 	format_verilog_concatenation(f, factor,   "localparam [" + str(int(key)-1) + ":0] FACTOR_"   + str(key) + " =\n")
 	format_verilog_concatenation(f, coeff,    "localparam [" + str(int(key)-1) + ":0] COEFF_"    + str(key) + " =\n")
 	format_verilog_concatenation(f, m_factor, "localparam [" + str(int(key)-1) + ":0] M_FACTOR_" + str(key) + " =\n")
+	format_verilog_concatenation(f, d,        "localparam [" + str(int(key)-1) + ":0] D_"        + str(key) + " =\n")
+	format_verilog_concatenation(f, s,        "localparam [" + str(int(key)-1) + ":0] S_"        + str(key) + " =\n")
 	
 	
 #
@@ -357,7 +359,7 @@ if __name__ == "__main__":
 			
 			# format numbers and write to file
 		format_c_header(file_h, key, modulus, message, secret, signature, prime1, prime2, exponent1, exponent2, message1, message2)
-		format_verilog_include(file_v, key, modulus, message)
+		format_verilog_include(file_v, key, modulus, message, secret, signature)
 
 
 		# done
diff --git a/test/modexp_fpga_model_vectors.v b/test/modexp_fpga_model_vectors.v
index 7a2b8e9..d5284c9 100644
--- a/test/modexp_fpga_model_vectors.v
+++ b/test/modexp_fpga_model_vectors.v
@@ -30,6 +30,16 @@ localparam [383:0] M_FACTOR_384 =
 	 32'h91e92683, 32'hc483bb6c, 32'h0ee1571d, 32'h6e28c2f5, 
 	 32'hff5e6b61, 32'h65fb6164, 32'hd3651e5a, 32'h746b8ca0};
 
+localparam [383:0] D_384 =
+	{32'had24a30c, 32'h766d8dc3, 32'he2100b02, 32'h24d1c4b0, 
+	 32'hbb6a6342, 32'h577df9be, 32'h89bb1ec3, 32'hdc3259f0, 
+	 32'h1a343f93, 32'h57a12599, 32'ha328ae2f, 32'hf85ef401};
+
+localparam [383:0] S_384 =
+	{32'h65752d0f, 32'h9a017293, 32'h36bfa115, 32'h4a7a81fc, 
+	 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16, 
+	 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819};
+
 localparam [511:0] M_512 =
 	{32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb, 
 	 32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613, 
@@ -66,3 +76,15 @@ localparam [511:0] M_FACTOR_512 =
 	 32'h663032a3, 32'h70734b62, 32'h2d30c132, 32'hefa75cc6, 
 	 32'h9f18b32a, 32'h97d6ddf8, 32'h2f6df2d0, 32'he9098874};
 
+localparam [511:0] D_512 =
+	{32'hc9686c43, 32'hbbe28d66, 32'h758ef8bc, 32'h9b7828e5, 
+	 32'h2ec2804a, 32'hb76745de, 32'h83fcbba0, 32'h2d9eba78, 
+	 32'h215f4cc2, 32'hf49387b3, 32'h8ed0b9dc, 32'h6c129231, 
+	 32'h944368be, 32'hdbf2db79, 32'h16323c49, 32'h34cdf801};
+
+localparam [511:0] S_512 =
+	{32'hcc2fc6b6, 32'he4849987, 32'h75773499, 32'hcb0792b0, 
+	 32'he79f4600, 32'hb2d739c5, 32'h1a661ac6, 32'hd3bf2db5, 
+	 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d, 
+	 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203};
+
-- 
cgit v1.2.3