1 files changed, 92 insertions, 27 deletions
diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py
index 0726eaa..d33f314 100644
--- a/modexpng_fpga_model.py
+++ b/modexpng_fpga_model.py
@@ -79,7 +79,7 @@ DUMP_INDICES = False
 DUMP_MACS_CLEARING = False
 DUMP_MACS_ACCUMULATION = False
 DUMP_MULT_PARTS = False
-DUMP_RCMB = True
+DUMP_RCMB = False
 
 
 #
@@ -119,7 +119,7 @@ class ModExpNG_Operand():
         for i in range(count):
 
             # word must not exceed 17 bits
-            if words[i] >= (2 ** (_WORD_WIDTH + 1)):
+            if words[i] >= (2 ** (_WORD_WIDTH + 2)):
                 raise Exception("Word is too large!")
 
         self.words = words
@@ -274,13 +274,12 @@ class ModExpNG_PartRecombinator():
         # merge upper half adding the two overlapping words
         for x in range(ab_num_words):
             next_word = words_msb[x]
-            if x < 2:
+            if x < 2:                
                 next_word += words_lsb[x + ab_num_words]
             words.append(next_word)
 
         return words
 
-
     def recombine_triangle(self, parts, ab_num_words, dump):
 
         # empty result so far
@@ -303,21 +302,62 @@ class ModExpNG_PartRecombinator():
     def recombine_rectangle(self, parts, ab_num_words, dump):
 
         # empty result so far
+        words_lsb = list()  # n words
+        words_msb = list()  # n+1 words
+
+        # recombine the lower half (n parts)
+        # the first tick produces null result, the last part
+        # produces three words and needs two extra ticks
+        self._flush_pipeline(dump)
+        for i in range(ab_num_words + 1 + 2):
+            next_part = parts[i] if i < ab_num_words else 0
+            next_word = self._push_pipeline(next_part, dump)
+
+            if i > 0:
+                words_lsb.append(next_word)
+
+        # recombine the upper half (n parts)
+        # the first tick produces null result, the last part
+        # produces two words and needs an extra tick
+        self._flush_pipeline(dump)
+        for i in range(ab_num_words + 2):
+            next_part = parts[i + ab_num_words] if i < ab_num_words else 0
+            next_word = self._push_pipeline(next_part, dump)
+
+            if i > 0:
+                words_msb.append(next_word)
+                
+        # merge words
         words = list()
 
+        # merge lower half
+        for x in range(ab_num_words):
+            next_word = words_lsb[x]
+            words.append(next_word)
+
+        # merge upper half adding the two overlapping words
+        for x in range(ab_num_words + 1):
+            next_word = words_msb[x]
+            if x < 2:
+                next_word += words_lsb[x + ab_num_words]
+            words.append(next_word)
+
+        return words
+
+                
         # flush recombinator pipeline
-        self._flush_pipeline(dump)
+        #self._flush_pipeline(dump)
 
         # the first tick produces null result, the last part produces
         # two words, so we need 2 * n + 2 ticks total and should only save
         # the result word during the last 2 * n + 1 ticks
-        for i in range(2 * ab_num_words + 2):
+        #for i in range(2 * ab_num_words + 2):
 
-            next_part = parts[i] if i < (2 * ab_num_words) else 0
-            next_word = self._push_pipeline(next_part, dump)
+            #next_part = parts[i] if i < (2 * ab_num_words) else 0
+            #next_word = self._push_pipeline(next_part, dump)
 
-            if i > 0:
-                words.append(next_word)
+            #if i > 0:
+                #words.append(next_word)
 
         return words
 
@@ -369,11 +409,11 @@ class ModExpNG_WordMultiplier():
         if b > 0xFFFF:
             self._b_seen_17 = True
 
-        if a > 0x1FFFF:
-            raise("a > 0x1FFFF!")
+        if a > 0x3FFFF:
+            raise Exception("a > 0x3FFFF!")
 
         if b > 0x1FFFF:
-            raise("b > 0x1FFFF!")
+            raise Exception("b > 0x1FFFF!")
 
         p = a * b
         self._macs[x] += p
@@ -451,6 +491,8 @@ class ModExpNG_WordMultiplier():
 
         for col in range(num_cols):
 
+            bt_carry = 0
+        
             for t in range(ab_num_words):
 
                 # take care of indices
@@ -469,7 +511,10 @@ class ModExpNG_WordMultiplier():
                 if dump and DUMP_INDICES: self._dump_indices(t, col)
 
                 # current b-word
-                bt = b_narrow.words[t]
+                bt = b_narrow.words[t] + bt_carry
+                bt_carry = bt >> _WORD_WIDTH
+                bt &= 0xffff
+                
 
                 # multiply by a-words
                 for x in range(NUM_MULTS):
@@ -659,6 +704,8 @@ class ModExpNG_LowlevelOperator():
 
 class ModExpNG_Worker():
 
+    max_zzz = 0
+
     def __init__(self):
         self.recombinator = ModExpNG_PartRecombinator()
         self.multiplier   = ModExpNG_WordMultiplier()
@@ -764,26 +811,40 @@ class ModExpNG_Worker():
         m = ModExpNG_Operand(None, 2 * ab_num_words + 1, m_words)
 
         # 4.
-        r_xwords = list()
-        for i in range(2*ab_num_words):
-            r_xwords.append(ab.words[i] + m.words[i])
-
-        r_xwords.append(m.words[2 * ab_num_words])
-
         cy = 0
-        for i in range(ab_num_words+1):
-            s = r_xwords[i] + cy
+        for i in range(ab_num_words + 1):
+            s = ab.words[i] + m.words[i] + cy
             cy = s >> 16
-
+            
         R = list()
         for i in range(ab_num_words):
             R.append(0)
 
-        R[0] += cy # !!!
-
+        R[0] = cy # !!! (cy is 2 bits, i.e. 0..3)
+        
+        if dump:
+            if ab.words[ab_num_words + 2] > 0:
+                ab.words[ab_num_words + 2] -= 1
+                ab.words[ab_num_words + 1] += 0x10000
+            if m.words[ab_num_words + 2] > 0:
+                m.words[ab_num_words + 2] -= 1
+                m.words[ab_num_words + 1] += 0x10000
+        
         for i in range(ab_num_words):
-            R[i] += r_xwords[ab_num_words + i + 1]
-
+            ab_word = ab.words[ab_num_words + i + 1] if i < (ab_num_words - 1) else 0
+            m_word = m.words[ab_num_words + i + 1]
+            
+            R[i] += ab_word + m_word
+
+            #if i == 0:
+                #if R[i] > self.max_zzz:
+                    #self.max_zzz = R[i]
+                    #print("self.max_zzz = %05x" % R[i])
+                #if R[i] > 0x1ffff:
+                    #sys.exit(123)
+                
+            
+                
         return ModExpNG_Operand(None, ab_num_words, R)
 
     def reduce(self, a):
@@ -816,6 +877,7 @@ if __name__ == "__main__":
     x_mutated_known = pow(vector.x.number(), 2, vector.n.number())
     y_mutated_known = pow(vector.y.number(), 2, vector.n.number())
 
+    
     # bring one into Montgomery domain (glue 2**r to one)
     # bring blinding coefficients into Montgomery domain (glue 2**(2*r) to x and y)
     # blind message
@@ -864,6 +926,7 @@ if __name__ == "__main__":
     else:
         print("17-bit wide B's not detected.")
 
+        
 
     sp_blind                     = worker.multiply(i,                            sp_blind_factor,  vector.p, vector.p_coeff, pq_num_words)
     sq_blind                     = worker.multiply(i,                            sq_blind_factor,  vector.q, vector.q_coeff, pq_num_words)
@@ -874,6 +937,8 @@ if __name__ == "__main__":
     sr_qinv_blind                = worker.multiply(sr_qinv_blind_inverse_factor, vector.p_factor,  vector.p, vector.p_coeff, pq_num_words)
     q_sr_qinv_blind              = worker.multiply(vector.q,                     sr_qinv_blind,    None,     None,           pq_num_words, multiply_only=True)
 
+    worker.reduce(q_sr_qinv_blind)
+    
     s_crt_blinded                = worker.add(sq_blind, q_sr_qinv_blind, pq_num_words)
 
     s_crt_unblinded              = worker.multiply(s_crt_blinded,                x_factor,         vector.n, vector.n_coeff, n_num_words)