From 766bb937c472d027b217216859d57b90e6bc6a6e Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 2 Apr 2019 01:02:15 +0300 Subject: Rewrote "square" recombination to match how it works in hardware. --- modexpng_fpga_model.py | 94 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 22 deletions(-) diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py index c73532f..73a21d3 100644 --- a/modexpng_fpga_model.py +++ b/modexpng_fpga_model.py @@ -238,24 +238,48 @@ class ModExpNG_PartRecombinator(): def recombine_square(self, parts, ab_num_words, dump): - # empty result so far - words = list() - - # flush recombinator pipeline + # empty results so far + words_lsb = list() # n words + words_msb = list() # n words + + # recombine the lower half (n parts) + # the first tick produces null result, the last part + # produces three words and needs two extra ticks self._flush_pipeline(dump) - - # the first tick produces null result, the last part produces - # two words, so we need (2*n - 1) + 2 = 2*n + 1 ticks total - # and should only save the result word during the last 2 * n ticks - for i in range(2 * ab_num_words + 1): - - next_part = parts[i] if i < (2 * ab_num_words - 1) else 0 + for i in range(ab_num_words + 1 + 2): + next_part = parts[i] if i < ab_num_words else 0 next_word = self._push_pipeline(next_part, dump) - + if i > 0: - words.append(next_word) - + words_lsb.append(next_word) + + # recombine the upper half (n-1 parts) + # the first tick produces null result + self._flush_pipeline(dump) + for i in range(ab_num_words + 1): + next_part = parts[i + ab_num_words] if i < (ab_num_words - 1) else 0 + next_word = self._push_pipeline(next_part, dump) + + if i > 0: + words_msb.append(next_word) + + # merge words + words = list() + + # merge lower half + for x in range(ab_num_words): + next_word = words_lsb[x] + words.append(next_word) + + # merge upper half adding the two overlapping words + for x in range(ab_num_words): + next_word = words_msb[x] + if x < 2: + next_word += words_lsb[x + ab_num_words] + words.append(next_word) + return words + def recombine_triangle(self, parts, ab_num_words, dump): @@ -301,6 +325,9 @@ class ModExpNG_PartRecombinator(): class ModExpNG_WordMultiplier(): + _a_seen_17 = False + _b_seen_17 = False + def __init__(self): self._macs = list() @@ -326,8 +353,22 @@ class ModExpNG_WordMultiplier(): def _clear_mac_aux(self): self._mac_aux[0] = 0 - def _update_one_mac(self, x, value): - self._macs[x] += value + def _update_one_mac(self, x, a, b): + + if a > 0xFFFF: + self._a_seen_17 = True + + if b > 0xFFFF: + self._b_seen_17 = True + + if a > 0x1FFFF: + raise("a > 0x1FFFF!") + + if b > 0x1FFFF: + raise("b > 0x1FFFF!") + + p = a * b + self._macs[x] += p def _update_mac_aux(self, value): self._mac_aux[0] += value @@ -396,11 +437,10 @@ class ModExpNG_WordMultiplier(): # multiply by a-words for x in range(NUM_MULTS): ax = a_wide.words[self._indices[x]] - self._update_one_mac(x, ax * bt) + self._update_one_mac(x, ax, bt) if t == (col * NUM_MULTS + x): part_index = t - #self._mult_store_part(parts, t, col, part_index, self._macs[x], dump) self._mult_store_part(parts, t, col, part_index, x, dump) @@ -418,7 +458,6 @@ class ModExpNG_WordMultiplier(): for x in range(NUM_MULTS): if not (col == (num_cols - 1) and x == (NUM_MULTS - 1)): part_index = ab_num_words + col * NUM_MULTS + x - #self._mult_store_part(parts, t, col, part_index, self._macs[x], dump) self._mult_store_part(parts, t, col, part_index, x, dump) return parts @@ -450,7 +489,7 @@ class ModExpNG_WordMultiplier(): # multiply by a-words for x in range(NUM_MULTS): ax = a_wide.words[self._indices[x]] - self._update_one_mac(x, ax * bt) + self._update_one_mac(x, ax, bt) if t == (col * NUM_MULTS + x): parts[t] = self._macs[x] @@ -495,7 +534,7 @@ class ModExpNG_WordMultiplier(): # multiply by a-words for x in range(NUM_MULTS): ax = a_wide.words[self._indices[x]] - self._update_one_mac(x, ax * bt) + self._update_one_mac(x, ax, bt) # don't save one value for the very last time instant per column if t < ab_num_words and t == (col * NUM_MULTS + x): @@ -750,10 +789,21 @@ if __name__ == "__main__": mp_blind_factor = worker.multiply(mp_blind, vector.p_factor, vector.p, vector.p_coeff, pq_num_words, dump=True) mq_blind_factor = worker.multiply(mq_blind, vector.q_factor, vector.q, vector.q_coeff, pq_num_words) - + sp_blind_factor = worker.exponentiate(ip_factor, mp_blind_factor, vector.dp, vector.p, vector.p_factor, vector.p_coeff, pq_num_words) sq_blind_factor = worker.exponentiate(iq_factor, mq_blind_factor, vector.dq, vector.q, vector.q_factor, vector.q_coeff, pq_num_words) + if worker.multiplier._a_seen_17: + print("17-bit wide A's seen.") + else: + print("17-bit wide A's not detected.") + + if worker.multiplier._b_seen_17: + print("17-bit wide B's seen.") + else: + print("17-bit wide B's not detected.") + + sp_blind = worker.multiply(i, sp_blind_factor, vector.p, vector.p_coeff, pq_num_words) sq_blind = worker.multiply(i, sq_blind_factor, vector.q, vector.q_coeff, pq_num_words) -- cgit v1.2.3