From b5a8b522c917633e0a0db034c1135453d40d8105 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Sat, 30 Mar 2019 15:29:56 +0300 Subject: * more debugging output * more precise modelling of DSP slice --- modexpng_fpga_model.py | 56 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py index 844cc86..c73532f 100644 --- a/modexpng_fpga_model.py +++ b/modexpng_fpga_model.py @@ -77,7 +77,9 @@ _VECTOR_CLASS = "Vector" DUMP_VECTORS = False DUMP_INDICES = False DUMP_MACS_CLEARING = False -DUMP_MACS_ACCUMULATION = True +DUMP_MACS_ACCUMULATION = False +DUMP_MULT_PARTS = False +DUMP_RCMB = True # @@ -204,13 +206,15 @@ class ModExpNG_PartRecombinator(): y |= (x & (1 << pos)) >> lsb return y - def _flush_pipeline(self): + def _flush_pipeline(self, dump): self.z0, self.y0, self.x0 = 0, 0, 0 + if dump and DUMP_RCMB: + print("RCMB -> flush()") - def _push_pipeline(self, part): + def _push_pipeline(self, part, dump): # split next part into 16-bit words - z = self._bit_select(part, 47, 32) + z = self._bit_select(part, 46, 32) y = self._bit_select(part, 31, 16) x = self._bit_select(part, 15, 0) @@ -225,16 +229,20 @@ class ModExpNG_PartRecombinator(): # update internal latches self.z0, self.y0, self.x0 = z1, y1, x1 + # dump + if dump and DUMP_RCMB: + print("RCMB -> push(): part = 0x%012x, word = 0x%04x" % (part, t)) + # done return t - def recombine_square(self, parts, ab_num_words): + def recombine_square(self, parts, ab_num_words, dump): # empty result so far words = list() # flush recombinator pipeline - self._flush_pipeline() + self._flush_pipeline(dump) # the first tick produces null result, the last part produces # two words, so we need (2*n - 1) + 2 = 2*n + 1 ticks total @@ -242,40 +250,40 @@ class ModExpNG_PartRecombinator(): for i in range(2 * ab_num_words + 1): next_part = parts[i] if i < (2 * ab_num_words - 1) else 0 - next_word = self._push_pipeline(next_part) + next_word = self._push_pipeline(next_part, dump) if i > 0: words.append(next_word) return words - def recombine_triangle(self, parts, ab_num_words): + def recombine_triangle(self, parts, ab_num_words, dump): # empty result so far words = list() # flush recombinator pipeline - self._flush_pipeline() + self._flush_pipeline(dump) # the first tick produces null result, so we need n + 1 + 1 = n + 2 # ticks total and should only save the result word during the last n ticks for i in range(ab_num_words + 2): next_part = parts[i] if i < (ab_num_words + 1) else 0 - next_word = self._push_pipeline(next_part) + next_word = self._push_pipeline(next_part, dump) if i > 0: words.append(next_word) return words - def recombine_rectangle(self, parts, ab_num_words): + def recombine_rectangle(self, parts, ab_num_words, dump): # empty result so far words = list() # flush recombinator pipeline - self._flush_pipeline() + self._flush_pipeline(dump) # the first tick produces null result, the last part produces # two words, so we need 2 * n + 2 ticks total and should only save @@ -283,7 +291,7 @@ class ModExpNG_PartRecombinator(): for i in range(2 * ab_num_words + 2): next_part = parts[i] if i < (2 * ab_num_words) else 0 - next_word = self._push_pipeline(next_part) + next_word = self._push_pipeline(next_part, dump) if i > 0: words.append(next_word) @@ -341,6 +349,12 @@ class ModExpNG_WordMultiplier(): def _rotate_index_aux(self): self._index_aux[0] -= 1 + def _mult_store_part(self, parts, time, column, part_index, mac_index, dump): + parts[part_index] = self._macs[mac_index] + if dump and DUMP_MULT_PARTS: + print("t=%2d, col=%2d > parts[%2d]: mac[%d] = 0x%012x" % + (time, column, part_index, mac_index, parts[part_index])) + def multiply_square(self, a_wide, b_narrow, ab_num_words, dump=False): if dump: print("multiply_square()") @@ -385,7 +399,11 @@ class ModExpNG_WordMultiplier(): self._update_one_mac(x, ax * bt) if t == (col * NUM_MULTS + x): - parts[t] = self._macs[x] + part_index = t + #self._mult_store_part(parts, t, col, part_index, self._macs[x], dump) + self._mult_store_part(parts, t, col, part_index, x, dump) + + if dump and DUMP_MACS_ACCUMULATION: print("t=%2d, col=%2d > "% (t, col), end='') @@ -399,7 +417,9 @@ class ModExpNG_WordMultiplier(): if t == (ab_num_words - 1): for x in range(NUM_MULTS): if not (col == (num_cols - 1) and x == (NUM_MULTS - 1)): - parts[ab_num_words + col * NUM_MULTS + x] = self._macs[x] + part_index = ab_num_words + col * NUM_MULTS + x + #self._mult_store_part(parts, t, col, part_index, self._macs[x], dump) + self._mult_store_part(parts, t, col, part_index, x, dump) return parts @@ -627,7 +647,7 @@ class ModExpNG_Worker(): ab = a else: ab_parts = self.multiplier.multiply_square(a, b, ab_num_words, dump) - ab_words = self.recombinator.recombine_square(ab_parts, ab_num_words) + ab_words = self.recombinator.recombine_square(ab_parts, ab_num_words, dump) ab = ModExpNG_Operand(None, 2 * ab_num_words, ab_words) if multiply_only: @@ -635,12 +655,12 @@ class ModExpNG_Worker(): # 2. q_parts = self.multiplier.multiply_triangle(ab, n_coeff, ab_num_words) - q_words = self.recombinator.recombine_triangle(q_parts, ab_num_words) + q_words = self.recombinator.recombine_triangle(q_parts, ab_num_words, dump) q = ModExpNG_Operand(None, ab_num_words + 1, q_words) # 3. m_parts = self.multiplier.multiply_rectangle(n, q, ab_num_words) - m_words = self.recombinator.recombine_rectangle(m_parts, ab_num_words) + m_words = self.recombinator.recombine_rectangle(m_parts, ab_num_words, dump) m = ModExpNG_Operand(None, 2 * ab_num_words + 1, m_words) # 4. -- cgit v1.2.3