From 9b11ba2da63fb37fd6276afdc428f5a5de77c2af Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 11 Feb 2020 15:42:54 +0300 Subject: More closely follow how things work in hardware. --- modexpng_fpga_model.py | 252 +++++++++++++++++++++++++++---------------------- 1 file changed, 141 insertions(+), 111 deletions(-) diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py index 334eecc..220be44 100644 --- a/modexpng_fpga_model.py +++ b/modexpng_fpga_model.py @@ -572,150 +572,180 @@ class ModExpNG_BanksCRT(): class ModExpNG_PartRecombinator(): + def __init__(self): + self._WORD_MASK_2X = (_WORD_MASK << _WORD_WIDTH) | _WORD_MASK + self._WORD_WIDTH_2X = 2 * _WORD_WIDTH + def _bit_select(self, x, msb, lsb): y = 0 for pos in range(lsb, msb+1): y |= (x & (1 << pos)) >> lsb return y + def _update_delays(self, x1, y1, z1, z2): + self._x_dly1, self._y_dly1, self._z_dly1, self._z_dly2 = x1, y1, z1, z2 + + def _update_carries(self, cm, cs): + self._cry_master, self._cry_slave = cm, cs + + def _clear_words(self): + self._words, self._wordsx = [], [] + + def _store_words(self, w, wx): + self._words.append(w) + self._wordsx.append(wx) + def _flush_pipeline(self, dump): - self.z0, self.y0, self.x0 = 0, 0, 0 + self._phase = False + self._master_p = None + self._update_carries(0, 0) + self._update_delays(0, 0, 0, 0) + self._clear_words() + if dump and DUMP_RECOMBINATION: print("RCMB -> flush()") + print("RCMB: master_ab | master_c | slave_ab") def _push_pipeline(self, part, dump): - - # split next part into 16-bit words - z = self._bit_select(part, 46, 32) - y = self._bit_select(part, 31, 16) x = self._bit_select(part, 15, 0) + y = self._bit_select(part, 31, 16) + z = self._bit_select(part, 45, 32) - # shift to the right - z1 = z - y1 = y + self.z0 - x1 = x + self.y0 + (self.x0 >> _WORD_WIDTH) # IMPORTANT: This carry can be up to two bits wide!! - - # save lower 16 bits of the rightmost cell - t = self.x0 & _WORD_MASK - - # update internal latches - self.z0, self.y0, self.x0 = z1, y1, x1 - - # dump + master_ab = (y << 16) | self._y_dly1 + master_c = (self._z_dly1 << 16) | self._z_dly2 + slave_ab = (x << 16) | self._x_dly1 + if dump and DUMP_RECOMBINATION: - print("RCMB -> push(): part = 0x%012x, word = 0x%04x" % (part, t)) - - # done - return t + print("PUSH: 0x%08x | 0x%08x | 0x%08x > " % (master_ab, master_c, slave_ab), end='') + + if not self._phase: + master_p = master_ab + master_c + self._cry_master + self._update_carries(master_p >> self._WORD_WIDTH_2X, self._cry_slave) + self._master_p = master_p & self._WORD_MASK_2X + if dump and DUMP_RECOMBINATION: + #print("MASTER: {0x%1d, 0x%08x}" % (self._cry_master, self._master_p)) + print("") + else: + slave_p = self._master_p + slave_ab + self._cry_slave + self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X) + slave_p &= self._WORD_MASK_2X + if dump and DUMP_RECOMBINATION: + print("SLAVE: {0x%1d, 0x%08x}" % (self._cry_slave, slave_p)) + #print("") + slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK + self._store_words(slave_p_lsb, slave_p_lsb) + self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb) + + self._phase = not self._phase + self._update_delays(x, y, z, self._z_dly1) + def _purge_pipeline(self, dump): + + slave_ab = self._x_dly1 + + if not self._phase: + raise Exception("RCMB: Can only purge pipeline after odd number of pushes!") + else: + slave_p = self._master_p + slave_ab + self._cry_slave + self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X) + slave_p &= self._WORD_MASK_2X + + slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK + self._store_words(slave_p_lsb, slave_p_lsb) + self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb) + + self._master_p = None + self._phase = None + self._update_carries(None, None) + self._update_delays(None, None, None, None) + + @property + def words(self): + return self._words + + @property + def wordsx(self): + return self._wordsx + def recombine_square(self, parts, ab_num_words, dump): - # empty results so far - words_lsb = list() # n words - words_msb = list() # n words + # hardware computes LSB and MSB words simultaneously, we can't + # simulate that here, so we compute sequentially + + # the first two words from MSB overlap with the last two words from + # LSB, so we compute MSB first + + # LSB has N parts and produces N+2 words (two last cycles accomodate + # the two "carry" words from MSB + # MSB has N-1 parts and produces N words + # total number of output words is 2*N - # recombine the lower half (n parts) - # the first tick produces null result, the last part - # produces three words and needs two extra ticks + # recombine the upper half self._flush_pipeline(dump) - for i in range(ab_num_words + 1 + 2): - next_part = parts[i] if i < ab_num_words else 0 - next_word = self._push_pipeline(next_part, dump) - - if i > 0: - words_lsb.append(next_word) - - # recombine the upper half (n-1 parts) - # the first tick produces null result + for i in range(ab_num_words): + din = parts[ab_num_words + i] if i < (ab_num_words - 1) else 0 + self._push_pipeline(din, dump) + words_msb_cry, words_msb = self.words[0:2], self.words[2:] + + # recombine the lower half + # note, that the very last word is 1 bit wider! self._flush_pipeline(dump) - for i in range(ab_num_words + 1): - next_part = parts[i + ab_num_words] if i < (ab_num_words - 1) else 0 - next_word = self._push_pipeline(next_part, dump) - - if i > 0: - words_msb.append(next_word) - - # merge words - words = list() - - # merge lower half - for x in range(ab_num_words): - next_word = words_lsb[x] - words.append(next_word) - - # merge upper half adding the two overlapping words - for x in range(ab_num_words): - next_word = words_msb[x] - if x < 2: - next_word += words_lsb[x + ab_num_words] - words.append(next_word) - - return words + for i in range(ab_num_words + 2): + din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words] + self._push_pipeline(din, dump) + words_lsb = self.words[:-1] + [self.wordsx[-1]] # + + return words_lsb + words_msb def recombine_triangle(self, parts, ab_num_words, dump): - # empty result so far - words_lsb = list() - - # recombine the lower half (n+1 parts) - # the first tick produces null result, so we need n + 1 + 1 = n + 2 - # ticks total and should only save the result word during the last - # n + 1 ticks + # hardware computes only LSB, so there's no overlap with MSB + + # LSB has N+1 parts and produces N+1 words, since the recombinator only + # outputs two words every other cycle, we need to manually purge the + # internal pipeline + self._flush_pipeline(dump) - for i in range(ab_num_words + 2): - - next_part = parts[i] if i < (ab_num_words + 1) else 0 - next_word = self._push_pipeline(next_part, dump) - - if i > 0: - words_lsb.append(next_word) + for i in range(ab_num_words + 1): + din = parts[i] + self._push_pipeline(din, dump) + self._purge_pipeline(dump) + words_lsb = self.words[:-1] return words_lsb def recombine_rectangle(self, parts, ab_num_words, dump): - # empty result so far - words_lsb = list() # n words - words_msb = list() # n+1 words - - # recombine the lower half (n parts) - # the first tick produces null result, the last part - # produces three words and needs two extra ticks + # hardware computes LSB and MSB words simultaneously, we can't + # simulate that here, so we compute sequentially + + # the first two words from MSB overlap with the last two words from + # LSB, so we compute MSB first + + # LSB has N parts and produces N+2 words (two last cycles accomodate + # the two "carry" words from MSB + # MSB has N parts and produces N+1 words, since the recombinator only + # outputs two words every other cycle, we need to manually purge the + # internal pipeline + # total number of output words is 2*N+1 + + # recombine the upper half self._flush_pipeline(dump) - for i in range(ab_num_words + 1 + 2): - next_part = parts[i] if i < ab_num_words else 0 - next_word = self._push_pipeline(next_part, dump) - - if i > 0: - words_lsb.append(next_word) - - # recombine the upper half (n parts) - # the first tick produces null result, the last part - # produces two words and needs an extra tick + for i in range(ab_num_words + 1): + din = parts[ab_num_words + i] if i < ab_num_words else 0 + self._push_pipeline(din, dump) + self._purge_pipeline(dump) + words_msb_cry, words_msb = self.words[0:2], self.words[2:-1] + + # recombine the lower half + # note, that the very last word is 1 bit wider! self._flush_pipeline(dump) for i in range(ab_num_words + 2): - next_part = parts[i + ab_num_words] if i < ab_num_words else 0 - next_word = self._push_pipeline(next_part, dump) - - if i > 0: - words_msb.append(next_word) - - # merge words - words = list() - - # merge lower half - for x in range(ab_num_words): - next_word = words_lsb[x] - words.append(next_word) - - # merge upper half adding the two overlapping words - for x in range(ab_num_words + 1): - next_word = words_msb[x] - if x < 2: - next_word += words_lsb[x + ab_num_words] - words.append(next_word) + din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words] + self._push_pipeline(din, dump) + words_lsb = self.words[:-1] + [self.wordsx[-1]] - return words + return words_lsb + words_msb class ModExpNG_WordMultiplier(): -- cgit v1.2.3