aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2020-02-11 15:42:54 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2020-02-11 15:42:54 +0300
commit9b11ba2da63fb37fd6276afdc428f5a5de77c2af (patch)
tree335d4bda77891c888cba09731598d3b788654d78
parent9519ec41975ddfa8c66a79c4ebc45ef9ebd4243d (diff)
More closely follow how things work in hardware.
-rw-r--r--modexpng_fpga_model.py252
1 files changed, 141 insertions, 111 deletions
diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py
index 334eecc..220be44 100644
--- a/modexpng_fpga_model.py
+++ b/modexpng_fpga_model.py
@@ -572,150 +572,180 @@ class ModExpNG_BanksCRT():
class ModExpNG_PartRecombinator():
+ def __init__(self):
+ self._WORD_MASK_2X = (_WORD_MASK << _WORD_WIDTH) | _WORD_MASK
+ self._WORD_WIDTH_2X = 2 * _WORD_WIDTH
+
def _bit_select(self, x, msb, lsb):
y = 0
for pos in range(lsb, msb+1):
y |= (x & (1 << pos)) >> lsb
return y
+ def _update_delays(self, x1, y1, z1, z2):
+ self._x_dly1, self._y_dly1, self._z_dly1, self._z_dly2 = x1, y1, z1, z2
+
+ def _update_carries(self, cm, cs):
+ self._cry_master, self._cry_slave = cm, cs
+
+ def _clear_words(self):
+ self._words, self._wordsx = [], []
+
+ def _store_words(self, w, wx):
+ self._words.append(w)
+ self._wordsx.append(wx)
+
def _flush_pipeline(self, dump):
- self.z0, self.y0, self.x0 = 0, 0, 0
+ self._phase = False
+ self._master_p = None
+ self._update_carries(0, 0)
+ self._update_delays(0, 0, 0, 0)
+ self._clear_words()
+
if dump and DUMP_RECOMBINATION:
print("RCMB -> flush()")
+ print("RCMB: master_ab | master_c | slave_ab")
def _push_pipeline(self, part, dump):
-
- # split next part into 16-bit words
- z = self._bit_select(part, 46, 32)
- y = self._bit_select(part, 31, 16)
x = self._bit_select(part, 15, 0)
+ y = self._bit_select(part, 31, 16)
+ z = self._bit_select(part, 45, 32)
- # shift to the right
- z1 = z
- y1 = y + self.z0
- x1 = x + self.y0 + (self.x0 >> _WORD_WIDTH) # IMPORTANT: This carry can be up to two bits wide!!
-
- # save lower 16 bits of the rightmost cell
- t = self.x0 & _WORD_MASK
-
- # update internal latches
- self.z0, self.y0, self.x0 = z1, y1, x1
-
- # dump
+ master_ab = (y << 16) | self._y_dly1
+ master_c = (self._z_dly1 << 16) | self._z_dly2
+ slave_ab = (x << 16) | self._x_dly1
+
if dump and DUMP_RECOMBINATION:
- print("RCMB -> push(): part = 0x%012x, word = 0x%04x" % (part, t))
-
- # done
- return t
+ print("PUSH: 0x%08x | 0x%08x | 0x%08x > " % (master_ab, master_c, slave_ab), end='')
+
+ if not self._phase:
+ master_p = master_ab + master_c + self._cry_master
+ self._update_carries(master_p >> self._WORD_WIDTH_2X, self._cry_slave)
+ self._master_p = master_p & self._WORD_MASK_2X
+ if dump and DUMP_RECOMBINATION:
+ #print("MASTER: {0x%1d, 0x%08x}" % (self._cry_master, self._master_p))
+ print("")
+ else:
+ slave_p = self._master_p + slave_ab + self._cry_slave
+ self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X)
+ slave_p &= self._WORD_MASK_2X
+ if dump and DUMP_RECOMBINATION:
+ print("SLAVE: {0x%1d, 0x%08x}" % (self._cry_slave, slave_p))
+ #print("")
+ slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK
+ self._store_words(slave_p_lsb, slave_p_lsb)
+ self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb)
+
+ self._phase = not self._phase
+ self._update_delays(x, y, z, self._z_dly1)
+ def _purge_pipeline(self, dump):
+
+ slave_ab = self._x_dly1
+
+ if not self._phase:
+ raise Exception("RCMB: Can only purge pipeline after odd number of pushes!")
+ else:
+ slave_p = self._master_p + slave_ab + self._cry_slave
+ self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X)
+ slave_p &= self._WORD_MASK_2X
+
+ slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK
+ self._store_words(slave_p_lsb, slave_p_lsb)
+ self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb)
+
+ self._master_p = None
+ self._phase = None
+ self._update_carries(None, None)
+ self._update_delays(None, None, None, None)
+
+ @property
+ def words(self):
+ return self._words
+
+ @property
+ def wordsx(self):
+ return self._wordsx
+
def recombine_square(self, parts, ab_num_words, dump):
- # empty results so far
- words_lsb = list() # n words
- words_msb = list() # n words
+ # hardware computes LSB and MSB words simultaneously, we can't
+ # simulate that here, so we compute sequentially
+
+ # the first two words from MSB overlap with the last two words from
+ # LSB, so we compute MSB first
+
+ # LSB has N parts and produces N+2 words (two last cycles accomodate
+ # the two "carry" words from MSB
+ # MSB has N-1 parts and produces N words
+ # total number of output words is 2*N
- # recombine the lower half (n parts)
- # the first tick produces null result, the last part
- # produces three words and needs two extra ticks
+ # recombine the upper half
self._flush_pipeline(dump)
- for i in range(ab_num_words + 1 + 2):
- next_part = parts[i] if i < ab_num_words else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_lsb.append(next_word)
-
- # recombine the upper half (n-1 parts)
- # the first tick produces null result
+ for i in range(ab_num_words):
+ din = parts[ab_num_words + i] if i < (ab_num_words - 1) else 0
+ self._push_pipeline(din, dump)
+ words_msb_cry, words_msb = self.words[0:2], self.words[2:]
+
+ # recombine the lower half
+ # note, that the very last word is 1 bit wider!
self._flush_pipeline(dump)
- for i in range(ab_num_words + 1):
- next_part = parts[i + ab_num_words] if i < (ab_num_words - 1) else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_msb.append(next_word)
-
- # merge words
- words = list()
-
- # merge lower half
- for x in range(ab_num_words):
- next_word = words_lsb[x]
- words.append(next_word)
-
- # merge upper half adding the two overlapping words
- for x in range(ab_num_words):
- next_word = words_msb[x]
- if x < 2:
- next_word += words_lsb[x + ab_num_words]
- words.append(next_word)
-
- return words
+ for i in range(ab_num_words + 2):
+ din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words]
+ self._push_pipeline(din, dump)
+ words_lsb = self.words[:-1] + [self.wordsx[-1]] #
+
+ return words_lsb + words_msb
def recombine_triangle(self, parts, ab_num_words, dump):
- # empty result so far
- words_lsb = list()
-
- # recombine the lower half (n+1 parts)
- # the first tick produces null result, so we need n + 1 + 1 = n + 2
- # ticks total and should only save the result word during the last
- # n + 1 ticks
+ # hardware computes only LSB, so there's no overlap with MSB
+
+ # LSB has N+1 parts and produces N+1 words, since the recombinator only
+ # outputs two words every other cycle, we need to manually purge the
+ # internal pipeline
+
self._flush_pipeline(dump)
- for i in range(ab_num_words + 2):
-
- next_part = parts[i] if i < (ab_num_words + 1) else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_lsb.append(next_word)
+ for i in range(ab_num_words + 1):
+ din = parts[i]
+ self._push_pipeline(din, dump)
+ self._purge_pipeline(dump)
+ words_lsb = self.words[:-1]
return words_lsb
def recombine_rectangle(self, parts, ab_num_words, dump):
- # empty result so far
- words_lsb = list() # n words
- words_msb = list() # n+1 words
-
- # recombine the lower half (n parts)
- # the first tick produces null result, the last part
- # produces three words and needs two extra ticks
+ # hardware computes LSB and MSB words simultaneously, we can't
+ # simulate that here, so we compute sequentially
+
+ # the first two words from MSB overlap with the last two words from
+ # LSB, so we compute MSB first
+
+ # LSB has N parts and produces N+2 words (two last cycles accomodate
+ # the two "carry" words from MSB
+ # MSB has N parts and produces N+1 words, since the recombinator only
+ # outputs two words every other cycle, we need to manually purge the
+ # internal pipeline
+ # total number of output words is 2*N+1
+
+ # recombine the upper half
self._flush_pipeline(dump)
- for i in range(ab_num_words + 1 + 2):
- next_part = parts[i] if i < ab_num_words else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_lsb.append(next_word)
-
- # recombine the upper half (n parts)
- # the first tick produces null result, the last part
- # produces two words and needs an extra tick
+ for i in range(ab_num_words + 1):
+ din = parts[ab_num_words + i] if i < ab_num_words else 0
+ self._push_pipeline(din, dump)
+ self._purge_pipeline(dump)
+ words_msb_cry, words_msb = self.words[0:2], self.words[2:-1]
+
+ # recombine the lower half
+ # note, that the very last word is 1 bit wider!
self._flush_pipeline(dump)
for i in range(ab_num_words + 2):
- next_part = parts[i + ab_num_words] if i < ab_num_words else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_msb.append(next_word)
-
- # merge words
- words = list()
-
- # merge lower half
- for x in range(ab_num_words):
- next_word = words_lsb[x]
- words.append(next_word)
-
- # merge upper half adding the two overlapping words
- for x in range(ab_num_words + 1):
- next_word = words_msb[x]
- if x < 2:
- next_word += words_lsb[x + ab_num_words]
- words.append(next_word)
+ din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words]
+ self._push_pipeline(din, dump)
+ words_lsb = self.words[:-1] + [self.wordsx[-1]]
- return words
+ return words_lsb + words_msb
class ModExpNG_WordMultiplier():