aboutsummaryrefslogtreecommitdiff
path: root/modexpng_fpga_model.py
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 14:25:27 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 14:25:27 +0300
commitec07464d239f7f6379a682ac57b58b863d3f0374 (patch)
tree4b825dc642cb6eb9a060e54bf8d69288fbee4904 /modexpng_fpga_model.py
parent0beee226e63b3a62ba32bc588e40eaeef01eac2b (diff)
Moved to "modexpng_fpga_model" repo, this one was meant for Verilog.
Diffstat (limited to 'modexpng_fpga_model.py')
-rw-r--r--modexpng_fpga_model.py1745
1 files changed, 0 insertions, 1745 deletions
diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py
deleted file mode 100644
index 325f544..0000000
--- a/modexpng_fpga_model.py
+++ /dev/null
@@ -1,1745 +0,0 @@
-#!/usr/bin/python3
-#
-#
-# ModExpNG core math model.
-#
-#
-# Copyright (c) 2019, NORDUnet A/S
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-# - Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# - Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# - Neither the name of the NORDUnet nor the names of its contributors may
-# be used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-
-
-# -------
-# Imports
-#--------
-
-import sys
-import importlib
-from enum import Enum, auto
-
-
-# --------------
-# Model Settings
-# --------------
-
-# length of public key
-KEY_LENGTH = 1024
-
-# how many parallel multipliers to use
-NUM_MULTS = 8
-
-
-# ---------------
-# Internal Values
-# ---------------
-
-# half of key length
-_KEY_LENGTH_HALF = KEY_LENGTH // 2
-
-# width of internal math pipeline
-_WORD_WIDTH = 16
-_WORD_WIDTH_EXT = 18
-
-_WORD_MASK = 2 ** _WORD_WIDTH - 1
-_WORD_MASK_EXT = 2 ** _WORD_WIDTH_EXT - 1
-_CARRY_MASK = _WORD_MASK ^ _WORD_MASK_EXT
-
-# folder with test vector scripts
-_VECTOR_PATH = "/vector"
-
-# name of test vector class
-_VECTOR_CLASS = "Vector"
-
-
-# ------------------
-# Debugging Settings
-# ------------------
-DUMP_LADDER_INDEX = -1 # at which ladder step to print debug vector
-DUMP_VECTORS = False # print entire debug vector components
-DUMP_INDICES = False # print indices of words at MAC inputs
-DUMP_MACS_INPUTS = False # print MAC input words
-DUMP_MACS_CLEARING = False # print MAC clearing bitmaps
-DUMP_MACS_ACCUMULATION = False # print MAC accumulators contents
-DUMP_MULT_PARTS = False # print multiplication output parts
-DUMP_RECOMBINATION = False # print recombination internals
-DUMP_REDUCTION = False # print reduction internals
-FORCE_OVERFLOW = False # force rarely seen internal overflow situation to verify how its handler works
-DUMP_PROGRESS_FACTOR = 16 # once per how many ladder steps to update progress indicator
-
-#
-# Multi-Precision Integer
-#
-class ModExpNG_Operand():
-
- def __init__(self, number, length, words = None):
-
- if words is None:
-
- # length must be divisible by word width
- if (length % _WORD_WIDTH) > 0:
- raise Exception("Bad number length!")
-
- self._init_from_number(number, length)
-
- else:
-
- # length must match words count
- if len(words) != length:
- raise Exception("Bad words count!")
-
- self._init_from_words(words, length)
-
- def format_verilog_concat(self, name):
-
- for i in range(len(self.words)):
- if i > 0:
- if (i % 4) == 0: print("")
- else: print(" ", end='')
- print("%s[%3d] = 18'h%05x;" % (name, i, self.words[i]), end='')
- print("")
-
- def _init_from_words(self, words, count):
-
- for i in range(count):
-
- # word must not exceed 18 bits
- if words[i] >= (2 ** (_WORD_WIDTH_EXT)):
- raise Exception("Word is too large!")
-
- self.words = list(words)
-
- def _init_from_number(self, number, length):
-
- num_hexchars_per_word = _WORD_WIDTH // 4
- num_hexchars_total = length // num_hexchars_per_word
-
- value_hex = format(number, 'x')
-
- # value must not be larger than specified, but it can be smaller, so
- # we may need to prepend it with zeroes
- if len(value_hex) > num_hexchars_total:
- raise Exception("Number is too large!")
- else:
- while len(value_hex) < num_hexchars_total:
- value_hex = "0" + value_hex
-
- # create empty list
- self.words = list()
-
- # fill in words
- while len(value_hex) > 0:
- value_hex_part = value_hex[-num_hexchars_per_word:]
- value_hex = value_hex[:-num_hexchars_per_word]
- self.words.append(int(value_hex_part, 16))
-
- def number(self):
- ret = 0
- shift = 0
- for word in self.words:
- ret += word << shift
- shift += _WORD_WIDTH
- return ret
-
- def _get_half(self, part):
- num_words = len(self.words)
- num_words_half = num_words // 2
- if not part: return ModExpNG_Operand(None, num_words_half, self.words[:num_words_half])
- else: return ModExpNG_Operand(None, num_words_half, self.words[num_words_half:])
-
- def lower_half(self):
- return self._get_half(False)
-
- def upper_half(self):
- return self._get_half(True)
-
-#
-# Test Vector
-#
-class ModExpNG_TestVector():
-
- def __init__(self):
-
- # format target filename
- filename = "vector_" + str(KEY_LENGTH) + "_randomized"
-
- # add ./vector to import search path
- sys.path.insert(1, sys.path[0] + _VECTOR_PATH)
-
- # import from filename
- vector_module = importlib.import_module(filename)
-
- # get vector class
- vector_class = getattr(vector_module, _VECTOR_CLASS)
-
- # instantiate vector class
- vector_inst = vector_class()
-
- # obtain parts of vector
- self.m = ModExpNG_Operand(vector_inst.m, KEY_LENGTH)
- self.n = ModExpNG_Operand(vector_inst.n, KEY_LENGTH)
- self.d = ModExpNG_Operand(vector_inst.d, KEY_LENGTH)
- self.p = ModExpNG_Operand(vector_inst.p, _KEY_LENGTH_HALF)
- self.q = ModExpNG_Operand(vector_inst.q, _KEY_LENGTH_HALF)
- self.dp = ModExpNG_Operand(vector_inst.dp, _KEY_LENGTH_HALF)
- self.dq = ModExpNG_Operand(vector_inst.dq, _KEY_LENGTH_HALF)
- self.qinv = ModExpNG_Operand(vector_inst.qinv, _KEY_LENGTH_HALF)
- self.n_factor = ModExpNG_Operand(vector_inst.n_factor, KEY_LENGTH)
- self.p_factor = ModExpNG_Operand(vector_inst.p_factor, _KEY_LENGTH_HALF)
- self.q_factor = ModExpNG_Operand(vector_inst.q_factor, _KEY_LENGTH_HALF)
- self.n_coeff = ModExpNG_Operand(vector_inst.n_coeff, KEY_LENGTH + _WORD_WIDTH)
- self.p_coeff = ModExpNG_Operand(vector_inst.p_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH)
- self.q_coeff = ModExpNG_Operand(vector_inst.q_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH)
- self.x = ModExpNG_Operand(vector_inst.x, KEY_LENGTH)
- self.y = ModExpNG_Operand(vector_inst.y, KEY_LENGTH)
-
-class ModExpNG_WideBankEnum(Enum):
- A = auto()
- B = auto()
- C = auto()
- D = auto()
- E = auto()
- N = auto()
- L = auto()
- H = auto()
-
-class ModExpNG_NarrowBankEnum(Enum):
- A = auto()
- B = auto()
- C = auto()
- D = auto()
- E = auto()
- N_COEFF = auto()
- I = auto()
-
-class ModExpNG_CoreInputEnum(Enum):
- M = auto()
-
- N = auto()
- P = auto()
- Q = auto()
-
- N_COEFF = auto()
- P_COEFF = auto()
- Q_COEFF = auto()
-
- N_FACTOR = auto()
- P_FACTOR = auto()
- Q_FACTOR = auto()
-
- X = auto()
- Y = auto()
-
- QINV = auto()
-
-class ModExpNG_CoreOutputEnum(Enum):
- XM = auto()
- YM = auto()
- S = auto()
-
-class ModExpNG_WideBank():
-
- def __init__(self):
- self.a = None
- self.b = None
- self.c = None
- self.d = None
- self.e = None
- self.n = None
- self.l = None
- self.h = None
-
- def _get_value(self, sel):
- if sel == ModExpNG_WideBankEnum.A: return self.a
- elif sel == ModExpNG_WideBankEnum.B: return self.b
- elif sel == ModExpNG_WideBankEnum.C: return self.c
- elif sel == ModExpNG_WideBankEnum.D: return self.d
- elif sel == ModExpNG_WideBankEnum.E: return self.e
- elif sel == ModExpNG_WideBankEnum.N: return self.n
- elif sel == ModExpNG_WideBankEnum.L: return self.l
- elif sel == ModExpNG_WideBankEnum.H: return self.h
- else: raise Exception("ModExpNG_WideBank._get_value(): Invalid selector!")
-
- def _set_value(self, sel, value):
- if sel == ModExpNG_WideBankEnum.A: self.a = value
- elif sel == ModExpNG_WideBankEnum.B: self.b = value
- elif sel == ModExpNG_WideBankEnum.C: self.c = value
- elif sel == ModExpNG_WideBankEnum.D: self.d = value
- elif sel == ModExpNG_WideBankEnum.E: self.e = value
- elif sel == ModExpNG_WideBankEnum.N: self.n = value
- elif sel == ModExpNG_WideBankEnum.L: self.l = value
- elif sel == ModExpNG_WideBankEnum.H: self.h = value
- else: raise Exception("ModExpNG_WideBank._set_value(): Invalid selector!")
-
-class ModExpNG_NarrowBank():
-
- def __init__(self, i):
- self.a = None
- self.b = None
- self.c = None
- self.d = None
- self.e = None
- self.n_coeff = None
- self.i = i
-
- def _get_value(self, sel):
- if sel == ModExpNG_NarrowBankEnum.A: return self.a
- elif sel == ModExpNG_NarrowBankEnum.B: return self.b
- elif sel == ModExpNG_NarrowBankEnum.C: return self.c
- elif sel == ModExpNG_NarrowBankEnum.D: return self.d
- elif sel == ModExpNG_NarrowBankEnum.E: return self.e
- elif sel == ModExpNG_NarrowBankEnum.N_COEFF: return self.n_coeff
- elif sel == ModExpNG_NarrowBankEnum.I: return self.i
- else: raise Exception("ModExpNG_NarrowBank._get_value(): Invalid selector!")
-
- def _set_value(self, sel, value):
- if sel == ModExpNG_NarrowBankEnum.A: self.a = value
- elif sel == ModExpNG_NarrowBankEnum.B: self.b = value
- elif sel == ModExpNG_NarrowBankEnum.C: self.c = value
- elif sel == ModExpNG_NarrowBankEnum.D: self.d = value
- elif sel == ModExpNG_NarrowBankEnum.E: self.e = value
- elif sel == ModExpNG_NarrowBankEnum.N_COEFF: self.n_coeff = value
- else: raise Exception("ModExpNG_NarrowBank._set_value(): Invalid selector!")
-
-class ModExpNG_CoreInput():
-
- def __init__(self):
- self._m = None
-
- self._n = None
- self._p = None
- self._q = None
-
- self._n_coeff = None
- self._p_coeff = None
- self._q_coeff = None
-
- self._n_factor = None
- self._p_factor = None
- self._q_factor = None
-
- self._x = None
- self._y = None
-
- self._qinv = None
-
- def set_value(self, sel, value):
- if sel == ModExpNG_CoreInputEnum.M: self._m = value
-
- elif sel == ModExpNG_CoreInputEnum.N: self._n = value
- elif sel == ModExpNG_CoreInputEnum.P: self._p = value
- elif sel == ModExpNG_CoreInputEnum.Q: self._q = value
-
- elif sel == ModExpNG_CoreInputEnum.N_COEFF: self._n_coeff = value
- elif sel == ModExpNG_CoreInputEnum.P_COEFF: self._p_coeff = value
- elif sel == ModExpNG_CoreInputEnum.Q_COEFF: self._q_coeff = value
-
- elif sel == ModExpNG_CoreInputEnum.N_FACTOR: self._n_factor = value
- elif sel == ModExpNG_CoreInputEnum.P_FACTOR: self._p_factor = value
- elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: self._q_factor = value
-
- elif sel == ModExpNG_CoreInputEnum.X: self._x = value
- elif sel == ModExpNG_CoreInputEnum.Y: self._y = value
-
- elif sel == ModExpNG_CoreInputEnum.QINV: self._qinv = value
-
- else: raise Exception("ModExpNG_CoreInput.set_value(): invalid selector!")
-
- def _get_value(self, sel):
- if sel == ModExpNG_CoreInputEnum.M: return self._m
-
- elif sel == ModExpNG_CoreInputEnum.N: return self._n
- elif sel == ModExpNG_CoreInputEnum.P: return self._p
- elif sel == ModExpNG_CoreInputEnum.Q: return self._q
-
- elif sel == ModExpNG_CoreInputEnum.N_COEFF: return self._n_coeff
- elif sel == ModExpNG_CoreInputEnum.P_COEFF: return self._p_coeff
- elif sel == ModExpNG_CoreInputEnum.Q_COEFF: return self._q_coeff
-
- elif sel == ModExpNG_CoreInputEnum.N_FACTOR: return self._n_factor
- elif sel == ModExpNG_CoreInputEnum.P_FACTOR: return self._p_factor
- elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: return self._q_factor
-
- elif sel == ModExpNG_CoreInputEnum.X: return self._x
- elif sel == ModExpNG_CoreInputEnum.Y: return self._y
-
- elif sel == ModExpNG_CoreInputEnum.QINV: return self._qinv
-
- else: raise Exception("ModExpNG_CoreInput._get_value(): invalid selector!")
-
-class ModExpNG_CoreOutput():
-
- def __init__(self):
- self._xm = None
- self._ym = None
- self._s = None
-
- def _set_value(self, sel, value):
- if sel == ModExpNG_CoreOutputEnum.XM: self._xm = value
- elif sel == ModExpNG_CoreOutputEnum.YM: self._ym = value
- elif sel == ModExpNG_CoreOutputEnum.S: self._s = value
- else: raise Exception("ModExpNG_CoreOutput._set_value(): invalid selector!")
-
- def get_value(self, sel):
- if sel == ModExpNG_CoreOutputEnum.XM: return self._xm
- elif sel == ModExpNG_CoreOutputEnum.YM: return self._ym
- elif sel == ModExpNG_CoreOutputEnum.S: return self._s
- else: raise Exception("ModExpNG_CoreOutput.get_value(): invalid selector!")
-
-class ModExpNG_BanksPair():
-
- def __init__(self, i):
- self.wide = ModExpNG_WideBank()
- self.narrow = ModExpNG_NarrowBank(i)
-
- def _get_wide(self, sel):
- return self.wide._get_value(sel)
-
- def _get_narrow(self, sel):
- return self.narrow._get_value(sel)
-
- def _set_wide(self, sel, value):
- self.wide._set_value(sel, value)
-
- def _set_narrow(self, sel, value):
- self.narrow._set_value(sel, value)
-
-class ModExpNG_BanksLadder():
-
- def __init__(self, i):
- self.ladder_x = ModExpNG_BanksPair(i)
- self.ladder_y = ModExpNG_BanksPair(i)
-
-class ModExpNG_BanksCRT():
-
- def __init__(self, i):
- self.crt_x = ModExpNG_BanksLadder(i)
- self.crt_y = ModExpNG_BanksLadder(i)
-
-class ModExpNG_PartRecombinator():
-
- def _bit_select(self, x, msb, lsb):
- y = 0
- for pos in range(lsb, msb+1):
- y |= (x & (1 << pos)) >> lsb
- return y
-
- def _flush_pipeline(self, dump):
- self.z0, self.y0, self.x0 = 0, 0, 0
- if dump and DUMP_RECOMBINATION:
- print("RCMB -> flush()")
-
- def _push_pipeline(self, part, dump):
-
- # split next part into 16-bit words
- z = self._bit_select(part, 46, 32)
- y = self._bit_select(part, 31, 16)
- x = self._bit_select(part, 15, 0)
-
- # shift to the right
- z1 = z
- y1 = y + self.z0
- x1 = x + self.y0 + (self.x0 >> _WORD_WIDTH) # IMPORTANT: This carry can be up to two bits wide!!
-
- # save lower 16 bits of the rightmost cell
- t = self.x0 & _WORD_MASK
-
- # update internal latches
- self.z0, self.y0, self.x0 = z1, y1, x1
-
- # dump
- if dump and DUMP_RECOMBINATION:
- print("RCMB -> push(): part = 0x%012x, word = 0x%04x" % (part, t))
-
- # done
- return t
-
- def recombine_square(self, parts, ab_num_words, dump):
-
- # empty results so far
- words_lsb = list() # n words
- words_msb = list() # n words
-
- # recombine the lower half (n parts)
- # the first tick produces null result, the last part
- # produces three words and needs two extra ticks
- self._flush_pipeline(dump)
- for i in range(ab_num_words + 1 + 2):
- next_part = parts[i] if i < ab_num_words else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_lsb.append(next_word)
-
- # recombine the upper half (n-1 parts)
- # the first tick produces null result
- self._flush_pipeline(dump)
- for i in range(ab_num_words + 1):
- next_part = parts[i + ab_num_words] if i < (ab_num_words - 1) else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_msb.append(next_word)
-
- # merge words
- words = list()
-
- # merge lower half
- for x in range(ab_num_words):
- next_word = words_lsb[x]
- words.append(next_word)
-
- # merge upper half adding the two overlapping words
- for x in range(ab_num_words):
- next_word = words_msb[x]
- if x < 2:
- next_word += words_lsb[x + ab_num_words]
- words.append(next_word)
-
- return words
-
- def recombine_triangle(self, parts, ab_num_words, dump):
-
- # empty result so far
- words_lsb = list()
-
- # recombine the lower half (n+1 parts)
- # the first tick produces null result, so we need n + 1 + 1 = n + 2
- # ticks total and should only save the result word during the last
- # n + 1 ticks
- self._flush_pipeline(dump)
- for i in range(ab_num_words + 2):
-
- next_part = parts[i] if i < (ab_num_words + 1) else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_lsb.append(next_word)
-
- return words_lsb
-
- def recombine_rectangle(self, parts, ab_num_words, dump):
-
- # empty result so far
- words_lsb = list() # n words
- words_msb = list() # n+1 words
-
- # recombine the lower half (n parts)
- # the first tick produces null result, the last part
- # produces three words and needs two extra ticks
- self._flush_pipeline(dump)
- for i in range(ab_num_words + 1 + 2):
- next_part = parts[i] if i < ab_num_words else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_lsb.append(next_word)
-
- # recombine the upper half (n parts)
- # the first tick produces null result, the last part
- # produces two words and needs an extra tick
- self._flush_pipeline(dump)
- for i in range(ab_num_words + 2):
- next_part = parts[i + ab_num_words] if i < ab_num_words else 0
- next_word = self._push_pipeline(next_part, dump)
-
- if i > 0:
- words_msb.append(next_word)
-
- # merge words
- words = list()
-
- # merge lower half
- for x in range(ab_num_words):
- next_word = words_lsb[x]
- words.append(next_word)
-
- # merge upper half adding the two overlapping words
- for x in range(ab_num_words + 1):
- next_word = words_msb[x]
- if x < 2:
- next_word += words_lsb[x + ab_num_words]
- words.append(next_word)
-
- return words
-
-class ModExpNG_WordMultiplier():
-
- def __init__(self):
-
- self._macs = list()
- self._indices = list()
-
- self._mac_aux = list()
- self._index_aux = list()
-
- for x in range(NUM_MULTS):
- self._macs.append(0)
- self._indices.append(0)
-
- self._mac_aux.append(0)
- self._index_aux.append(0)
-
- def _clear_all_macs(self, t, col, dump):
- for x in range(NUM_MULTS):
- self._macs[x] = 0
- if dump and DUMP_MACS_CLEARING:
- print("t=%2d, col=%2d > clear > all" % (t, col))
-
- def _clear_one_mac(self, x, t, col, dump):
- self._macs[x] = 0
- if dump and DUMP_MACS_CLEARING:
- print("t=%2d, col=%2d > clear > x=%d" % (t, col, x))
-
- def _clear_mac_aux(self, t, col, dump):
- self._mac_aux[0] = 0
- if dump and DUMP_MACS_CLEARING:
- print("t= 0, col=%2d > clear > aux" % (col))
-
- def _update_one_mac(self, x, t, col, a, b, dump, need_aux=False):
-
- if a >= (2 ** _WORD_WIDTH_EXT):
- raise Exception("a > 0x3FFFF!")
-
- if b >= (2 ** _WORD_WIDTH):
- raise Exception("b > 0xFFFF!")
-
- p = a * b
- if dump and DUMP_MACS_INPUTS:
- if x == 0: print("t=%2d, col=%2d > b=%05x > " % (t, col, b), end='')
- if x > 0: print("; ", end='')
- print("MAC[%d]: a=%05x" % (x, a), end='')
- if x == (NUM_MULTS-1) and not need_aux: print("")
-
- self._macs[x] += p
-
- def _update_mac_aux(self, y, col, a, b, dump):
-
- if a >= (2 ** _WORD_WIDTH_EXT):
- raise Exception("a > 0x3FFFF!")
-
- if b >= (2 ** _WORD_WIDTH):
- raise Exception("b > 0xFFFF!")
-
- p = a * b
- if dump and DUMP_MACS_INPUTS:
- print("; AUX: a=%05x" % a)
-
- self._mac_aux[0] += p
-
- def _preset_indices(self, col):
- for x in range(len(self._indices)):
- self._indices[x] = col * len(self._indices) + x
-
- def _preset_index_aux(self, num_cols):
- self._index_aux[0] = num_cols * len(self._indices)
-
- def _dump_macs_helper(self, t, col, aux=False):
- print("t=%2d, col=%2d > "% (t, col), end='')
- for i in range(NUM_MULTS):
- if i > 0: print(" | ", end='')
- print("mac[%d]: 0x%012x" % (i, self._macs[i]), end='')
- if aux:
- print(" | mac_aux[ 0]: 0x%012x" % (self._mac_aux[0]), end='')
- print("")
-
- def _dump_macs(self, t, col):
- self._dump_macs_helper(t, col)
-
- def _dump_macs_with_aux(self, t, col):
- self._dump_macs_helper(t, col, True)
-
- def _dump_indices_helper(self, t, col, aux=False):
- print("t=%2d, col=%2d > indices:" % (t, col), end='')
- for i in range(NUM_MULTS):
- print(" %2d" % self._indices[i], end='')
- if aux:
- print(" %2d" % self._index_aux[0], end='')
- print("")
-
- def _dump_indices(self, t, col):
- self._dump_indices_helper(t, col)
-
- def _dump_indices_with_aux(self, t, col):
- self._dump_indices_helper(t, col, True)
-
- def _rotate_indices(self, num_words):
- for x in range(len(self._indices)):
- if self._indices[x] > 0:
- self._indices[x] -= 1
- else:
- self._indices[x] = num_words - 1
-
- def _rotate_index_aux(self):
- self._index_aux[0] -= 1
-
- def _mult_store_part(self, parts, time, column, part_index, mac_index, dump):
- parts[part_index] = self._macs[mac_index]
- if dump and DUMP_MULT_PARTS:
- print("t=%2d, col=%2d > parts[%2d]: mac[%d] = 0x%012x" %
- (time, column, part_index, mac_index, parts[part_index]))
-
- def _mult_store_part_aux(self, parts, time, column, part_index, dump):
- parts[part_index] = self._mac_aux[0]
- if dump and DUMP_MULT_PARTS:
- print("t=%2d, col=%2d > parts[%2d]: mac_aux[%d] = 0x%012x" %
- (time, column, part_index, 0, parts[part_index]))
-
- def multiply_square(self, a_wide, b_narrow, ab_num_words, dump=False):
-
- num_cols = ab_num_words // NUM_MULTS
-
- parts = list()
- for i in range(2 * ab_num_words - 1):
- parts.append(0)
-
- for col in range(num_cols):
-
- b_carry = 0
-
- for t in range(ab_num_words):
-
- # take care of indices
- if t == 0: self._preset_indices(col)
- else: self._rotate_indices(ab_num_words)
-
- # take care of macs
- if t == 0:
- self._clear_all_macs(t, col, dump)
- else:
- t1 = t - 1
- if (t1 // 8) == col:
- self._clear_one_mac(t1 % NUM_MULTS, t, col, dump)
-
- # debug output
- if dump and DUMP_INDICES: self._dump_indices(t, col)
-
- # current b-word
- # multiplier's b-input is limited to 16-bit words, so we need to propagate
- # carries on the fly here, carry can be up to two bits
- bt = b_narrow.words[t] + b_carry
- b_carry = (bt & _CARRY_MASK) >> _WORD_WIDTH
- if dump and b_carry > 1:
- print("Rare overflow case was detected and then successfully corrected.")
- bt &= _WORD_MASK
-
- # multiply by a-words
- for x in range(NUM_MULTS):
- ax = a_wide.words[self._indices[x]]
- self._update_one_mac(x, t, col, ax, bt, dump)
-
- if t == (col * NUM_MULTS + x):
- part_index = t
- self._mult_store_part(parts, t, col, part_index, x, dump)
-
- # debug output
- if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col)
-
- # save the uppers part of product at end of column,
- # for the last column don't save the very last part
- if t == (ab_num_words - 1):
- for x in range(NUM_MULTS):
- if not (col == (num_cols - 1) and x == (NUM_MULTS - 1)):
- part_index = ab_num_words + col * NUM_MULTS + x
- self._mult_store_part(parts, t, col, part_index, x, dump)
-
- return parts
-
- def multiply_triangle(self, a_wide, b_narrow, ab_num_words, dump=False):
-
- num_cols = ab_num_words // NUM_MULTS
-
- parts = list()
- for i in range(ab_num_words + 1):
- parts.append(0)
-
- for col in range(num_cols):
-
- last_col = col == (num_cols - 1)
-
- for t in range(ab_num_words + 1):
-
- # take care of indices
- if t == 0: self._preset_indices(col)
- else: self._rotate_indices(ab_num_words)
-
- # take care of auxilary index
- if last_col:
- if t == 0: self._preset_index_aux(num_cols)
- else: self._rotate_index_aux()
-
- # take care of macs
- if t == 0: self._clear_all_macs(t, col, dump)
-
- # take care of auxilary mac
- if last_col:
- if t == 0: self._clear_mac_aux(t, col, dump)
-
- # debug output
- if dump and DUMP_INDICES: self._dump_indices_with_aux(t, col)
-
- # current b-word
- bt = b_narrow.words[t]
-
- # multiply by a-words
- for x in range(NUM_MULTS):
- ax = a_wide.words[self._indices[x]]
- self._update_one_mac(x, t, col, ax, bt, dump, last_col)
-
- if t == (col * NUM_MULTS + x):
- part_index = t
- self._mult_store_part(parts, t, col, part_index, x, dump)
-
- # aux multiplier
- if last_col:
- ax = a_wide.words[self._index_aux[0]]
- self._update_mac_aux(t, col, ax, bt, dump)
-
- if t == ab_num_words:
- part_index = t
- self._mult_store_part_aux(parts, t, col, part_index, dump)
-
- # debug output
- if dump and DUMP_MACS_ACCUMULATION: self._dump_macs_with_aux(t, col)
-
- # shortcut
- if not last_col:
- if t == (NUM_MULTS * (col + 1) - 1): break
-
- return parts
-
- def multiply_rectangle(self, a_wide, b_narrow, ab_num_words, dump=False):
-
- num_cols = ab_num_words // NUM_MULTS
-
- parts = list()
- for i in range(2 * ab_num_words):
- parts.append(0)
-
- for col in range(num_cols):
-
- for t in range(ab_num_words + 1):
-
- # take care of indices
- if t == 0: self._preset_indices(col)
- else: self._rotate_indices(ab_num_words)
-
- # take care of macs
- if t == 0:
- self._clear_all_macs(t, col, dump)
- else:
- t1 = t - 1
- if (t1 // 8) == col:
- self._clear_one_mac(t1 % NUM_MULTS, t, col, dump)
-
- # debug output
- if dump and DUMP_INDICES: self._dump_indices(t, col)
-
- # current b-word
- bt = b_narrow.words[t]
-
- # multiply by a-words
- for x in range(NUM_MULTS):
- ax = a_wide.words[self._indices[x]]
- self._update_one_mac(x, t, col, ax, bt, dump)
-
- # don't save one value for the very last time instant per column
- if t < ab_num_words and t == (col * NUM_MULTS + x):
- part_index = t
- self._mult_store_part(parts, t, col, part_index, x, dump)
-
- # debug output
- if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col)
-
- # save the upper parts of product at end of column
- if t == ab_num_words:
- for x in range(NUM_MULTS):
- part_index = ab_num_words + col * NUM_MULTS + x
- self._mult_store_part(parts, t, col, part_index, x, dump)
-
- return parts
-
-class ModExpNG_LowlevelOperator():
-
- def _check_word(self, a):
- if a < 0 or a > _WORD_MASK:
- raise Exception("Word out of range!")
-
- def _check_carry_borrow(self, cb):
- if cb < 0 or cb > 1:
- raise Exception("Carry or borrow out of range!")
-
- def add_words(self, a, b, c_in):
-
- self._check_word(a)
- self._check_word(b)
- self._check_carry_borrow(c_in)
-
- sum = a + b + c_in
-
- sum_s = sum & _WORD_MASK
- sum_c = sum >> _WORD_WIDTH
-
- return (sum_c, sum_s)
-
- def sub_words(self, a, b, b_in):
-
- self._check_word(a)
- self._check_word(b)
- self._check_carry_borrow(b_in)
-
- dif = a - b - b_in
-
- if dif < 0:
- dif_b = 1
- dif_d = dif + 2 ** _WORD_WIDTH
- else:
- dif_b = 0
- dif_d = dif
-
- return (dif_b, dif_d)
-
-class ModExpNG_Worker():
-
- def __init__(self):
- self.lowlevel = ModExpNG_LowlevelOperator()
- self.multiplier = ModExpNG_WordMultiplier()
- self.recombinator = ModExpNG_PartRecombinator()
-
- def serial_subtract_modular(self, a, b, n, ab_num_words):
- c_in = 0
- b_in = 0
- ab = list()
- ab_n = list()
- for x in range(ab_num_words):
- a_word = a.words[x]
- b_word = b.words[x]
- (b_out, d_out) = self.lowlevel.sub_words(a_word, b_word, b_in)
- (c_out, s_out) = self.lowlevel.add_words(d_out, n.words[x], c_in)
- ab.append(d_out)
- ab_n.append(s_out)
- (c_in, b_in) = (c_out, b_out)
- d = ab if not b_out else ab_n
- return ModExpNG_Operand(None, ab_num_words, d)
-
- def serial_add_uneven(self, a, b, ab_num_words):
- c_in = 0
- ab = list()
- for x in range(2 * ab_num_words):
- a_word = a.words[x] if x < ab_num_words else 0
- b_word = b.words[x]
- (c_out, s_out) = self.lowlevel.add_words(a_word, b_word, c_in)
- ab.append(s_out)
- c_in = c_out
- return ModExpNG_Operand(None, 2*ab_num_words, ab)
-
- def multipurpose_multiply(self, a, b, n, n_coeff, ab_num_words, reduce_only=False, multiply_only=False, dump=False, dump_crt="", dump_ladder=""):
-
- #
- # 1. AB = A * B
- #
- if dump: print("multiply_square(%s_%s)" % (dump_crt, dump_ladder))
-
- if reduce_only:
- ab = b
- else:
- ab_parts = self.multiplier.multiply_square(a, b, ab_num_words, dump)
- ab_words = self.recombinator.recombine_square(ab_parts, ab_num_words, dump)
- ab = ModExpNG_Operand(None, 2 * ab_num_words, ab_words)
-
- if dump and DUMP_VECTORS:
- ab.format_verilog_concat("%s_%s_AB" % (dump_crt, dump_ladder))
-
- if multiply_only:
- return ModExpNG_Operand(None, 2*ab_num_words, ab_words)
-
- #
- # 2. Q = LSB(AB) * N_COEFF
- #
- if dump: print("multiply_triangle(%s_%s)" % (dump_crt, dump_ladder))
-
- q_parts = self.multiplier.multiply_triangle(ab, n_coeff, ab_num_words, dump)
- q_words = self.recombinator.recombine_triangle(q_parts, ab_num_words, dump)
- q = ModExpNG_Operand(None, ab_num_words + 1, q_words)
-
- if dump and DUMP_VECTORS:
- q.format_verilog_concat("%s_%s_Q" % (dump_crt, dump_ladder))
-
- #
- # 3. M = Q * N
- #
- if dump: print("multiply_rectangle(%s_%s)" % (dump_crt, dump_ladder))
-
- m_parts = self.multiplier.multiply_rectangle(n, q, ab_num_words, dump)
- m_words = self.recombinator.recombine_rectangle(m_parts, ab_num_words, dump)
- m = ModExpNG_Operand(None, 2 * ab_num_words + 1, m_words)
-
- if dump and DUMP_VECTORS:
- m.format_verilog_concat("%s_%s_M" % (dump_crt, dump_ladder))
-
- #
- # 4. R = AB + M
- #
-
- #
- # 4a. compute carry (actual sum is all zeroes and need not be stored)
- #
-
- r_cy = 0 # this can be up to two bits, since we're adding extended words!!
- for i in range(ab_num_words + 1):
- s = ab.words[i] + m.words[i] + r_cy
- r_cy_new = s >> _WORD_WIDTH
-
- if dump and DUMP_REDUCTION:
- print("[%2d] 0x%05x + 0x%05x + 0x%x => {0x%x, [0x%05x]}" %
- (i, ab.words[i], m.words[i], r_cy, r_cy_new, s & 0xffff)) # ???
-
- r_cy = r_cy_new
-
-
- #
- # 4b. Initialize empty result
- #
-
- R = list()
- for i in range(ab_num_words):
- R.append(0)
-
- #
- # 4c. compute the actual upper part of sum (take carry into account)
- #
-
- for i in range(ab_num_words):
-
- if dump and DUMP_REDUCTION:
- print("[%2d]" % i, end='')
-
- ab_word = ab.words[ab_num_words + i + 1] if i < (ab_num_words - 1) else 0
- if dump and DUMP_REDUCTION:
- print(" 0x%05x" % ab_word, end='')
-
- m_word = m.words[ab_num_words + i + 1]
- if dump and DUMP_REDUCTION:
- print(" + 0x%05x" % m_word, end='')
-
- if i == 0: R[i] = r_cy
- else: R[i] = 0
-
- if dump and DUMP_REDUCTION:
- print(" + 0x%x" % R[i], end='')
-
- R[i] += ab_word
- R[i] += m_word
- if dump and DUMP_REDUCTION:
- print(" = 0x%05x" % R[i])
-
- return ModExpNG_Operand(None, ab_num_words, R)
-
- def convert_nonredundant(self, a, num_words):
- carry = 0
- for x in range(num_words):
- a.words[x] += carry
- carry = a.words[x] >> _WORD_WIDTH
- a.words[x] &= _WORD_MASK
- return carry
-
-class ModExpNG_Core():
-
- def __init__(self, i):
- self.wrk = ModExpNG_Worker()
- self.bnk = ModExpNG_BanksCRT(i)
- self.inp = ModExpNG_CoreInput()
- self.out = ModExpNG_CoreOutput()
-
- #
- # CRT_(X|Y) means either CRT_X or CRT_Y
- # LADDER_{X,Y} means both LADDER_X and LADDER_Y
- #
-
- #
- # copy from CRT_(X|Y).LADDER_X.NARROW to OUTPUT
- #
- def set_output_from_narrow(self, sel_output, bank_crt, sel_narrow):
- self.out._set_value(sel_output, bank_crt.ladder_x._get_narrow(sel_narrow))
-
- #
- # copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.NARROW
- #
- def set_narrow_from_input(self, bank_crt, sel_narrow, sel_input):
- bank_crt.ladder_x._set_narrow(sel_narrow, self.inp._get_value(sel_input))
- bank_crt.ladder_y._set_narrow(sel_narrow, self.inp._get_value(sel_input))
-
- #
- # copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.WIDE
- #
- def set_wide_from_input(self, bank_crt, sel_wide, sel_input):
- bank_crt.ladder_x._set_wide(sel_wide, self.inp._get_value(sel_input))
- bank_crt.ladder_y._set_wide(sel_wide, self.inp._get_value(sel_input))
-
- #
- # copy from CRT_Y.LADDER_{X,Y}.{WIDE,NARROW} to CRT_X.LADDER_{X,Y}.{WIDE,NARROW}
- #
- def copy_crt_y2x(self, sel_wide, sel_narrow):
-
- self.bnk.crt_x.ladder_x._set_wide(sel_wide, self.bnk.crt_y.ladder_x._get_wide(sel_wide))
- self.bnk.crt_x.ladder_y._set_wide(sel_wide, self.bnk.crt_y.ladder_y._get_wide(sel_wide))
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow))
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, self.bnk.crt_y.ladder_y._get_narrow(sel_narrow))
-
- #
- # copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{X,Y}.LADDER_Y.{WIDE,NARROW}
- #
- def copy_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out):
-
- self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in))
- self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in))
-
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in))
- self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in))
-
- #
- # copy from CRT_{X,Y}.LADDER_Y.{WIDE,NARROW} to CRT_{X,Y}.LADDER_X.{WIDE,NARROW}
- #
- def copy_ladders_y2x(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out):
-
- self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, self.bnk.crt_x.ladder_y._get_wide(sel_wide_in))
- self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, self.bnk.crt_y.ladder_y._get_wide(sel_wide_in))
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in))
- self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in))
-
- #
- # copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{Y,X}.LADDER_Y.{WIDE,NARROW}
- #
- def cross_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out):
-
- self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in))
- self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in))
-
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in))
- self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in))
-
- #
- # modular multiply sel_wide_in by sel_narrow_in
- # stores intermediate result in WIDE.L and WIDE.H
- # needs modulus WIDE.N and speed-up coefficients NARROW.N_COEFF to be filled
- # places two copies of resulting quantity in sel_wide_out and sel_narrow_out
- # sel_*_in and sel_*_out can overlap (overwriting of input operands is ok)
- #
- def modular_multiply(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words, mode=(True, True), d=False):
-
- xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
- yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
-
- xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
- yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
-
- xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)
- xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in)
-
- yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)
- yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in)
-
- xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
- xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in)
-
- yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
- yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in)
-
- if not mode[0]: xb = xxb
- else: xb = xyb
-
- if not mode[1]: yb = yxb
- else: yb = yyb
-
- xxp = self.wrk.multipurpose_multiply(xxa, xb, xn, xn_coeff, num_words, dump=d, dump_crt="X", dump_ladder="X")
- xyp = self.wrk.multipurpose_multiply(xya, xb, xn, xn_coeff, num_words, dump=d, dump_crt="X", dump_ladder="Y")
-
- yxp = self.wrk.multipurpose_multiply(yxa, yb, yn, yn_coeff, num_words, dump=d, dump_crt="Y", dump_ladder="X")
- yyp = self.wrk.multipurpose_multiply(yya, yb, yn, yn_coeff, num_words, dump=d, dump_crt="Y", dump_ladder="Y")
-
- self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xxp)
- self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xyp)
- self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yxp)
- self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yyp)
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxp)
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyp)
- self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxp)
- self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyp)
-
- #
- # modular subtract values in sel_narrow_in (X-Y)
- # stores two copies of the result in sel_*_out
- #
- def modular_subtract(self, sel_narrow_in, sel_narrow_out, sel_wide_out, num_words):
-
- xa = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
- xb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in)
- xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
-
- ya = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
- yb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in)
- yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
-
- xd = self.wrk.serial_subtract_modular(xa, xb, xn, num_words)
- yd = self.wrk.serial_subtract_modular(ya, yb, yn, num_words)
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xd)
- self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yd)
-
- self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xd)
- self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yd)
-
- #
- # modular reduce sel_narrow_in
- # stores two copies of the result in sel_*_out
- #
- def modular_reduce(self, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words):
-
- xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
- yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
-
- xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
- yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
-
- xb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
- yb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
-
- xp = self.wrk.multipurpose_multiply(None, xb, xn, xn_coeff, num_words, reduce_only=True)
- yp = self.wrk.multipurpose_multiply(None, yb, yn, yn_coeff, num_words, reduce_only=True)
-
- self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xp)
- self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xp)
- self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yp)
- self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yp)
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xp)
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xp)
- self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yp)
- self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yp)
-
- #
- # propagate carries (convert to non-redundant representation) content in sel_narrow
- # overwrites input value
- #
- def propagate_carries(self, sel_narrow, num_words):
- self.wrk.convert_nonredundant(self.bnk.crt_x.ladder_x._get_narrow(sel_narrow), num_words)
- self.wrk.convert_nonredundant(self.bnk.crt_x.ladder_y._get_narrow(sel_narrow), num_words)
- self.wrk.convert_nonredundant(self.bnk.crt_y.ladder_x._get_narrow(sel_narrow), num_words)
- self.wrk.convert_nonredundant(self.bnk.crt_y.ladder_y._get_narrow(sel_narrow), num_words)
-
- #
- # copy from CRT_{X,Y}.LADDER_{X,Y}.WIDE.{H,L} to CRT_{X,Y}.LADDER_{X,Y}.NARROW
- #
- def merge_lha(self, sel_narrow, num_words):
- xx_lsb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.L)
- xy_lsb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.L)
- yx_lsb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.L)
- yy_lsb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.L)
-
- xx_msb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.H)
- xy_msb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.H)
- yx_msb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.H)
- yy_msb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.H)
-
- xx = xx_lsb.words + xx_msb.words
- xy = xy_lsb.words + xy_msb.words
- yx = yx_lsb.words + yx_msb.words
- yy = yy_lsb.words + yy_msb.words
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xx))
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xy))
- self.bnk.crt_y.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yx))
- self.bnk.crt_y.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yy))
-
- #
- # multiply sel_wide_in by sel_narrow_in
- # stores twice larger product in WIDE.L and WIDE.H
- #
- def regular_multiply(self, sel_wide_in, sel_narrow_in, num_words):
-
- xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
- yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
-
- xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
- yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
-
- xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)
- xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in)
-
- yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)
- yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in)
-
- xb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
- yb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
-
- xxp = self.wrk.multipurpose_multiply(xxa, xb, None, None, num_words, multiply_only=True)
- xyp = self.wrk.multipurpose_multiply(xya, xb, None, None, num_words, multiply_only=True)
-
- yxp = self.wrk.multipurpose_multiply(yxa, yb, None, None, num_words, multiply_only=True)
- yyp = self.wrk.multipurpose_multiply(yya, yb, None, None, num_words, multiply_only=True)
-
- xxp_lsb = xxp.lower_half()
- xxp_msb = xxp.upper_half()
-
- xyp_lsb = xyp.lower_half()
- xyp_msb = xyp.upper_half()
-
- yxp_lsb = yxp.lower_half()
- yxp_msb = yxp.upper_half()
-
- yyp_lsb = yyp.lower_half()
- yyp_msb = yyp.upper_half()
-
- self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.L, xxp_lsb)
- self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.L, xyp_lsb)
- self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.L, yxp_lsb)
- self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.L, yyp_lsb)
-
- self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.H, xxp_msb)
- self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.H, xyp_msb)
- self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.H, yxp_msb)
- self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.H, yyp_msb)
-
- #
- # adds sel_narrow_a_in to sel_narrow_b_in
- # stores result in sel_narrow_out
- #
- def regular_add(self, sel_narrow_a_in, sel_narrow_b_in, sel_narrow_out, num_words):
- xxa = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_a_in)
- xya = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_a_in)
- yxa = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_a_in)
- yya = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_a_in)
-
- xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_b_in)
- xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_b_in)
- yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_b_in)
- yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_b_in)
-
- xxc = self.wrk.serial_add_uneven(xxa, xxb, num_words)
- xyc = self.wrk.serial_add_uneven(xya, xyb, num_words)
- yxc = self.wrk.serial_add_uneven(yxa, yxb, num_words)
- yyc = self.wrk.serial_add_uneven(yya, yyb, num_words)
-
- self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxc)
- self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyc)
- self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxc)
- self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyc)
-
- #
- # dump working variables before ladder step
- #
- def dump_before_step_using_crt(self, pq, m):
- print("num_words = %d" % pq)
- print("\rladder_mode_x = %d" % m[0])
- print("\rladder_mode_y = %d" % m[1])
- self.bnk.crt_x.ladder_x._get_narrow(N.C).format_verilog_concat("X_X")
- self.bnk.crt_x.ladder_y._get_narrow(N.C).format_verilog_concat("X_Y")
- self.bnk.crt_y.ladder_x._get_narrow(N.C).format_verilog_concat("Y_X")
- self.bnk.crt_y.ladder_y._get_narrow(N.C).format_verilog_concat("Y_Y")
- self.bnk.crt_x.ladder_x._get_wide(W.N).format_verilog_concat("X_N")
- self.bnk.crt_x.ladder_x._get_wide(W.N).format_verilog_concat("Y_N")
- self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format_verilog_concat("X_N_COEFF")
- self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format_verilog_concat("Y_N_COEFF")
-
- #
- # dump working variables after ladder step
- #
- def dump_after_step_using_crt(self):
- self.bnk.crt_x.ladder_x._get_narrow(N.C).format_verilog_concat("X_X")
- self.bnk.crt_x.ladder_y._get_narrow(N.C).format_verilog_concat("X_Y")
- self.bnk.crt_y.ladder_x._get_narrow(N.C).format_verilog_concat("Y_X")
- self.bnk.crt_y.ladder_y._get_narrow(N.C).format_verilog_concat("Y_Y")
-
- #
- # this deliberately converts narrow operand into redundant representation
- #
- def _force_overflow(self, bank_crt, sel_narrow):
-
- # original words
- T = bank_crt.ladder_x._get_narrow(sel_narrow).words
-
- # loop through upper N-1 words
- for i in range(1, len(T)):
-
- # get msbs of the previous word
- upper_bits = T[i-1] & _CARRY_MASK
-
- # if the previous msbs are empty, force lsbs of the current word
- # into them and then wipe the current lsbs
- if upper_bits == 0:
- lower_bits = T[i] & (_CARRY_MASK >> _WORD_WIDTH)
- T[i] ^= lower_bits
- T[i-1] |= (lower_bits << _WORD_WIDTH)
-
- # overwrite original words
- bank_crt.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, len(T), T))
-
- print("Forced overflow.")
-
-#
-# read content of core's output bank and compare it against known good values
-#
-def compare_signature():
-
- c = core
- s = s_known
- xm = xm_known
- ym = ym_known
-
- core_s = c.out.get_value(O.S)
- core_xm = c.out.get_value(O.XM)
- core_ym = c.out.get_value(O.YM)
-
- if core_s.number() != s: print("ERROR: core_s != s!")
- else: print("s is OK")
-
- if core_xm.number() != xm: print("ERROR: core_xm != xm!")
- else: print("x_mutated is OK")
-
- if core_ym.number() != ym: print("ERROR: core_ym != ym!")
- else: print("y_mutated is OK")
-
-#
-# get current ladder mode based on two exponents' bits
-#
-def get_ladder_mode_using_crt(v, bit):
-
- bit_value_p = (v.dp.number() & (1 << bit)) >> bit
- bit_value_q = (v.dq.number() & (1 << bit)) >> bit
-
- bit_value_p = bit_value_p > 0
- bit_value_q = bit_value_q > 0
-
- return (bit_value_p, bit_value_q)
-
-#
-# get current ladder mode based on private exponent's bit
-#
-def get_ladder_mode_without_crt(v, bit):
-
- bit_value_d = (v.d.number() & (1 << bit)) >> bit
-
- bit_value_d = bit_value_d > 0
-
- return (not bit_value_d, bit_value_d)
-
-#
-# print current exponentiation progress
-#
-def print_ladder_progress(current, total):
-
- # this will always print "100.0%" at the very last iteration, since we're
- # counting bits from msb to lsb and the very last index is zero, which
- # is congruent to 0 mod DUMP_PROGRESS_FACTOR
- if (current % DUMP_PROGRESS_FACTOR) == 0:
- pct = float((_WORD_WIDTH * total - current) / (_WORD_WIDTH * total)) * 100.0
- print("\rdone: %5.1f%%" % pct, end='')
-
- # move to next line after the very last iteration
- if current == 0: print("")
-
-#
-# try to exponentiate using the quad-multiplier (dual-core, dual-ladder) scheme
-#
-def sign_using_crt():
-
- c = core
- v = vector
- n = n_num_words
- pq = pq_num_words
-
- ff = (False, False)
- #
- # A / B => different content in banks (A in WIDE, B in NARROW)
- # [XY]Z => different content in ladders (XZ in X, YZ in Y)
- # .. => temporarily half-filled bank (omitted to save space)
- # * => "crossed" content (X.Y == Y.X and Y.Y == X.X)
- #
- # +------------------------+-------+------------------+---------+-----------+
- # | A | B | C | D | E |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | ? | ? | ? | ? | ? |
- c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | ? | ? | ? | ? | ? |
- c.set_wide_from_input (c.bnk.crt_x, W.A, I.X) # | .. | ? | ? | ? | ? |
- c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y) # | [XY] / ? | ? | ? | ? | ? |
- c.set_wide_from_input (c.bnk.crt_x, W.E, I.M) # | [XY] / ? | ? | ? | ? | .. / ? |
- c.set_wide_from_input (c.bnk.crt_y, W.E, I.M) # | [XY] / ? | ? | ? | ? | M / ? |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? |
- c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? |
- c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR) # | [XY] / .. | ? | ? | ? | M / ? |
- c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR) # | [XY] / N_FACTOR | ? | ? | ? | M / ? |
- c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M / .. |
- c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_multiply(W.A, N.A, W.B, N.B, n) # | [XY] / N_FACTOR | [XY]F | ? | ? | M | [XY]F = [XY] * N_FACTOR
- c.modular_multiply(W.B, N.B, W.C, N.C, n, mode=ff) # | [XY] / N_FACTOR | [XY]F | [XY]YM | ? | M | [XY]MF = [XY]F * [XY]F
- c.modular_multiply(W.C, N.I, W.D, N.D, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | [XY]M = [XY]MF * 1
- # +------------------------+-------+------------------+---------+-----------+
- c.propagate_carries(N.D, n_num_words) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_output_from_narrow(O.XM, c.bnk.crt_x, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M |
- c.set_output_from_narrow(O.YM, c.bnk.crt_y, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_multiply(W.E, N.B, W.C, N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M | [XY]MB = M * [XY]F
- # +------------------------+-------+------------------+---------+-----------+
- c.propagate_carries(N.C, n_num_words) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M |
- # +------------------------+-------+------------------+---------+-----------+
- c.copy_crt_y2x(W.C, N.C) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_wide_from_input (c.bnk.crt_x, W.N, I.P) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M |
- c.set_wide_from_input (c.bnk.crt_y, W.N, I.Q) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M |
- c.set_wide_from_input (c.bnk.crt_x, W.A, I.P_FACTOR) # | ... / N_FACTOR | [XY]F | YMB | [XY]M | M |
- c.set_wide_from_input (c.bnk.crt_y, W.A, I.Q_FACTOR) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | M |
- c.set_wide_from_input (c.bnk.crt_x, W.E, I.QINV) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | .. |
- c.set_wide_from_input (c.bnk.crt_x, W.E, I.QINV) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.P_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
- c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.Q_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
- c.set_narrow_from_input(c.bnk.crt_x, N.A, I.P_FACTOR) # | [PQ]_FACTOR / ... | [XY]F | YMB | [XY]M | QINV / M |
- c.set_narrow_from_input(c.bnk.crt_y, N.A, I.Q_FACTOR) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
- c.set_narrow_from_input(c.bnk.crt_x, N.E, I.QINV) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV / .. |
- c.set_narrow_from_input(c.bnk.crt_x, N.E, I.QINV) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_reduce(N.C, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | YMB | [PQ]MBZ | QINV | [PQ]MBZ = YMB mod [PQ]
- c.modular_multiply(W.D, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBZ | QINV | [PQ]MB = [PQ]MBZ * [PQ]_FACTOR
- c.modular_multiply(W.C, N.A, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBF | QINV | [PQ]MBF = [PQ]MB * [PQ]_FACTOR
- c.modular_multiply(W.A, N.I, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]IF | [PQ]MBF | QINV | [PQ]IF = 1 * [PQ]_FACTOR
- # +------------------------+-------+------------------+---------+-----------+
- c.copy_ladders_x2y(W.D, N.D, W.C, N.C) # | [PQ]_FACTOR | [XY]F | [PQ]IF / [PQ]MBF | [PQ]MBF | QINV |
- # +------------------------+-------+------------------+---------+-----------+
- ########################### # | | | | | |
- # Begin Montgomery Ladder # # | | | | | |
- ########################### # | | | | | |
- # | | | | | |
- for bit in range(_WORD_WIDTH * pq - 1, -1, -1): # | | | | | |
- # | | | | | |
- m = get_ladder_mode_using_crt(v, bit) # | | | | | |
- dbg = bit == DUMP_LADDER_INDEX # | | | | | |
- # | | | | | |
- if dbg: # | | | | | |
- if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C) # | | | | | |
- if DUMP_VECTORS: c.dump_before_step_using_crt(pq, m) # | | | | | |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_multiply(W.C, N.C, W.C, N.C, pq, mode=m, d=dbg) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]MBF | QINV | <LADDER>
- # +------------------------+-------+------------------+---------+-----------+
- if dbg and DUMP_VECTORS: c.dump_after_step_using_crt() # | | | | | |
- print_ladder_progress(bit, pq) # | | | | | |
- # | | | | | |
- ######################### # | | | | | |
- # End Montgomery Ladder # # | | | | | |
- ######################### # | | | | | |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_multiply(W.C, N.I, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV | [PQ]SB = [PQ]SBF * 1
- # +------------------------+-------+------------------+---------+-----------+
- c.propagate_carries(N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV |
- # +------------------------+-------+------------------+---------+-----------+
- c.cross_ladders_x2y(W.D, N.D, W.D, N.D) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB* | QINV |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_subtract(N.D, N.C, W.C, pq) # | [PQ]_FACTOR | [XY]F | RSB | [PQ]SB* | QINV | RSB = PSB - QSB
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_multiply(W.C, N.E, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBIZ | [PQ]SB* | QINV | RSBIZ = RSB * QINV
- c.modular_multiply(W.C, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | QINV | RSBI = RSBIZ * P_FACTOR
- # +------------------------+-------+------------------+---------+-----------+
- c.set_wide_from_input (c.bnk.crt_x, W.E, I.Q) # | [PQ]_FACTOR / N_FACTOR | [XY]F | RSBI | [PQ]SB* | .. |
- c.set_wide_from_input (c.bnk.crt_x, W.E, I.Q) # | [PQ]_FACTOR / N_FACTOR | [XY]F | RSBI | [PQ]SB* | Q / QINV |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_narrow_from_input(c.bnk.crt_x, N.E, I.Q) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | Q / .. |
- c.set_narrow_from_input(c.bnk.crt_x, N.E, I.Q) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | Q |
- # +------------------------+-------+------------------+---------+-----------+
- c.regular_multiply(W.E, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | Q | = RSBI * Q
- # +------------------------+-------+------------------+---------+-----------+
- c.merge_lha(N.A, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | Q |
- # +------------------------+-------+------------------+---------+-----------+
- c.propagate_carries(N.A, n) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | Q |
- # +------------------------+-------+------------------+---------+-----------+
- c.copy_crt_y2x(W.D, N.D) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | QSB* | Q |
- # +------------------------+-------+------------------+---------+-----------+
- c.regular_add(N.D, N.A, N.C, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | SB | QSB* | Q | SB = QSB + RSBI
- # +------------------------+-------+------------------+---------+-----------+
- c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | | | | | |
- c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | | | | | |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | | | | | |
- c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | | | | | |
- # +------------------------+-------+------------------+---------+-----------+
- c.modular_multiply(W.B, N.C, W.A, N.A, n, ff) # | S | | | | | S = XF * SB
- # +------------------------+-------+------------------+---------+-----------+
- c.propagate_carries(N.A, n) # | S | | | | |
- # +------------------------+-------+------------------+---------+-----------+
- c.set_output_from_narrow(O.S, c.bnk.crt_x, N.A) # | S | | | | |
- # +------------------------+-------+------------------+---------+-----------+
-
-#
-# try to exponentiate using only half of the quad-multiplier (one dual-ladder core)
-#
-def sign_without_crt():
-
- c = core
- v = vector
- n = n_num_words
-
- ff = (False, False)
-
- c.set_wide_from_input (c.bnk.crt_x, W.N, I.N)
- c.set_wide_from_input (c.bnk.crt_y, W.N, I.N)
- c.set_wide_from_input (c.bnk.crt_x, W.A, I.X)
- c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y)
- c.set_wide_from_input (c.bnk.crt_x, W.E, I.M)
- c.set_wide_from_input (c.bnk.crt_y, W.E, I.M)
-
- c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF)
- c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF)
- c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR)
- c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR)
- c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M)
- c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M)
-
- c.modular_multiply(W.A, N.A, W.B, N.B, n) # [XY]F = [XY] * N_FACTOR
- c.modular_multiply(W.B, N.B, W.C, N.C, n, mode=ff) # [XY]MF = [XY]F * [XY]F
- c.modular_multiply(W.C, N.I, W.D, N.D, n) # [XY]M = [XY]MF * 1
-
- c.propagate_carries(N.D, n)
-
- c.set_output_from_narrow(O.XM, c.bnk.crt_x, N.D)
- c.set_output_from_narrow(O.YM, c.bnk.crt_y, N.D)
-
- c.modular_multiply(W.E, N.B, W.C, N.C, n) # [XY]MB = M * [XY]F
-
- XF = c.bnk.crt_x.ladder_x._get_narrow(N.B)
-
- c.set_wide_from_input(c.bnk.crt_x, W.A, I.N_FACTOR)
- c.set_wide_from_input(c.bnk.crt_y, W.A, I.N_FACTOR)
-
- c.modular_multiply(W.C, N.A, W.D, N.D, n) # MBF = MB * N_FACTOR
- c.modular_multiply(W.A, N.I, W.C, N.C, n) # IF = 1 * N_FACTOR
-
- c.copy_ladders_x2y(W.D, N.D, W.C, N.C)
-
- ###########################
- # Begin Montgomery Ladder #
- ###########################
-
- for bit in range(_WORD_WIDTH * n - 1, -1, -1):
-
- m = get_ladder_mode_without_crt(v, bit)
- dbg = bit == DUMP_LADDER_INDEX
-
- if dbg:
- if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C)
- if DUMP_VECTORS: c.dump_before_step_without_crt(n, m)
-
- c.modular_multiply(W.C, N.C, W.C, N.C, n, mode=m, d=dbg)
-
- if dbg and DUMP_VECTORS: c.dump_after_step_without_crt()
- print_ladder_progress(bit, n)
-
- #########################
- # End Montgomery Ladder #
- #########################
-
- c.cross_ladders_x2y(W.B, N.B, W.B, N.B)
-
- c.modular_multiply(W.C, N.I, W.D, N.D, n) # SB = SBF * 1
- c.modular_multiply(W.B, N.D, W.A, N.A, n, mode=ff) # S = XF * SB
-
- c.copy_ladders_y2x(W.A, N.A, W.B, N.B)
-
- c.propagate_carries(N.B, n)
-
- c.set_output_from_narrow(O.S, c.bnk.crt_y, N.B)
-
-
-#
-# main()
-#
-if __name__ == "__main__":
-
- # handy shortcuts
- W = ModExpNG_WideBankEnum
- N = ModExpNG_NarrowBankEnum
- I = ModExpNG_CoreInputEnum
- O = ModExpNG_CoreOutputEnum
-
- # set helper quantity
- # instantiate core
- # load test vector
- # transfer numbers from vector to core
- # set numbers of words
- # obtain known good reference value with built-in math
- # mutate blinding quantities with built-in math
-
- i = ModExpNG_Operand(1, KEY_LENGTH)
-
- core = ModExpNG_Core(i)
- vector = ModExpNG_TestVector()
-
- core.inp.set_value(I.M, vector.m)
-
- core.inp.set_value(I.N, vector.n)
- core.inp.set_value(I.P, vector.p)
- core.inp.set_value(I.Q, vector.q)
-
- core.inp.set_value(I.N_COEFF, vector.n_coeff)
- core.inp.set_value(I.P_COEFF, vector.p_coeff)
- core.inp.set_value(I.Q_COEFF, vector.q_coeff)
-
- core.inp.set_value(I.N_FACTOR, vector.n_factor)
- core.inp.set_value(I.P_FACTOR, vector.p_factor)
- core.inp.set_value(I.Q_FACTOR, vector.q_factor)
-
- core.inp.set_value(I.X, vector.x)
- core.inp.set_value(I.Y, vector.y)
-
- core.inp.set_value(I.QINV, vector.qinv)
-
- n_num_words = KEY_LENGTH // _WORD_WIDTH
- pq_num_words = n_num_words // 2
-
- s_known = pow(vector.m.number(), vector.d.number(), vector.n.number())
-
- xm_known = pow(vector.x.number(), 2, vector.n.number())
- ym_known = pow(vector.y.number(), 2, vector.n.number())
-
- # sign using CRT and check
- print("Signing using CRT...")
- sign_using_crt()
- compare_signature()
-
- # sign without CRT and check
- print("Signing without CRT...")
- sign_without_crt()
- compare_signature()
-
-
-#
-# End-of-File
-#