From 00fd9dea38057e0f5e8d3e66722c8cb3ef9fba6b Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 14:26:55 +0300 Subject: Moved here from "modexpng" repo. --- modexpng_fpga_model.py | 1745 +++++++++++++++++++++++++++++++++++++++++++ vector/.gitignore | 3 + vector/README.md | 10 + vector/vector_format.py | 67 ++ vector/vector_regenerate.py | 48 ++ vector/vector_util.py | 319 ++++++++ 6 files changed, 2192 insertions(+) create mode 100644 modexpng_fpga_model.py create mode 100644 vector/.gitignore create mode 100644 vector/README.md create mode 100644 vector/vector_format.py create mode 100644 vector/vector_regenerate.py create mode 100644 vector/vector_util.py diff --git a/modexpng_fpga_model.py b/modexpng_fpga_model.py new file mode 100644 index 0000000..325f544 --- /dev/null +++ b/modexpng_fpga_model.py @@ -0,0 +1,1745 @@ +#!/usr/bin/python3 +# +# +# ModExpNG core math model. +# +# +# Copyright (c) 2019, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +# ------- +# Imports +#-------- + +import sys +import importlib +from enum import Enum, auto + + +# -------------- +# Model Settings +# -------------- + +# length of public key +KEY_LENGTH = 1024 + +# how many parallel multipliers to use +NUM_MULTS = 8 + + +# --------------- +# Internal Values +# --------------- + +# half of key length +_KEY_LENGTH_HALF = KEY_LENGTH // 2 + +# width of internal math pipeline +_WORD_WIDTH = 16 +_WORD_WIDTH_EXT = 18 + +_WORD_MASK = 2 ** _WORD_WIDTH - 1 +_WORD_MASK_EXT = 2 ** _WORD_WIDTH_EXT - 1 +_CARRY_MASK = _WORD_MASK ^ _WORD_MASK_EXT + +# folder with test vector scripts +_VECTOR_PATH = "/vector" + +# name of test vector class +_VECTOR_CLASS = "Vector" + + +# ------------------ +# Debugging Settings +# ------------------ +DUMP_LADDER_INDEX = -1 # at which ladder step to print debug vector +DUMP_VECTORS = False # print entire debug vector components +DUMP_INDICES = False # print indices of words at MAC inputs +DUMP_MACS_INPUTS = False # print MAC input words +DUMP_MACS_CLEARING = False # print MAC clearing bitmaps +DUMP_MACS_ACCUMULATION = False # print MAC accumulators contents +DUMP_MULT_PARTS = False # print multiplication output parts +DUMP_RECOMBINATION = False # print recombination internals +DUMP_REDUCTION = False # print reduction internals +FORCE_OVERFLOW = False # force rarely seen internal overflow situation to verify how its handler works +DUMP_PROGRESS_FACTOR = 16 # once per how many ladder steps to update progress indicator + +# +# Multi-Precision Integer +# +class ModExpNG_Operand(): + + def __init__(self, number, length, words = None): + + if words is None: + + # length must be divisible by word width + if (length % _WORD_WIDTH) > 0: + raise Exception("Bad number length!") + + self._init_from_number(number, length) + + else: + + # length must match words count + if len(words) != length: + raise Exception("Bad words count!") + + self._init_from_words(words, length) + + def format_verilog_concat(self, name): + + for i in range(len(self.words)): + if i > 0: + if (i % 4) == 0: print("") + else: print(" ", end='') + print("%s[%3d] = 18'h%05x;" % (name, i, self.words[i]), end='') + print("") + + def _init_from_words(self, words, count): + + for i in range(count): + + # word must not exceed 18 bits + if words[i] >= (2 ** (_WORD_WIDTH_EXT)): + raise Exception("Word is too large!") + + self.words = list(words) + + def _init_from_number(self, number, length): + + num_hexchars_per_word = _WORD_WIDTH // 4 + num_hexchars_total = length // num_hexchars_per_word + + value_hex = format(number, 'x') + + # value must not be larger than specified, but it can be smaller, so + # we may need to prepend it with zeroes + if len(value_hex) > num_hexchars_total: + raise Exception("Number is too large!") + else: + while len(value_hex) < num_hexchars_total: + value_hex = "0" + value_hex + + # create empty list + self.words = list() + + # fill in words + while len(value_hex) > 0: + value_hex_part = value_hex[-num_hexchars_per_word:] + value_hex = value_hex[:-num_hexchars_per_word] + self.words.append(int(value_hex_part, 16)) + + def number(self): + ret = 0 + shift = 0 + for word in self.words: + ret += word << shift + shift += _WORD_WIDTH + return ret + + def _get_half(self, part): + num_words = len(self.words) + num_words_half = num_words // 2 + if not part: return ModExpNG_Operand(None, num_words_half, self.words[:num_words_half]) + else: return ModExpNG_Operand(None, num_words_half, self.words[num_words_half:]) + + def lower_half(self): + return self._get_half(False) + + def upper_half(self): + return self._get_half(True) + +# +# Test Vector +# +class ModExpNG_TestVector(): + + def __init__(self): + + # format target filename + filename = "vector_" + str(KEY_LENGTH) + "_randomized" + + # add ./vector to import search path + sys.path.insert(1, sys.path[0] + _VECTOR_PATH) + + # import from filename + vector_module = importlib.import_module(filename) + + # get vector class + vector_class = getattr(vector_module, _VECTOR_CLASS) + + # instantiate vector class + vector_inst = vector_class() + + # obtain parts of vector + self.m = ModExpNG_Operand(vector_inst.m, KEY_LENGTH) + self.n = ModExpNG_Operand(vector_inst.n, KEY_LENGTH) + self.d = ModExpNG_Operand(vector_inst.d, KEY_LENGTH) + self.p = ModExpNG_Operand(vector_inst.p, _KEY_LENGTH_HALF) + self.q = ModExpNG_Operand(vector_inst.q, _KEY_LENGTH_HALF) + self.dp = ModExpNG_Operand(vector_inst.dp, _KEY_LENGTH_HALF) + self.dq = ModExpNG_Operand(vector_inst.dq, _KEY_LENGTH_HALF) + self.qinv = ModExpNG_Operand(vector_inst.qinv, _KEY_LENGTH_HALF) + self.n_factor = ModExpNG_Operand(vector_inst.n_factor, KEY_LENGTH) + self.p_factor = ModExpNG_Operand(vector_inst.p_factor, _KEY_LENGTH_HALF) + self.q_factor = ModExpNG_Operand(vector_inst.q_factor, _KEY_LENGTH_HALF) + self.n_coeff = ModExpNG_Operand(vector_inst.n_coeff, KEY_LENGTH + _WORD_WIDTH) + self.p_coeff = ModExpNG_Operand(vector_inst.p_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH) + self.q_coeff = ModExpNG_Operand(vector_inst.q_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH) + self.x = ModExpNG_Operand(vector_inst.x, KEY_LENGTH) + self.y = ModExpNG_Operand(vector_inst.y, KEY_LENGTH) + +class ModExpNG_WideBankEnum(Enum): + A = auto() + B = auto() + C = auto() + D = auto() + E = auto() + N = auto() + L = auto() + H = auto() + +class ModExpNG_NarrowBankEnum(Enum): + A = auto() + B = auto() + C = auto() + D = auto() + E = auto() + N_COEFF = auto() + I = auto() + +class ModExpNG_CoreInputEnum(Enum): + M = auto() + + N = auto() + P = auto() + Q = auto() + + N_COEFF = auto() + P_COEFF = auto() + Q_COEFF = auto() + + N_FACTOR = auto() + P_FACTOR = auto() + Q_FACTOR = auto() + + X = auto() + Y = auto() + + QINV = auto() + +class ModExpNG_CoreOutputEnum(Enum): + XM = auto() + YM = auto() + S = auto() + +class ModExpNG_WideBank(): + + def __init__(self): + self.a = None + self.b = None + self.c = None + self.d = None + self.e = None + self.n = None + self.l = None + self.h = None + + def _get_value(self, sel): + if sel == ModExpNG_WideBankEnum.A: return self.a + elif sel == ModExpNG_WideBankEnum.B: return self.b + elif sel == ModExpNG_WideBankEnum.C: return self.c + elif sel == ModExpNG_WideBankEnum.D: return self.d + elif sel == ModExpNG_WideBankEnum.E: return self.e + elif sel == ModExpNG_WideBankEnum.N: return self.n + elif sel == ModExpNG_WideBankEnum.L: return self.l + elif sel == ModExpNG_WideBankEnum.H: return self.h + else: raise Exception("ModExpNG_WideBank._get_value(): Invalid selector!") + + def _set_value(self, sel, value): + if sel == ModExpNG_WideBankEnum.A: self.a = value + elif sel == ModExpNG_WideBankEnum.B: self.b = value + elif sel == ModExpNG_WideBankEnum.C: self.c = value + elif sel == ModExpNG_WideBankEnum.D: self.d = value + elif sel == ModExpNG_WideBankEnum.E: self.e = value + elif sel == ModExpNG_WideBankEnum.N: self.n = value + elif sel == ModExpNG_WideBankEnum.L: self.l = value + elif sel == ModExpNG_WideBankEnum.H: self.h = value + else: raise Exception("ModExpNG_WideBank._set_value(): Invalid selector!") + +class ModExpNG_NarrowBank(): + + def __init__(self, i): + self.a = None + self.b = None + self.c = None + self.d = None + self.e = None + self.n_coeff = None + self.i = i + + def _get_value(self, sel): + if sel == ModExpNG_NarrowBankEnum.A: return self.a + elif sel == ModExpNG_NarrowBankEnum.B: return self.b + elif sel == ModExpNG_NarrowBankEnum.C: return self.c + elif sel == ModExpNG_NarrowBankEnum.D: return self.d + elif sel == ModExpNG_NarrowBankEnum.E: return self.e + elif sel == ModExpNG_NarrowBankEnum.N_COEFF: return self.n_coeff + elif sel == ModExpNG_NarrowBankEnum.I: return self.i + else: raise Exception("ModExpNG_NarrowBank._get_value(): Invalid selector!") + + def _set_value(self, sel, value): + if sel == ModExpNG_NarrowBankEnum.A: self.a = value + elif sel == ModExpNG_NarrowBankEnum.B: self.b = value + elif sel == ModExpNG_NarrowBankEnum.C: self.c = value + elif sel == ModExpNG_NarrowBankEnum.D: self.d = value + elif sel == ModExpNG_NarrowBankEnum.E: self.e = value + elif sel == ModExpNG_NarrowBankEnum.N_COEFF: self.n_coeff = value + else: raise Exception("ModExpNG_NarrowBank._set_value(): Invalid selector!") + +class ModExpNG_CoreInput(): + + def __init__(self): + self._m = None + + self._n = None + self._p = None + self._q = None + + self._n_coeff = None + self._p_coeff = None + self._q_coeff = None + + self._n_factor = None + self._p_factor = None + self._q_factor = None + + self._x = None + self._y = None + + self._qinv = None + + def set_value(self, sel, value): + if sel == ModExpNG_CoreInputEnum.M: self._m = value + + elif sel == ModExpNG_CoreInputEnum.N: self._n = value + elif sel == ModExpNG_CoreInputEnum.P: self._p = value + elif sel == ModExpNG_CoreInputEnum.Q: self._q = value + + elif sel == ModExpNG_CoreInputEnum.N_COEFF: self._n_coeff = value + elif sel == ModExpNG_CoreInputEnum.P_COEFF: self._p_coeff = value + elif sel == ModExpNG_CoreInputEnum.Q_COEFF: self._q_coeff = value + + elif sel == ModExpNG_CoreInputEnum.N_FACTOR: self._n_factor = value + elif sel == ModExpNG_CoreInputEnum.P_FACTOR: self._p_factor = value + elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: self._q_factor = value + + elif sel == ModExpNG_CoreInputEnum.X: self._x = value + elif sel == ModExpNG_CoreInputEnum.Y: self._y = value + + elif sel == ModExpNG_CoreInputEnum.QINV: self._qinv = value + + else: raise Exception("ModExpNG_CoreInput.set_value(): invalid selector!") + + def _get_value(self, sel): + if sel == ModExpNG_CoreInputEnum.M: return self._m + + elif sel == ModExpNG_CoreInputEnum.N: return self._n + elif sel == ModExpNG_CoreInputEnum.P: return self._p + elif sel == ModExpNG_CoreInputEnum.Q: return self._q + + elif sel == ModExpNG_CoreInputEnum.N_COEFF: return self._n_coeff + elif sel == ModExpNG_CoreInputEnum.P_COEFF: return self._p_coeff + elif sel == ModExpNG_CoreInputEnum.Q_COEFF: return self._q_coeff + + elif sel == ModExpNG_CoreInputEnum.N_FACTOR: return self._n_factor + elif sel == ModExpNG_CoreInputEnum.P_FACTOR: return self._p_factor + elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: return self._q_factor + + elif sel == ModExpNG_CoreInputEnum.X: return self._x + elif sel == ModExpNG_CoreInputEnum.Y: return self._y + + elif sel == ModExpNG_CoreInputEnum.QINV: return self._qinv + + else: raise Exception("ModExpNG_CoreInput._get_value(): invalid selector!") + +class ModExpNG_CoreOutput(): + + def __init__(self): + self._xm = None + self._ym = None + self._s = None + + def _set_value(self, sel, value): + if sel == ModExpNG_CoreOutputEnum.XM: self._xm = value + elif sel == ModExpNG_CoreOutputEnum.YM: self._ym = value + elif sel == ModExpNG_CoreOutputEnum.S: self._s = value + else: raise Exception("ModExpNG_CoreOutput._set_value(): invalid selector!") + + def get_value(self, sel): + if sel == ModExpNG_CoreOutputEnum.XM: return self._xm + elif sel == ModExpNG_CoreOutputEnum.YM: return self._ym + elif sel == ModExpNG_CoreOutputEnum.S: return self._s + else: raise Exception("ModExpNG_CoreOutput.get_value(): invalid selector!") + +class ModExpNG_BanksPair(): + + def __init__(self, i): + self.wide = ModExpNG_WideBank() + self.narrow = ModExpNG_NarrowBank(i) + + def _get_wide(self, sel): + return self.wide._get_value(sel) + + def _get_narrow(self, sel): + return self.narrow._get_value(sel) + + def _set_wide(self, sel, value): + self.wide._set_value(sel, value) + + def _set_narrow(self, sel, value): + self.narrow._set_value(sel, value) + +class ModExpNG_BanksLadder(): + + def __init__(self, i): + self.ladder_x = ModExpNG_BanksPair(i) + self.ladder_y = ModExpNG_BanksPair(i) + +class ModExpNG_BanksCRT(): + + def __init__(self, i): + self.crt_x = ModExpNG_BanksLadder(i) + self.crt_y = ModExpNG_BanksLadder(i) + +class ModExpNG_PartRecombinator(): + + def _bit_select(self, x, msb, lsb): + y = 0 + for pos in range(lsb, msb+1): + y |= (x & (1 << pos)) >> lsb + return y + + def _flush_pipeline(self, dump): + self.z0, self.y0, self.x0 = 0, 0, 0 + if dump and DUMP_RECOMBINATION: + print("RCMB -> flush()") + + def _push_pipeline(self, part, dump): + + # split next part into 16-bit words + z = self._bit_select(part, 46, 32) + y = self._bit_select(part, 31, 16) + x = self._bit_select(part, 15, 0) + + # shift to the right + z1 = z + y1 = y + self.z0 + x1 = x + self.y0 + (self.x0 >> _WORD_WIDTH) # IMPORTANT: This carry can be up to two bits wide!! + + # save lower 16 bits of the rightmost cell + t = self.x0 & _WORD_MASK + + # update internal latches + self.z0, self.y0, self.x0 = z1, y1, x1 + + # dump + if dump and DUMP_RECOMBINATION: + print("RCMB -> push(): part = 0x%012x, word = 0x%04x" % (part, t)) + + # done + return t + + def recombine_square(self, parts, ab_num_words, dump): + + # empty results so far + words_lsb = list() # n words + words_msb = list() # n words + + # recombine the lower half (n parts) + # the first tick produces null result, the last part + # produces three words and needs two extra ticks + self._flush_pipeline(dump) + for i in range(ab_num_words + 1 + 2): + next_part = parts[i] if i < ab_num_words else 0 + next_word = self._push_pipeline(next_part, dump) + + if i > 0: + words_lsb.append(next_word) + + # recombine the upper half (n-1 parts) + # the first tick produces null result + self._flush_pipeline(dump) + for i in range(ab_num_words + 1): + next_part = parts[i + ab_num_words] if i < (ab_num_words - 1) else 0 + next_word = self._push_pipeline(next_part, dump) + + if i > 0: + words_msb.append(next_word) + + # merge words + words = list() + + # merge lower half + for x in range(ab_num_words): + next_word = words_lsb[x] + words.append(next_word) + + # merge upper half adding the two overlapping words + for x in range(ab_num_words): + next_word = words_msb[x] + if x < 2: + next_word += words_lsb[x + ab_num_words] + words.append(next_word) + + return words + + def recombine_triangle(self, parts, ab_num_words, dump): + + # empty result so far + words_lsb = list() + + # recombine the lower half (n+1 parts) + # the first tick produces null result, so we need n + 1 + 1 = n + 2 + # ticks total and should only save the result word during the last + # n + 1 ticks + self._flush_pipeline(dump) + for i in range(ab_num_words + 2): + + next_part = parts[i] if i < (ab_num_words + 1) else 0 + next_word = self._push_pipeline(next_part, dump) + + if i > 0: + words_lsb.append(next_word) + + return words_lsb + + def recombine_rectangle(self, parts, ab_num_words, dump): + + # empty result so far + words_lsb = list() # n words + words_msb = list() # n+1 words + + # recombine the lower half (n parts) + # the first tick produces null result, the last part + # produces three words and needs two extra ticks + self._flush_pipeline(dump) + for i in range(ab_num_words + 1 + 2): + next_part = parts[i] if i < ab_num_words else 0 + next_word = self._push_pipeline(next_part, dump) + + if i > 0: + words_lsb.append(next_word) + + # recombine the upper half (n parts) + # the first tick produces null result, the last part + # produces two words and needs an extra tick + self._flush_pipeline(dump) + for i in range(ab_num_words + 2): + next_part = parts[i + ab_num_words] if i < ab_num_words else 0 + next_word = self._push_pipeline(next_part, dump) + + if i > 0: + words_msb.append(next_word) + + # merge words + words = list() + + # merge lower half + for x in range(ab_num_words): + next_word = words_lsb[x] + words.append(next_word) + + # merge upper half adding the two overlapping words + for x in range(ab_num_words + 1): + next_word = words_msb[x] + if x < 2: + next_word += words_lsb[x + ab_num_words] + words.append(next_word) + + return words + +class ModExpNG_WordMultiplier(): + + def __init__(self): + + self._macs = list() + self._indices = list() + + self._mac_aux = list() + self._index_aux = list() + + for x in range(NUM_MULTS): + self._macs.append(0) + self._indices.append(0) + + self._mac_aux.append(0) + self._index_aux.append(0) + + def _clear_all_macs(self, t, col, dump): + for x in range(NUM_MULTS): + self._macs[x] = 0 + if dump and DUMP_MACS_CLEARING: + print("t=%2d, col=%2d > clear > all" % (t, col)) + + def _clear_one_mac(self, x, t, col, dump): + self._macs[x] = 0 + if dump and DUMP_MACS_CLEARING: + print("t=%2d, col=%2d > clear > x=%d" % (t, col, x)) + + def _clear_mac_aux(self, t, col, dump): + self._mac_aux[0] = 0 + if dump and DUMP_MACS_CLEARING: + print("t= 0, col=%2d > clear > aux" % (col)) + + def _update_one_mac(self, x, t, col, a, b, dump, need_aux=False): + + if a >= (2 ** _WORD_WIDTH_EXT): + raise Exception("a > 0x3FFFF!") + + if b >= (2 ** _WORD_WIDTH): + raise Exception("b > 0xFFFF!") + + p = a * b + if dump and DUMP_MACS_INPUTS: + if x == 0: print("t=%2d, col=%2d > b=%05x > " % (t, col, b), end='') + if x > 0: print("; ", end='') + print("MAC[%d]: a=%05x" % (x, a), end='') + if x == (NUM_MULTS-1) and not need_aux: print("") + + self._macs[x] += p + + def _update_mac_aux(self, y, col, a, b, dump): + + if a >= (2 ** _WORD_WIDTH_EXT): + raise Exception("a > 0x3FFFF!") + + if b >= (2 ** _WORD_WIDTH): + raise Exception("b > 0xFFFF!") + + p = a * b + if dump and DUMP_MACS_INPUTS: + print("; AUX: a=%05x" % a) + + self._mac_aux[0] += p + + def _preset_indices(self, col): + for x in range(len(self._indices)): + self._indices[x] = col * len(self._indices) + x + + def _preset_index_aux(self, num_cols): + self._index_aux[0] = num_cols * len(self._indices) + + def _dump_macs_helper(self, t, col, aux=False): + print("t=%2d, col=%2d > "% (t, col), end='') + for i in range(NUM_MULTS): + if i > 0: print(" | ", end='') + print("mac[%d]: 0x%012x" % (i, self._macs[i]), end='') + if aux: + print(" | mac_aux[ 0]: 0x%012x" % (self._mac_aux[0]), end='') + print("") + + def _dump_macs(self, t, col): + self._dump_macs_helper(t, col) + + def _dump_macs_with_aux(self, t, col): + self._dump_macs_helper(t, col, True) + + def _dump_indices_helper(self, t, col, aux=False): + print("t=%2d, col=%2d > indices:" % (t, col), end='') + for i in range(NUM_MULTS): + print(" %2d" % self._indices[i], end='') + if aux: + print(" %2d" % self._index_aux[0], end='') + print("") + + def _dump_indices(self, t, col): + self._dump_indices_helper(t, col) + + def _dump_indices_with_aux(self, t, col): + self._dump_indices_helper(t, col, True) + + def _rotate_indices(self, num_words): + for x in range(len(self._indices)): + if self._indices[x] > 0: + self._indices[x] -= 1 + else: + self._indices[x] = num_words - 1 + + def _rotate_index_aux(self): + self._index_aux[0] -= 1 + + def _mult_store_part(self, parts, time, column, part_index, mac_index, dump): + parts[part_index] = self._macs[mac_index] + if dump and DUMP_MULT_PARTS: + print("t=%2d, col=%2d > parts[%2d]: mac[%d] = 0x%012x" % + (time, column, part_index, mac_index, parts[part_index])) + + def _mult_store_part_aux(self, parts, time, column, part_index, dump): + parts[part_index] = self._mac_aux[0] + if dump and DUMP_MULT_PARTS: + print("t=%2d, col=%2d > parts[%2d]: mac_aux[%d] = 0x%012x" % + (time, column, part_index, 0, parts[part_index])) + + def multiply_square(self, a_wide, b_narrow, ab_num_words, dump=False): + + num_cols = ab_num_words // NUM_MULTS + + parts = list() + for i in range(2 * ab_num_words - 1): + parts.append(0) + + for col in range(num_cols): + + b_carry = 0 + + for t in range(ab_num_words): + + # take care of indices + if t == 0: self._preset_indices(col) + else: self._rotate_indices(ab_num_words) + + # take care of macs + if t == 0: + self._clear_all_macs(t, col, dump) + else: + t1 = t - 1 + if (t1 // 8) == col: + self._clear_one_mac(t1 % NUM_MULTS, t, col, dump) + + # debug output + if dump and DUMP_INDICES: self._dump_indices(t, col) + + # current b-word + # multiplier's b-input is limited to 16-bit words, so we need to propagate + # carries on the fly here, carry can be up to two bits + bt = b_narrow.words[t] + b_carry + b_carry = (bt & _CARRY_MASK) >> _WORD_WIDTH + if dump and b_carry > 1: + print("Rare overflow case was detected and then successfully corrected.") + bt &= _WORD_MASK + + # multiply by a-words + for x in range(NUM_MULTS): + ax = a_wide.words[self._indices[x]] + self._update_one_mac(x, t, col, ax, bt, dump) + + if t == (col * NUM_MULTS + x): + part_index = t + self._mult_store_part(parts, t, col, part_index, x, dump) + + # debug output + if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col) + + # save the uppers part of product at end of column, + # for the last column don't save the very last part + if t == (ab_num_words - 1): + for x in range(NUM_MULTS): + if not (col == (num_cols - 1) and x == (NUM_MULTS - 1)): + part_index = ab_num_words + col * NUM_MULTS + x + self._mult_store_part(parts, t, col, part_index, x, dump) + + return parts + + def multiply_triangle(self, a_wide, b_narrow, ab_num_words, dump=False): + + num_cols = ab_num_words // NUM_MULTS + + parts = list() + for i in range(ab_num_words + 1): + parts.append(0) + + for col in range(num_cols): + + last_col = col == (num_cols - 1) + + for t in range(ab_num_words + 1): + + # take care of indices + if t == 0: self._preset_indices(col) + else: self._rotate_indices(ab_num_words) + + # take care of auxilary index + if last_col: + if t == 0: self._preset_index_aux(num_cols) + else: self._rotate_index_aux() + + # take care of macs + if t == 0: self._clear_all_macs(t, col, dump) + + # take care of auxilary mac + if last_col: + if t == 0: self._clear_mac_aux(t, col, dump) + + # debug output + if dump and DUMP_INDICES: self._dump_indices_with_aux(t, col) + + # current b-word + bt = b_narrow.words[t] + + # multiply by a-words + for x in range(NUM_MULTS): + ax = a_wide.words[self._indices[x]] + self._update_one_mac(x, t, col, ax, bt, dump, last_col) + + if t == (col * NUM_MULTS + x): + part_index = t + self._mult_store_part(parts, t, col, part_index, x, dump) + + # aux multiplier + if last_col: + ax = a_wide.words[self._index_aux[0]] + self._update_mac_aux(t, col, ax, bt, dump) + + if t == ab_num_words: + part_index = t + self._mult_store_part_aux(parts, t, col, part_index, dump) + + # debug output + if dump and DUMP_MACS_ACCUMULATION: self._dump_macs_with_aux(t, col) + + # shortcut + if not last_col: + if t == (NUM_MULTS * (col + 1) - 1): break + + return parts + + def multiply_rectangle(self, a_wide, b_narrow, ab_num_words, dump=False): + + num_cols = ab_num_words // NUM_MULTS + + parts = list() + for i in range(2 * ab_num_words): + parts.append(0) + + for col in range(num_cols): + + for t in range(ab_num_words + 1): + + # take care of indices + if t == 0: self._preset_indices(col) + else: self._rotate_indices(ab_num_words) + + # take care of macs + if t == 0: + self._clear_all_macs(t, col, dump) + else: + t1 = t - 1 + if (t1 // 8) == col: + self._clear_one_mac(t1 % NUM_MULTS, t, col, dump) + + # debug output + if dump and DUMP_INDICES: self._dump_indices(t, col) + + # current b-word + bt = b_narrow.words[t] + + # multiply by a-words + for x in range(NUM_MULTS): + ax = a_wide.words[self._indices[x]] + self._update_one_mac(x, t, col, ax, bt, dump) + + # don't save one value for the very last time instant per column + if t < ab_num_words and t == (col * NUM_MULTS + x): + part_index = t + self._mult_store_part(parts, t, col, part_index, x, dump) + + # debug output + if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col) + + # save the upper parts of product at end of column + if t == ab_num_words: + for x in range(NUM_MULTS): + part_index = ab_num_words + col * NUM_MULTS + x + self._mult_store_part(parts, t, col, part_index, x, dump) + + return parts + +class ModExpNG_LowlevelOperator(): + + def _check_word(self, a): + if a < 0 or a > _WORD_MASK: + raise Exception("Word out of range!") + + def _check_carry_borrow(self, cb): + if cb < 0 or cb > 1: + raise Exception("Carry or borrow out of range!") + + def add_words(self, a, b, c_in): + + self._check_word(a) + self._check_word(b) + self._check_carry_borrow(c_in) + + sum = a + b + c_in + + sum_s = sum & _WORD_MASK + sum_c = sum >> _WORD_WIDTH + + return (sum_c, sum_s) + + def sub_words(self, a, b, b_in): + + self._check_word(a) + self._check_word(b) + self._check_carry_borrow(b_in) + + dif = a - b - b_in + + if dif < 0: + dif_b = 1 + dif_d = dif + 2 ** _WORD_WIDTH + else: + dif_b = 0 + dif_d = dif + + return (dif_b, dif_d) + +class ModExpNG_Worker(): + + def __init__(self): + self.lowlevel = ModExpNG_LowlevelOperator() + self.multiplier = ModExpNG_WordMultiplier() + self.recombinator = ModExpNG_PartRecombinator() + + def serial_subtract_modular(self, a, b, n, ab_num_words): + c_in = 0 + b_in = 0 + ab = list() + ab_n = list() + for x in range(ab_num_words): + a_word = a.words[x] + b_word = b.words[x] + (b_out, d_out) = self.lowlevel.sub_words(a_word, b_word, b_in) + (c_out, s_out) = self.lowlevel.add_words(d_out, n.words[x], c_in) + ab.append(d_out) + ab_n.append(s_out) + (c_in, b_in) = (c_out, b_out) + d = ab if not b_out else ab_n + return ModExpNG_Operand(None, ab_num_words, d) + + def serial_add_uneven(self, a, b, ab_num_words): + c_in = 0 + ab = list() + for x in range(2 * ab_num_words): + a_word = a.words[x] if x < ab_num_words else 0 + b_word = b.words[x] + (c_out, s_out) = self.lowlevel.add_words(a_word, b_word, c_in) + ab.append(s_out) + c_in = c_out + return ModExpNG_Operand(None, 2*ab_num_words, ab) + + def multipurpose_multiply(self, a, b, n, n_coeff, ab_num_words, reduce_only=False, multiply_only=False, dump=False, dump_crt="", dump_ladder=""): + + # + # 1. AB = A * B + # + if dump: print("multiply_square(%s_%s)" % (dump_crt, dump_ladder)) + + if reduce_only: + ab = b + else: + ab_parts = self.multiplier.multiply_square(a, b, ab_num_words, dump) + ab_words = self.recombinator.recombine_square(ab_parts, ab_num_words, dump) + ab = ModExpNG_Operand(None, 2 * ab_num_words, ab_words) + + if dump and DUMP_VECTORS: + ab.format_verilog_concat("%s_%s_AB" % (dump_crt, dump_ladder)) + + if multiply_only: + return ModExpNG_Operand(None, 2*ab_num_words, ab_words) + + # + # 2. Q = LSB(AB) * N_COEFF + # + if dump: print("multiply_triangle(%s_%s)" % (dump_crt, dump_ladder)) + + q_parts = self.multiplier.multiply_triangle(ab, n_coeff, ab_num_words, dump) + q_words = self.recombinator.recombine_triangle(q_parts, ab_num_words, dump) + q = ModExpNG_Operand(None, ab_num_words + 1, q_words) + + if dump and DUMP_VECTORS: + q.format_verilog_concat("%s_%s_Q" % (dump_crt, dump_ladder)) + + # + # 3. M = Q * N + # + if dump: print("multiply_rectangle(%s_%s)" % (dump_crt, dump_ladder)) + + m_parts = self.multiplier.multiply_rectangle(n, q, ab_num_words, dump) + m_words = self.recombinator.recombine_rectangle(m_parts, ab_num_words, dump) + m = ModExpNG_Operand(None, 2 * ab_num_words + 1, m_words) + + if dump and DUMP_VECTORS: + m.format_verilog_concat("%s_%s_M" % (dump_crt, dump_ladder)) + + # + # 4. R = AB + M + # + + # + # 4a. compute carry (actual sum is all zeroes and need not be stored) + # + + r_cy = 0 # this can be up to two bits, since we're adding extended words!! + for i in range(ab_num_words + 1): + s = ab.words[i] + m.words[i] + r_cy + r_cy_new = s >> _WORD_WIDTH + + if dump and DUMP_REDUCTION: + print("[%2d] 0x%05x + 0x%05x + 0x%x => {0x%x, [0x%05x]}" % + (i, ab.words[i], m.words[i], r_cy, r_cy_new, s & 0xffff)) # ??? + + r_cy = r_cy_new + + + # + # 4b. Initialize empty result + # + + R = list() + for i in range(ab_num_words): + R.append(0) + + # + # 4c. compute the actual upper part of sum (take carry into account) + # + + for i in range(ab_num_words): + + if dump and DUMP_REDUCTION: + print("[%2d]" % i, end='') + + ab_word = ab.words[ab_num_words + i + 1] if i < (ab_num_words - 1) else 0 + if dump and DUMP_REDUCTION: + print(" 0x%05x" % ab_word, end='') + + m_word = m.words[ab_num_words + i + 1] + if dump and DUMP_REDUCTION: + print(" + 0x%05x" % m_word, end='') + + if i == 0: R[i] = r_cy + else: R[i] = 0 + + if dump and DUMP_REDUCTION: + print(" + 0x%x" % R[i], end='') + + R[i] += ab_word + R[i] += m_word + if dump and DUMP_REDUCTION: + print(" = 0x%05x" % R[i]) + + return ModExpNG_Operand(None, ab_num_words, R) + + def convert_nonredundant(self, a, num_words): + carry = 0 + for x in range(num_words): + a.words[x] += carry + carry = a.words[x] >> _WORD_WIDTH + a.words[x] &= _WORD_MASK + return carry + +class ModExpNG_Core(): + + def __init__(self, i): + self.wrk = ModExpNG_Worker() + self.bnk = ModExpNG_BanksCRT(i) + self.inp = ModExpNG_CoreInput() + self.out = ModExpNG_CoreOutput() + + # + # CRT_(X|Y) means either CRT_X or CRT_Y + # LADDER_{X,Y} means both LADDER_X and LADDER_Y + # + + # + # copy from CRT_(X|Y).LADDER_X.NARROW to OUTPUT + # + def set_output_from_narrow(self, sel_output, bank_crt, sel_narrow): + self.out._set_value(sel_output, bank_crt.ladder_x._get_narrow(sel_narrow)) + + # + # copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.NARROW + # + def set_narrow_from_input(self, bank_crt, sel_narrow, sel_input): + bank_crt.ladder_x._set_narrow(sel_narrow, self.inp._get_value(sel_input)) + bank_crt.ladder_y._set_narrow(sel_narrow, self.inp._get_value(sel_input)) + + # + # copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.WIDE + # + def set_wide_from_input(self, bank_crt, sel_wide, sel_input): + bank_crt.ladder_x._set_wide(sel_wide, self.inp._get_value(sel_input)) + bank_crt.ladder_y._set_wide(sel_wide, self.inp._get_value(sel_input)) + + # + # copy from CRT_Y.LADDER_{X,Y}.{WIDE,NARROW} to CRT_X.LADDER_{X,Y}.{WIDE,NARROW} + # + def copy_crt_y2x(self, sel_wide, sel_narrow): + + self.bnk.crt_x.ladder_x._set_wide(sel_wide, self.bnk.crt_y.ladder_x._get_wide(sel_wide)) + self.bnk.crt_x.ladder_y._set_wide(sel_wide, self.bnk.crt_y.ladder_y._get_wide(sel_wide)) + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow)) + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, self.bnk.crt_y.ladder_y._get_narrow(sel_narrow)) + + # + # copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{X,Y}.LADDER_Y.{WIDE,NARROW} + # + def copy_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out): + + self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)) + self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)) + + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)) + self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)) + + # + # copy from CRT_{X,Y}.LADDER_Y.{WIDE,NARROW} to CRT_{X,Y}.LADDER_X.{WIDE,NARROW} + # + def copy_ladders_y2x(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out): + + self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, self.bnk.crt_x.ladder_y._get_wide(sel_wide_in)) + self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, self.bnk.crt_y.ladder_y._get_wide(sel_wide_in)) + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in)) + self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in)) + + # + # copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{Y,X}.LADDER_Y.{WIDE,NARROW} + # + def cross_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out): + + self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)) + self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)) + + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)) + self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)) + + # + # modular multiply sel_wide_in by sel_narrow_in + # stores intermediate result in WIDE.L and WIDE.H + # needs modulus WIDE.N and speed-up coefficients NARROW.N_COEFF to be filled + # places two copies of resulting quantity in sel_wide_out and sel_narrow_out + # sel_*_in and sel_*_out can overlap (overwriting of input operands is ok) + # + def modular_multiply(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words, mode=(True, True), d=False): + + xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + + xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) + yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) + + xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in) + xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in) + + yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in) + yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in) + + xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) + xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in) + + yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) + yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in) + + if not mode[0]: xb = xxb + else: xb = xyb + + if not mode[1]: yb = yxb + else: yb = yyb + + xxp = self.wrk.multipurpose_multiply(xxa, xb, xn, xn_coeff, num_words, dump=d, dump_crt="X", dump_ladder="X") + xyp = self.wrk.multipurpose_multiply(xya, xb, xn, xn_coeff, num_words, dump=d, dump_crt="X", dump_ladder="Y") + + yxp = self.wrk.multipurpose_multiply(yxa, yb, yn, yn_coeff, num_words, dump=d, dump_crt="Y", dump_ladder="X") + yyp = self.wrk.multipurpose_multiply(yya, yb, yn, yn_coeff, num_words, dump=d, dump_crt="Y", dump_ladder="Y") + + self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xxp) + self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xyp) + self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yxp) + self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yyp) + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxp) + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyp) + self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxp) + self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyp) + + # + # modular subtract values in sel_narrow_in (X-Y) + # stores two copies of the result in sel_*_out + # + def modular_subtract(self, sel_narrow_in, sel_narrow_out, sel_wide_out, num_words): + + xa = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) + xb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in) + xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + + ya = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) + yb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in) + yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + + xd = self.wrk.serial_subtract_modular(xa, xb, xn, num_words) + yd = self.wrk.serial_subtract_modular(ya, yb, yn, num_words) + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xd) + self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yd) + + self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xd) + self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yd) + + # + # modular reduce sel_narrow_in + # stores two copies of the result in sel_*_out + # + def modular_reduce(self, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words): + + xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + + xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) + yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) + + xb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) + yb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) + + xp = self.wrk.multipurpose_multiply(None, xb, xn, xn_coeff, num_words, reduce_only=True) + yp = self.wrk.multipurpose_multiply(None, yb, yn, yn_coeff, num_words, reduce_only=True) + + self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xp) + self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xp) + self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yp) + self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yp) + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xp) + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xp) + self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yp) + self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yp) + + # + # propagate carries (convert to non-redundant representation) content in sel_narrow + # overwrites input value + # + def propagate_carries(self, sel_narrow, num_words): + self.wrk.convert_nonredundant(self.bnk.crt_x.ladder_x._get_narrow(sel_narrow), num_words) + self.wrk.convert_nonredundant(self.bnk.crt_x.ladder_y._get_narrow(sel_narrow), num_words) + self.wrk.convert_nonredundant(self.bnk.crt_y.ladder_x._get_narrow(sel_narrow), num_words) + self.wrk.convert_nonredundant(self.bnk.crt_y.ladder_y._get_narrow(sel_narrow), num_words) + + # + # copy from CRT_{X,Y}.LADDER_{X,Y}.WIDE.{H,L} to CRT_{X,Y}.LADDER_{X,Y}.NARROW + # + def merge_lha(self, sel_narrow, num_words): + xx_lsb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.L) + xy_lsb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.L) + yx_lsb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.L) + yy_lsb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.L) + + xx_msb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.H) + xy_msb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.H) + yx_msb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.H) + yy_msb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.H) + + xx = xx_lsb.words + xx_msb.words + xy = xy_lsb.words + xy_msb.words + yx = yx_lsb.words + yx_msb.words + yy = yy_lsb.words + yy_msb.words + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xx)) + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xy)) + self.bnk.crt_y.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yx)) + self.bnk.crt_y.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yy)) + + # + # multiply sel_wide_in by sel_narrow_in + # stores twice larger product in WIDE.L and WIDE.H + # + def regular_multiply(self, sel_wide_in, sel_narrow_in, num_words): + + xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) + + xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) + yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) + + xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in) + xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in) + + yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in) + yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in) + + xb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) + yb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) + + xxp = self.wrk.multipurpose_multiply(xxa, xb, None, None, num_words, multiply_only=True) + xyp = self.wrk.multipurpose_multiply(xya, xb, None, None, num_words, multiply_only=True) + + yxp = self.wrk.multipurpose_multiply(yxa, yb, None, None, num_words, multiply_only=True) + yyp = self.wrk.multipurpose_multiply(yya, yb, None, None, num_words, multiply_only=True) + + xxp_lsb = xxp.lower_half() + xxp_msb = xxp.upper_half() + + xyp_lsb = xyp.lower_half() + xyp_msb = xyp.upper_half() + + yxp_lsb = yxp.lower_half() + yxp_msb = yxp.upper_half() + + yyp_lsb = yyp.lower_half() + yyp_msb = yyp.upper_half() + + self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.L, xxp_lsb) + self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.L, xyp_lsb) + self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.L, yxp_lsb) + self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.L, yyp_lsb) + + self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.H, xxp_msb) + self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.H, xyp_msb) + self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.H, yxp_msb) + self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.H, yyp_msb) + + # + # adds sel_narrow_a_in to sel_narrow_b_in + # stores result in sel_narrow_out + # + def regular_add(self, sel_narrow_a_in, sel_narrow_b_in, sel_narrow_out, num_words): + xxa = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_a_in) + xya = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_a_in) + yxa = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_a_in) + yya = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_a_in) + + xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_b_in) + xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_b_in) + yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_b_in) + yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_b_in) + + xxc = self.wrk.serial_add_uneven(xxa, xxb, num_words) + xyc = self.wrk.serial_add_uneven(xya, xyb, num_words) + yxc = self.wrk.serial_add_uneven(yxa, yxb, num_words) + yyc = self.wrk.serial_add_uneven(yya, yyb, num_words) + + self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxc) + self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyc) + self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxc) + self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyc) + + # + # dump working variables before ladder step + # + def dump_before_step_using_crt(self, pq, m): + print("num_words = %d" % pq) + print("\rladder_mode_x = %d" % m[0]) + print("\rladder_mode_y = %d" % m[1]) + self.bnk.crt_x.ladder_x._get_narrow(N.C).format_verilog_concat("X_X") + self.bnk.crt_x.ladder_y._get_narrow(N.C).format_verilog_concat("X_Y") + self.bnk.crt_y.ladder_x._get_narrow(N.C).format_verilog_concat("Y_X") + self.bnk.crt_y.ladder_y._get_narrow(N.C).format_verilog_concat("Y_Y") + self.bnk.crt_x.ladder_x._get_wide(W.N).format_verilog_concat("X_N") + self.bnk.crt_x.ladder_x._get_wide(W.N).format_verilog_concat("Y_N") + self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format_verilog_concat("X_N_COEFF") + self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format_verilog_concat("Y_N_COEFF") + + # + # dump working variables after ladder step + # + def dump_after_step_using_crt(self): + self.bnk.crt_x.ladder_x._get_narrow(N.C).format_verilog_concat("X_X") + self.bnk.crt_x.ladder_y._get_narrow(N.C).format_verilog_concat("X_Y") + self.bnk.crt_y.ladder_x._get_narrow(N.C).format_verilog_concat("Y_X") + self.bnk.crt_y.ladder_y._get_narrow(N.C).format_verilog_concat("Y_Y") + + # + # this deliberately converts narrow operand into redundant representation + # + def _force_overflow(self, bank_crt, sel_narrow): + + # original words + T = bank_crt.ladder_x._get_narrow(sel_narrow).words + + # loop through upper N-1 words + for i in range(1, len(T)): + + # get msbs of the previous word + upper_bits = T[i-1] & _CARRY_MASK + + # if the previous msbs are empty, force lsbs of the current word + # into them and then wipe the current lsbs + if upper_bits == 0: + lower_bits = T[i] & (_CARRY_MASK >> _WORD_WIDTH) + T[i] ^= lower_bits + T[i-1] |= (lower_bits << _WORD_WIDTH) + + # overwrite original words + bank_crt.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, len(T), T)) + + print("Forced overflow.") + +# +# read content of core's output bank and compare it against known good values +# +def compare_signature(): + + c = core + s = s_known + xm = xm_known + ym = ym_known + + core_s = c.out.get_value(O.S) + core_xm = c.out.get_value(O.XM) + core_ym = c.out.get_value(O.YM) + + if core_s.number() != s: print("ERROR: core_s != s!") + else: print("s is OK") + + if core_xm.number() != xm: print("ERROR: core_xm != xm!") + else: print("x_mutated is OK") + + if core_ym.number() != ym: print("ERROR: core_ym != ym!") + else: print("y_mutated is OK") + +# +# get current ladder mode based on two exponents' bits +# +def get_ladder_mode_using_crt(v, bit): + + bit_value_p = (v.dp.number() & (1 << bit)) >> bit + bit_value_q = (v.dq.number() & (1 << bit)) >> bit + + bit_value_p = bit_value_p > 0 + bit_value_q = bit_value_q > 0 + + return (bit_value_p, bit_value_q) + +# +# get current ladder mode based on private exponent's bit +# +def get_ladder_mode_without_crt(v, bit): + + bit_value_d = (v.d.number() & (1 << bit)) >> bit + + bit_value_d = bit_value_d > 0 + + return (not bit_value_d, bit_value_d) + +# +# print current exponentiation progress +# +def print_ladder_progress(current, total): + + # this will always print "100.0%" at the very last iteration, since we're + # counting bits from msb to lsb and the very last index is zero, which + # is congruent to 0 mod DUMP_PROGRESS_FACTOR + if (current % DUMP_PROGRESS_FACTOR) == 0: + pct = float((_WORD_WIDTH * total - current) / (_WORD_WIDTH * total)) * 100.0 + print("\rdone: %5.1f%%" % pct, end='') + + # move to next line after the very last iteration + if current == 0: print("") + +# +# try to exponentiate using the quad-multiplier (dual-core, dual-ladder) scheme +# +def sign_using_crt(): + + c = core + v = vector + n = n_num_words + pq = pq_num_words + + ff = (False, False) + # + # A / B => different content in banks (A in WIDE, B in NARROW) + # [XY]Z => different content in ladders (XZ in X, YZ in Y) + # .. => temporarily half-filled bank (omitted to save space) + # * => "crossed" content (X.Y == Y.X and Y.Y == X.X) + # + # +------------------------+-------+------------------+---------+-----------+ + # | A | B | C | D | E | + # +------------------------+-------+------------------+---------+-----------+ + c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | ? | ? | ? | ? | ? | + c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | ? | ? | ? | ? | ? | + c.set_wide_from_input (c.bnk.crt_x, W.A, I.X) # | .. | ? | ? | ? | ? | + c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y) # | [XY] / ? | ? | ? | ? | ? | + c.set_wide_from_input (c.bnk.crt_x, W.E, I.M) # | [XY] / ? | ? | ? | ? | .. / ? | + c.set_wide_from_input (c.bnk.crt_y, W.E, I.M) # | [XY] / ? | ? | ? | ? | M / ? | + # +------------------------+-------+------------------+---------+-----------+ + c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? | + c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? | + c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR) # | [XY] / .. | ? | ? | ? | M / ? | + c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR) # | [XY] / N_FACTOR | ? | ? | ? | M / ? | + c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M / .. | + c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_multiply(W.A, N.A, W.B, N.B, n) # | [XY] / N_FACTOR | [XY]F | ? | ? | M | [XY]F = [XY] * N_FACTOR + c.modular_multiply(W.B, N.B, W.C, N.C, n, mode=ff) # | [XY] / N_FACTOR | [XY]F | [XY]YM | ? | M | [XY]MF = [XY]F * [XY]F + c.modular_multiply(W.C, N.I, W.D, N.D, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | [XY]M = [XY]MF * 1 + # +------------------------+-------+------------------+---------+-----------+ + c.propagate_carries(N.D, n_num_words) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | + # +------------------------+-------+------------------+---------+-----------+ + c.set_output_from_narrow(O.XM, c.bnk.crt_x, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | + c.set_output_from_narrow(O.YM, c.bnk.crt_y, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_multiply(W.E, N.B, W.C, N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M | [XY]MB = M * [XY]F + # +------------------------+-------+------------------+---------+-----------+ + c.propagate_carries(N.C, n_num_words) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M | + # +------------------------+-------+------------------+---------+-----------+ + c.copy_crt_y2x(W.C, N.C) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M | + # +------------------------+-------+------------------+---------+-----------+ + c.set_wide_from_input (c.bnk.crt_x, W.N, I.P) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M | + c.set_wide_from_input (c.bnk.crt_y, W.N, I.Q) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M | + c.set_wide_from_input (c.bnk.crt_x, W.A, I.P_FACTOR) # | ... / N_FACTOR | [XY]F | YMB | [XY]M | M | + c.set_wide_from_input (c.bnk.crt_y, W.A, I.Q_FACTOR) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | M | + c.set_wide_from_input (c.bnk.crt_x, W.E, I.QINV) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | .. | + c.set_wide_from_input (c.bnk.crt_x, W.E, I.QINV) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M | + # +------------------------+-------+------------------+---------+-----------+ + c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.P_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M | + c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.Q_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M | + c.set_narrow_from_input(c.bnk.crt_x, N.A, I.P_FACTOR) # | [PQ]_FACTOR / ... | [XY]F | YMB | [XY]M | QINV / M | + c.set_narrow_from_input(c.bnk.crt_y, N.A, I.Q_FACTOR) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV / M | + c.set_narrow_from_input(c.bnk.crt_x, N.E, I.QINV) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV / .. | + c.set_narrow_from_input(c.bnk.crt_x, N.E, I.QINV) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_reduce(N.C, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | YMB | [PQ]MBZ | QINV | [PQ]MBZ = YMB mod [PQ] + c.modular_multiply(W.D, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBZ | QINV | [PQ]MB = [PQ]MBZ * [PQ]_FACTOR + c.modular_multiply(W.C, N.A, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBF | QINV | [PQ]MBF = [PQ]MB * [PQ]_FACTOR + c.modular_multiply(W.A, N.I, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]IF | [PQ]MBF | QINV | [PQ]IF = 1 * [PQ]_FACTOR + # +------------------------+-------+------------------+---------+-----------+ + c.copy_ladders_x2y(W.D, N.D, W.C, N.C) # | [PQ]_FACTOR | [XY]F | [PQ]IF / [PQ]MBF | [PQ]MBF | QINV | + # +------------------------+-------+------------------+---------+-----------+ + ########################### # | | | | | | + # Begin Montgomery Ladder # # | | | | | | + ########################### # | | | | | | + # | | | | | | + for bit in range(_WORD_WIDTH * pq - 1, -1, -1): # | | | | | | + # | | | | | | + m = get_ladder_mode_using_crt(v, bit) # | | | | | | + dbg = bit == DUMP_LADDER_INDEX # | | | | | | + # | | | | | | + if dbg: # | | | | | | + if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C) # | | | | | | + if DUMP_VECTORS: c.dump_before_step_using_crt(pq, m) # | | | | | | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_multiply(W.C, N.C, W.C, N.C, pq, mode=m, d=dbg) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]MBF | QINV | + # +------------------------+-------+------------------+---------+-----------+ + if dbg and DUMP_VECTORS: c.dump_after_step_using_crt() # | | | | | | + print_ladder_progress(bit, pq) # | | | | | | + # | | | | | | + ######################### # | | | | | | + # End Montgomery Ladder # # | | | | | | + ######################### # | | | | | | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_multiply(W.C, N.I, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV | [PQ]SB = [PQ]SBF * 1 + # +------------------------+-------+------------------+---------+-----------+ + c.propagate_carries(N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV | + # +------------------------+-------+------------------+---------+-----------+ + c.cross_ladders_x2y(W.D, N.D, W.D, N.D) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB* | QINV | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_subtract(N.D, N.C, W.C, pq) # | [PQ]_FACTOR | [XY]F | RSB | [PQ]SB* | QINV | RSB = PSB - QSB + # +------------------------+-------+------------------+---------+-----------+ + c.modular_multiply(W.C, N.E, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBIZ | [PQ]SB* | QINV | RSBIZ = RSB * QINV + c.modular_multiply(W.C, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | QINV | RSBI = RSBIZ * P_FACTOR + # +------------------------+-------+------------------+---------+-----------+ + c.set_wide_from_input (c.bnk.crt_x, W.E, I.Q) # | [PQ]_FACTOR / N_FACTOR | [XY]F | RSBI | [PQ]SB* | .. | + c.set_wide_from_input (c.bnk.crt_x, W.E, I.Q) # | [PQ]_FACTOR / N_FACTOR | [XY]F | RSBI | [PQ]SB* | Q / QINV | + # +------------------------+-------+------------------+---------+-----------+ + c.set_narrow_from_input(c.bnk.crt_x, N.E, I.Q) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | Q / .. | + c.set_narrow_from_input(c.bnk.crt_x, N.E, I.Q) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | Q | + # +------------------------+-------+------------------+---------+-----------+ + c.regular_multiply(W.E, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | Q | = RSBI * Q + # +------------------------+-------+------------------+---------+-----------+ + c.merge_lha(N.A, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | Q | + # +------------------------+-------+------------------+---------+-----------+ + c.propagate_carries(N.A, n) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | Q | + # +------------------------+-------+------------------+---------+-----------+ + c.copy_crt_y2x(W.D, N.D) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | QSB* | Q | + # +------------------------+-------+------------------+---------+-----------+ + c.regular_add(N.D, N.A, N.C, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | SB | QSB* | Q | SB = QSB + RSBI + # +------------------------+-------+------------------+---------+-----------+ + c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | | | | | | + c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | | | | | | + # +------------------------+-------+------------------+---------+-----------+ + c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | | | | | | + c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | | | | | | + # +------------------------+-------+------------------+---------+-----------+ + c.modular_multiply(W.B, N.C, W.A, N.A, n, ff) # | S | | | | | S = XF * SB + # +------------------------+-------+------------------+---------+-----------+ + c.propagate_carries(N.A, n) # | S | | | | | + # +------------------------+-------+------------------+---------+-----------+ + c.set_output_from_narrow(O.S, c.bnk.crt_x, N.A) # | S | | | | | + # +------------------------+-------+------------------+---------+-----------+ + +# +# try to exponentiate using only half of the quad-multiplier (one dual-ladder core) +# +def sign_without_crt(): + + c = core + v = vector + n = n_num_words + + ff = (False, False) + + c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) + c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) + c.set_wide_from_input (c.bnk.crt_x, W.A, I.X) + c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y) + c.set_wide_from_input (c.bnk.crt_x, W.E, I.M) + c.set_wide_from_input (c.bnk.crt_y, W.E, I.M) + + c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF) + c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF) + c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR) + c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR) + c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M) + c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M) + + c.modular_multiply(W.A, N.A, W.B, N.B, n) # [XY]F = [XY] * N_FACTOR + c.modular_multiply(W.B, N.B, W.C, N.C, n, mode=ff) # [XY]MF = [XY]F * [XY]F + c.modular_multiply(W.C, N.I, W.D, N.D, n) # [XY]M = [XY]MF * 1 + + c.propagate_carries(N.D, n) + + c.set_output_from_narrow(O.XM, c.bnk.crt_x, N.D) + c.set_output_from_narrow(O.YM, c.bnk.crt_y, N.D) + + c.modular_multiply(W.E, N.B, W.C, N.C, n) # [XY]MB = M * [XY]F + + XF = c.bnk.crt_x.ladder_x._get_narrow(N.B) + + c.set_wide_from_input(c.bnk.crt_x, W.A, I.N_FACTOR) + c.set_wide_from_input(c.bnk.crt_y, W.A, I.N_FACTOR) + + c.modular_multiply(W.C, N.A, W.D, N.D, n) # MBF = MB * N_FACTOR + c.modular_multiply(W.A, N.I, W.C, N.C, n) # IF = 1 * N_FACTOR + + c.copy_ladders_x2y(W.D, N.D, W.C, N.C) + + ########################### + # Begin Montgomery Ladder # + ########################### + + for bit in range(_WORD_WIDTH * n - 1, -1, -1): + + m = get_ladder_mode_without_crt(v, bit) + dbg = bit == DUMP_LADDER_INDEX + + if dbg: + if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C) + if DUMP_VECTORS: c.dump_before_step_without_crt(n, m) + + c.modular_multiply(W.C, N.C, W.C, N.C, n, mode=m, d=dbg) + + if dbg and DUMP_VECTORS: c.dump_after_step_without_crt() + print_ladder_progress(bit, n) + + ######################### + # End Montgomery Ladder # + ######################### + + c.cross_ladders_x2y(W.B, N.B, W.B, N.B) + + c.modular_multiply(W.C, N.I, W.D, N.D, n) # SB = SBF * 1 + c.modular_multiply(W.B, N.D, W.A, N.A, n, mode=ff) # S = XF * SB + + c.copy_ladders_y2x(W.A, N.A, W.B, N.B) + + c.propagate_carries(N.B, n) + + c.set_output_from_narrow(O.S, c.bnk.crt_y, N.B) + + +# +# main() +# +if __name__ == "__main__": + + # handy shortcuts + W = ModExpNG_WideBankEnum + N = ModExpNG_NarrowBankEnum + I = ModExpNG_CoreInputEnum + O = ModExpNG_CoreOutputEnum + + # set helper quantity + # instantiate core + # load test vector + # transfer numbers from vector to core + # set numbers of words + # obtain known good reference value with built-in math + # mutate blinding quantities with built-in math + + i = ModExpNG_Operand(1, KEY_LENGTH) + + core = ModExpNG_Core(i) + vector = ModExpNG_TestVector() + + core.inp.set_value(I.M, vector.m) + + core.inp.set_value(I.N, vector.n) + core.inp.set_value(I.P, vector.p) + core.inp.set_value(I.Q, vector.q) + + core.inp.set_value(I.N_COEFF, vector.n_coeff) + core.inp.set_value(I.P_COEFF, vector.p_coeff) + core.inp.set_value(I.Q_COEFF, vector.q_coeff) + + core.inp.set_value(I.N_FACTOR, vector.n_factor) + core.inp.set_value(I.P_FACTOR, vector.p_factor) + core.inp.set_value(I.Q_FACTOR, vector.q_factor) + + core.inp.set_value(I.X, vector.x) + core.inp.set_value(I.Y, vector.y) + + core.inp.set_value(I.QINV, vector.qinv) + + n_num_words = KEY_LENGTH // _WORD_WIDTH + pq_num_words = n_num_words // 2 + + s_known = pow(vector.m.number(), vector.d.number(), vector.n.number()) + + xm_known = pow(vector.x.number(), 2, vector.n.number()) + ym_known = pow(vector.y.number(), 2, vector.n.number()) + + # sign using CRT and check + print("Signing using CRT...") + sign_using_crt() + compare_signature() + + # sign without CRT and check + print("Signing without CRT...") + sign_without_crt() + compare_signature() + + +# +# End-of-File +# diff --git a/vector/.gitignore b/vector/.gitignore new file mode 100644 index 0000000..f9daf64 --- /dev/null +++ b/vector/.gitignore @@ -0,0 +1,3 @@ +/__pycache__/ +/*_randomized.key +/*_randomized.py diff --git a/vector/README.md b/vector/README.md new file mode 100644 index 0000000..3bd1853 --- /dev/null +++ b/vector/README.md @@ -0,0 +1,10 @@ +ModExpNG +======== + +Ranzomized test vector generation scripts for ModExpNG core model. + + * `vector_regenerate.py` generates a new random RSA keypair using OpenSSL. Each invocation overwrites the keypair, the old one is not retained. **Never use the generated keypair for anything outside of this model!** + * `vector_format.py` processes the previously generated keypair. It first generates a "random" demo message to be signed and a "random" blinding factor, signs the message and checks the signature using Python's built-in math. If everything goes well, it writes the formatted test vector to a file. + * `vector_util.py` is a helper module. + +To obtain a test vector, optionally edit _KEY_LENGTH_ in `vector_regenerate.py` to set desired key length, then run the script to generate randomized key file. Then optionally edit _KEY_LENGTH_ in `vector_format.py` to match key length and change _RND_SEED_MESSAGE_ to get a different demo message and _RND_SEED_BLINDING_ to get a different blinding factor. Finally run the script to obtain randomized test vector module. diff --git a/vector/vector_format.py b/vector/vector_format.py new file mode 100644 index 0000000..a3e7e81 --- /dev/null +++ b/vector/vector_format.py @@ -0,0 +1,67 @@ +#!/usr/bin/python3 +# +# +# Formats a new test vector for ModExpNG core model. +# +# +# Copyright (c) 2019, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +import sys +import vector_util + +SCRIPT_USAGE = "USAGE: vector_format.py [openssl_binary]" + +KEY_LENGTH = 1024 + +RNG_SEED_MESSAGE = 1 +RNG_SEED_BLINDING = 2 + + +if __name__ == "__main__": + + # ModInv fails otherwise... + sys.setrecursionlimit(int(1.5 * KEY_LENGTH)) + + OPENSSL_BINARY = vector_util.openssl_binary(SCRIPT_USAGE) + + if len(OPENSSL_BINARY) > 0: + + MESSAGE = vector_util.random_message(RNG_SEED_MESSAGE, KEY_LENGTH) + BLINDING = vector_util.random_blinding(RNG_SEED_BLINDING, KEY_LENGTH) + VECTOR = vector_util.load_vector(OPENSSL_BINARY, KEY_LENGTH) + + vector_ok = VECTOR.selfcheck(MESSAGE, BLINDING) + if vector_ok: + vector_util.save_vector(VECTOR) + print("Test vector formatted.") + else: + print("Failed to format test vector.") + diff --git a/vector/vector_regenerate.py b/vector/vector_regenerate.py new file mode 100644 index 0000000..34c6384 --- /dev/null +++ b/vector/vector_regenerate.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 +# +# +# Generates a new ranzomized test vector for ModExpNG core model. +# +# +# Copyright (c) 2019, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +import vector_util + +SCRIPT_USAGE = "USAGE: vector_regenerate.py [openssl_binary]" + +KEY_LENGTH = 1024 + +if __name__ == "__main__": + + OPENSSL_BINARY = vector_util.openssl_binary(SCRIPT_USAGE) + + if len(OPENSSL_BINARY) > 0: + vector_util.openssl_genrsa(OPENSSL_BINARY, KEY_LENGTH) diff --git a/vector/vector_util.py b/vector/vector_util.py new file mode 100644 index 0000000..37e4fb6 --- /dev/null +++ b/vector/vector_util.py @@ -0,0 +1,319 @@ +#!/usr/bin/python3 +# +# +# Helper routines for ModExpNG randomized test vector generator. +# +# +# Copyright (c) 2019, NORDUnet A/S +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the NORDUnet nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +import sys +import random +import subprocess +from enum import Enum, auto + + +class VectorPiece(Enum): + VectorPieceOther = auto() + VectorPieceN = auto() + VectorPieceD = auto() + VectorPieceP = auto() + VectorPieceQ = auto() + VectorPieceDP = auto() + VectorPieceDQ = auto() + VectorPieceQINV = auto() + + +class Vector: + + # public exponent + _f4 = 0x10001 + + def __init__(self, length): + self._bits = length + self._n = "" + self._d = "" + self._p = "" + self._q = "" + self._dp = "" + self._dq = "" + self._qinv = "" + + def _add_piece(self, type, value): + value = value.replace(":", "") + value = value.replace("\r", "") + value = value.replace("\n", "") + value = value.replace(" ", "") + + if type == VectorPiece.VectorPieceN: self._n += value + elif type == VectorPiece.VectorPieceD: self._d += value + elif type == VectorPiece.VectorPieceP: self._p += value + elif type == VectorPiece.VectorPieceQ: self._q += value + elif type == VectorPiece.VectorPieceDP: self._dp += value + elif type == VectorPiece.VectorPieceDQ: self._dq += value + elif type == VectorPiece.VectorPieceQINV: self._qinv += value + else: raise Exception("Invalid vector piece type!") + + def _calc_mont_factor(self, length, modulus): + return pow(2, 2*length, modulus) + + def _calc_mod_coeff(self, length, modulus): + + pwr = pow(2, length) + pwr_mask = pwr - 1 + + r = 1 + b = 1 + + nn = ((modulus ^ pwr_mask) + 1) % pwr + + for i in range(1, length): + + b = (b << 1) % pwr + t = (r * nn) % pwr + + if t & (1 << i): r += b + + return r + + def _calc_blind_y(self, x, modulus): + x_inv = self._modinv(x, modulus) + return pow(x_inv, self._f4, modulus) + + def _egcd(self, a, b): + if a == 0: + return (b, 0, 1) + else: + g, y, x = self._egcd(b % a, a) + return (g, x - (b // a) * y, y) + + def _modinv(self, a, m): + g, x, y = self._egcd(a, m) + if g != 1: + raise Exception("_modinv() failed!") + else: + return x % m + + def selfcheck(self, message, blinding): + + self.m = message # message (padded) + self.n = int(self._n, 16) # modulus + self.d = int(self._d, 16) # private key + self.p = int(self._p, 16) # part of modulus + self.q = int(self._q, 16) # part of modulus + self.dp = int(self._dp, 16) # smaller exponent + self.dq = int(self._dq, 16) # smaller exponent + self.qinv = int(self._qinv, 16) # helper coefficient + + self.x = blinding + self.y = self._calc_blind_y(self.x, self.n) + + # check modulus + if self.n == 0: + print("ERROR: n == 0") + return False + + if self.n != self.p * self.q: + print("ERROR: n != (p * q)") + return False + + # check smaller exponents + if self.dp != (self.d % (self.p-1)): + print("ERROR: dp != (d % (p-1))") + return False + + if self.dq != (self.d % (self.q-1)): + print("ERROR: dq != (d % (q-1))") + return False + + # sign to obtain known good value + s_reference = pow(message, self.d, self.n) + + # blind message + message_blinded = (message * self.y) % self.n + + # sign blinded message + s_blinded = pow(message_blinded, self.d, self.n) + + # unblind signature + s_unblinded = (s_blinded * self.x) % self.n + + # check, that x and y actually work + if s_unblinded != s_reference: + print("ERROR: s_unblinded != s_reference!") + return False + + # try to do crt with the blinded message + sp_blinded = pow(message_blinded, self.dp, self.p) + sq_blinded = pow(message_blinded, self.dq, self.q) + + # recover full blinded signature + sr_blinded = sp_blinded - sq_blinded + if sr_blinded < 0: sr_blinded += self.p + + sr_qinv_blinded = (sr_blinded * self.qinv) % self.p + + s_crt_blinded = sq_blinded + self.q * sr_qinv_blinded + + # unblind crt signature + s_crt_unblinded = (s_crt_blinded * self.x) % self.n + + if s_crt_unblinded != s_reference: + print("ERROR: s_crt_unblinded != s_reference!") + return False + + self.n_factor = self._calc_mont_factor(self._bits + 16, self.n) + self.p_factor = self._calc_mont_factor(self._bits // 2 + 16, self.p) + self.q_factor = self._calc_mont_factor(self._bits // 2 + 16, self.q) + + self.n_coeff = self._calc_mod_coeff(self._bits + 16, self.n) + self.p_coeff = self._calc_mod_coeff(self._bits // 2 + 16, self.p) + self.q_coeff = self._calc_mod_coeff(self._bits // 2 + 16, self.q) + + print("Test vector checked.") + + return True + + +def openssl_binary(usage): + + # nothing so far + openssl = "" + + # user didn't overide anything + if len(sys.argv) == 1: + openssl = "openssl" + print("Using system OpenSSL library.") + + # user requested some specific binary + elif len(sys.argv) == 2: + openssl = sys.argv[1] + print("Using OpenSSL binary '" + openssl + "'...") + + # didn't understand command line + else: + print(usage) + + # return path to selected binary (if any) + return openssl + + +def openssl_genrsa(binary, length): + + filename = str(length) + "_randomized.key" + subprocess.call([binary, "genrsa", "-out", filename, str(length)]) + + +def random_message(seed, length): + + message = 0 + num_bytes = length // 8 - 1 + + random.seed(seed) + + for i in range(num_bytes): + message <<= 8 + message += random.getrandbits(8) + + return message + + +def random_blinding(seed, length): + + blinding = 0 + num_bytes = length // 8 - 1 + + random.seed(seed) + + for i in range(num_bytes): + blinding <<= 8 + blinding += random.getrandbits(8) + + return blinding + + +def load_vector(binary, length): + + vector = Vector(length) + piece_type = VectorPiece.VectorPieceOther + + filename = str(length) + "_randomized.key" + openssl_command = [binary, "rsa", "-in", filename, "-noout", "-text"] + openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8").splitlines() + + for line in openssl_stdout: + if line.startswith("RSA Private-Key:"): piece_type = VectorPiece.VectorPieceOther + elif line.startswith("modulus:"): piece_type = VectorPiece.VectorPieceN + elif line.startswith("publicExponent:"): piece_type = VectorPiece.VectorPieceOther + elif line.startswith("privateExponent:"): piece_type = VectorPiece.VectorPieceD + elif line.startswith("prime1:"): piece_type = VectorPiece.VectorPieceP + elif line.startswith("prime2:"): piece_type = VectorPiece.VectorPieceQ + elif line.startswith("exponent1:"): piece_type = VectorPiece.VectorPieceDP + elif line.startswith("exponent2:"): piece_type = VectorPiece.VectorPieceDQ + elif line.startswith("coefficient:"): piece_type = VectorPiece.VectorPieceQINV + else: vector._add_piece(piece_type, line) + + return vector + + +def save_vector(vector): + + filename = "vector_" + str(vector._bits) + "_randomized.py" + print("Writing to '%s'..." % filename) + + f = open(filename, 'w') + + f.write("# Generated automatically, do not edit.\n\n") + + f.write("class Vector:\n") + f.write(" m = 0x%x\n" % vector.m) + f.write(" n = 0x%x\n" % vector.n) + f.write(" d = 0x%x\n" % vector.d) + f.write(" p = 0x%x\n" % vector.p) + f.write(" q = 0x%x\n" % vector.q) + f.write(" dp = 0x%x\n" % vector.dp) + f.write(" dq = 0x%x\n" % vector.dq) + f.write(" qinv = 0x%x\n" % vector.qinv) + f.write(" n_factor = 0x%x\n" % vector.n_factor) + f.write(" p_factor = 0x%x\n" % vector.p_factor) + f.write(" q_factor = 0x%x\n" % vector.q_factor) + f.write(" n_coeff = 0x%x\n" % vector.n_coeff) + f.write(" p_coeff = 0x%x\n" % vector.p_coeff) + f.write(" q_coeff = 0x%x\n" % vector.q_coeff) + f.write(" x = 0x%x\n" % vector.x) + f.write(" y = 0x%x\n" % vector.y) + + f.close() + + +# +# End of file +# -- cgit v1.2.3