#!/usr/bin/python3 # # # ModExpNG core math model. # # # Copyright 2019 The Commons Conservancy Cryptech Project # SPDX-License-Identifier: BSD-3-Clause # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # - Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # ------- # Imports #-------- import sys import importlib from enum import Enum, auto # -------------- # Model Settings # -------------- # length of public key KEY_LENGTH = 512 # how many parallel multipliers to use NUM_MULTS = 8 # --------------- # Internal Values # --------------- # half of key length _KEY_LENGTH_HALF = KEY_LENGTH // 2 # width of internal math pipeline _WORD_WIDTH = 16 _WORD_WIDTH_EXT = 18 _WORD_MASK = 2 ** _WORD_WIDTH - 1 _WORD_MASK_EXT = 2 ** _WORD_WIDTH_EXT - 1 _CARRY_MASK = _WORD_MASK ^ _WORD_MASK_EXT # folder with test vector scripts _VECTOR_PATH = "/vector" # name of test vector class _VECTOR_CLASS = "Vector" # ------------------ # Debugging Settings # ------------------ DUMP_LADDER_INDEX = -1 # at which ladder step to print intermediate debug vector (-1 doesn't print internals) DUMP_VECTORS = False # print entire debug vector components DUMP_INDICES = False # print indices of words at MAC inputs DUMP_MACS_INPUTS = False # print MAC input words DUMP_MACS_CLEARING = False # print MAC clearing bitmaps DUMP_MACS_ACCUMULATION = False # print MAC accumulators contents DUMP_MULT_PARTS = False # print multiplication output parts DUMP_RECOMBINATION = False # print recombination internals DUMP_REDUCTION = False # print reduction internals DUMP_EXPONENTS = False # dump secret exponents FORCE_OVERFLOW = False # force rarely seen internal overflow situation to verify how its handler works DUMP_PROGRESS_FACTOR = 16 # once per how many ladder steps to update progress indicator DUMP_FORMAT_BUS = True # False: dump 18-bit words, True: dump 32-bit words DUMP_FORMAT_C_ARRAY = False # False: dump in Verilog format, True: dump as C array initializer # # Multi-Precision Integer # class ModExpNG_Operand(): def __init__(self, number, length, words = None): if words is None: # length must be divisible by word width if (length % _WORD_WIDTH) > 0: raise Exception("Bad number length!") self._init_from_number(number, length) else: # length must match words count if len(words) != length: raise Exception("Bad words count!") self._init_from_words(words, length) def copy(self): return ModExpNG_Operand(None, len(self.words), self.words) def _format_verilog(self, name): if not DUMP_FORMAT_BUS: for i in range(len(self.words)): if i > 0: if (i % 4) == 0: print("") else: print(" ", end='') print("%s[%3d] = 18'h%05x;" % (name, i, self.words[i]), end='') else: _words = list(self.words) num_words = len(_words) if num_words % 2 > 0: _words.append(0) num_words += 1 for i in range(num_words // 2): if i > 0: if (i % 4) == 0: print("") else: print(" ", end='') print("%s[%3d] = 32'h%04x%04x;" % (name, i, _words[2*i+1], _words[2*i]), end='') print("") def _format_c_array(self, name): words = list(reversed(self.words)) if len(words) % 2 > 0: words.insert(0, 0) print("#define %s_%d_INIT \\\n\t{" % (name, KEY_LENGTH), end='') for i in range(0, len(words), 2): print("0x%04x%04x" % (words[i], words[i+1]), end='') if (i + 2) < len(words): print(", ", end='') if ((i + 2) % 8) == 0: print("\\\n\t ", end='') else: print("}") def format(self, name): if not DUMP_FORMAT_C_ARRAY: self._format_verilog(name) else: self._format_c_array(name) def _init_from_words(self, words, count): for i in range(count): # word must not exceed 18 bits if words[i] >= (2 ** (_WORD_WIDTH_EXT)): raise Exception("Word is too large!") self.words = list(words) def _init_from_number(self, number, length): num_hexchars_per_word = _WORD_WIDTH // 4 num_hexchars_total = length // num_hexchars_per_word value_hex = format(number, 'x') # value must not be larger than specified, but it can be smaller, so # we may need to prepend it with zeroes if len(value_hex) > num_hexchars_total: raise Exception("Number is too large!") else: while len(value_hex) < num_hexchars_total: value_hex = "0" + value_hex # create empty list self.words = list() # fill in words while len(value_hex) > 0: value_hex_part = value_hex[-num_hexchars_per_word:] value_hex = value_hex[:-num_hexchars_per_word] self.words.append(int(value_hex_part, 16)) def number(self): ret = 0 shift = 0 for word in self.words: ret += word << shift shift += _WORD_WIDTH return ret def _get_half(self, part): num_words = len(self.words) num_words_half = num_words // 2 if not part: return ModExpNG_Operand(None, num_words_half, self.words[:num_words_half]) else: return ModExpNG_Operand(None, num_words_half, self.words[num_words_half:]) def lower_half(self): return self._get_half(False) def upper_half(self): return self._get_half(True) # # Test Vector # class ModExpNG_TestVector(): def __init__(self): # format target filename filename = "vector_" + str(KEY_LENGTH) + "_randomized" # add ./vector to import search path sys.path.insert(1, sys.path[0] + _VECTOR_PATH) # import from filename vector_module = importlib.import_module(filename) # get vector class vector_class = getattr(vector_module, _VECTOR_CLASS) # instantiate vector class vector_inst = vector_class() # obtain parts of vector self.m = ModExpNG_Operand(vector_inst.m, KEY_LENGTH) self.n = ModExpNG_Operand(vector_inst.n, KEY_LENGTH) self.d = ModExpNG_Operand(vector_inst.d, KEY_LENGTH) self.p = ModExpNG_Operand(vector_inst.p, _KEY_LENGTH_HALF) self.q = ModExpNG_Operand(vector_inst.q, _KEY_LENGTH_HALF) self.dp = ModExpNG_Operand(vector_inst.dp, _KEY_LENGTH_HALF) self.dq = ModExpNG_Operand(vector_inst.dq, _KEY_LENGTH_HALF) self.qinv = ModExpNG_Operand(vector_inst.qinv, _KEY_LENGTH_HALF) self.n_factor = ModExpNG_Operand(vector_inst.n_factor, KEY_LENGTH) self.p_factor = ModExpNG_Operand(vector_inst.p_factor, _KEY_LENGTH_HALF) self.q_factor = ModExpNG_Operand(vector_inst.q_factor, _KEY_LENGTH_HALF) self.n_coeff = ModExpNG_Operand(vector_inst.n_coeff, KEY_LENGTH + _WORD_WIDTH) self.p_coeff = ModExpNG_Operand(vector_inst.p_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH) self.q_coeff = ModExpNG_Operand(vector_inst.q_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH) self.x = ModExpNG_Operand(vector_inst.x, KEY_LENGTH) self.y = ModExpNG_Operand(vector_inst.y, KEY_LENGTH) class ModExpNG_WideBankEnum(Enum): A = auto() B = auto() C = auto() D = auto() E = auto() N = auto() L = auto() H = auto() class ModExpNG_NarrowBankEnum(Enum): A = auto() B = auto() C = auto() D = auto() E = auto() N_COEFF = auto() Q = auto() EXT = auto() I = auto() class ModExpNG_CoreInputEnum(Enum): M = auto() N = auto() P = auto() Q = auto() N_COEFF = auto() P_COEFF = auto() Q_COEFF = auto() N_FACTOR = auto() P_FACTOR = auto() Q_FACTOR = auto() X = auto() Y = auto() QINV = auto() class ModExpNG_CoreOutputEnum(Enum): XM = auto() YM = auto() S = auto() class ModExpNG_WideBank(): def __init__(self): self.a = None self.b = None self.c = None self.d = None self.e = None self.n = None self.l = None self.h = None self.a_cache = ModExpNG_Operand(0, KEY_LENGTH) self.b_cache = ModExpNG_Operand(0, KEY_LENGTH) self.c_cache = ModExpNG_Operand(0, KEY_LENGTH) self.d_cache = ModExpNG_Operand(0, KEY_LENGTH) self.e_cache = ModExpNG_Operand(0, KEY_LENGTH) self.n_cache = ModExpNG_Operand(0, KEY_LENGTH) self.l_cache = ModExpNG_Operand(0, KEY_LENGTH) self.h_cache = ModExpNG_Operand(0, KEY_LENGTH) def _get_value(self, sel): if sel == ModExpNG_WideBankEnum.A: return self.a elif sel == ModExpNG_WideBankEnum.B: return self.b elif sel == ModExpNG_WideBankEnum.C: return self.c elif sel == ModExpNG_WideBankEnum.D: return self.d elif sel == ModExpNG_WideBankEnum.E: return self.e elif sel == ModExpNG_WideBankEnum.N: return self.n elif sel == ModExpNG_WideBankEnum.L: return self.l elif sel == ModExpNG_WideBankEnum.H: return self.h else: raise Exception("ModExpNG_WideBank._get_value(): Invalid selector!") def _get_value_cache(self, sel): if sel == ModExpNG_WideBankEnum.A: return self.a_cache elif sel == ModExpNG_WideBankEnum.B: return self.b_cache elif sel == ModExpNG_WideBankEnum.C: return self.c_cache elif sel == ModExpNG_WideBankEnum.D: return self.d_cache elif sel == ModExpNG_WideBankEnum.E: return self.e_cache elif sel == ModExpNG_WideBankEnum.N: return self.n_cache elif sel == ModExpNG_WideBankEnum.L: return self.l_cache elif sel == ModExpNG_WideBankEnum.H: return self.h_cache else: raise Exception("ModExpNG_WideBank._get_value(): Invalid selector!") def _set_value(self, sel, value): if sel == ModExpNG_WideBankEnum.A: self.a = value.copy() elif sel == ModExpNG_WideBankEnum.B: self.b = value.copy() elif sel == ModExpNG_WideBankEnum.C: self.c = value.copy() elif sel == ModExpNG_WideBankEnum.D: self.d = value.copy() elif sel == ModExpNG_WideBankEnum.E: self.e = value.copy() elif sel == ModExpNG_WideBankEnum.N: self.n = value.copy() elif sel == ModExpNG_WideBankEnum.L: self.l = value.copy() elif sel == ModExpNG_WideBankEnum.H: self.h = value.copy() else: raise Exception("ModExpNG_WideBank._set_value(): Invalid selector!") if sel == ModExpNG_WideBankEnum.A: for i in range(len(value.words)): self.a_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.B: for i in range(len(value.words)): self.b_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.C: for i in range(len(value.words)): self.c_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.D: for i in range(len(value.words)): self.d_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.E: for i in range(len(value.words)): self.e_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.N: for i in range(len(value.words)): self.n_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.L: for i in range(len(value.words)): self.l_cache.words[i] = value.words[i] elif sel == ModExpNG_WideBankEnum.H: for i in range(len(value.words)): self.h_cache.words[i] = value.words[i] class ModExpNG_NarrowBank(): def __init__(self, i): self.a = None self.b = None self.c = None self.d = None self.e = None self.n_coeff = None self.q = None self.ext = ModExpNG_Operand(0, 2*_WORD_WIDTH) self.i = i self.a_cache = ModExpNG_Operand(0, KEY_LENGTH) self.b_cache = ModExpNG_Operand(0, KEY_LENGTH) self.c_cache = ModExpNG_Operand(0, KEY_LENGTH) self.d_cache = ModExpNG_Operand(0, KEY_LENGTH) self.e_cache = ModExpNG_Operand(0, KEY_LENGTH) self.n_coeff_cache = ModExpNG_Operand(0, KEY_LENGTH) self.q_cache = ModExpNG_Operand(0, KEY_LENGTH) def _get_value(self, sel): if sel == ModExpNG_NarrowBankEnum.A: return self.a elif sel == ModExpNG_NarrowBankEnum.B: return self.b elif sel == ModExpNG_NarrowBankEnum.C: return self.c elif sel == ModExpNG_NarrowBankEnum.D: return self.d elif sel == ModExpNG_NarrowBankEnum.E: return self.e elif sel == ModExpNG_NarrowBankEnum.N_COEFF: return self.n_coeff elif sel == ModExpNG_NarrowBankEnum.Q: return self.q elif sel == ModExpNG_NarrowBankEnum.EXT: return self.ext elif sel == ModExpNG_NarrowBankEnum.I: return self.i else: raise Exception("ModExpNG_NarrowBank._get_value(): Invalid selector!") def _get_value_cache(self, sel): if sel == ModExpNG_NarrowBankEnum.A: return self.a_cache elif sel == ModExpNG_NarrowBankEnum.B: return self.b_cache elif sel == ModExpNG_NarrowBankEnum.C: return self.c_cache elif sel == ModExpNG_NarrowBankEnum.D: return self.d_cache elif sel == ModExpNG_NarrowBankEnum.E: return self.e_cache elif sel == ModExpNG_NarrowBankEnum.N_COEFF: return self.n_coeff_cache elif sel == ModExpNG_NarrowBankEnum.Q: return self.q_cache else: raise Exception("ModExpNG_NarrowBank._get_value(): Invalid selector!") def _set_value(self, sel, value): if sel == ModExpNG_NarrowBankEnum.A: self.a = value.copy() elif sel == ModExpNG_NarrowBankEnum.B: self.b = value.copy() elif sel == ModExpNG_NarrowBankEnum.C: self.c = value.copy() elif sel == ModExpNG_NarrowBankEnum.D: self.d = value.copy() elif sel == ModExpNG_NarrowBankEnum.E: self.e = value.copy() elif sel == ModExpNG_NarrowBankEnum.N_COEFF: self.n_coeff = ModExpNG_Operand(None, len(value.words)-1, value.words[:-1]) self.ext.words[0] = value.words[-1] elif sel == ModExpNG_NarrowBankEnum.Q: self.q = ModExpNG_Operand(None, len(value.words)-1, value.words[:-1]) self.ext.words[1] = value.words[-1] else: raise Exception("ModExpNG_NarrowBank._set_value(): Invalid selector!") if sel == ModExpNG_NarrowBankEnum.A: for i in range(len(value.words)): self.a_cache.words[i] = value.words[i] elif sel == ModExpNG_NarrowBankEnum.B: for i in range(len(value.words)): self.b_cache.words[i] = value.words[i] elif sel == ModExpNG_NarrowBankEnum.C: for i in range(len(value.words)): self.c_cache.words[i] = value.words[i] elif sel == ModExpNG_NarrowBankEnum.D: for i in range(len(value.words)): self.d_cache.words[i] = value.words[i] elif sel == ModExpNG_NarrowBankEnum.E: for i in range(len(value.words)): self.e_cache.words[i] = value.words[i] elif sel == ModExpNG_NarrowBankEnum.N_COEFF: for i in range(len(value.words)-1): self.n_coeff_cache.words[i] = value.words[i] elif sel == ModExpNG_NarrowBankEnum.Q: for i in range(len(value.words)-1): self.q_cache.words[i] = value.words[i] class ModExpNG_CoreInput(): def __init__(self): self._m = None self._n = None self._p = None self._q = None self._n_coeff = None self._p_coeff = None self._q_coeff = None self._n_factor = None self._p_factor = None self._q_factor = None self._x = None self._y = None self._qinv = None def set_value(self, sel, value): if sel == ModExpNG_CoreInputEnum.M: self._m = value elif sel == ModExpNG_CoreInputEnum.N: self._n = value elif sel == ModExpNG_CoreInputEnum.P: self._p = value elif sel == ModExpNG_CoreInputEnum.Q: self._q = value elif sel == ModExpNG_CoreInputEnum.N_COEFF: self._n_coeff = value elif sel == ModExpNG_CoreInputEnum.P_COEFF: self._p_coeff = value elif sel == ModExpNG_CoreInputEnum.Q_COEFF: self._q_coeff = value elif sel == ModExpNG_CoreInputEnum.N_FACTOR: self._n_factor = value elif sel == ModExpNG_CoreInputEnum.P_FACTOR: self._p_factor = value elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: self._q_factor = value elif sel == ModExpNG_CoreInputEnum.X: self._x = value elif sel == ModExpNG_CoreInputEnum.Y: self._y = value elif sel == ModExpNG_CoreInputEnum.QINV: self._qinv = value else: raise Exception("ModExpNG_CoreInput.set_value(): invalid selector!") def _get_value(self, sel): if sel == ModExpNG_CoreInputEnum.M: return self._m elif sel == ModExpNG_CoreInputEnum.N: return self._n elif sel == ModExpNG_CoreInputEnum.P: return self._p elif sel == ModExpNG_CoreInputEnum.Q: return self._q elif sel == ModExpNG_CoreInputEnum.N_COEFF: return self._n_coeff elif sel == ModExpNG_CoreInputEnum.P_COEFF: return self._p_coeff elif sel == ModExpNG_CoreInputEnum.Q_COEFF: return self._q_coeff elif sel == ModExpNG_CoreInputEnum.N_FACTOR: return self._n_factor elif sel == ModExpNG_CoreInputEnum.P_FACTOR: return self._p_factor elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: return self._q_factor elif sel == ModExpNG_CoreInputEnum.X: return self._x elif sel == ModExpNG_CoreInputEnum.Y: return self._y elif sel == ModExpNG_CoreInputEnum.QINV: return self._qinv else: raise Exception("ModExpNG_CoreInput._get_value(): invalid selector!") class ModExpNG_CoreOutput(): def __init__(self): self._xm = None self._ym = None self._s = None def _set_value(self, sel, value): if sel == ModExpNG_CoreOutputEnum.XM: self._xm = value elif sel == ModExpNG_CoreOutputEnum.YM: self._ym = value elif sel == ModExpNG_CoreOutputEnum.S: self._s = value else: raise Exception("ModExpNG_CoreOutput._set_value(): invalid selector!") def get_value(self, sel): if sel == ModExpNG_CoreOutputEnum.XM: return self._xm elif sel == ModExpNG_CoreOutputEnum.YM: return self._ym elif sel == ModExpNG_CoreOutputEnum.S: return self._s else: raise Exception("ModExpNG_CoreOutput.get_value(): invalid selector!") class ModExpNG_BanksPair(): def __init__(self, i): self.wide = ModExpNG_WideBank() self.narrow = ModExpNG_NarrowBank(i) def _get_wide(self, sel): return self.wide._get_value(sel) def _get_narrow(self, sel): return self.narrow._get_value(sel) def _get_wide_cache(self, sel): return self.wide._get_value_cache(sel) def _get_narrow_cache(self, sel): return self.narrow._get_value_cache(sel) def _set_wide(self, sel, value): self.wide._set_value(sel, value) def _set_narrow(self, sel, value): self.narrow._set_value(sel, value) class ModExpNG_BanksLadder(): def __init__(self, i): self.ladder_x = ModExpNG_BanksPair(i) self.ladder_y = ModExpNG_BanksPair(i) class ModExpNG_BanksCRT(): def __init__(self, i): self.crt_x = ModExpNG_BanksLadder(i) self.crt_y = ModExpNG_BanksLadder(i) class ModExpNG_PartRecombinator(): def __init__(self): self._WORD_MASK_2X = (_WORD_MASK << _WORD_WIDTH) | _WORD_MASK self._WORD_WIDTH_2X = 2 * _WORD_WIDTH def _bit_select(self, x, msb, lsb): y = 0 for pos in range(lsb, msb+1): y |= (x & (1 << pos)) >> lsb return y def _update_delays(self, x1, y1, z1, z2): self._x_dly1, self._y_dly1, self._z_dly1, self._z_dly2 = x1, y1, z1, z2 def _update_carries(self, cm, cs): self._cry_master, self._cry_slave = cm, cs def _clear_words(self): self._words, self._wordsx = [], [] def _store_words(self, w, wx): self._words.append(w) self._wordsx.append(wx) def _flush_pipeline(self, dump): self._phase = False self._master_p = None self._update_carries(0, 0) self._update_delays(0, 0, 0, 0) self._clear_words() if dump and DUMP_RECOMBINATION: print("RCMB -> flush()") print("RCMB: master_ab | master_c | slave_ab") def _push_pipeline(self, part, dump): x = self._bit_select(part, 15, 0) y = self._bit_select(part, 31, 16) z = self._bit_select(part, 45, 32) master_ab = (y << 16) | self._y_dly1 master_c = (self._z_dly1 << 16) | self._z_dly2 slave_ab = (x << 16) | self._x_dly1 if dump and DUMP_RECOMBINATION: print("PUSH: 0x%08x | 0x%08x | 0x%08x > " % (master_ab, master_c, slave_ab), end='') if not self._phase: master_p = master_ab + master_c + self._cry_master self._update_carries(master_p >> self._WORD_WIDTH_2X, self._cry_slave) self._master_p = master_p & self._WORD_MASK_2X if dump and DUMP_RECOMBINATION: #print("MASTER: {0x%1d, 0x%08x}" % (self._cry_master, self._master_p)) print("") else: slave_p = self._master_p + slave_ab + self._cry_slave self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X) slave_p &= self._WORD_MASK_2X if dump and DUMP_RECOMBINATION: print("SLAVE: {0x%1d, 0x%08x}" % (self._cry_slave, slave_p)) #print("") slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK self._store_words(slave_p_lsb, slave_p_lsb) self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb) self._phase = not self._phase self._update_delays(x, y, z, self._z_dly1) def _purge_pipeline(self, dump): slave_ab = self._x_dly1 if not self._phase: raise Exception("RCMB: Can only purge pipeline after odd number of pushes!") else: slave_p = self._master_p + slave_ab + self._cry_slave self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X) slave_p &= self._WORD_MASK_2X slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK self._store_words(slave_p_lsb, slave_p_lsb) self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb) self._master_p = None self._phase = None self._update_carries(None, None) self._update_delays(None, None, None, None) @property def words(self): return self._words @property def wordsx(self): return self._wordsx def recombine_square(self, parts, ab_num_words, dump): # hardware computes LSB and MSB words simultaneously, we can't # simulate that here, so we compute sequentially # the first two words from MSB overlap with the last two words from # LSB, so we compute MSB first # LSB has N parts and produces N+2 words (two last cycles accomodate # the two "carry" words from MSB # MSB has N-1 parts and produces N words # total number of output words is 2*N # recombine the upper half self._flush_pipeline(dump) for i in range(ab_num_words): din = parts[ab_num_words + i] if i < (ab_num_words - 1) else 0 self._push_pipeline(din, dump) words_msb_cry, words_msb = self.words[0:2], self.words[2:] # recombine the lower half # note, that the very last word is 1 bit wider! self._flush_pipeline(dump) for i in range(ab_num_words + 2): din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words] self._push_pipeline(din, dump) words_lsb = self.words[:-1] + [self.wordsx[-1]] # return words_lsb + words_msb def recombine_triangle(self, parts, ab_num_words, dump): # hardware computes only LSB, so there's no overlap with MSB # LSB has N+1 parts and produces N+1 words, since the recombinator only # outputs two words every other cycle, we need to manually purge the # internal pipeline self._flush_pipeline(dump) for i in range(ab_num_words + 1): din = parts[i] self._push_pipeline(din, dump) self._purge_pipeline(dump) words_lsb = self.words[:-1] return words_lsb def recombine_rectangle(self, parts, ab_num_words, dump): # hardware computes LSB and MSB words simultaneously, we can't # simulate that here, so we compute sequentially # the first two words from MSB overlap with the last two words from # LSB, so we compute MSB first # LSB has N parts and produces N+2 words (two last cycles accomodate # the two "carry" words from MSB # MSB has N parts and produces N+1 words, since the recombinator only # outputs two words every other cycle, we need to manually purge the # internal pipeline # total number of output words is 2*N+1 # recombine the upper half self._flush_pipeline(dump) for i in range(ab_num_words + 1): din = parts[ab_num_words + i] if i < ab_num_words else 0 self._push_pipeline(din, dump) self._purge_pipeline(dump) words_msb_cry, words_msb = self.words[0:2], self.words[2:-1] # recombine the lower half # note, that the very last word is 1 bit wider! self._flush_pipeline(dump) for i in range(ab_num_words + 2): din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words] self._push_pipeline(din, dump) words_lsb = self.words[:-1] + [self.wordsx[-1]] return words_lsb + words_msb class ModExpNG_WordMultiplier(): def __init__(self): self._macs = list() self._indices = list() self._mac_aux = list() self._index_aux = list() for x in range(NUM_MULTS): self._macs.append(0) self._indices.append(0) self._mac_aux.append(0) self._index_aux.append(0) def _clear_all_macs(self, t, col, dump): for x in range(NUM_MULTS): self._macs[x] = 0 if dump and DUMP_MACS_CLEARING: print("t=%2d, col=%2d > clear > all" % (t, col)) def _clear_one_mac(self, x, t, col, dump): self._macs[x] = 0 if dump and DUMP_MACS_CLEARING: print("t=%2d, col=%2d > clear > x=%d" % (t, col, x)) def _clear_mac_aux(self, t, col, dump): self._mac_aux[0] = 0 if dump and DUMP_MACS_CLEARING: print("t= 0, col=%2d > clear > aux" % (col)) def _update_one_mac(self, x, t, col, a, b, dump, need_aux=False): if a >= (2 ** _WORD_WIDTH_EXT): raise Exception("a > 0x3FFFF!") if b >= (2 ** _WORD_WIDTH): raise Exception("b > 0xFFFF!") p = a * b if dump and DUMP_MACS_INPUTS: if x == 0: print("t=%2d, col=%2d > b=%05x > " % (t, col, b), end='') if x > 0: print("; ", end='') print("MAC[%d]: a=%05x" % (x, a), end='') if x == (NUM_MULTS-1) and not need_aux: print("") self._macs[x] += p def _update_mac_aux(self, y, col, a, b, dump): if a >= (2 ** _WORD_WIDTH_EXT): raise Exception("a > 0x3FFFF!") if b >= (2 ** _WORD_WIDTH): raise Exception("b > 0xFFFF!") p = a * b if dump and DUMP_MACS_INPUTS: print("; AUX: a=%05x" % a) self._mac_aux[0] += p def _preset_indices(self, col): for x in range(len(self._indices)): self._indices[x] = col * len(self._indices) + x def _preset_index_aux(self, num_cols): self._index_aux[0] = num_cols * len(self._indices) def _dump_macs_helper(self, t, col, aux=False): print("t=%2d, col=%2d > "% (t, col), end='') for i in range(NUM_MULTS): if i > 0: print(" | ", end='') print("mac[%d]: 0x%012x" % (i, self._macs[i]), end='') if aux: print(" | mac_aux[ 0]: 0x%012x" % (self._mac_aux[0]), end='') print("") def _dump_macs(self, t, col): self._dump_macs_helper(t, col) def _dump_macs_with_aux(self, t, col): self._dump_macs_helper(t, col, True) def _dump_indices_helper(self, t, col, aux=False): print("t=%2d, col=%2d > indices:" % (t, col), end='') for i in range(NUM_MULTS): print(" %2d" % self._indices[i], end='') if aux: print(" %2d" % self._index_aux[0], end='') print("") def _dump_indices(self, t, col): self._dump_indices_helper(t, col) def _dump_indices_with_aux(self, t, col): self._dump_indices_helper(t, col, True) def _rotate_indices(self, num_words): for x in range(len(self._indices)): if self._indices[x] > 0: self._indices[x] -= 1 else: self._indices[x] = num_words - 1 def _rotate_index_aux(self): self._index_aux[0] -= 1 def _mult_store_part(self, parts, time, column, part_index, mac_index, dump): parts[part_index] = self._macs[mac_index] if dump and DUMP_MULT_PARTS: print("t=%2d, col=%2d > parts[%2d]: mac[%d] = 0x%012x" % (time, column, part_index, mac_index, parts[part_index])) def _mult_store_part_aux(self, parts, time, column, part_index, dump): parts[part_index] = self._mac_aux[0] if dump and DUMP_MULT_PARTS: print("t=%2d, col=%2d > parts[%2d]: mac_aux[%d] = 0x%012x" % (time, column, part_index, 0, parts[part_index])) def multiply_square(self, a_wide, b_narrow, ab_num_words, dump=False): num_cols = ab_num_words // NUM_MULTS parts = list() for i in range(2 * ab_num_words - 1): parts.append(0) for col in range(num_cols): b_carry = 0 for t in range(ab_num_words): # take care of indices if t == 0: self._preset_indices(col) else: self._rotate_indices(ab_num_words) # take care of macs if t == 0: self._clear_all_macs(t, col, dump) else: t1 = t - 1 if (t1 // 8) == col: self._clear_one_mac(t1 % NUM_MULTS, t, col, dump) # debug output if dump and DUMP_INDICES: self._dump_indices(t, col) # current b-word # multiplier's b-input is limited to 16-bit words, so we need to propagate # carries on the fly here, carry can be up to two bits bt = b_narrow.words[t] + b_carry b_carry = (bt & _CARRY_MASK) >> _WORD_WIDTH if dump and b_carry > 1: print("Rare overflow case was detected and then successfully corrected.") bt &= _WORD_MASK # multiply by a-words for x in range(NUM_MULTS): ax = a_wide.words[self._indices[x]] self._update_one_mac(x, t, col, ax, bt, dump) if t == (col * NUM_MULTS + x): part_index = t self._mult_store_part(parts, t, col, part_index, x, dump) # debug output if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col) # save the uppers part of product at end of column, # for the last column don't save the very last part if t == (ab_num_words - 1): for x in range(NUM_MULTS): if not (col == (num_cols - 1) and x == (NUM_MULTS - 1)): part_index = ab_num_words + col * NUM_MULTS + x self._mult_store_part(parts, t, col, part_index, x, dump) return parts def multiply_triangle(self, a_wide, b_narrow, ab_num_words, dump=False): num_cols = ab_num_words // NUM_MULTS parts = list() for i in range(ab_num_words + 1): parts.append(0) for col in range(num_cols): last_col = col == (num_cols - 1) for t in range(ab_num_words + 1): # take care of indices if t == 0: self._preset_indices(col) else: self._rotate_indices(ab_num_words) # take care of auxilary index if last_col: if t == 0: self._preset_index_aux(num_cols) else: self._rotate_index_aux() # take care of macs if t == 0: self._clear_all_macs(t, col, dump) # take care of auxilary mac if last_col: if t == 0: self._clear_mac_aux(t, col, dump) # debug output if dump and DUMP_INDICES: self._dump_indices_with_aux(t, col) # current b-word bt = b_narrow.words[t] # multiply by a-words for x in range(NUM_MULTS): ax = a_wide.words[self._indices[x]] self._update_one_mac(x, t, col, ax, bt, dump, last_col) if t == (col * NUM_MULTS + x): part_index = t self._mult_store_part(parts, t, col, part_index, x, dump) # aux multiplier if last_col: ax = a_wide.words[self._index_aux[0]] self._update_mac_aux(t, col, ax, bt, dump) if t == ab_num_words: part_index = t self._mult_store_part_aux(parts, t, col, part_index, dump) # debug output if dump and DUMP_MACS_ACCUMULATION: self._dump_macs_with_aux(t, col) # shortcut if not last_col: if t == (NUM_MULTS * (col + 1) - 1): break return parts def multiply_rectangle(self, a_wide, b_narrow, ab_num_words, dump=False): num_cols = ab_num_words // NUM_MULTS parts = list() for i in range(2 * ab_num_words): parts.append(0) for col in range(num_cols): for t in range(ab_num_words + 1): # take care of indices if t == 0: self._preset_indices(col) else: self._rotate_indices(ab_num_words) # take care of macs if t == 0: self._clear_all_macs(t, col, dump) else: t1 = t - 1 if (t1 // 8) == col: self._clear_one_mac(t1 % NUM_MULTS, t, col, dump) # debug output if dump and DUMP_INDICES: self._dump_indices(t, col) # current b-word bt = b_narrow.words[t] # multiply by a-words for x in range(NUM_MULTS): ax = a_wide.words[self._indices[x]] self._update_one_mac(x, t, col, ax, bt, dump) # don't save one value for the very last time instant per column if t < ab_num_words and t == (col * NUM_MULTS + x): part_index = t self._mult_store_part(parts, t, col, part_index, x, dump) # debug output if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col) # save the upper parts of product at end of column if t == ab_num_words: for x in range(NUM_MULTS): part_index = ab_num_words + col * NUM_MULTS + x self._mult_store_part(parts, t, col, part_index, x, dump) return parts class ModExpNG_LowlevelOperator(): def _check_word(self, a): if a < 0 or a > _WORD_MASK: raise Exception("Word out of range!") def _check_carry_borrow(self, cb): if cb < 0 or cb > 1: raise Exception("Carry or borrow out of range!") def add_words(self, a, b, c_in): self._check_word(a) self._check_word(b) self._check_carry_borrow(c_in) sum = a + b + c_in sum_s = sum & _WORD_MASK sum_c = sum >> _WORD_WIDTH return (sum_c, sum_s) def sub_words(self, a, b, b_in): self._check_word(a) self._check_word(b) self._check_carry_borrow(b_in) dif = a - b - b_in if dif < 0: dif_b = 1 dif_d = dif + 2 ** _WORD_WIDTH else: dif_b = 0 dif_d = dif return (dif_b, dif_d) class ModExpNG_Worker(): def __init__(self): self.lowlevel = ModExpNG_LowlevelOperator() self.multiplier = ModExpNG_WordMultiplier() self.recombinator = ModExpNG_PartRecombinator() def serial_subtract_modular(self, a, b, n, ab_num_words): c_in = 0 b_in = 0 ab = list() ab_n = list() for x in range(ab_num_words): a_word = a.words[x] b_word = b.words[x] (b_out, d_out) = self.lowlevel.sub_words(a_word, b_word, b_in) (c_out, s_out) = self.lowlevel.add_words(d_out, n.words[x], c_in) ab.append(d_out) ab_n.append(s_out) (c_in, b_in) = (c_out, b_out) d = ab if not b_out else ab_n return ModExpNG_Operand(None, ab_num_words, d) def serial_add_uneven(self, a, b, ab_num_words): c_in = 0 ab = list() for x in range(2 * ab_num_words): a_word = a.words[x] if x < ab_num_words else 0 b_word = b.words[x] (c_out, s_out) = self.lowlevel.add_words(a_word, b_word, c_in) ab.append(s_out) c_in = c_out return ModExpNG_Operand(None, 2*ab_num_words, ab) def multipurpose_multiply(self, a, b, n, n_coeff, ab_num_words, bnk, reduce_only=False, multiply_only=False, dump=False, dump_crt="", dump_ladder=""): # # 1. AB = A * B # if dump: print("multiply_square(%s_%s)" % (dump_crt, dump_ladder)) if reduce_only: ab = b else: ab_parts = self.multiplier.multiply_square(a, b, ab_num_words, dump) ab_words = self.recombinator.recombine_square(ab_parts, ab_num_words, dump) ab = ModExpNG_Operand(None, 2 * ab_num_words, ab_words) if dump and DUMP_VECTORS: ab.format("%s_%s_AB" % (dump_crt, dump_ladder)) if not bnk is None: bnk._set_wide(ModExpNG_WideBankEnum.L, ab.lower_half()) bnk._set_wide(ModExpNG_WideBankEnum.H, ab.upper_half()) if multiply_only: return ModExpNG_Operand(None, 2*ab_num_words, ab_words) # # 2. Q = LSB(AB) * N_COEFF # if dump: print("multiply_triangle(%s_%s)" % (dump_crt, dump_ladder)) q_parts = self.multiplier.multiply_triangle(ab, n_coeff, ab_num_words, dump) q_words = self.recombinator.recombine_triangle(q_parts, ab_num_words, dump) q = ModExpNG_Operand(None, ab_num_words + 1, q_words) if dump and DUMP_VECTORS: q.format("%s_%s_Q" % (dump_crt, dump_ladder)) if not bnk is None: bnk._set_narrow(ModExpNG_NarrowBankEnum.Q, q) q_words = list(bnk._get_narrow(ModExpNG_NarrowBankEnum.Q).words) q_words.append(bnk._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[1]) q = ModExpNG_Operand(None, len(q_words), q_words) # # 3. M = Q * N # if dump: print("multiply_rectangle(%s_%s)" % (dump_crt, dump_ladder)) m_parts = self.multiplier.multiply_rectangle(n, q, ab_num_words, dump) m_words = self.recombinator.recombine_rectangle(m_parts, ab_num_words, dump) m = ModExpNG_Operand(None, 2 * ab_num_words + 1, m_words) if dump and DUMP_VECTORS: m.format("%s_%s_M" % (dump_crt, dump_ladder)) # # 4. R = AB + M # # # 4a. compute carry (actual sum is all zeroes and need not be stored) # r_cy = 0 # this can be up to two bits, since we're adding extended words!! for i in range(ab_num_words + 1): s = ab.words[i] + m.words[i] + r_cy r_cy_new = s >> _WORD_WIDTH if dump and DUMP_REDUCTION: print("[%2d] 0x%05x + 0x%05x + 0x%x => {0x%x, [0x%05x]}" % (i, ab.words[i], m.words[i], r_cy, r_cy_new, s & 0xffff)) # ??? r_cy = r_cy_new # # 4b. Initialize empty result # R = list() for i in range(ab_num_words): R.append(0) # # 4c. compute the actual upper part of sum (take carry into account) # for i in range(ab_num_words): if dump and DUMP_REDUCTION: print("[%2d]" % i, end='') ab_word = ab.words[ab_num_words + i + 1] if i < (ab_num_words - 1) else 0 if dump and DUMP_REDUCTION: print(" 0x%05x" % ab_word, end='') m_word = m.words[ab_num_words + i + 1] if dump and DUMP_REDUCTION: print(" + 0x%05x" % m_word, end='') if i == 0: R[i] = r_cy else: R[i] = 0 if dump and DUMP_REDUCTION: print(" + 0x%x" % R[i], end='') R[i] += ab_word R[i] += m_word if dump and DUMP_REDUCTION: print(" = 0x%05x" % R[i]) return ModExpNG_Operand(None, ab_num_words, R) def convert_nonredundant(self, a, num_words): carry = 0 for x in range(num_words): a.words[x] += carry carry = a.words[x] >> _WORD_WIDTH a.words[x] &= _WORD_MASK class ModExpNG_Core(): def __init__(self, i): self.wrk = ModExpNG_Worker() self.bnk = ModExpNG_BanksCRT(i) self.inp = ModExpNG_CoreInput() self.out = ModExpNG_CoreOutput() def _dump_bank_indices(self, n): print(" ", end='') for i in range(n): print("[ %3d ] " % i, end='') print(""); def _dump_bank_seps(self, n): print(" ", end='') for i in range(n): print("------- ", end='') print(""); def _dump_bank_entry_narrow(self, name, op, val, n): print("%s.NARROW.%s " % (name, op), end='') for i in range(n): if i < len(val.words) and not val is None: print("0x%05x " % val.words[i], end='') continue print("0xxxxxx ", end='') print("") def _dump_bank_entry_wide(self, name, op, val, n): print("%s.WIDE.%s " % (name, op), end='') for i in range(n): if i < len(val.words) and not val is None: print("0x%05x " % val.words[i], end='') else: print("0xxxxxx ", end='') print("") def _dump_bank(self, name, banks_pair): n = KEY_LENGTH // _WORD_WIDTH self._dump_bank_indices(n) self._dump_bank_entry_wide(name, "A: ", banks_pair._get_wide_cache(W.A), n) self._dump_bank_entry_wide(name, "B: ", banks_pair._get_wide_cache(W.B), n) self._dump_bank_entry_wide(name, "C: ", banks_pair._get_wide_cache(W.C), n) self._dump_bank_entry_wide(name, "D: ", banks_pair._get_wide_cache(W.D), n) self._dump_bank_entry_wide(name, "E: ", banks_pair._get_wide_cache(W.E), n) self._dump_bank_entry_wide(name, "N: ", banks_pair._get_wide_cache(W.N), n) self._dump_bank_entry_wide(name, "L: ", banks_pair._get_wide_cache(W.L), n) self._dump_bank_entry_wide(name, "H: ", banks_pair._get_wide_cache(W.H), n) self._dump_bank_seps(n) self._dump_bank_entry_narrow(name, "A: ", banks_pair._get_narrow_cache(N.A), n) self._dump_bank_entry_narrow(name, "B: ", banks_pair._get_narrow_cache(N.B), n) self._dump_bank_entry_narrow(name, "C: ", banks_pair._get_narrow_cache(N.C), n) self._dump_bank_entry_narrow(name, "D: ", banks_pair._get_narrow_cache(N.D), n) self._dump_bank_entry_narrow(name, "E: ", banks_pair._get_narrow_cache(N.E), n) self._dump_bank_entry_narrow(name, "COEFF:", banks_pair._get_narrow_cache(N.N_COEFF), n) self._dump_bank_entry_narrow(name, "Q: ", banks_pair._get_narrow_cache(N.Q), n) self._dump_bank_entry_narrow(name, "EXT: ", banks_pair._get_narrow(N.EXT), n) def dump_banks(self): print("OPCODE == STOP: BANKS DUMP FOLLOWS") self._dump_bank("X.X", self.bnk.crt_x.ladder_x) self._dump_bank("X.Y", self.bnk.crt_x.ladder_y) self._dump_bank("Y.X", self.bnk.crt_y.ladder_x) self._dump_bank("Y.Y", self.bnk.crt_y.ladder_y) sys.exit() # # CRT_(X|Y) means either CRT_X or CRT_Y # LADDER_{X,Y} means both LADDER_X and LADDER_Y # # # copy from CRT_(X|Y).LADDER_X.NARROW to OUTPUT # def set_output_from_narrow_x(self, sel_output, bank_crt, sel_narrow): self.out._set_value(sel_output, bank_crt.ladder_x._get_narrow(sel_narrow)) # # copy from CRT_(X|Y).LADDER_Y.NARROW to OUTPUT # def set_output_from_narrow_y(self, sel_output, bank_crt, sel_narrow): self.out._set_value(sel_output, bank_crt.ladder_y._get_narrow(sel_narrow)) # # copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.NARROW # def set_narrow_from_input(self, bank_crt, sel_narrow, sel_input): bank_crt.ladder_x._set_narrow(sel_narrow, self.inp._get_value(sel_input)) bank_crt.ladder_y._set_narrow(sel_narrow, self.inp._get_value(sel_input)) # # copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.WIDE # def set_wide_from_input(self, bank_crt, sel_wide, sel_input): bank_crt.ladder_x._set_wide(sel_wide, self.inp._get_value(sel_input)) bank_crt.ladder_y._set_wide(sel_wide, self.inp._get_value(sel_input)) # # copy from CRT_Y.LADDER_{X,Y}.{WIDE,NARROW} to CRT_X.LADDER_{X,Y}.{WIDE,NARROW} # def copy_crt_y2x(self, sel_wide, sel_narrow): self.bnk.crt_x.ladder_x._set_wide(sel_wide, self.bnk.crt_y.ladder_x._get_wide(sel_wide)) self.bnk.crt_x.ladder_y._set_wide(sel_wide, self.bnk.crt_y.ladder_y._get_wide(sel_wide)) self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow)) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, self.bnk.crt_y.ladder_y._get_narrow(sel_narrow)) # # copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{X,Y}.LADDER_Y.{WIDE,NARROW} # def copy_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out): self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)) self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)) # # copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{Y,X}.LADDER_Y.{WIDE,NARROW} # def cross_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out): self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)) self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)) # # modular multiply sel_wide_in by sel_narrow_in # stores intermediate result in WIDE.L and WIDE.H # needs modulus WIDE.N and speed-up coefficients NARROW.N_COEFF to be filled # places two copies of resulting quantity in sel_wide_out and sel_narrow_out # sel_*_in and sel_*_out can overlap (overwriting of input operands is ok) # def modular_multiply(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words, mode=(True, True), d=False): xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) xn_coeff_words = list(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words) yn_coeff_words = list(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words) xn_coeff_words.append(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0]) yn_coeff_words.append(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0]) xn_coeff = ModExpNG_Operand(None, len(xn_coeff_words), xn_coeff_words) yn_coeff = ModExpNG_Operand(None, len(yn_coeff_words), yn_coeff_words) xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in) xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in) yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in) yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in) xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in) yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in) if not mode[0]: xb = xxb else: xb = xyb if not mode[1]: yb = yxb else: yb = yyb xxp = self.wrk.multipurpose_multiply(xxa, xb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_x, dump=d, dump_crt="X", dump_ladder="X") xyp = self.wrk.multipurpose_multiply(xya, xb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_y, dump=d, dump_crt="X", dump_ladder="Y") yxp = self.wrk.multipurpose_multiply(yxa, yb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_x, dump=d, dump_crt="Y", dump_ladder="X") yyp = self.wrk.multipurpose_multiply(yya, yb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_y, dump=d, dump_crt="Y", dump_ladder="Y") self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xxp) self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xyp) self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yxp) self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yyp) self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxp) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyp) self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxp) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyp) # # modular subtract values in sel_narrow_in (X-Y) # stores two copies of the result in sel_*_out # def modular_subtract(self, sel_narrow_in, sel_narrow_out, sel_wide_out, num_words): xa = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) xb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in) xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) ya = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) yb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in) yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) xd = self.wrk.serial_subtract_modular(xa, xb, xn, num_words) yd = self.wrk.serial_subtract_modular(ya, yb, yn, num_words) self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xd) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xd) self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yd) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yd) self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xd) self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xd) self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yd) self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yd) # # modular reduce sel_narrow_in # stores two copies of the result in sel_*_out # def modular_reduce(self, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words): xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) xn_coeff_words = list(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words) yn_coeff_words = list(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words) xn_coeff_words.append(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0]) yn_coeff_words.append(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0]) xn_coeff = ModExpNG_Operand(None, len(xn_coeff_words), xn_coeff_words) yn_coeff = ModExpNG_Operand(None, len(yn_coeff_words), yn_coeff_words) xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in) yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in) xxp = self.wrk.multipurpose_multiply(None, xxb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_x, reduce_only=True) xyp = self.wrk.multipurpose_multiply(None, xyb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_y, reduce_only=True) yxp = self.wrk.multipurpose_multiply(None, yxb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_x, reduce_only=True) yyp = self.wrk.multipurpose_multiply(None, yyb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_y, reduce_only=True) self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xxp) self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xyp) self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yxp) self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yyp) self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxp) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyp) self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxp) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyp) # # propagate carries (convert to non-redundant representation) content in sel_narrow # overwrites input value # def propagate_carries(self, sel_narrow, num_words): xx = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow) xy = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow) yx = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow) yy = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow) self.wrk.convert_nonredundant(xx, num_words) self.wrk.convert_nonredundant(xy, num_words) self.wrk.convert_nonredundant(yx, num_words) self.wrk.convert_nonredundant(yy, num_words) self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, xx) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, xy) self.bnk.crt_y.ladder_x._set_narrow(sel_narrow, yx) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow, yy) # # copy from CRT_{X,Y}.LADDER_{X,Y}.WIDE.{H,L} to CRT_{X,Y}.LADDER_{X,Y}.NARROW # def merge_lha(self, sel_narrow, num_words): xx_lsb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.L) xy_lsb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.L) yx_lsb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.L) yy_lsb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.L) xx_msb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.H) xy_msb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.H) yx_msb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.H) yy_msb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.H) xx = xx_lsb.words + xx_msb.words xy = xy_lsb.words + xy_msb.words yx = yx_lsb.words + yx_msb.words yy = yy_lsb.words + yy_msb.words self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xx)) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xy)) self.bnk.crt_y.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yx)) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yy)) # # multiply sel_wide_in by sel_narrow_in # stores twice larger product in WIDE.L and WIDE.H # def regular_multiply(self, sel_wide_in, sel_narrow_in, num_words): xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N) yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N) xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF) xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in) xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in) yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in) yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in) xb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in) yb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in) xxp = self.wrk.multipurpose_multiply(xxa, xb, None, None, num_words, None, multiply_only=True) xyp = self.wrk.multipurpose_multiply(xya, xb, None, None, num_words, None, multiply_only=True) yxp = self.wrk.multipurpose_multiply(yxa, yb, None, None, num_words, None, multiply_only=True) yyp = self.wrk.multipurpose_multiply(yya, yb, None, None, num_words, None, multiply_only=True) xxp_lsb = xxp.lower_half() xxp_msb = xxp.upper_half() xyp_lsb = xyp.lower_half() xyp_msb = xyp.upper_half() yxp_lsb = yxp.lower_half() yxp_msb = yxp.upper_half() yyp_lsb = yyp.lower_half() yyp_msb = yyp.upper_half() self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.L, xxp_lsb) self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.L, xyp_lsb) self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.L, yxp_lsb) self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.L, yyp_lsb) self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.H, xxp_msb) self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.H, xyp_msb) self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.H, yxp_msb) self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.H, yyp_msb) # # adds sel_narrow_a_in to sel_narrow_b_in # stores result in sel_narrow_out # def regular_add(self, sel_wide_a_in, sel_narrow_b_in, sel_narrow_out, num_words): xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_a_in) xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_a_in) yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_a_in) yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_a_in) xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_b_in) xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_b_in) yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_b_in) yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_b_in) xxc = self.wrk.serial_add_uneven(xxa, xxb, num_words) xyc = self.wrk.serial_add_uneven(xya, xyb, num_words) yxc = self.wrk.serial_add_uneven(yxa, yxb, num_words) yyc = self.wrk.serial_add_uneven(yya, yyb, num_words) self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxc) self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyc) self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxc) self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyc) # # dump working variables before ladder step # def dump_before_step_using_crt(self, pq, m): print("num_words = %d" % pq) print("\rladder_mode_x = %d" % m[0]) print("\rladder_mode_y = %d" % m[1]) self.bnk.crt_x.ladder_x._get_narrow(N.C).format("X_X") self.bnk.crt_x.ladder_y._get_narrow(N.C).format("X_Y") self.bnk.crt_y.ladder_x._get_narrow(N.C).format("Y_X") self.bnk.crt_y.ladder_y._get_narrow(N.C).format("Y_Y") self.bnk.crt_x.ladder_x._get_wide(W.N).format("X_N") self.bnk.crt_x.ladder_x._get_wide(W.N).format("Y_N") self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format("X_N_COEFF") self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format("Y_N_COEFF") # # dump working variables after ladder step # def dump_after_step_using_crt(self): self.bnk.crt_x.ladder_x._get_narrow(N.C).format("X_X") self.bnk.crt_x.ladder_y._get_narrow(N.C).format("X_Y") self.bnk.crt_y.ladder_x._get_narrow(N.C).format("Y_X") self.bnk.crt_y.ladder_y._get_narrow(N.C).format("Y_Y") # # this deliberately converts narrow operand into redundant representation # def _force_overflow(self, bank_crt, sel_narrow): # original words T = bank_crt.ladder_x._get_narrow(sel_narrow).words # loop through upper N-1 words for i in range(1, len(T)): # get msbs of the previous word upper_bits = T[i-1] & _CARRY_MASK # if the previous msbs are empty, force lsbs of the current word # into them and then wipe the current lsbs if upper_bits == 0: lower_bits = T[i] & (_CARRY_MASK >> _WORD_WIDTH) T[i] ^= lower_bits T[i-1] |= (lower_bits << _WORD_WIDTH) # overwrite original words bank_crt.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, len(T), T)) print("Forced overflow.") # # read content of core's output bank and compare it against known good values # def compare_signature(): c = core s = s_known xm = xm_known ym = ym_known core_s = c.out.get_value(O.S) core_xm = c.out.get_value(O.XM) core_ym = c.out.get_value(O.YM) if core_s.number() != s: print("ERROR: core_s != s!") else: print("s is OK") if core_xm.number() != xm: print("ERROR: core_xm != xm!") else: print("x_mutated is OK") if core_ym.number() != ym: print("ERROR: core_ym != ym!") else: print("y_mutated is OK") # # get current ladder mode based on two exponents' bits # def get_ladder_mode_using_crt(v, bit): bit_value_p = (v.dp.number() & (1 << bit)) >> bit bit_value_q = (v.dq.number() & (1 << bit)) >> bit bit_value_p = bit_value_p > 0 bit_value_q = bit_value_q > 0 return (bit_value_p, bit_value_q) # # get current ladder mode based on private exponent's bit # def get_ladder_mode_without_crt(v, bit): bit_value_d = (v.d.number() & (1 << bit)) >> bit bit_value_d = bit_value_d > 0 return (not bit_value_d, bit_value_d) # # print current exponentiation progress # def print_ladder_progress(current, total): # this will always print "100.0%" at the very last iteration, since we're # counting bits from msb to lsb and the very last index is zero, which # is congruent to 0 mod DUMP_PROGRESS_FACTOR if (current % DUMP_PROGRESS_FACTOR) == 0: pct = float((_WORD_WIDTH * total - current) / (_WORD_WIDTH * total)) * 100.0 print("\rdone: %5.1f%%" % pct, end='') # move to next line after the very last iteration if current == 0: print("") # # try to exponentiate using the quad-multiplier (dual-core, dual-ladder) scheme # def sign_using_crt(): c = core v = vector n = n_num_words pq = pq_num_words ff = (False, False) # # A / B => different content in banks (A in WIDE, B in NARROW) # [XY]Z => different content in ladders (XZ in X, YZ in Y) # .. => temporarily half-filled bank (omitted to save space) # * => "crossed" content (X.Y == Y.X and Y.Y == X.X) # # +------------------------+-------+------------------+---------+-----------+ # | A | B | C | D | E | # +------------------------+-------+------------------+---------+-----------+ c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | ? | ? | ? | ? | ? | c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | ? | ? | ? | ? | ? | c.set_wide_from_input (c.bnk.crt_x, W.A, I.X) # | .. | ? | ? | ? | ? | c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y) # | [XY] / ? | ? | ? | ? | ? | c.set_wide_from_input (c.bnk.crt_x, W.E, I.M) # | [XY] / ? | ? | ? | ? | .. / ? | c.set_wide_from_input (c.bnk.crt_y, W.E, I.M) # | [XY] / ? | ? | ? | ? | M / ? | # +------------------------+-------+------------------+---------+-----------+ c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? | c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? | c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR) # | [XY] / .. | ? | ? | ? | M / ? | c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR) # | [XY] / N_FACTOR | ? | ? | ? | M / ? | c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M / .. | c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M | # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.A, N.A, W.B, N.B, n) # | [XY] / N_FACTOR | [XY]F | ? | ? | M | [XY]F = [XY] * N_FACTOR c.modular_multiply(W.B, N.B, W.C, N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | ? | M | [XY]MF = [XY]F * [XY]F c.modular_multiply(W.C, N.I, W.D, N.D, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | [XY]M = [XY]MF * 1 # +------------------------+-------+------------------+---------+-----------+ c.propagate_carries(N.D, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | # +------------------------+-------+------------------+---------+-----------+ c.set_output_from_narrow_x(O.XM, c.bnk.crt_x, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | c.set_output_from_narrow_x(O.YM, c.bnk.crt_y, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.E, N.B, W.C, N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M | [XY]MB = M * [XY]F # +------------------------+-------+------------------+---------+-----------+ c.propagate_carries(N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M | # +------------------------+-------+------------------+---------+-----------+ c.copy_crt_y2x(W.C, N.C) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M | # +------------------------+-------+------------------+---------+-----------+ c.set_wide_from_input (c.bnk.crt_x, W.N, I.P) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M | c.set_wide_from_input (c.bnk.crt_y, W.N, I.Q) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M | c.set_wide_from_input (c.bnk.crt_x, W.A, I.P_FACTOR) # | ... / N_FACTOR | [XY]F | YMB | [XY]M | M | c.set_wide_from_input (c.bnk.crt_y, W.A, I.Q_FACTOR) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | M | c.set_wide_from_input (c.bnk.crt_x, W.E, I.QINV) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M | # +------------------------+-------+------------------+---------+-----------+ c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.P_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M | c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.Q_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M | c.set_narrow_from_input(c.bnk.crt_x, N.A, I.P_FACTOR) # | [PQ]_FACTOR / ... | [XY]F | YMB | [XY]M | QINV / M | c.set_narrow_from_input(c.bnk.crt_y, N.A, I.Q_FACTOR) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV / M | c.set_narrow_from_input(c.bnk.crt_x, N.E, I.QINV) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV | # +------------------------+-------+------------------+---------+-----------+ c.modular_reduce(N.C, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | YMB | [PQ]MBZ | QINV | [PQ]MBZ = YMB mod [PQ] # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.D, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBZ | QINV | [PQ]MB = [PQ]MBZ * [PQ]_FACTOR c.modular_multiply(W.C, N.A, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBF | QINV | [PQ]MBF = [PQ]MB * [PQ]_FACTOR c.modular_multiply(W.A, N.I, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]IF | [PQ]MBF | QINV | [PQ]IF = 1 * [PQ]_FACTOR # +------------------------+-------+------------------+---------+-----------+ c.copy_ladders_x2y(W.D, N.D, W.C, N.C) # | [PQ]_FACTOR | [XY]F | [PQ]IF / [PQ]MBF | [PQ]MBF | QINV | # +------------------------+-------+------------------+---------+-----------+ ########################### # | | | | | | # Begin Montgomery Ladder # # | | | | | | ########################### # | | | | | | # | | | | | | for bit in range(_WORD_WIDTH * pq - 1, -1, -1): # | | | | | | m = get_ladder_mode_using_crt(v, bit) # | | | | | | dbg = bit == DUMP_LADDER_INDEX # | | | | | | # | | | | | | if dbg: # | | | | | | if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C) # | | | | | | if DUMP_VECTORS: c.dump_before_step_using_crt(pq, m) # | | | | | | # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.C, N.C, W.C, N.C, pq, mode=m, d=dbg) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]MBF | QINV | # +------------------------+-------+------------------+---------+-----------+ if dbg and DUMP_VECTORS: c.dump_after_step_using_crt() # | | | | | | print_ladder_progress(bit, pq) # | | | | | | # | | | | | | ######################### # | | | | | | # End Montgomery Ladder # # | | | | | | ######################### # | | | | | | # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.C, N.I, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV | [PQ]SB = [PQ]SBF * 1 # +------------------------+-------+------------------+---------+-----------+ c.propagate_carries(N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV | # +------------------------+-------+------------------+---------+-----------+ c.cross_ladders_x2y(W.D, N.D, W.D, N.D) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB* | QINV | # +------------------------+-------+------------------+---------+-----------+ c.modular_subtract(N.D, N.C, W.C, pq) # | [PQ]_FACTOR | [XY]F | RSB | [PQ]SB* | QINV | RSB = PSB - QSB # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.C, N.E, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBIZ | [PQ]SB* | QINV | RSBIZ = RSB * QINV c.modular_multiply(W.C, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | QINV | RSBI = RSBIZ * P_FACTOR # +------------------------+-------+------------------+---------+-----------+ c.set_wide_from_input (c.bnk.crt_x, W.E, I.Q) # | [PQ]_FACTOR / N_FACTOR | [XY]F | RSBI | [PQ]SB* | | # +------------------------+-------+------------------+---------+-----------+ c.set_narrow_from_input(c.bnk.crt_x, N.E, I.Q) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | | # +------------------------+-------+------------------+---------+-----------+ c.regular_multiply(W.E, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | | = RSBI * Q # +------------------------+-------+------------------+---------+-----------+ c.merge_lha(N.A, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | | # +------------------------+-------+------------------+---------+-----------+ c.propagate_carries(N.A, n) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | | # +------------------------+-------+------------------+---------+-----------+ c.copy_crt_y2x(W.D, N.D) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | QSB* | | # +------------------------+-------+------------------+---------+-----------+ c.regular_add(W.D, N.A, N.C, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | SB | QSB* | | SB = QSB + RSBI # +------------------------+-------+------------------+---------+-----------+ c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | | | | | | c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | | | | | | # +------------------------+-------+------------------+---------+-----------+ c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | | | | | | c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | | | | | | # +------------------------+-------+------------------+---------+-----------+ c.modular_multiply(W.B, N.C, W.A, N.A, n, ff) # | S | | | | | S = XF * SB # +------------------------+-------+------------------+---------+-----------+ c.propagate_carries(N.A, n) # | S | | | | | # +------------------------+-------+------------------+---------+-----------+ c.set_output_from_narrow_x(O.S, c.bnk.crt_x, N.A) # | S | | | | | # +------------------------+-------+------------------+---------+-----------+ #c.dump_banks() # # try to exponentiate using only half of the quad-multiplier (one dual-ladder core) # def sign_without_crt(): c = core v = vector n = n_num_words ff = (False, False) c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) c.set_wide_from_input (c.bnk.crt_x, W.A, I.X) c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y) c.set_wide_from_input (c.bnk.crt_x, W.E, I.M) c.set_wide_from_input (c.bnk.crt_y, W.E, I.M) c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF) c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF) c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR) c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR) c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M) c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M) c.modular_multiply(W.A, N.A, W.B, N.B, n) # [XY]F = [XY] * N_FACTOR c.modular_multiply(W.B, N.B, W.C, N.C, n) # [XY]MF = [XY]F * [XY]F c.modular_multiply(W.C, N.I, W.D, N.D, n) # [XY]M = [XY]MF * 1 c.propagate_carries(N.D, n) c.set_output_from_narrow_x(O.XM, c.bnk.crt_x, N.D) c.set_output_from_narrow_x(O.YM, c.bnk.crt_y, N.D) c.modular_multiply(W.E, N.B, W.C, N.C, n) # [XY]MB = M * [XY]F c.set_wide_from_input(c.bnk.crt_x, W.A, I.N_FACTOR) c.set_wide_from_input(c.bnk.crt_y, W.A, I.N_FACTOR) c.modular_multiply(W.C, N.A, W.D, N.D, n) # MBF = MB * N_FACTOR c.modular_multiply(W.A, N.I, W.C, N.C, n) # IF = 1 * N_FACTOR c.copy_ladders_x2y(W.D, N.D, W.C, N.C) ########################### # Begin Montgomery Ladder # ########################### for bit in range(_WORD_WIDTH * n - 1, -1, -1): m = get_ladder_mode_without_crt(v, bit) dbg = bit == DUMP_LADDER_INDEX if dbg: if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C) if DUMP_VECTORS: c.dump_before_step_without_crt(n, m) c.modular_multiply(W.C, N.C, W.C, N.C, n, mode=m, d=dbg) if dbg and DUMP_VECTORS: c.dump_after_step_without_crt() print_ladder_progress(bit, n) ######################### # End Montgomery Ladder # ######################### c.cross_ladders_x2y(W.B, N.B, W.B, N.B) c.modular_multiply(W.C, N.I, W.D, N.D, n) # SB = SBF * 1 c.modular_multiply(W.B, N.D, W.A, N.A, n, mode=ff) # S = XF * SB c.propagate_carries(N.A, n) c.set_output_from_narrow_y(O.S, c.bnk.crt_y, N.A) #c.dump_banks() # # main() # if __name__ == "__main__": # handy shortcuts W = ModExpNG_WideBankEnum N = ModExpNG_NarrowBankEnum I = ModExpNG_CoreInputEnum O = ModExpNG_CoreOutputEnum # set helper quantity # instantiate core # load test vector # transfer numbers from vector to core # set numbers of words # obtain known good reference value with built-in math # mutate blinding quantities with built-in math i = ModExpNG_Operand(1, KEY_LENGTH) core = ModExpNG_Core(i) vector = ModExpNG_TestVector() core.inp.set_value(I.M, vector.m) core.inp.set_value(I.N, vector.n) core.inp.set_value(I.P, vector.p) core.inp.set_value(I.Q, vector.q) core.inp.set_value(I.N_COEFF, vector.n_coeff) core.inp.set_value(I.P_COEFF, vector.p_coeff) core.inp.set_value(I.Q_COEFF, vector.q_coeff) core.inp.set_value(I.N_FACTOR, vector.n_factor) core.inp.set_value(I.P_FACTOR, vector.p_factor) core.inp.set_value(I.Q_FACTOR, vector.q_factor) core.inp.set_value(I.X, vector.x) core.inp.set_value(I.Y, vector.y) core.inp.set_value(I.QINV, vector.qinv) n_num_words = KEY_LENGTH // _WORD_WIDTH pq_num_words = n_num_words // 2 s_known = pow(vector.m.number(), vector.d.number(), vector.n.number()) xm_known = pow(vector.x.number(), 2, vector.n.number()) ym_known = pow(vector.y.number(), 2, vector.n.number()) if DUMP_VECTORS: vector.m.format("M") vector.n.format("N") vector.n_factor.format("N_FACTOR") vector.n_coeff.format("N_COEFF") vector.x.format("X") vector.y.format("Y") vector.p.format("P") vector.q.format("Q") vector.p_factor.format("P_FACTOR") vector.q_factor.format("Q_FACTOR") vector.p_coeff.format("P_COEFF") vector.q_coeff.format("Q_COEFF") vector.d.format("D") vector.dp.format("DP") vector.dq.format("DQ") vector.qinv.format("QINV") xm_known_operand = ModExpNG_Operand(xm_known, KEY_LENGTH) ym_known_operand = ModExpNG_Operand(ym_known, KEY_LENGTH) s_known_operand = ModExpNG_Operand(s_known, KEY_LENGTH) xm_known_operand.format("XM") ym_known_operand.format("YM") s_known_operand.format("S") # sign using CRT and check print("Signing using CRT...") sign_using_crt() compare_signature() # sign without CRT and check print("Signing without CRT...") sign_without_crt() compare_signature() # # End-of-File #