#!/usr/bin/python3
#
#
# ModExpNG core math model.
#
#
# Copyright 2019 The Commons Conservancy Cryptech Project
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# - Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# -------
# Imports
#--------
import sys
import importlib
from enum import Enum, auto
# --------------
# Model Settings
# --------------
# length of public key
KEY_LENGTH = 512
# how many parallel multipliers to use
NUM_MULTS = 8
# ---------------
# Internal Values
# ---------------
# half of key length
_KEY_LENGTH_HALF = KEY_LENGTH // 2
# width of internal math pipeline
_WORD_WIDTH = 16
_WORD_WIDTH_EXT = 18
_WORD_MASK = 2 ** _WORD_WIDTH - 1
_WORD_MASK_EXT = 2 ** _WORD_WIDTH_EXT - 1
_CARRY_MASK = _WORD_MASK ^ _WORD_MASK_EXT
# folder with test vector scripts
_VECTOR_PATH = "/vector"
# name of test vector class
_VECTOR_CLASS = "Vector"
# ------------------
# Debugging Settings
# ------------------
DUMP_LADDER_INDEX = -1 # at which ladder step to print intermediate debug vector (-1 doesn't print internals)
DUMP_VECTORS = False # print entire debug vector components
DUMP_INDICES = False # print indices of words at MAC inputs
DUMP_MACS_INPUTS = False # print MAC input words
DUMP_MACS_CLEARING = False # print MAC clearing bitmaps
DUMP_MACS_ACCUMULATION = False # print MAC accumulators contents
DUMP_MULT_PARTS = False # print multiplication output parts
DUMP_RECOMBINATION = False # print recombination internals
DUMP_REDUCTION = False # print reduction internals
DUMP_EXPONENTS = False # dump secret exponents
FORCE_OVERFLOW = False # force rarely seen internal overflow situation to verify how its handler works
DUMP_PROGRESS_FACTOR = 16 # once per how many ladder steps to update progress indicator
DUMP_FORMAT_BUS = True # False: dump 18-bit words, True: dump 32-bit words
DUMP_FORMAT_C_ARRAY = False # False: dump in Verilog format, True: dump as C array initializer
#
# Multi-Precision Integer
#
class ModExpNG_Operand():
def __init__(self, number, length, words = None):
if words is None:
# length must be divisible by word width
if (length % _WORD_WIDTH) > 0:
raise Exception("Bad number length!")
self._init_from_number(number, length)
else:
# length must match words count
if len(words) != length:
raise Exception("Bad words count!")
self._init_from_words(words, length)
def copy(self):
return ModExpNG_Operand(None, len(self.words), self.words)
def _format_verilog(self, name):
if not DUMP_FORMAT_BUS:
for i in range(len(self.words)):
if i > 0:
if (i % 4) == 0: print("")
else: print(" ", end='')
print("%s[%3d] = 18'h%05x;" % (name, i, self.words[i]), end='')
else:
_words = list(self.words)
num_words = len(_words)
if num_words % 2 > 0:
_words.append(0)
num_words += 1
for i in range(num_words // 2):
if i > 0:
if (i % 4) == 0: print("")
else: print(" ", end='')
print("%s[%3d] = 32'h%04x%04x;" % (name, i, _words[2*i+1], _words[2*i]), end='')
print("")
def _format_c_array(self, name):
words = list(reversed(self.words))
if len(words) % 2 > 0: words.insert(0, 0)
print("#define %s_%d_INIT \\\n\t{" % (name, KEY_LENGTH), end='')
for i in range(0, len(words), 2):
print("0x%04x%04x" % (words[i], words[i+1]), end='')
if (i + 2) < len(words):
print(", ", end='')
if ((i + 2) % 8) == 0: print("\\\n\t ", end='')
else: print("}")
def format(self, name):
if not DUMP_FORMAT_C_ARRAY:
self._format_verilog(name)
else:
self._format_c_array(name)
def _init_from_words(self, words, count):
for i in range(count):
# word must not exceed 18 bits
if words[i] >= (2 ** (_WORD_WIDTH_EXT)):
raise Exception("Word is too large!")
self.words = list(words)
def _init_from_number(self, number, length):
num_hexchars_per_word = _WORD_WIDTH // 4
num_hexchars_total = length // num_hexchars_per_word
value_hex = format(number, 'x')
# value must not be larger than specified, but it can be smaller, so
# we may need to prepend it with zeroes
if len(value_hex) > num_hexchars_total:
raise Exception("Number is too large!")
else:
while len(value_hex) < num_hexchars_total:
value_hex = "0" + value_hex
# create empty list
self.words = list()
# fill in words
while len(value_hex) > 0:
value_hex_part = value_hex[-num_hexchars_per_word:]
value_hex = value_hex[:-num_hexchars_per_word]
self.words.append(int(value_hex_part, 16))
def number(self):
ret = 0
shift = 0
for word in self.words:
ret += word << shift
shift += _WORD_WIDTH
return ret
def _get_half(self, part):
num_words = len(self.words)
num_words_half = num_words // 2
if not part: return ModExpNG_Operand(None, num_words_half, self.words[:num_words_half])
else: return ModExpNG_Operand(None, num_words_half, self.words[num_words_half:])
def lower_half(self):
return self._get_half(False)
def upper_half(self):
return self._get_half(True)
#
# Test Vector
#
class ModExpNG_TestVector():
def __init__(self):
# format target filename
filename = "vector_" + str(KEY_LENGTH) + "_randomized"
# add ./vector to import search path
sys.path.insert(1, sys.path[0] + _VECTOR_PATH)
# import from filename
vector_module = importlib.import_module(filename)
# get vector class
vector_class = getattr(vector_module, _VECTOR_CLASS)
# instantiate vector class
vector_inst = vector_class()
# obtain parts of vector
self.m = ModExpNG_Operand(vector_inst.m, KEY_LENGTH)
self.n = ModExpNG_Operand(vector_inst.n, KEY_LENGTH)
self.d = ModExpNG_Operand(vector_inst.d, KEY_LENGTH)
self.p = ModExpNG_Operand(vector_inst.p, _KEY_LENGTH_HALF)
self.q = ModExpNG_Operand(vector_inst.q, _KEY_LENGTH_HALF)
self.dp = ModExpNG_Operand(vector_inst.dp, _KEY_LENGTH_HALF)
self.dq = ModExpNG_Operand(vector_inst.dq, _KEY_LENGTH_HALF)
self.qinv = ModExpNG_Operand(vector_inst.qinv, _KEY_LENGTH_HALF)
self.n_factor = ModExpNG_Operand(vector_inst.n_factor, KEY_LENGTH)
self.p_factor = ModExpNG_Operand(vector_inst.p_factor, _KEY_LENGTH_HALF)
self.q_factor = ModExpNG_Operand(vector_inst.q_factor, _KEY_LENGTH_HALF)
self.n_coeff = ModExpNG_Operand(vector_inst.n_coeff, KEY_LENGTH + _WORD_WIDTH)
self.p_coeff = ModExpNG_Operand(vector_inst.p_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH)
self.q_coeff = ModExpNG_Operand(vector_inst.q_coeff, _KEY_LENGTH_HALF + _WORD_WIDTH)
self.x = ModExpNG_Operand(vector_inst.x, KEY_LENGTH)
self.y = ModExpNG_Operand(vector_inst.y, KEY_LENGTH)
class ModExpNG_WideBankEnum(Enum):
A = auto()
B = auto()
C = auto()
D = auto()
E = auto()
N = auto()
L = auto()
H = auto()
class ModExpNG_NarrowBankEnum(Enum):
A = auto()
B = auto()
C = auto()
D = auto()
E = auto()
N_COEFF = auto()
Q = auto()
EXT = auto()
I = auto()
class ModExpNG_CoreInputEnum(Enum):
M = auto()
N = auto()
P = auto()
Q = auto()
N_COEFF = auto()
P_COEFF = auto()
Q_COEFF = auto()
N_FACTOR = auto()
P_FACTOR = auto()
Q_FACTOR = auto()
X = auto()
Y = auto()
QINV = auto()
class ModExpNG_CoreOutputEnum(Enum):
XM = auto()
YM = auto()
S = auto()
class ModExpNG_WideBank():
def __init__(self):
self.a = None
self.b = None
self.c = None
self.d = None
self.e = None
self.n = None
self.l = None
self.h = None
self.a_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.b_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.c_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.d_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.e_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.n_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.l_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.h_cache = ModExpNG_Operand(0, KEY_LENGTH)
def _get_value(self, sel):
if sel == ModExpNG_WideBankEnum.A: return self.a
elif sel == ModExpNG_WideBankEnum.B: return self.b
elif sel == ModExpNG_WideBankEnum.C: return self.c
elif sel == ModExpNG_WideBankEnum.D: return self.d
elif sel == ModExpNG_WideBankEnum.E: return self.e
elif sel == ModExpNG_WideBankEnum.N: return self.n
elif sel == ModExpNG_WideBankEnum.L: return self.l
elif sel == ModExpNG_WideBankEnum.H: return self.h
else: raise Exception("ModExpNG_WideBank._get_value(): Invalid selector!")
def _get_value_cache(self, sel):
if sel == ModExpNG_WideBankEnum.A: return self.a_cache
elif sel == ModExpNG_WideBankEnum.B: return self.b_cache
elif sel == ModExpNG_WideBankEnum.C: return self.c_cache
elif sel == ModExpNG_WideBankEnum.D: return self.d_cache
elif sel == ModExpNG_WideBankEnum.E: return self.e_cache
elif sel == ModExpNG_WideBankEnum.N: return self.n_cache
elif sel == ModExpNG_WideBankEnum.L: return self.l_cache
elif sel == ModExpNG_WideBankEnum.H: return self.h_cache
else: raise Exception("ModExpNG_WideBank._get_value(): Invalid selector!")
def _set_value(self, sel, value):
if sel == ModExpNG_WideBankEnum.A: self.a = value.copy()
elif sel == ModExpNG_WideBankEnum.B: self.b = value.copy()
elif sel == ModExpNG_WideBankEnum.C: self.c = value.copy()
elif sel == ModExpNG_WideBankEnum.D: self.d = value.copy()
elif sel == ModExpNG_WideBankEnum.E: self.e = value.copy()
elif sel == ModExpNG_WideBankEnum.N: self.n = value.copy()
elif sel == ModExpNG_WideBankEnum.L: self.l = value.copy()
elif sel == ModExpNG_WideBankEnum.H: self.h = value.copy()
else: raise Exception("ModExpNG_WideBank._set_value(): Invalid selector!")
if sel == ModExpNG_WideBankEnum.A:
for i in range(len(value.words)): self.a_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.B:
for i in range(len(value.words)): self.b_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.C:
for i in range(len(value.words)): self.c_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.D:
for i in range(len(value.words)): self.d_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.E:
for i in range(len(value.words)): self.e_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.N:
for i in range(len(value.words)): self.n_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.L:
for i in range(len(value.words)): self.l_cache.words[i] = value.words[i]
elif sel == ModExpNG_WideBankEnum.H:
for i in range(len(value.words)): self.h_cache.words[i] = value.words[i]
class ModExpNG_NarrowBank():
def __init__(self, i):
self.a = None
self.b = None
self.c = None
self.d = None
self.e = None
self.n_coeff = None
self.q = None
self.ext = ModExpNG_Operand(0, 2*_WORD_WIDTH)
self.i = i
self.a_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.b_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.c_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.d_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.e_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.n_coeff_cache = ModExpNG_Operand(0, KEY_LENGTH)
self.q_cache = ModExpNG_Operand(0, KEY_LENGTH)
def _get_value(self, sel):
if sel == ModExpNG_NarrowBankEnum.A: return self.a
elif sel == ModExpNG_NarrowBankEnum.B: return self.b
elif sel == ModExpNG_NarrowBankEnum.C: return self.c
elif sel == ModExpNG_NarrowBankEnum.D: return self.d
elif sel == ModExpNG_NarrowBankEnum.E: return self.e
elif sel == ModExpNG_NarrowBankEnum.N_COEFF: return self.n_coeff
elif sel == ModExpNG_NarrowBankEnum.Q: return self.q
elif sel == ModExpNG_NarrowBankEnum.EXT: return self.ext
elif sel == ModExpNG_NarrowBankEnum.I: return self.i
else: raise Exception("ModExpNG_NarrowBank._get_value(): Invalid selector!")
def _get_value_cache(self, sel):
if sel == ModExpNG_NarrowBankEnum.A: return self.a_cache
elif sel == ModExpNG_NarrowBankEnum.B: return self.b_cache
elif sel == ModExpNG_NarrowBankEnum.C: return self.c_cache
elif sel == ModExpNG_NarrowBankEnum.D: return self.d_cache
elif sel == ModExpNG_NarrowBankEnum.E: return self.e_cache
elif sel == ModExpNG_NarrowBankEnum.N_COEFF: return self.n_coeff_cache
elif sel == ModExpNG_NarrowBankEnum.Q: return self.q_cache
else: raise Exception("ModExpNG_NarrowBank._get_value(): Invalid selector!")
def _set_value(self, sel, value):
if sel == ModExpNG_NarrowBankEnum.A: self.a = value.copy()
elif sel == ModExpNG_NarrowBankEnum.B: self.b = value.copy()
elif sel == ModExpNG_NarrowBankEnum.C: self.c = value.copy()
elif sel == ModExpNG_NarrowBankEnum.D: self.d = value.copy()
elif sel == ModExpNG_NarrowBankEnum.E: self.e = value.copy()
elif sel == ModExpNG_NarrowBankEnum.N_COEFF:
self.n_coeff = ModExpNG_Operand(None, len(value.words)-1, value.words[:-1])
self.ext.words[0] = value.words[-1]
elif sel == ModExpNG_NarrowBankEnum.Q:
self.q = ModExpNG_Operand(None, len(value.words)-1, value.words[:-1])
self.ext.words[1] = value.words[-1]
else: raise Exception("ModExpNG_NarrowBank._set_value(): Invalid selector!")
if sel == ModExpNG_NarrowBankEnum.A:
for i in range(len(value.words)): self.a_cache.words[i] = value.words[i]
elif sel == ModExpNG_NarrowBankEnum.B:
for i in range(len(value.words)): self.b_cache.words[i] = value.words[i]
elif sel == ModExpNG_NarrowBankEnum.C:
for i in range(len(value.words)): self.c_cache.words[i] = value.words[i]
elif sel == ModExpNG_NarrowBankEnum.D:
for i in range(len(value.words)): self.d_cache.words[i] = value.words[i]
elif sel == ModExpNG_NarrowBankEnum.E:
for i in range(len(value.words)): self.e_cache.words[i] = value.words[i]
elif sel == ModExpNG_NarrowBankEnum.N_COEFF:
for i in range(len(value.words)-1): self.n_coeff_cache.words[i] = value.words[i]
elif sel == ModExpNG_NarrowBankEnum.Q:
for i in range(len(value.words)-1): self.q_cache.words[i] = value.words[i]
class ModExpNG_CoreInput():
def __init__(self):
self._m = None
self._n = None
self._p = None
self._q = None
self._n_coeff = None
self._p_coeff = None
self._q_coeff = None
self._n_factor = None
self._p_factor = None
self._q_factor = None
self._x = None
self._y = None
self._qinv = None
def set_value(self, sel, value):
if sel == ModExpNG_CoreInputEnum.M: self._m = value
elif sel == ModExpNG_CoreInputEnum.N: self._n = value
elif sel == ModExpNG_CoreInputEnum.P: self._p = value
elif sel == ModExpNG_CoreInputEnum.Q: self._q = value
elif sel == ModExpNG_CoreInputEnum.N_COEFF: self._n_coeff = value
elif sel == ModExpNG_CoreInputEnum.P_COEFF: self._p_coeff = value
elif sel == ModExpNG_CoreInputEnum.Q_COEFF: self._q_coeff = value
elif sel == ModExpNG_CoreInputEnum.N_FACTOR: self._n_factor = value
elif sel == ModExpNG_CoreInputEnum.P_FACTOR: self._p_factor = value
elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: self._q_factor = value
elif sel == ModExpNG_CoreInputEnum.X: self._x = value
elif sel == ModExpNG_CoreInputEnum.Y: self._y = value
elif sel == ModExpNG_CoreInputEnum.QINV: self._qinv = value
else: raise Exception("ModExpNG_CoreInput.set_value(): invalid selector!")
def _get_value(self, sel):
if sel == ModExpNG_CoreInputEnum.M: return self._m
elif sel == ModExpNG_CoreInputEnum.N: return self._n
elif sel == ModExpNG_CoreInputEnum.P: return self._p
elif sel == ModExpNG_CoreInputEnum.Q: return self._q
elif sel == ModExpNG_CoreInputEnum.N_COEFF: return self._n_coeff
elif sel == ModExpNG_CoreInputEnum.P_COEFF: return self._p_coeff
elif sel == ModExpNG_CoreInputEnum.Q_COEFF: return self._q_coeff
elif sel == ModExpNG_CoreInputEnum.N_FACTOR: return self._n_factor
elif sel == ModExpNG_CoreInputEnum.P_FACTOR: return self._p_factor
elif sel == ModExpNG_CoreInputEnum.Q_FACTOR: return self._q_factor
elif sel == ModExpNG_CoreInputEnum.X: return self._x
elif sel == ModExpNG_CoreInputEnum.Y: return self._y
elif sel == ModExpNG_CoreInputEnum.QINV: return self._qinv
else: raise Exception("ModExpNG_CoreInput._get_value(): invalid selector!")
class ModExpNG_CoreOutput():
def __init__(self):
self._xm = None
self._ym = None
self._s = None
def _set_value(self, sel, value):
if sel == ModExpNG_CoreOutputEnum.XM: self._xm = value
elif sel == ModExpNG_CoreOutputEnum.YM: self._ym = value
elif sel == ModExpNG_CoreOutputEnum.S: self._s = value
else: raise Exception("ModExpNG_CoreOutput._set_value(): invalid selector!")
def get_value(self, sel):
if sel == ModExpNG_CoreOutputEnum.XM: return self._xm
elif sel == ModExpNG_CoreOutputEnum.YM: return self._ym
elif sel == ModExpNG_CoreOutputEnum.S: return self._s
else: raise Exception("ModExpNG_CoreOutput.get_value(): invalid selector!")
class ModExpNG_BanksPair():
def __init__(self, i):
self.wide = ModExpNG_WideBank()
self.narrow = ModExpNG_NarrowBank(i)
def _get_wide(self, sel):
return self.wide._get_value(sel)
def _get_narrow(self, sel):
return self.narrow._get_value(sel)
def _get_wide_cache(self, sel):
return self.wide._get_value_cache(sel)
def _get_narrow_cache(self, sel):
return self.narrow._get_value_cache(sel)
def _set_wide(self, sel, value):
self.wide._set_value(sel, value)
def _set_narrow(self, sel, value):
self.narrow._set_value(sel, value)
class ModExpNG_BanksLadder():
def __init__(self, i):
self.ladder_x = ModExpNG_BanksPair(i)
self.ladder_y = ModExpNG_BanksPair(i)
class ModExpNG_BanksCRT():
def __init__(self, i):
self.crt_x = ModExpNG_BanksLadder(i)
self.crt_y = ModExpNG_BanksLadder(i)
class ModExpNG_PartRecombinator():
def __init__(self):
self._WORD_MASK_2X = (_WORD_MASK << _WORD_WIDTH) | _WORD_MASK
self._WORD_WIDTH_2X = 2 * _WORD_WIDTH
def _bit_select(self, x, msb, lsb):
y = 0
for pos in range(lsb, msb+1):
y |= (x & (1 << pos)) >> lsb
return y
def _update_delays(self, x1, y1, z1, z2):
self._x_dly1, self._y_dly1, self._z_dly1, self._z_dly2 = x1, y1, z1, z2
def _update_carries(self, cm, cs):
self._cry_master, self._cry_slave = cm, cs
def _clear_words(self):
self._words, self._wordsx = [], []
def _store_words(self, w, wx):
self._words.append(w)
self._wordsx.append(wx)
def _flush_pipeline(self, dump):
self._phase = False
self._master_p = None
self._update_carries(0, 0)
self._update_delays(0, 0, 0, 0)
self._clear_words()
if dump and DUMP_RECOMBINATION:
print("RCMB -> flush()")
print("RCMB: master_ab | master_c | slave_ab")
def _push_pipeline(self, part, dump):
x = self._bit_select(part, 15, 0)
y = self._bit_select(part, 31, 16)
z = self._bit_select(part, 45, 32)
master_ab = (y << 16) | self._y_dly1
master_c = (self._z_dly1 << 16) | self._z_dly2
slave_ab = (x << 16) | self._x_dly1
if dump and DUMP_RECOMBINATION:
print("PUSH: 0x%08x | 0x%08x | 0x%08x > " % (master_ab, master_c, slave_ab), end='')
if not self._phase:
master_p = master_ab + master_c + self._cry_master
self._update_carries(master_p >> self._WORD_WIDTH_2X, self._cry_slave)
self._master_p = master_p & self._WORD_MASK_2X
if dump and DUMP_RECOMBINATION:
#print("MASTER: {0x%1d, 0x%08x}" % (self._cry_master, self._master_p))
print("")
else:
slave_p = self._master_p + slave_ab + self._cry_slave
self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X)
slave_p &= self._WORD_MASK_2X
if dump and DUMP_RECOMBINATION:
print("SLAVE: {0x%1d, 0x%08x}" % (self._cry_slave, slave_p))
#print("")
slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK
self._store_words(slave_p_lsb, slave_p_lsb)
self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb)
self._phase = not self._phase
self._update_delays(x, y, z, self._z_dly1)
def _purge_pipeline(self, dump):
slave_ab = self._x_dly1
if not self._phase:
raise Exception("RCMB: Can only purge pipeline after odd number of pushes!")
else:
slave_p = self._master_p + slave_ab + self._cry_slave
self._update_carries(self._cry_master, slave_p >> self._WORD_WIDTH_2X)
slave_p &= self._WORD_MASK_2X
slave_p_msb, slave_p_lsb = slave_p >> _WORD_WIDTH, slave_p & _WORD_MASK
self._store_words(slave_p_lsb, slave_p_lsb)
self._store_words(slave_p_msb, (self._cry_slave << _WORD_WIDTH) | slave_p_msb)
self._master_p = None
self._phase = None
self._update_carries(None, None)
self._update_delays(None, None, None, None)
@property
def words(self):
return self._words
@property
def wordsx(self):
return self._wordsx
def recombine_square(self, parts, ab_num_words, dump):
# hardware computes LSB and MSB words simultaneously, we can't
# simulate that here, so we compute sequentially
# the first two words from MSB overlap with the last two words from
# LSB, so we compute MSB first
# LSB has N parts and produces N+2 words (two last cycles accomodate
# the two "carry" words from MSB
# MSB has N-1 parts and produces N words
# total number of output words is 2*N
# recombine the upper half
self._flush_pipeline(dump)
for i in range(ab_num_words):
din = parts[ab_num_words + i] if i < (ab_num_words - 1) else 0
self._push_pipeline(din, dump)
words_msb_cry, words_msb = self.words[0:2], self.words[2:]
# recombine the lower half
# note, that the very last word is 1 bit wider!
self._flush_pipeline(dump)
for i in range(ab_num_words + 2):
din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words]
self._push_pipeline(din, dump)
words_lsb = self.words[:-1] + [self.wordsx[-1]] #
return words_lsb + words_msb
def recombine_triangle(self, parts, ab_num_words, dump):
# hardware computes only LSB, so there's no overlap with MSB
# LSB has N+1 parts and produces N+1 words, since the recombinator only
# outputs two words every other cycle, we need to manually purge the
# internal pipeline
self._flush_pipeline(dump)
for i in range(ab_num_words + 1):
din = parts[i]
self._push_pipeline(din, dump)
self._purge_pipeline(dump)
words_lsb = self.words[:-1]
return words_lsb
def recombine_rectangle(self, parts, ab_num_words, dump):
# hardware computes LSB and MSB words simultaneously, we can't
# simulate that here, so we compute sequentially
# the first two words from MSB overlap with the last two words from
# LSB, so we compute MSB first
# LSB has N parts and produces N+2 words (two last cycles accomodate
# the two "carry" words from MSB
# MSB has N parts and produces N+1 words, since the recombinator only
# outputs two words every other cycle, we need to manually purge the
# internal pipeline
# total number of output words is 2*N+1
# recombine the upper half
self._flush_pipeline(dump)
for i in range(ab_num_words + 1):
din = parts[ab_num_words + i] if i < ab_num_words else 0
self._push_pipeline(din, dump)
self._purge_pipeline(dump)
words_msb_cry, words_msb = self.words[0:2], self.words[2:-1]
# recombine the lower half
# note, that the very last word is 1 bit wider!
self._flush_pipeline(dump)
for i in range(ab_num_words + 2):
din = parts[i] if i < ab_num_words else words_msb_cry[i - ab_num_words]
self._push_pipeline(din, dump)
words_lsb = self.words[:-1] + [self.wordsx[-1]]
return words_lsb + words_msb
class ModExpNG_WordMultiplier():
def __init__(self):
self._macs = list()
self._indices = list()
self._mac_aux = list()
self._index_aux = list()
for x in range(NUM_MULTS):
self._macs.append(0)
self._indices.append(0)
self._mac_aux.append(0)
self._index_aux.append(0)
def _clear_all_macs(self, t, col, dump):
for x in range(NUM_MULTS):
self._macs[x] = 0
if dump and DUMP_MACS_CLEARING:
print("t=%2d, col=%2d > clear > all" % (t, col))
def _clear_one_mac(self, x, t, col, dump):
self._macs[x] = 0
if dump and DUMP_MACS_CLEARING:
print("t=%2d, col=%2d > clear > x=%d" % (t, col, x))
def _clear_mac_aux(self, t, col, dump):
self._mac_aux[0] = 0
if dump and DUMP_MACS_CLEARING:
print("t= 0, col=%2d > clear > aux" % (col))
def _update_one_mac(self, x, t, col, a, b, dump, need_aux=False):
if a >= (2 ** _WORD_WIDTH_EXT):
raise Exception("a > 0x3FFFF!")
if b >= (2 ** _WORD_WIDTH):
raise Exception("b > 0xFFFF!")
p = a * b
if dump and DUMP_MACS_INPUTS:
if x == 0: print("t=%2d, col=%2d > b=%05x > " % (t, col, b), end='')
if x > 0: print("; ", end='')
print("MAC[%d]: a=%05x" % (x, a), end='')
if x == (NUM_MULTS-1) and not need_aux: print("")
self._macs[x] += p
def _update_mac_aux(self, y, col, a, b, dump):
if a >= (2 ** _WORD_WIDTH_EXT):
raise Exception("a > 0x3FFFF!")
if b >= (2 ** _WORD_WIDTH):
raise Exception("b > 0xFFFF!")
p = a * b
if dump and DUMP_MACS_INPUTS:
print("; AUX: a=%05x" % a)
self._mac_aux[0] += p
def _preset_indices(self, col):
for x in range(len(self._indices)):
self._indices[x] = col * len(self._indices) + x
def _preset_index_aux(self, num_cols):
self._index_aux[0] = num_cols * len(self._indices)
def _dump_macs_helper(self, t, col, aux=False):
print("t=%2d, col=%2d > "% (t, col), end='')
for i in range(NUM_MULTS):
if i > 0: print(" | ", end='')
print("mac[%d]: 0x%012x" % (i, self._macs[i]), end='')
if aux:
print(" | mac_aux[ 0]: 0x%012x" % (self._mac_aux[0]), end='')
print("")
def _dump_macs(self, t, col):
self._dump_macs_helper(t, col)
def _dump_macs_with_aux(self, t, col):
self._dump_macs_helper(t, col, True)
def _dump_indices_helper(self, t, col, aux=False):
print("t=%2d, col=%2d > indices:" % (t, col), end='')
for i in range(NUM_MULTS):
print(" %2d" % self._indices[i], end='')
if aux:
print(" %2d" % self._index_aux[0], end='')
print("")
def _dump_indices(self, t, col):
self._dump_indices_helper(t, col)
def _dump_indices_with_aux(self, t, col):
self._dump_indices_helper(t, col, True)
def _rotate_indices(self, num_words):
for x in range(len(self._indices)):
if self._indices[x] > 0:
self._indices[x] -= 1
else:
self._indices[x] = num_words - 1
def _rotate_index_aux(self):
self._index_aux[0] -= 1
def _mult_store_part(self, parts, time, column, part_index, mac_index, dump):
parts[part_index] = self._macs[mac_index]
if dump and DUMP_MULT_PARTS:
print("t=%2d, col=%2d > parts[%2d]: mac[%d] = 0x%012x" %
(time, column, part_index, mac_index, parts[part_index]))
def _mult_store_part_aux(self, parts, time, column, part_index, dump):
parts[part_index] = self._mac_aux[0]
if dump and DUMP_MULT_PARTS:
print("t=%2d, col=%2d > parts[%2d]: mac_aux[%d] = 0x%012x" %
(time, column, part_index, 0, parts[part_index]))
def multiply_square(self, a_wide, b_narrow, ab_num_words, dump=False):
num_cols = ab_num_words // NUM_MULTS
parts = list()
for i in range(2 * ab_num_words - 1):
parts.append(0)
for col in range(num_cols):
b_carry = 0
for t in range(ab_num_words):
# take care of indices
if t == 0: self._preset_indices(col)
else: self._rotate_indices(ab_num_words)
# take care of macs
if t == 0:
self._clear_all_macs(t, col, dump)
else:
t1 = t - 1
if (t1 // 8) == col:
self._clear_one_mac(t1 % NUM_MULTS, t, col, dump)
# debug output
if dump and DUMP_INDICES: self._dump_indices(t, col)
# current b-word
# multiplier's b-input is limited to 16-bit words, so we need to propagate
# carries on the fly here, carry can be up to two bits
bt = b_narrow.words[t] + b_carry
b_carry = (bt & _CARRY_MASK) >> _WORD_WIDTH
if dump and b_carry > 1:
print("Rare overflow case was detected and then successfully corrected.")
bt &= _WORD_MASK
# multiply by a-words
for x in range(NUM_MULTS):
ax = a_wide.words[self._indices[x]]
self._update_one_mac(x, t, col, ax, bt, dump)
if t == (col * NUM_MULTS + x):
part_index = t
self._mult_store_part(parts, t, col, part_index, x, dump)
# debug output
if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col)
# save the uppers part of product at end of column,
# for the last column don't save the very last part
if t == (ab_num_words - 1):
for x in range(NUM_MULTS):
if not (col == (num_cols - 1) and x == (NUM_MULTS - 1)):
part_index = ab_num_words + col * NUM_MULTS + x
self._mult_store_part(parts, t, col, part_index, x, dump)
return parts
def multiply_triangle(self, a_wide, b_narrow, ab_num_words, dump=False):
num_cols = ab_num_words // NUM_MULTS
parts = list()
for i in range(ab_num_words + 1):
parts.append(0)
for col in range(num_cols):
last_col = col == (num_cols - 1)
for t in range(ab_num_words + 1):
# take care of indices
if t == 0: self._preset_indices(col)
else: self._rotate_indices(ab_num_words)
# take care of auxilary index
if last_col:
if t == 0: self._preset_index_aux(num_cols)
else: self._rotate_index_aux()
# take care of macs
if t == 0: self._clear_all_macs(t, col, dump)
# take care of auxilary mac
if last_col:
if t == 0: self._clear_mac_aux(t, col, dump)
# debug output
if dump and DUMP_INDICES: self._dump_indices_with_aux(t, col)
# current b-word
bt = b_narrow.words[t]
# multiply by a-words
for x in range(NUM_MULTS):
ax = a_wide.words[self._indices[x]]
self._update_one_mac(x, t, col, ax, bt, dump, last_col)
if t == (col * NUM_MULTS + x):
part_index = t
self._mult_store_part(parts, t, col, part_index, x, dump)
# aux multiplier
if last_col:
ax = a_wide.words[self._index_aux[0]]
self._update_mac_aux(t, col, ax, bt, dump)
if t == ab_num_words:
part_index = t
self._mult_store_part_aux(parts, t, col, part_index, dump)
# debug output
if dump and DUMP_MACS_ACCUMULATION: self._dump_macs_with_aux(t, col)
# shortcut
if not last_col:
if t == (NUM_MULTS * (col + 1) - 1): break
return parts
def multiply_rectangle(self, a_wide, b_narrow, ab_num_words, dump=False):
num_cols = ab_num_words // NUM_MULTS
parts = list()
for i in range(2 * ab_num_words):
parts.append(0)
for col in range(num_cols):
for t in range(ab_num_words + 1):
# take care of indices
if t == 0: self._preset_indices(col)
else: self._rotate_indices(ab_num_words)
# take care of macs
if t == 0:
self._clear_all_macs(t, col, dump)
else:
t1 = t - 1
if (t1 // 8) == col:
self._clear_one_mac(t1 % NUM_MULTS, t, col, dump)
# debug output
if dump and DUMP_INDICES: self._dump_indices(t, col)
# current b-word
bt = b_narrow.words[t]
# multiply by a-words
for x in range(NUM_MULTS):
ax = a_wide.words[self._indices[x]]
self._update_one_mac(x, t, col, ax, bt, dump)
# don't save one value for the very last time instant per column
if t < ab_num_words and t == (col * NUM_MULTS + x):
part_index = t
self._mult_store_part(parts, t, col, part_index, x, dump)
# debug output
if dump and DUMP_MACS_ACCUMULATION: self._dump_macs(t, col)
# save the upper parts of product at end of column
if t == ab_num_words:
for x in range(NUM_MULTS):
part_index = ab_num_words + col * NUM_MULTS + x
self._mult_store_part(parts, t, col, part_index, x, dump)
return parts
class ModExpNG_LowlevelOperator():
def _check_word(self, a):
if a < 0 or a > _WORD_MASK:
raise Exception("Word out of range!")
def _check_carry_borrow(self, cb):
if cb < 0 or cb > 1:
raise Exception("Carry or borrow out of range!")
def add_words(self, a, b, c_in):
self._check_word(a)
self._check_word(b)
self._check_carry_borrow(c_in)
sum = a + b + c_in
sum_s = sum & _WORD_MASK
sum_c = sum >> _WORD_WIDTH
return (sum_c, sum_s)
def sub_words(self, a, b, b_in):
self._check_word(a)
self._check_word(b)
self._check_carry_borrow(b_in)
dif = a - b - b_in
if dif < 0:
dif_b = 1
dif_d = dif + 2 ** _WORD_WIDTH
else:
dif_b = 0
dif_d = dif
return (dif_b, dif_d)
class ModExpNG_Worker():
def __init__(self):
self.lowlevel = ModExpNG_LowlevelOperator()
self.multiplier = ModExpNG_WordMultiplier()
self.recombinator = ModExpNG_PartRecombinator()
def serial_subtract_modular(self, a, b, n, ab_num_words):
c_in = 0
b_in = 0
ab = list()
ab_n = list()
for x in range(ab_num_words):
a_word = a.words[x]
b_word = b.words[x]
(b_out, d_out) = self.lowlevel.sub_words(a_word, b_word, b_in)
(c_out, s_out) = self.lowlevel.add_words(d_out, n.words[x], c_in)
ab.append(d_out)
ab_n.append(s_out)
(c_in, b_in) = (c_out, b_out)
d = ab if not b_out else ab_n
return ModExpNG_Operand(None, ab_num_words, d)
def serial_add_uneven(self, a, b, ab_num_words):
c_in = 0
ab = list()
for x in range(2 * ab_num_words):
a_word = a.words[x] if x < ab_num_words else 0
b_word = b.words[x]
(c_out, s_out) = self.lowlevel.add_words(a_word, b_word, c_in)
ab.append(s_out)
c_in = c_out
return ModExpNG_Operand(None, 2*ab_num_words, ab)
def multipurpose_multiply(self, a, b, n, n_coeff, ab_num_words, bnk, reduce_only=False, multiply_only=False, dump=False, dump_crt="", dump_ladder=""):
#
# 1. AB = A * B
#
if dump: print("multiply_square(%s_%s)" % (dump_crt, dump_ladder))
if reduce_only:
ab = b
else:
ab_parts = self.multiplier.multiply_square(a, b, ab_num_words, dump)
ab_words = self.recombinator.recombine_square(ab_parts, ab_num_words, dump)
ab = ModExpNG_Operand(None, 2 * ab_num_words, ab_words)
if dump and DUMP_VECTORS:
ab.format("%s_%s_AB" % (dump_crt, dump_ladder))
if not bnk is None:
bnk._set_wide(ModExpNG_WideBankEnum.L, ab.lower_half())
bnk._set_wide(ModExpNG_WideBankEnum.H, ab.upper_half())
if multiply_only:
return ModExpNG_Operand(None, 2*ab_num_words, ab_words)
#
# 2. Q = LSB(AB) * N_COEFF
#
if dump: print("multiply_triangle(%s_%s)" % (dump_crt, dump_ladder))
q_parts = self.multiplier.multiply_triangle(ab, n_coeff, ab_num_words, dump)
q_words = self.recombinator.recombine_triangle(q_parts, ab_num_words, dump)
q = ModExpNG_Operand(None, ab_num_words + 1, q_words)
if dump and DUMP_VECTORS:
q.format("%s_%s_Q" % (dump_crt, dump_ladder))
if not bnk is None:
bnk._set_narrow(ModExpNG_NarrowBankEnum.Q, q)
q_words = list(bnk._get_narrow(ModExpNG_NarrowBankEnum.Q).words)
q_words.append(bnk._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[1])
q = ModExpNG_Operand(None, len(q_words), q_words)
#
# 3. M = Q * N
#
if dump: print("multiply_rectangle(%s_%s)" % (dump_crt, dump_ladder))
m_parts = self.multiplier.multiply_rectangle(n, q, ab_num_words, dump)
m_words = self.recombinator.recombine_rectangle(m_parts, ab_num_words, dump)
m = ModExpNG_Operand(None, 2 * ab_num_words + 1, m_words)
if dump and DUMP_VECTORS:
m.format("%s_%s_M" % (dump_crt, dump_ladder))
#
# 4. R = AB + M
#
#
# 4a. compute carry (actual sum is all zeroes and need not be stored)
#
r_cy = 0 # this can be up to two bits, since we're adding extended words!!
for i in range(ab_num_words + 1):
s = ab.words[i] + m.words[i] + r_cy
r_cy_new = s >> _WORD_WIDTH
if dump and DUMP_REDUCTION:
print("[%2d] 0x%05x + 0x%05x + 0x%x => {0x%x, [0x%05x]}" %
(i, ab.words[i], m.words[i], r_cy, r_cy_new, s & 0xffff)) # ???
r_cy = r_cy_new
#
# 4b. Initialize empty result
#
R = list()
for i in range(ab_num_words):
R.append(0)
#
# 4c. compute the actual upper part of sum (take carry into account)
#
for i in range(ab_num_words):
if dump and DUMP_REDUCTION:
print("[%2d]" % i, end='')
ab_word = ab.words[ab_num_words + i + 1] if i < (ab_num_words - 1) else 0
if dump and DUMP_REDUCTION:
print(" 0x%05x" % ab_word, end='')
m_word = m.words[ab_num_words + i + 1]
if dump and DUMP_REDUCTION:
print(" + 0x%05x" % m_word, end='')
if i == 0: R[i] = r_cy
else: R[i] = 0
if dump and DUMP_REDUCTION:
print(" + 0x%x" % R[i], end='')
R[i] += ab_word
R[i] += m_word
if dump and DUMP_REDUCTION:
print(" = 0x%05x" % R[i])
return ModExpNG_Operand(None, ab_num_words, R)
def convert_nonredundant(self, a, num_words):
carry = 0
for x in range(num_words):
a.words[x] += carry
carry = a.words[x] >> _WORD_WIDTH
a.words[x] &= _WORD_MASK
class ModExpNG_Core():
def __init__(self, i):
self.wrk = ModExpNG_Worker()
self.bnk = ModExpNG_BanksCRT(i)
self.inp = ModExpNG_CoreInput()
self.out = ModExpNG_CoreOutput()
def _dump_bank_indices(self, n):
print(" ", end='')
for i in range(n): print("[ %3d ] " % i, end='')
print("");
def _dump_bank_seps(self, n):
print(" ", end='')
for i in range(n): print("------- ", end='')
print("");
def _dump_bank_entry_narrow(self, name, op, val, n):
print("%s.NARROW.%s " % (name, op), end='')
for i in range(n):
if i < len(val.words) and not val is None:
print("0x%05x " % val.words[i], end='')
continue
print("0xxxxxx ", end='')
print("")
def _dump_bank_entry_wide(self, name, op, val, n):
print("%s.WIDE.%s " % (name, op), end='')
for i in range(n):
if i < len(val.words) and not val is None:
print("0x%05x " % val.words[i], end='')
else:
print("0xxxxxx ", end='')
print("")
def _dump_bank(self, name, banks_pair):
n = KEY_LENGTH // _WORD_WIDTH
self._dump_bank_indices(n)
self._dump_bank_entry_wide(name, "A: ", banks_pair._get_wide_cache(W.A), n)
self._dump_bank_entry_wide(name, "B: ", banks_pair._get_wide_cache(W.B), n)
self._dump_bank_entry_wide(name, "C: ", banks_pair._get_wide_cache(W.C), n)
self._dump_bank_entry_wide(name, "D: ", banks_pair._get_wide_cache(W.D), n)
self._dump_bank_entry_wide(name, "E: ", banks_pair._get_wide_cache(W.E), n)
self._dump_bank_entry_wide(name, "N: ", banks_pair._get_wide_cache(W.N), n)
self._dump_bank_entry_wide(name, "L: ", banks_pair._get_wide_cache(W.L), n)
self._dump_bank_entry_wide(name, "H: ", banks_pair._get_wide_cache(W.H), n)
self._dump_bank_seps(n)
self._dump_bank_entry_narrow(name, "A: ", banks_pair._get_narrow_cache(N.A), n)
self._dump_bank_entry_narrow(name, "B: ", banks_pair._get_narrow_cache(N.B), n)
self._dump_bank_entry_narrow(name, "C: ", banks_pair._get_narrow_cache(N.C), n)
self._dump_bank_entry_narrow(name, "D: ", banks_pair._get_narrow_cache(N.D), n)
self._dump_bank_entry_narrow(name, "E: ", banks_pair._get_narrow_cache(N.E), n)
self._dump_bank_entry_narrow(name, "COEFF:", banks_pair._get_narrow_cache(N.N_COEFF), n)
self._dump_bank_entry_narrow(name, "Q: ", banks_pair._get_narrow_cache(N.Q), n)
self._dump_bank_entry_narrow(name, "EXT: ", banks_pair._get_narrow(N.EXT), n)
def dump_banks(self):
print("OPCODE == STOP: BANKS DUMP FOLLOWS")
self._dump_bank("X.X", self.bnk.crt_x.ladder_x)
self._dump_bank("X.Y", self.bnk.crt_x.ladder_y)
self._dump_bank("Y.X", self.bnk.crt_y.ladder_x)
self._dump_bank("Y.Y", self.bnk.crt_y.ladder_y)
sys.exit()
#
# CRT_(X|Y) means either CRT_X or CRT_Y
# LADDER_{X,Y} means both LADDER_X and LADDER_Y
#
#
# copy from CRT_(X|Y).LADDER_X.NARROW to OUTPUT
#
def set_output_from_narrow_x(self, sel_output, bank_crt, sel_narrow):
self.out._set_value(sel_output, bank_crt.ladder_x._get_narrow(sel_narrow))
#
# copy from CRT_(X|Y).LADDER_Y.NARROW to OUTPUT
#
def set_output_from_narrow_y(self, sel_output, bank_crt, sel_narrow):
self.out._set_value(sel_output, bank_crt.ladder_y._get_narrow(sel_narrow))
#
# copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.NARROW
#
def set_narrow_from_input(self, bank_crt, sel_narrow, sel_input):
bank_crt.ladder_x._set_narrow(sel_narrow, self.inp._get_value(sel_input))
bank_crt.ladder_y._set_narrow(sel_narrow, self.inp._get_value(sel_input))
#
# copy from INPUT to CRT_(X|Y).LADDER_{X,Y}.WIDE
#
def set_wide_from_input(self, bank_crt, sel_wide, sel_input):
bank_crt.ladder_x._set_wide(sel_wide, self.inp._get_value(sel_input))
bank_crt.ladder_y._set_wide(sel_wide, self.inp._get_value(sel_input))
#
# copy from CRT_Y.LADDER_{X,Y}.{WIDE,NARROW} to CRT_X.LADDER_{X,Y}.{WIDE,NARROW}
#
def copy_crt_y2x(self, sel_wide, sel_narrow):
self.bnk.crt_x.ladder_x._set_wide(sel_wide, self.bnk.crt_y.ladder_x._get_wide(sel_wide))
self.bnk.crt_x.ladder_y._set_wide(sel_wide, self.bnk.crt_y.ladder_y._get_wide(sel_wide))
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow))
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, self.bnk.crt_y.ladder_y._get_narrow(sel_narrow))
#
# copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{X,Y}.LADDER_Y.{WIDE,NARROW}
#
def copy_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out):
self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in))
self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in))
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in))
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in))
#
# copy from CRT_{X,Y}.LADDER_X.{WIDE,NARROW} to CRT_{Y,X}.LADDER_Y.{WIDE,NARROW}
#
def cross_ladders_x2y(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out):
self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, self.bnk.crt_y.ladder_x._get_wide(sel_wide_in))
self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, self.bnk.crt_x.ladder_x._get_wide(sel_wide_in))
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in))
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in))
#
# modular multiply sel_wide_in by sel_narrow_in
# stores intermediate result in WIDE.L and WIDE.H
# needs modulus WIDE.N and speed-up coefficients NARROW.N_COEFF to be filled
# places two copies of resulting quantity in sel_wide_out and sel_narrow_out
# sel_*_in and sel_*_out can overlap (overwriting of input operands is ok)
#
def modular_multiply(self, sel_wide_in, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words, mode=(True, True), d=False):
xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
xn_coeff_words = list(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words)
yn_coeff_words = list(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words)
xn_coeff_words.append(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0])
yn_coeff_words.append(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0])
xn_coeff = ModExpNG_Operand(None, len(xn_coeff_words), xn_coeff_words)
yn_coeff = ModExpNG_Operand(None, len(yn_coeff_words), yn_coeff_words)
xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)
xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in)
yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)
yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in)
xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in)
yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in)
if not mode[0]: xb = xxb
else: xb = xyb
if not mode[1]: yb = yxb
else: yb = yyb
xxp = self.wrk.multipurpose_multiply(xxa, xb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_x, dump=d, dump_crt="X", dump_ladder="X")
xyp = self.wrk.multipurpose_multiply(xya, xb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_y, dump=d, dump_crt="X", dump_ladder="Y")
yxp = self.wrk.multipurpose_multiply(yxa, yb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_x, dump=d, dump_crt="Y", dump_ladder="X")
yyp = self.wrk.multipurpose_multiply(yya, yb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_y, dump=d, dump_crt="Y", dump_ladder="Y")
self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xxp)
self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xyp)
self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yxp)
self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yyp)
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxp)
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyp)
self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxp)
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyp)
#
# modular subtract values in sel_narrow_in (X-Y)
# stores two copies of the result in sel_*_out
#
def modular_subtract(self, sel_narrow_in, sel_narrow_out, sel_wide_out, num_words):
xa = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
xb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in)
xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
ya = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
yb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in)
yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
xd = self.wrk.serial_subtract_modular(xa, xb, xn, num_words)
yd = self.wrk.serial_subtract_modular(ya, yb, yn, num_words)
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xd)
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xd)
self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yd)
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yd)
self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xd)
self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xd)
self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yd)
self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yd)
#
# modular reduce sel_narrow_in
# stores two copies of the result in sel_*_out
#
def modular_reduce(self, sel_narrow_in, sel_wide_out, sel_narrow_out, num_words):
xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
xn_coeff_words = list(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words)
yn_coeff_words = list(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF).words)
xn_coeff_words.append(self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0])
yn_coeff_words.append(self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.EXT).words[0])
xn_coeff = ModExpNG_Operand(None, len(xn_coeff_words), xn_coeff_words)
yn_coeff = ModExpNG_Operand(None, len(yn_coeff_words), yn_coeff_words)
xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_in)
yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_in)
xxp = self.wrk.multipurpose_multiply(None, xxb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_x, reduce_only=True)
xyp = self.wrk.multipurpose_multiply(None, xyb, xn, xn_coeff, num_words, self.bnk.crt_x.ladder_y, reduce_only=True)
yxp = self.wrk.multipurpose_multiply(None, yxb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_x, reduce_only=True)
yyp = self.wrk.multipurpose_multiply(None, yyb, yn, yn_coeff, num_words, self.bnk.crt_y.ladder_y, reduce_only=True)
self.bnk.crt_x.ladder_x._set_wide(sel_wide_out, xxp)
self.bnk.crt_x.ladder_y._set_wide(sel_wide_out, xyp)
self.bnk.crt_y.ladder_x._set_wide(sel_wide_out, yxp)
self.bnk.crt_y.ladder_y._set_wide(sel_wide_out, yyp)
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxp)
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyp)
self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxp)
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyp)
#
# propagate carries (convert to non-redundant representation) content in sel_narrow
# overwrites input value
#
def propagate_carries(self, sel_narrow, num_words):
xx = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow)
xy = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow)
yx = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow)
yy = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow)
self.wrk.convert_nonredundant(xx, num_words)
self.wrk.convert_nonredundant(xy, num_words)
self.wrk.convert_nonredundant(yx, num_words)
self.wrk.convert_nonredundant(yy, num_words)
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, xx)
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, xy)
self.bnk.crt_y.ladder_x._set_narrow(sel_narrow, yx)
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow, yy)
#
# copy from CRT_{X,Y}.LADDER_{X,Y}.WIDE.{H,L} to CRT_{X,Y}.LADDER_{X,Y}.NARROW
#
def merge_lha(self, sel_narrow, num_words):
xx_lsb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.L)
xy_lsb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.L)
yx_lsb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.L)
yy_lsb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.L)
xx_msb = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.H)
xy_msb = self.bnk.crt_x.ladder_y._get_wide(ModExpNG_WideBankEnum.H)
yx_msb = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.H)
yy_msb = self.bnk.crt_y.ladder_y._get_wide(ModExpNG_WideBankEnum.H)
xx = xx_lsb.words + xx_msb.words
xy = xy_lsb.words + xy_msb.words
yx = yx_lsb.words + yx_msb.words
yy = yy_lsb.words + yy_msb.words
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xx))
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, xy))
self.bnk.crt_y.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yx))
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow, ModExpNG_Operand(None, 2*num_words, yy))
#
# multiply sel_wide_in by sel_narrow_in
# stores twice larger product in WIDE.L and WIDE.H
#
def regular_multiply(self, sel_wide_in, sel_narrow_in, num_words):
xn = self.bnk.crt_x.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
yn = self.bnk.crt_y.ladder_x._get_wide(ModExpNG_WideBankEnum.N)
xn_coeff = self.bnk.crt_x.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
yn_coeff = self.bnk.crt_y.ladder_x._get_narrow(ModExpNG_NarrowBankEnum.N_COEFF)
xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_in)
xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_in)
yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_in)
yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_in)
xb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_in)
yb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_in)
xxp = self.wrk.multipurpose_multiply(xxa, xb, None, None, num_words, None, multiply_only=True)
xyp = self.wrk.multipurpose_multiply(xya, xb, None, None, num_words, None, multiply_only=True)
yxp = self.wrk.multipurpose_multiply(yxa, yb, None, None, num_words, None, multiply_only=True)
yyp = self.wrk.multipurpose_multiply(yya, yb, None, None, num_words, None, multiply_only=True)
xxp_lsb = xxp.lower_half()
xxp_msb = xxp.upper_half()
xyp_lsb = xyp.lower_half()
xyp_msb = xyp.upper_half()
yxp_lsb = yxp.lower_half()
yxp_msb = yxp.upper_half()
yyp_lsb = yyp.lower_half()
yyp_msb = yyp.upper_half()
self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.L, xxp_lsb)
self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.L, xyp_lsb)
self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.L, yxp_lsb)
self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.L, yyp_lsb)
self.bnk.crt_x.ladder_x._set_wide(ModExpNG_WideBankEnum.H, xxp_msb)
self.bnk.crt_x.ladder_y._set_wide(ModExpNG_WideBankEnum.H, xyp_msb)
self.bnk.crt_y.ladder_x._set_wide(ModExpNG_WideBankEnum.H, yxp_msb)
self.bnk.crt_y.ladder_y._set_wide(ModExpNG_WideBankEnum.H, yyp_msb)
#
# adds sel_narrow_a_in to sel_narrow_b_in
# stores result in sel_narrow_out
#
def regular_add(self, sel_wide_a_in, sel_narrow_b_in, sel_narrow_out, num_words):
xxa = self.bnk.crt_x.ladder_x._get_wide(sel_wide_a_in)
xya = self.bnk.crt_x.ladder_y._get_wide(sel_wide_a_in)
yxa = self.bnk.crt_y.ladder_x._get_wide(sel_wide_a_in)
yya = self.bnk.crt_y.ladder_y._get_wide(sel_wide_a_in)
xxb = self.bnk.crt_x.ladder_x._get_narrow(sel_narrow_b_in)
xyb = self.bnk.crt_x.ladder_y._get_narrow(sel_narrow_b_in)
yxb = self.bnk.crt_y.ladder_x._get_narrow(sel_narrow_b_in)
yyb = self.bnk.crt_y.ladder_y._get_narrow(sel_narrow_b_in)
xxc = self.wrk.serial_add_uneven(xxa, xxb, num_words)
xyc = self.wrk.serial_add_uneven(xya, xyb, num_words)
yxc = self.wrk.serial_add_uneven(yxa, yxb, num_words)
yyc = self.wrk.serial_add_uneven(yya, yyb, num_words)
self.bnk.crt_x.ladder_x._set_narrow(sel_narrow_out, xxc)
self.bnk.crt_x.ladder_y._set_narrow(sel_narrow_out, xyc)
self.bnk.crt_y.ladder_x._set_narrow(sel_narrow_out, yxc)
self.bnk.crt_y.ladder_y._set_narrow(sel_narrow_out, yyc)
#
# dump working variables before ladder step
#
def dump_before_step_using_crt(self, pq, m):
print("num_words = %d" % pq)
print("\rladder_mode_x = %d" % m[0])
print("\rladder_mode_y = %d" % m[1])
self.bnk.crt_x.ladder_x._get_narrow(N.C).format("X_X")
self.bnk.crt_x.ladder_y._get_narrow(N.C).format("X_Y")
self.bnk.crt_y.ladder_x._get_narrow(N.C).format("Y_X")
self.bnk.crt_y.ladder_y._get_narrow(N.C).format("Y_Y")
self.bnk.crt_x.ladder_x._get_wide(W.N).format("X_N")
self.bnk.crt_x.ladder_x._get_wide(W.N).format("Y_N")
self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format("X_N_COEFF")
self.bnk.crt_x.ladder_x._get_narrow(N.N_COEFF).format("Y_N_COEFF")
#
# dump working variables after ladder step
#
def dump_after_step_using_crt(self):
self.bnk.crt_x.ladder_x._get_narrow(N.C).format("X_X")
self.bnk.crt_x.ladder_y._get_narrow(N.C).format("X_Y")
self.bnk.crt_y.ladder_x._get_narrow(N.C).format("Y_X")
self.bnk.crt_y.ladder_y._get_narrow(N.C).format("Y_Y")
#
# this deliberately converts narrow operand into redundant representation
#
def _force_overflow(self, bank_crt, sel_narrow):
# original words
T = bank_crt.ladder_x._get_narrow(sel_narrow).words
# loop through upper N-1 words
for i in range(1, len(T)):
# get msbs of the previous word
upper_bits = T[i-1] & _CARRY_MASK
# if the previous msbs are empty, force lsbs of the current word
# into them and then wipe the current lsbs
if upper_bits == 0:
lower_bits = T[i] & (_CARRY_MASK >> _WORD_WIDTH)
T[i] ^= lower_bits
T[i-1] |= (lower_bits << _WORD_WIDTH)
# overwrite original words
bank_crt.ladder_x._set_narrow(sel_narrow, ModExpNG_Operand(None, len(T), T))
print("Forced overflow.")
#
# read content of core's output bank and compare it against known good values
#
def compare_signature():
c = core
s = s_known
xm = xm_known
ym = ym_known
core_s = c.out.get_value(O.S)
core_xm = c.out.get_value(O.XM)
core_ym = c.out.get_value(O.YM)
if core_s.number() != s: print("ERROR: core_s != s!")
else: print("s is OK")
if core_xm.number() != xm: print("ERROR: core_xm != xm!")
else: print("x_mutated is OK")
if core_ym.number() != ym: print("ERROR: core_ym != ym!")
else: print("y_mutated is OK")
#
# get current ladder mode based on two exponents' bits
#
def get_ladder_mode_using_crt(v, bit):
bit_value_p = (v.dp.number() & (1 << bit)) >> bit
bit_value_q = (v.dq.number() & (1 << bit)) >> bit
bit_value_p = bit_value_p > 0
bit_value_q = bit_value_q > 0
return (bit_value_p, bit_value_q)
#
# get current ladder mode based on private exponent's bit
#
def get_ladder_mode_without_crt(v, bit):
bit_value_d = (v.d.number() & (1 << bit)) >> bit
bit_value_d = bit_value_d > 0
return (not bit_value_d, bit_value_d)
#
# print current exponentiation progress
#
def print_ladder_progress(current, total):
# this will always print "100.0%" at the very last iteration, since we're
# counting bits from msb to lsb and the very last index is zero, which
# is congruent to 0 mod DUMP_PROGRESS_FACTOR
if (current % DUMP_PROGRESS_FACTOR) == 0:
pct = float((_WORD_WIDTH * total - current) / (_WORD_WIDTH * total)) * 100.0
print("\rdone: %5.1f%%" % pct, end='')
# move to next line after the very last iteration
if current == 0: print("")
#
# try to exponentiate using the quad-multiplier (dual-core, dual-ladder) scheme
#
def sign_using_crt():
c = core
v = vector
n = n_num_words
pq = pq_num_words
ff = (False, False)
#
# A / B => different content in banks (A in WIDE, B in NARROW)
# [XY]Z => different content in ladders (XZ in X, YZ in Y)
# .. => temporarily half-filled bank (omitted to save space)
# * => "crossed" content (X.Y == Y.X and Y.Y == X.X)
#
# +------------------------+-------+------------------+---------+-----------+
# | A | B | C | D | E |
# +------------------------+-------+------------------+---------+-----------+
c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | ? | ? | ? | ? | ? |
c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | ? | ? | ? | ? | ? |
c.set_wide_from_input (c.bnk.crt_x, W.A, I.X) # | .. | ? | ? | ? | ? |
c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y) # | [XY] / ? | ? | ? | ? | ? |
c.set_wide_from_input (c.bnk.crt_x, W.E, I.M) # | [XY] / ? | ? | ? | ? | .. / ? |
c.set_wide_from_input (c.bnk.crt_y, W.E, I.M) # | [XY] / ? | ? | ? | ? | M / ? |
# +------------------------+-------+------------------+---------+-----------+
c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? |
c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | [XY] / ? | ? | ? | ? | M / ? |
c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR) # | [XY] / .. | ? | ? | ? | M / ? |
c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR) # | [XY] / N_FACTOR | ? | ? | ? | M / ? |
c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M / .. |
c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M) # | [XY] / N_FACTOR | ? | ? | ? | M |
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.A, N.A, W.B, N.B, n) # | [XY] / N_FACTOR | [XY]F | ? | ? | M | [XY]F = [XY] * N_FACTOR
c.modular_multiply(W.B, N.B, W.C, N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | ? | M | [XY]MF = [XY]F * [XY]F
c.modular_multiply(W.C, N.I, W.D, N.D, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M | [XY]M = [XY]MF * 1
# +------------------------+-------+------------------+---------+-----------+
c.propagate_carries(N.D, n) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M |
# +------------------------+-------+------------------+---------+-----------+
c.set_output_from_narrow_x(O.XM, c.bnk.crt_x, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M |
c.set_output_from_narrow_x(O.YM, c.bnk.crt_y, N.D) # | [XY] / N_FACTOR | [XY]F | [XY]YM | [XY]M | M |
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.E, N.B, W.C, N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M | [XY]MB = M * [XY]F
# +------------------------+-------+------------------+---------+-----------+
c.propagate_carries(N.C, n) # | [XY] / N_FACTOR | [XY]F | [XY]MB | [XY]M | M |
# +------------------------+-------+------------------+---------+-----------+
c.copy_crt_y2x(W.C, N.C) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M |
# +------------------------+-------+------------------+---------+-----------+
c.set_wide_from_input (c.bnk.crt_x, W.N, I.P) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M |
c.set_wide_from_input (c.bnk.crt_y, W.N, I.Q) # | [XY] / N_FACTOR | [XY]F | YMB | [XY]M | M |
c.set_wide_from_input (c.bnk.crt_x, W.A, I.P_FACTOR) # | ... / N_FACTOR | [XY]F | YMB | [XY]M | M |
c.set_wide_from_input (c.bnk.crt_y, W.A, I.Q_FACTOR) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | M |
c.set_wide_from_input (c.bnk.crt_x, W.E, I.QINV) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
# +------------------------+-------+------------------+---------+-----------+
c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.P_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.Q_COEFF) # | [PQ]_FACTOR / N_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
c.set_narrow_from_input(c.bnk.crt_x, N.A, I.P_FACTOR) # | [PQ]_FACTOR / ... | [XY]F | YMB | [XY]M | QINV / M |
c.set_narrow_from_input(c.bnk.crt_y, N.A, I.Q_FACTOR) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV / M |
c.set_narrow_from_input(c.bnk.crt_x, N.E, I.QINV) # | [PQ]_FACTOR | [XY]F | YMB | [XY]M | QINV |
# +------------------------+-------+------------------+---------+-----------+
c.modular_reduce(N.C, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | YMB | [PQ]MBZ | QINV | [PQ]MBZ = YMB mod [PQ]
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.D, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBZ | QINV | [PQ]MB = [PQ]MBZ * [PQ]_FACTOR
c.modular_multiply(W.C, N.A, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]MB | [PQ]MBF | QINV | [PQ]MBF = [PQ]MB * [PQ]_FACTOR
c.modular_multiply(W.A, N.I, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | [PQ]IF | [PQ]MBF | QINV | [PQ]IF = 1 * [PQ]_FACTOR
# +------------------------+-------+------------------+---------+-----------+
c.copy_ladders_x2y(W.D, N.D, W.C, N.C) # | [PQ]_FACTOR | [XY]F | [PQ]IF / [PQ]MBF | [PQ]MBF | QINV |
# +------------------------+-------+------------------+---------+-----------+
########################### # | | | | | |
# Begin Montgomery Ladder # # | | | | | |
########################### # | | | | | |
# | | | | | |
for bit in range(_WORD_WIDTH * pq - 1, -1, -1): # | | | | | |
m = get_ladder_mode_using_crt(v, bit) # | | | | | |
dbg = bit == DUMP_LADDER_INDEX # | | | | | |
# | | | | | |
if dbg: # | | | | | |
if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C) # | | | | | |
if DUMP_VECTORS: c.dump_before_step_using_crt(pq, m) # | | | | | |
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.C, N.C, W.C, N.C, pq, mode=m, d=dbg) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]MBF | QINV | <LADDER>
# +------------------------+-------+------------------+---------+-----------+
if dbg and DUMP_VECTORS: c.dump_after_step_using_crt() # | | | | | |
print_ladder_progress(bit, pq) # | | | | | |
# | | | | | |
######################### # | | | | | |
# End Montgomery Ladder # # | | | | | |
######################### # | | | | | |
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.C, N.I, W.D, N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV | [PQ]SB = [PQ]SBF * 1
# +------------------------+-------+------------------+---------+-----------+
c.propagate_carries(N.D, pq) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB | QINV |
# +------------------------+-------+------------------+---------+-----------+
c.cross_ladders_x2y(W.D, N.D, W.D, N.D) # | [PQ]_FACTOR | [XY]F | [PQ]SBF | [PQ]SB* | QINV |
# +------------------------+-------+------------------+---------+-----------+
c.modular_subtract(N.D, N.C, W.C, pq) # | [PQ]_FACTOR | [XY]F | RSB | [PQ]SB* | QINV | RSB = PSB - QSB
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.C, N.E, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBIZ | [PQ]SB* | QINV | RSBIZ = RSB * QINV
c.modular_multiply(W.C, N.A, W.C, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | QINV | RSBI = RSBIZ * P_FACTOR
# +------------------------+-------+------------------+---------+-----------+
c.set_wide_from_input (c.bnk.crt_x, W.E, I.Q) # | [PQ]_FACTOR / N_FACTOR | [XY]F | RSBI | [PQ]SB* | |
# +------------------------+-------+------------------+---------+-----------+
c.set_narrow_from_input(c.bnk.crt_x, N.E, I.Q) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | |
# +------------------------+-------+------------------+---------+-----------+
c.regular_multiply(W.E, N.C, pq) # | [PQ]_FACTOR | [XY]F | RSBI | [PQ]SB* | | = RSBI * Q
# +------------------------+-------+------------------+---------+-----------+
c.merge_lha(N.A, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | |
# +------------------------+-------+------------------+---------+-----------+
c.propagate_carries(N.A, n) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | [PQ]SB* | |
# +------------------------+-------+------------------+---------+-----------+
c.copy_crt_y2x(W.D, N.D) # | [PQ]_FACTOR / QRSBI | [XY]F | RSBI | QSB* | |
# +------------------------+-------+------------------+---------+-----------+
c.regular_add(W.D, N.A, N.C, pq) # | [PQ]_FACTOR / QRSBI | [XY]F | SB | QSB* | | SB = QSB + RSBI
# +------------------------+-------+------------------+---------+-----------+
c.set_wide_from_input (c.bnk.crt_x, W.N, I.N) # | | | | | |
c.set_wide_from_input (c.bnk.crt_y, W.N, I.N) # | | | | | |
# +------------------------+-------+------------------+---------+-----------+
c.set_narrow_from_input(c.bnk.crt_x, N.N_COEFF, I.N_COEFF) # | | | | | |
c.set_narrow_from_input(c.bnk.crt_y, N.N_COEFF, I.N_COEFF) # | | | | | |
# +------------------------+-------+------------------+---------+-----------+
c.modular_multiply(W.B, N.C, W.A, N.A, n, ff) # | S | | | | | S = XF * SB
# +------------------------+-------+------------------+---------+-----------+
c.propagate_carries(N.A, n) # | S | | | | |
# +------------------------+-------+------------------+---------+-----------+
c.set_output_from_narrow_x(O.S, c.bnk.crt_x, N.A) # | S | | | | |
# +------------------------+-------+------------------+---------+-----------+
#c.dump_banks()
#
# try to exponentiate using only half of the quad-multiplier (one dual-ladder core)
#
def sign_without_crt():
c = core
v = vector
n = n_num_words
ff = (False, False)
c.set_wide_from_input (c.bnk.crt_x, W.N, I.N)
c.set_wide_from_input (c.bnk.crt_y, W.N, I.N)
c.set_wide_from_input (c.bnk.crt_x, W.A, I.X)
c.set_wide_from_input (c.bnk.crt_y, W.A, I.Y)
c.set_wide_from_input (c.bnk.crt_x, W.E, I.M)
c.set_wide_from_input (c.bnk.crt_y, W.E, I.M)
c.set_narrow_from_input (c.bnk.crt_x, N.N_COEFF, I.N_COEFF)
c.set_narrow_from_input (c.bnk.crt_y, N.N_COEFF, I.N_COEFF)
c.set_narrow_from_input (c.bnk.crt_x, N.A, I.N_FACTOR)
c.set_narrow_from_input (c.bnk.crt_y, N.A, I.N_FACTOR)
c.set_narrow_from_input (c.bnk.crt_x, N.E, I.M)
c.set_narrow_from_input (c.bnk.crt_y, N.E, I.M)
c.modular_multiply(W.A, N.A, W.B, N.B, n) # [XY]F = [XY] * N_FACTOR
c.modular_multiply(W.B, N.B, W.C, N.C, n) # [XY]MF = [XY]F * [XY]F
c.modular_multiply(W.C, N.I, W.D, N.D, n) # [XY]M = [XY]MF * 1
c.propagate_carries(N.D, n)
c.set_output_from_narrow_x(O.XM, c.bnk.crt_x, N.D)
c.set_output_from_narrow_x(O.YM, c.bnk.crt_y, N.D)
c.modular_multiply(W.E, N.B, W.C, N.C, n) # [XY]MB = M * [XY]F
c.set_wide_from_input(c.bnk.crt_x, W.A, I.N_FACTOR)
c.set_wide_from_input(c.bnk.crt_y, W.A, I.N_FACTOR)
c.modular_multiply(W.C, N.A, W.D, N.D, n) # MBF = MB * N_FACTOR
c.modular_multiply(W.A, N.I, W.C, N.C, n) # IF = 1 * N_FACTOR
c.copy_ladders_x2y(W.D, N.D, W.C, N.C)
###########################
# Begin Montgomery Ladder #
###########################
for bit in range(_WORD_WIDTH * n - 1, -1, -1):
m = get_ladder_mode_without_crt(v, bit)
dbg = bit == DUMP_LADDER_INDEX
if dbg:
if FORCE_OVERFLOW: c._force_overflow(c.bnk.crt_x, N.C)
if DUMP_VECTORS: c.dump_before_step_without_crt(n, m)
c.modular_multiply(W.C, N.C, W.C, N.C, n, mode=m, d=dbg)
if dbg and DUMP_VECTORS: c.dump_after_step_without_crt()
print_ladder_progress(bit, n)
#########################
# End Montgomery Ladder #
#########################
c.cross_ladders_x2y(W.B, N.B, W.B, N.B)
c.modular_multiply(W.C, N.I, W.D, N.D, n) # SB = SBF * 1
c.modular_multiply(W.B, N.D, W.A, N.A, n, mode=ff) # S = XF * SB
c.propagate_carries(N.A, n)
c.set_output_from_narrow_y(O.S, c.bnk.crt_y, N.A)
#c.dump_banks()
#
# main()
#
if __name__ == "__main__":
# handy shortcuts
W = ModExpNG_WideBankEnum
N = ModExpNG_NarrowBankEnum
I = ModExpNG_CoreInputEnum
O = ModExpNG_CoreOutputEnum
# set helper quantity
# instantiate core
# load test vector
# transfer numbers from vector to core
# set numbers of words
# obtain known good reference value with built-in math
# mutate blinding quantities with built-in math
i = ModExpNG_Operand(1, KEY_LENGTH)
core = ModExpNG_Core(i)
vector = ModExpNG_TestVector()
core.inp.set_value(I.M, vector.m)
core.inp.set_value(I.N, vector.n)
core.inp.set_value(I.P, vector.p)
core.inp.set_value(I.Q, vector.q)
core.inp.set_value(I.N_COEFF, vector.n_coeff)
core.inp.set_value(I.P_COEFF, vector.p_coeff)
core.inp.set_value(I.Q_COEFF, vector.q_coeff)
core.inp.set_value(I.N_FACTOR, vector.n_factor)
core.inp.set_value(I.P_FACTOR, vector.p_factor)
core.inp.set_value(I.Q_FACTOR, vector.q_factor)
core.inp.set_value(I.X, vector.x)
core.inp.set_value(I.Y, vector.y)
core.inp.set_value(I.QINV, vector.qinv)
n_num_words = KEY_LENGTH // _WORD_WIDTH
pq_num_words = n_num_words // 2
s_known = pow(vector.m.number(), vector.d.number(), vector.n.number())
xm_known = pow(vector.x.number(), 2, vector.n.number())
ym_known = pow(vector.y.number(), 2, vector.n.number())
if DUMP_VECTORS:
vector.m.format("M")
vector.n.format("N")
vector.n_factor.format("N_FACTOR")
vector.n_coeff.format("N_COEFF")
vector.x.format("X")
vector.y.format("Y")
vector.p.format("P")
vector.q.format("Q")
vector.p_factor.format("P_FACTOR")
vector.q_factor.format("Q_FACTOR")
vector.p_coeff.format("P_COEFF")
vector.q_coeff.format("Q_COEFF")
vector.d.format("D")
vector.dp.format("DP")
vector.dq.format("DQ")
vector.qinv.format("QINV")
xm_known_operand = ModExpNG_Operand(xm_known, KEY_LENGTH)
ym_known_operand = ModExpNG_Operand(ym_known, KEY_LENGTH)
s_known_operand = ModExpNG_Operand(s_known, KEY_LENGTH)
xm_known_operand.format("XM")
ym_known_operand.format("YM")
s_known_operand.format("S")
# sign using CRT and check
print("Signing using CRT...")
sign_using_crt()
compare_signature()
# sign without CRT and check
print("Signing without CRT...")
sign_without_crt()
compare_signature()
#
# End-of-File
#