/*
* modexpa7_driver_sample.c
* ----------------------------------------------
* Demo program to test ModExpA7 core in hardware
*
* Authors: Pavel Shatov
* Copyright (c) 2017, NORDUnet A/S
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the NORDUnet nor the names of its contributors may
* be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Note, that the test program needs a custom bitstream without
* the core selector, where the DUT is at offset 0.
*/
// stm32 headers
#include "stm-init.h"
#include "stm-led.h"
#include "stm-fmc.h"
// test vectors
#include "test/modexp_fpga_model_vectors.h"
// locations of core registers
#define CORE_ADDR_NAME0 (0x00 << 2)
#define CORE_ADDR_NAME1 (0x01 << 2)
#define CORE_ADDR_VERSION (0x02 << 2)
#define CORE_ADDR_CONTROL (0x08 << 2)
#define CORE_ADDR_STATUS (0x09 << 2)
#define CORE_ADDR_MODE (0x10 << 2)
#define CORE_ADDR_MODULUS_BITS (0x11 << 2)
#define CORE_ADDR_EXPONENT_BITS (0x12 << 2)
#define CORE_ADDR_BUFFER_BITS (0x13 << 2)
#define CORE_ADDR_ARRAY_BITS (0x14 << 2)
// locations of operand buffers
#define CORE_ADDR_BANK_MODULUS (0x800 + 0 * 0x200)
#define CORE_ADDR_BANK_MESSAGE (0x800 + 1 * 0x200)
#define CORE_ADDR_BANK_EXPONENT (0x800 + 2 * 0x200)
#define CORE_ADDR_BANK_RESULT (0x800 + 3 * 0x200)
// bit maps
#define CORE_CONTROL_BIT_INIT 0x00000001
#define CORE_CONTROL_BIT_NEXT 0x00000002
#define CORE_STATUS_BIT_READY 0x00000001
#define CORE_STATUS_BIT_VALID 0x00000002
#define CORE_MODE_BIT_CRT 0x00000002
/*
* test vectors
*/
static const uint32_t m_384[] = M_384;
static const uint32_t n_384[] = N_384;
static const uint32_t d_384[] = D_384;
static const uint32_t s_384[] = S_384;
static const uint32_t m_512[] = M_512;
static const uint32_t n_512[] = N_512;
static const uint32_t d_512[] = D_512;
static const uint32_t s_512[] = S_512;
static const uint32_t p_192[] = P_192;
static const uint32_t q_192[] = Q_192;
static const uint32_t dp_192[] = DP_192;
static const uint32_t dq_192[] = DQ_192;
static const uint32_t mp_192[] = MP_192;
static const uint32_t mq_192[] = MQ_192;
static const uint32_t p_256[] = P_256;
static const uint32_t q_256[] = Q_256;
static const uint32_t dp_256[] = DP_256;
static const uint32_t dq_256[] = DQ_256;
static const uint32_t mp_256[] = MP_256;
static const uint32_t mq_256[] = MQ_256;
/*
* prototypes
*/
void toggle_yellow_led(void);
void setup_modexpa7( const uint32_t *n, size_t l);
int test_modexpa7( const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
size_t l);
int test_modexpa7_crt( const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
size_t l);
/*
* test routine
*/
int main()
{
int ok;
stm_init();
fmc_init();
// turn on the green led
led_on(LED_GREEN);
led_off(LED_RED);
led_off(LED_YELLOW);
led_off(LED_BLUE);
// check, that core is present
uint32_t core_name0;
uint32_t core_name1;
uint32_t core_version;
fmc_read_32(CORE_ADDR_NAME0, &core_name0);
fmc_read_32(CORE_ADDR_NAME1, &core_name1);
fmc_read_32(CORE_ADDR_VERSION, &core_version);
// must be "mode", "xpa7", "0.20"
if ( (core_name0 != 0x6D6F6465) ||
(core_name1 != 0x78706137) ||
(core_version != 0x302E3230))
{
led_off(LED_GREEN);
led_on(LED_RED);
while (1);
}
// read compile-time settings
uint32_t core_buffer_bits;
uint32_t core_array_bits;
// largest supported operand width, systolic array "power"
fmc_read_32(CORE_ADDR_BUFFER_BITS, &core_buffer_bits);
fmc_read_32(CORE_ADDR_ARRAY_BITS, &core_array_bits);
// repeat forever
while (1)
{
// New modulus requires precomputation of modulus-dependent
// speed-up coefficient, this must be done once per new
// modulus, i.e. when we're repeatedly signing with the
// same key, we only need to do precomputation once before
// starting the very first signing operation.
// fresh start
ok = 1;
{
// run precomputation of modulus-dependent factor for the 384-bit modulus
setup_modexpa7(n_384, 384);
// try signing the message from the 384-bit test vector
ok = ok && test_modexpa7(m_384, d_384, s_384, 384);
}
{
// run precomputation of modulus-dependent factor for the 512-bit modulus
setup_modexpa7(n_512, 512);
// try signing the message from the 512-bit test vector
ok = ok && test_modexpa7(m_512, d_512, s_512, 512);
}
{
// run precomputation of modulus-dependent factor for the first 192-bit part of 384-bit modulus
setup_modexpa7(p_192, 192);
// try signing 384-bit base using 192-bit exponent
ok = ok && test_modexpa7_crt(m_384, dp_192, mp_192, 192);
// run precomputation of modulus-dependent factor for the second 192-bit part of 384-bit modulus
setup_modexpa7(q_192, 192);
// try signing 384-bit base using 192-bit exponent
ok = ok && test_modexpa7_crt(m_384, dq_192, mq_192, 192);
}
{
// run precomputation of modulus-dependent factor for the first 256-bit part of 512-bit modulus
setup_modexpa7(p_256, 256);
// try signing 512-bit base using 256-bit exponent
ok = ok && test_modexpa7_crt(m_512, dp_256, mp_256, 256);
// run precomputation of modulus-dependent factor for the second 256-bit part of 512-bit modulus
setup_modexpa7(q_256, 256);
// try signing 512-bit base using 256-bit exponent
ok = ok && test_modexpa7_crt(m_512, dq_256, mq_256, 256);
}
// turn on the red led to indicate something went wrong
if (!ok)
{ led_off(LED_GREEN);
led_on(LED_RED);
}
// indicate, that we're alive doing something...
toggle_yellow_led();
}
}
/*
* Load new modulus and do the necessary precomputations.
*/
void setup_modexpa7( const uint32_t *n,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
uint32_t n_word;
uint32_t dummy_num_cyc;
// determine numbers of 32-bit words
num_words = l >> 5;
// set modulus width
num_bits = l;
fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
// fill modulus bank (the least significant word
// is at the lowest offset)
for (i=0; i<num_words; i++)
{ n_word = n[i];
fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
}
// clear 'init' control bit, then set 'init' control bit again
// to trigger precomputation (core is edge-triggered)
reg_control = 0;
fmc_write_32(CORE_ADDR_CONTROL, ®_control);
reg_control = CORE_CONTROL_BIT_INIT;
fmc_write_32(CORE_ADDR_CONTROL, ®_control);
// wait for 'ready' status bit to be set
dummy_num_cyc = 0;
do
{ dummy_num_cyc++;
fmc_read_32(CORE_ADDR_STATUS, ®_status);
}
while (!(reg_status & CORE_STATUS_BIT_READY));
}
//
// Sign the message and compare it against the correct reference value.
//
int test_modexpa7( const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
uint32_t m_word, d_word, s_word;
uint32_t dummy_num_cyc;
uint32_t mode;
// determine numbers of 32-bit words
num_words = l >> 5;
// set exponent width
num_bits = l;
fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
// disable CRT mode
mode = 0;
fmc_write_32(CORE_ADDR_MODE, &mode);
// fill message and exponent banks (the least significant
// word is at the lowest offset)
for (i=0; i<num_words; i++)
{ m_word = m[i];
d_word = d[i];
fmc_write_32(CORE_ADDR_BANK_MESSAGE + ((num_words - (i + 1)) * sizeof(uint32_t)), &m_word);
fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
}
// clear 'next' control bit, then set 'next' control bit again
// to trigger exponentiation (core is edge-triggered)
reg_control = 0;
fmc_write_32(CORE_ADDR_CONTROL, ®_control);
reg_control = CORE_CONTROL_BIT_NEXT;
fmc_write_32(CORE_ADDR_CONTROL, ®_control);
// wait for 'valid' status bit to be set
dummy_num_cyc = 0;
do
{ dummy_num_cyc++;
fmc_read_32(CORE_ADDR_STATUS, ®_status);
}
while (!(reg_status & CORE_STATUS_BIT_VALID));
// read back the result word-by-word, then compare to the reference values
for (i=0; i<num_words; i++)
{
fmc_read_32(CORE_ADDR_BANK_RESULT + (i * sizeof(uint32_t)), &s_word);
if (s_word != s[num_words - (i + 1)])
return 0;
}
// everything went just fine
return 1;
}
int test_modexpa7_crt( const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
uint32_t m_word, d_word, s_word;
uint32_t dummy_num_cyc;
uint32_t mode;
// determine numbers of 32-bit words
num_words = l >> 5;
// set exponent width
num_bits = l;
fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
// enable CRT mode
mode = CORE_MODE_BIT_CRT;
fmc_write_32(CORE_ADDR_MODE, &mode);
// fill exponent bank (the least significant word
// is at the lowest offset)
for (i=0; i<num_words; i++)
{ d_word = d[i];
fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
}
// fill message bank (the least significant word
// is at the lowest offset, message is twice larger
// than the modulus in CRT mode!)
for (i=0; i<(2 * num_words); i++)
{ m_word = m[i];
fmc_write_32(CORE_ADDR_BANK_MESSAGE + ((2 * num_words - (i + 1)) * sizeof(uint32_t)), &m_word);
}
// clear 'next' control bit, then set 'next' control bit again
// to trigger exponentiation (core is edge-triggered)
reg_control = 0;
fmc_write_32(CORE_ADDR_CONTROL, ®_control);
reg_control = CORE_CONTROL_BIT_NEXT;
fmc_write_32(CORE_ADDR_CONTROL, ®_control);
// wait for 'valid' status bit to be set
dummy_num_cyc = 0;
do
{ dummy_num_cyc++;
fmc_read_32(CORE_ADDR_STATUS, ®_status);
}
while (!(reg_status & CORE_STATUS_BIT_VALID));
// read back the result word-by-word, then compare to the reference values
for (i=0; i<num_words; i++)
{
fmc_read_32(CORE_ADDR_BANK_RESULT + (i * sizeof(uint32_t)), &s_word);
if (s_word != s[num_words - (i + 1)])
return 0;
}
// everything went just fine
return 1;
}
//
// toggle the yellow led to indicate that we're not stuck somewhere
//
void toggle_yellow_led(void)
{
static int led_state = 0;
led_state = !led_state;
if (led_state) led_on(LED_YELLOW);
else led_off(LED_YELLOW);
}
//
// end of file
//