aboutsummaryrefslogtreecommitdiff
path: root/src/stm32/modexpa7_driver_sample.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/stm32/modexpa7_driver_sample.c')
-rw-r--r--src/stm32/modexpa7_driver_sample.c236
1 files changed, 165 insertions, 71 deletions
diff --git a/src/stm32/modexpa7_driver_sample.c b/src/stm32/modexpa7_driver_sample.c
index 390c949..e1de2bd 100644
--- a/src/stm32/modexpa7_driver_sample.c
+++ b/src/stm32/modexpa7_driver_sample.c
@@ -59,12 +59,19 @@
#define CORE_ADDR_BUFFER_BITS (0x13 << 2)
#define CORE_ADDR_ARRAY_BITS (0x14 << 2)
+ // operand bank size
+#define BANK_LENGTH 0x200 // 0x200 = 512 bytes = 4096 bits
// locations of operand buffers
-#define CORE_ADDR_BANK_MODULUS (0x800 + 0 * 0x200)
-#define CORE_ADDR_BANK_MESSAGE (0x800 + 1 * 0x200)
-#define CORE_ADDR_BANK_EXPONENT (0x800 + 2 * 0x200)
-#define CORE_ADDR_BANK_RESULT (0x800 + 3 * 0x200)
+#define CORE_ADDR_BANK_MODULUS (BANK_LENGTH * (8 + 0))
+#define CORE_ADDR_BANK_MESSAGE (BANK_LENGTH * (8 + 1))
+#define CORE_ADDR_BANK_EXPONENT (BANK_LENGTH * (8 + 2))
+#define CORE_ADDR_BANK_RESULT (BANK_LENGTH * (8 + 3))
+
+#define CORE_ADDR_BANK_MODULUS_COEFF_OUT (BANK_LENGTH * (8 + 4))
+#define CORE_ADDR_BANK_MODULUS_COEFF_IN (BANK_LENGTH * (8 + 5))
+#define CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT (BANK_LENGTH * (8 + 6))
+#define CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN (BANK_LENGTH * (8 + 7))
// bit maps
#define CORE_CONTROL_BIT_INIT 0x00000001
@@ -75,6 +82,27 @@
#define CORE_MODE_BIT_CRT 0x00000002
+ /*
+ * zero operands
+ */
+#define Z_384 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+
+#define Z_192 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000}
+
+#define Z_512 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+
+#define Z_256 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
/*
* test vectors
@@ -83,11 +111,15 @@ static const uint32_t m_384[] = M_384;
static const uint32_t n_384[] = N_384;
static const uint32_t d_384[] = D_384;
static const uint32_t s_384[] = S_384;
+static uint32_t n_coeff_384[] = Z_384;
+static uint32_t factor_384[] = Z_384;
static const uint32_t m_512[] = M_512;
static const uint32_t n_512[] = N_512;
static const uint32_t d_512[] = D_512;
static const uint32_t s_512[] = S_512;
+static uint32_t n_coeff_512[] = Z_512;
+static uint32_t factor_512[] = Z_512;
static const uint32_t p_192[] = P_192;
static const uint32_t q_192[] = Q_192;
@@ -95,6 +127,10 @@ static const uint32_t dp_192[] = DP_192;
static const uint32_t dq_192[] = DQ_192;
static const uint32_t mp_192[] = MP_192;
static const uint32_t mq_192[] = MQ_192;
+static uint32_t p_coeff_192[] = Z_192;
+static uint32_t q_coeff_192[] = Z_192;
+static uint32_t factor_p_192[] = Z_192;
+static uint32_t factor_q_192[] = Z_192;
static const uint32_t p_256[] = P_256;
static const uint32_t q_256[] = Q_256;
@@ -102,7 +138,10 @@ static const uint32_t dp_256[] = DP_256;
static const uint32_t dq_256[] = DQ_256;
static const uint32_t mp_256[] = MP_256;
static const uint32_t mq_256[] = MQ_256;
-
+static uint32_t p_coeff_256[] = Z_256;
+static uint32_t q_coeff_256[] = Z_256;
+static uint32_t factor_p_256[] = Z_256;
+static uint32_t factor_q_256[] = Z_256;
/*
@@ -110,16 +149,25 @@ static const uint32_t mq_256[] = MQ_256;
*/
void toggle_yellow_led(void);
-void setup_modexpa7( const uint32_t *n, size_t l);
+void setup_modexpa7( const uint32_t *n,
+ uint32_t *coeff,
+ uint32_t *factor,
+ size_t l);
-int test_modexpa7( const uint32_t *m,
+int test_modexpa7( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l);
-int test_modexpa7_crt( const uint32_t *m,
+int test_modexpa7_crt( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l);
@@ -148,10 +196,10 @@ int main()
fmc_read_32(CORE_ADDR_NAME1, &core_name1);
fmc_read_32(CORE_ADDR_VERSION, &core_version);
- // must be "mode", "xpa7", "0.20"
+ // must be "mode", "xpa7", "0.25"
if ( (core_name0 != 0x6D6F6465) ||
(core_name1 != 0x78706137) ||
- (core_version != 0x302E3230))
+ (core_version != 0x302E3235))
{
led_off(LED_GREEN);
led_on(LED_RED);
@@ -164,61 +212,63 @@ int main()
// largest supported operand width, systolic array "power"
fmc_read_32(CORE_ADDR_BUFFER_BITS, &core_buffer_bits);
- fmc_read_32(CORE_ADDR_ARRAY_BITS, &core_array_bits);
+ fmc_read_32(CORE_ADDR_ARRAY_BITS, &core_array_bits);
+
+ //
+ // do pre-computation for all the moduli and store speed-up quantities,
+ // note that each key requires three precomputations: one for the entire
+ // public key and two for each of the corresponding private key components
+ //
+ // we set the 'init' control bit, wait for `ready' status bit to go high,
+ // then retrieve the calculated values from the corresponding "output" banks
+ //
+ // we turn off the green led and turn the yellow led during the process to
+ // get an idea of how long it takes
+ //
+
+ led_off(LED_GREEN);
+ led_on(LED_YELLOW);
+
+ // 384-bit key and 192-bit primes
+ setup_modexpa7(n_384, n_coeff_384, factor_384, 384);
+ setup_modexpa7(p_192, p_coeff_192, factor_p_192, 192);
+ setup_modexpa7(q_192, q_coeff_192, factor_q_192, 192);
+
+ // 512-bit key and 256-bit primes
+ setup_modexpa7(n_512, n_coeff_512, factor_512, 512);
+ setup_modexpa7(p_256, p_coeff_256, factor_p_256, 256);
+ setup_modexpa7(q_256, q_coeff_256, factor_q_256, 256);
+
+ led_off(LED_YELLOW);
+ led_on(LED_GREEN);
+
// repeat forever
while (1)
- {
- // New modulus requires precomputation of modulus-dependent
- // speed-up coefficient, this must be done once per new
- // modulus, i.e. when we're repeatedly signing with the
- // same key, we only need to do precomputation once before
- // starting the very first signing operation.
-
+ {
// fresh start
ok = 1;
-
- {
- // run precomputation of modulus-dependent factor for the 384-bit modulus
- setup_modexpa7(n_384, 384);
-
- // try signing the message from the 384-bit test vector
- ok = ok && test_modexpa7(m_384, d_384, s_384, 384);
- }
- {
- // run precomputation of modulus-dependent factor for the 512-bit modulus
- setup_modexpa7(n_512, 512);
-
- // try signing the message from the 512-bit test vector
- ok = ok && test_modexpa7(m_512, d_512, s_512, 512);
- }
- {
- // run precomputation of modulus-dependent factor for the first 192-bit part of 384-bit modulus
- setup_modexpa7(p_192, 192);
-
+ {
+ // try signing the message with the 384-bit test vector
+ ok = ok && test_modexpa7(n_384, m_384, d_384, s_384, n_coeff_384, factor_384, 384);
+
// try signing 384-bit base using 192-bit exponent
- ok = ok && test_modexpa7_crt(m_384, dp_192, mp_192, 192);
-
- // run precomputation of modulus-dependent factor for the second 192-bit part of 384-bit modulus
- setup_modexpa7(q_192, 192);
+ ok = ok && test_modexpa7_crt(p_192, m_384, dp_192, mp_192, p_coeff_192, factor_p_192, 192);
// try signing 384-bit base using 192-bit exponent
- ok = ok && test_modexpa7_crt(m_384, dq_192, mq_192, 192);
+ ok = ok && test_modexpa7_crt(q_192, m_384, dq_192, mq_192, q_coeff_192, factor_q_192, 192);
}
+
+ {
+ // try signing the message with the 512-bit test vector
+ ok = ok && test_modexpa7(n_512, m_512, d_512, s_512, n_coeff_512, factor_512, 512);
- {
- // run precomputation of modulus-dependent factor for the first 256-bit part of 512-bit modulus
- setup_modexpa7(p_256, 256);
-
// try signing 512-bit base using 256-bit exponent
- ok = ok && test_modexpa7_crt(m_512, dp_256, mp_256, 256);
-
- // run precomputation of modulus-dependent factor for the second 256-bit part of 512-bit modulus
- setup_modexpa7(q_256, 256);
+ ok = ok && test_modexpa7_crt(p_256, m_512, dp_256, mp_256, p_coeff_256, factor_p_256, 256);
// try signing 512-bit base using 256-bit exponent
- ok = ok && test_modexpa7_crt(m_512, dq_256, mq_256, 256);
+ ok = ok && test_modexpa7_crt(q_256, m_512, dq_256, mq_256, q_coeff_256, factor_q_256, 256);
}
// turn on the red led to indicate something went wrong
@@ -234,15 +284,18 @@ int main()
/*
- * Load new modulus and do the necessary precomputations.
+ * Load new modulus and do all the necessary precomputations.
*/
void setup_modexpa7( const uint32_t *n,
+ uint32_t *coeff,
+ uint32_t *factor,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
uint32_t n_word;
+ uint32_t coeff_word, factor_word;
uint32_t dummy_num_cyc;
// determine numbers of 32-bit words
@@ -250,10 +303,9 @@ void setup_modexpa7( const uint32_t *n,
// set modulus width
num_bits = l;
- fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
- // fill modulus bank (the least significant word
- // is at the lowest offset)
+ // fill modulus bank (the least significant word is at the lowest offset)
for (i=0; i<num_words; i++)
{ n_word = n[i];
fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
@@ -273,42 +325,70 @@ void setup_modexpa7( const uint32_t *n,
fmc_read_32(CORE_ADDR_STATUS, &reg_status);
}
while (!(reg_status & CORE_STATUS_BIT_READY));
+
+ // retrieve the modulus-dependent coefficient and Montgomery factor
+ // from the corresponding core "output" banks and store them for later use
+ for (i=0; i<num_words; i++)
+ {
+ fmc_read_32(CORE_ADDR_BANK_MODULUS_COEFF_OUT + i * sizeof(uint32_t), &coeff_word);
+ coeff[i] = coeff_word;
+
+ fmc_read_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT + i * sizeof(uint32_t), &factor_word);
+ factor[i] = factor_word;
+ }
}
//
// Sign the message and compare it against the correct reference value.
//
-int test_modexpa7( const uint32_t *m,
+int test_modexpa7( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
- uint32_t m_word, d_word, s_word;
+ uint32_t n_word, m_word, d_word, s_word;
+ uint32_t coeff_word, factor_word;
uint32_t dummy_num_cyc;
uint32_t mode;
// determine numbers of 32-bit words
num_words = l >> 5;
- // set exponent width
+ // set modulus width, exponent width
num_bits = l;
- fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
// disable CRT mode
mode = 0;
fmc_write_32(CORE_ADDR_MODE, &mode);
- // fill message and exponent banks (the least significant
- // word is at the lowest offset)
+ // fill modulus, message and exponent banks (the least significant
+ // word is at the lowest offset), we also need to fill "input" core
+ // banks with previously pre-calculated and saved modulus-dependent
+ // speed-up coefficient and Montgomery factor
for (i=0; i<num_words; i++)
- { m_word = m[i];
+ {
+ n_word = n[i];
+ m_word = m[i];
d_word = d[i];
+
+ fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
fmc_write_32(CORE_ADDR_BANK_MESSAGE + ((num_words - (i + 1)) * sizeof(uint32_t)), &m_word);
fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
+
+ coeff_word = coeff[i];
+ factor_word = factor[i];
+
+ fmc_write_32(CORE_ADDR_BANK_MODULUS_COEFF_IN + i * sizeof(uint32_t), &coeff_word);
+ fmc_write_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN + i * sizeof(uint32_t), &factor_word);
}
// clear 'next' control bit, then set 'next' control bit again
@@ -331,8 +411,7 @@ int test_modexpa7( const uint32_t *m,
{
fmc_read_32(CORE_ADDR_BANK_RESULT + (i * sizeof(uint32_t)), &s_word);
- if (s_word != s[num_words - (i + 1)])
- return 0;
+ if (s_word != s[num_words - (i + 1)]) return 0;
}
// everything went just fine
@@ -340,34 +419,49 @@ int test_modexpa7( const uint32_t *m,
}
-int test_modexpa7_crt( const uint32_t *m,
+int test_modexpa7_crt( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
- uint32_t m_word, d_word, s_word;
+ uint32_t n_word, m_word, d_word, s_word;
+ uint32_t coeff_word, factor_word;
uint32_t dummy_num_cyc;
uint32_t mode;
// determine numbers of 32-bit words
num_words = l >> 5;
- // set exponent width
+ // set modulus width, exponent width
num_bits = l;
- fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
// enable CRT mode
mode = CORE_MODE_BIT_CRT;
fmc_write_32(CORE_ADDR_MODE, &mode);
- // fill exponent bank (the least significant word
- // is at the lowest offset)
+ // fill modulus and exponent banks (the least significant word is at
+ // the lowest offset), we also need to fill "input" core banks with
+ // previously pre-calculated and saved modulus-dependent speed-up
+ // coefficient and Montgomery factor
for (i=0; i<num_words; i++)
- { d_word = d[i];
+ { n_word = n[i];
+ d_word = d[i];
+ fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
+
+ coeff_word = coeff[i];
+ factor_word = factor[i];
+
+ fmc_write_32(CORE_ADDR_BANK_MODULUS_COEFF_IN + i * sizeof(uint32_t), &coeff_word);
+ fmc_write_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN + i * sizeof(uint32_t), &factor_word);
}
// fill message bank (the least significant word