diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-23 17:03:17 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-23 17:03:17 +0300 |
commit | 0f111bf1ea0e5310852c50448a7226db57e61d5b (patch) | |
tree | 9f05558be50dd93a523ee343d1ecd73235879cb9 /stm32/modexpng_util.c | |
parent | d50bb6039649a61b2048447a63a9670895e1e2be (diff) |
Added demo driver code for STM32.
Diffstat (limited to 'stm32/modexpng_util.c')
-rw-r--r-- | stm32/modexpng_util.c | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/stm32/modexpng_util.c b/stm32/modexpng_util.c new file mode 100644 index 0000000..27afb19 --- /dev/null +++ b/stm32/modexpng_util.c @@ -0,0 +1,189 @@ +// +// helper precomputation routines for the "modexpng" core +// +#include "modexpng_util.h" + + +// +// internal buffers +// +static uint32_t MOD_FACTOR_N[BUF_NUM_WORDS]; +static uint32_t MOD_NN[BUF_NUM_WORDS+1]; +static uint32_t MOD_T[BUF_NUM_WORDS+1]; + +static void _add32(uint32_t, uint32_t, uint32_t, uint32_t *, uint32_t *); +static void _sub32(uint32_t, uint32_t, uint32_t, uint32_t *, uint32_t *); +static void _mul32(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t *, uint32_t *); + + +// +// calculation of the Montgomery factor +// +void _calc_montgomery_factor(uint32_t num_words, const uint32_t *N, uint32_t *N_FACTOR) +{ + // counters + uint32_t i, j; + + // flag + uint32_t flag_keep; + + // carry and borrow + uint32_t cry_in, cry_out; + uint32_t brw_in, brw_out; + + // initially set N_FACTOR = 1 + for (i=0; i<num_words; i++) + N_FACTOR[i] = i ? 0 : 1; + + // do the math + for (i=0; i<2*(num_words * UINT32_BITS + UINT16_BITS); i++) + { + // clear carry and borrow + cry_in = 0, brw_in = 0; + + // calculate N_FACTOR = N_FACTOR << 1, MOD_FACTOR_N = N_FACTOR - N + for (j=0; j<num_words; j++) + { + cry_out = N_FACTOR[j] >> (UINT32_BITS - 1); // | N_FACTOR <<= 1 + N_FACTOR[j] <<= 1; N_FACTOR[j] |= cry_in; // | + + _sub32(N_FACTOR[j], N[j], brw_in, &MOD_FACTOR_N[j], &brw_out); // MOD_FACTOR_N = N_FACTOR - N + + // propagate carry & borrow + cry_in = cry_out, brw_in = brw_out; + } + + // obtain flag + flag_keep = brw_out && !cry_out; + + // now select the right value + for (j=0; j<num_words; j++) + N_FACTOR[j] = flag_keep ? N_FACTOR[j] : MOD_FACTOR_N[j]; + } +} + + +// +// calculation of the modulus-dependent speed-up coefficient +// +void _calc_modulus_coeff(uint32_t num_words, const uint32_t *N, uint32_t *N_COEFF) +{ + // counters + uint32_t i, j, k, jk; + + // indices + uint32_t word_index, bit_index; + + // flag + uint32_t flag_update; + + // carries + uint32_t cry_in, cry_out; + + // temporary variables + uint32_t mod_p, add_s, b_word; + + // initially set N_COEFF to 1 + for (i=0; i<=num_words; i++) + N_COEFF[i] = i ? 0 : 1; + + // also set NN to ~N+1 + // note that since N must be odd, ~N is even, so adding 1 to it doesn't need + // any carry propagation + for (i=0; i<num_words; i++) MOD_NN[i] = ~N[i]; + MOD_NN[0] += 1; + MOD_NN[num_words] = 0xffffffff; + + // do the math + for (i=1; i<(num_words * UINT32_BITS + UINT16_BITS); i++) + { + word_index = i / UINT32_BITS; + bit_index = i & (UINT32_BITS - 1); + + // clear T + for (j=0; j<=num_words; j++) MOD_T[j] = 0; + + // T = N_COEFF * NN mod 2 ** (modulus_length + 16) + /* + * Note, that we only need the lower half of the product T, so in + * the outer loop we always scan entire N_COEFF, but the inner + * loop only scans entire NN during the first iteration, and then + * keeps skipping one more word every iteration, during the last + * iteration we only scan one word of NN. + * + */ + for (j=0; j<=num_words; j++) + { cry_in = 0; + for (k=0; k<=(num_words-j); k++) + { jk = j + k; + _mul32(N_COEFF[j], MOD_NN[k], MOD_T[jk], cry_in, &mod_p, &cry_out); + MOD_T[jk] = mod_p; + cry_in = cry_out; + if (word_index == jk) + flag_update = MOD_T[jk] & (1 << bit_index) ? 1 : 0; + } + } + if (flag_update) + { cry_in = 0; + for (j=0; j<=num_words; j++) + { b_word = (j == word_index) ? (1 << bit_index) : 0; + _add32(b_word, N_COEFF[j], cry_in, &add_s, &cry_out); + N_COEFF[j] = add_s; + cry_in = cry_out; + } + } + } +} + + +// +// low-level addition w/ carry +// +static void _add32(uint32_t a, uint32_t b, uint32_t c_in, uint32_t *s, uint32_t *c_out) +{ + uint64_t t; // intermediate var + + t = (uint64_t)a + (uint64_t)b; // obtain "wide" difference + t += (uint64_t)(c_in & 1); // take borrow into account + + *s = (uint32_t)t; // return the lower part of result + *c_out = (uint32_t)(t >> UINT32_BITS); // return the higher part of result, ... + *c_out &= (uint32_t)1; // ...but truncate it to 1 bit +} + + +// +// low-level subtraction w/ borrow +// +static void _sub32(uint32_t a, uint32_t b, uint32_t b_in, uint32_t *d, uint32_t *b_out) +{ + uint64_t t; // intermediate var + + t = (uint64_t)a - (uint64_t)b; // obtain "wide" difference + t -= (uint64_t)(b_in & 1); // take borrow into account + + *d = (uint32_t)t; // return the lower part of result + *b_out = (uint32_t)(t >> UINT32_BITS); // return the higher part of result, ... + *b_out &= (uint32_t)1; // ...but truncate it to 1 bit +} + + +// +// low-level multiplication w/ carry and pre-adder +// +static void _mul32(uint32_t a, uint32_t b, uint32_t t, uint32_t c_in, uint32_t *p, uint32_t *c_out) +{ + uint64_t r; // intermediate result + + r = (uint64_t)a * (uint64_t)b; // obtain wide product + r += (uint64_t)t; // handle pre-addition + r += (uint64_t)c_in; // take carry into account + + *p = (uint32_t)r; // return the lower part of result + *c_out = (uint32_t)(r >> UINT32_BITS); // return the higher part of result, ... +} + + +// +// end-of-file +// |