aboutsummaryrefslogblamecommitdiff
path: root/stm32/modexpng_driver_sample.c
blob: d87926a924a7cc714deb69a2e14f204aed4ae1f1 (plain) (tree)
1
2
3
4
5
6
                                                                                
  


                                                        
  































                                                                                


































































































































































                                                                                                                      
 
                





































                                                                                                                             




               



                                    















                                                    


                                                               









                                                                             
                    








                                                                                
                        





                                                                           
                                                                                 

                                                                           
                    






                                                   
                                                 
                         
                        



                                                                                             
                                








                                                                                             
                        





                                                                                                         
                                












                                                                                                         
                                




                                                                   
                                                       




















































































                                                                                                 







                                                                                                                                              
                 

                                                                                                                                  



                                                                                                       








                                                                                                                                            
                           

                                                                                                                                                       







                                                                                                   


                                                                          


                                                                                                       
                                                              
                                                    
                                                              





                                                                                             
                                                                            













                                                                                                            


                                                                                                 












































                                                                               
//------------------------------------------------------------------------------
//
// modexpng_driver_sample.c
// -----------------------------------------------------
// Sample driver to test the "modexpng" core in hardware
//
// Authors: Pavel Shatov
//
// Copyright (c) 2019, NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may be
//   used to endorse or promote products derived from this software without
//   specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
//------------------------------------------------------------------------------


//
// note, that the test program needs a custom bitstream where
// the core is located at offset 0 (without the core selector)
//

// stm32 headers
#include "stm-init.h"
#include "stm-led.h"
#include "stm-fmc.h"

// test vectors (generated by the supplied python math model)
#include "modexpng_vector_1024.h"
#include "modexpng_vector_2048.h"
#include "modexpng_vector_4096.h"

// reference code
#include "modexpng_util.h"

// locations of core registers
#define CORE_ADDR_NAME0					(0x00 << 2)
#define CORE_ADDR_NAME1					(0x01 << 2)
#define CORE_ADDR_VERSION				(0x02 << 2)
#define CORE_ADDR_CONTROL				(0x08 << 2)
#define CORE_ADDR_STATUS				(0x09 << 2)
#define CORE_ADDR_MODE		      (0x10 << 2)
#define CORE_ADDR_MODULUS_BITS	(0x11 << 2)
#define CORE_ADDR_EXPONENT_BITS	(0x12 << 2)
#define CORE_ADDR_BANK_BITS			(0x13 << 2)
#define CORE_ADDR_NUM_MULTS			(0x14 << 2)

// locations of data buffers
#define CORE_ADDR_BANK_M					(1 * 0x1000 + 0 * 0x200)
#define CORE_ADDR_BANK_N					(1 * 0x1000 + 1 * 0x200)
#define CORE_ADDR_BANK_N_FACTOR		(1 * 0x1000 + 2 * 0x200)
#define CORE_ADDR_BANK_N_COEFF		(1 * 0x1000 + 3 * 0x200)
#define CORE_ADDR_BANK_X	 				(1 * 0x1000 + 5 * 0x200)
#define CORE_ADDR_BANK_Y	 				(1 * 0x1000 + 6 * 0x200)

#define CORE_ADDR_BANK_D					(2 * 0x1000 + 0 * 0x200)
#define CORE_ADDR_BANK_P					(2 * 0x1000 + 1 * 0x200)
#define CORE_ADDR_BANK_DP					(2 * 0x1000 + 3 * 0x100)
#define CORE_ADDR_BANK_P_FACTOR		(2 * 0x1000 + 2 * 0x200)
#define CORE_ADDR_BANK_P_COEFF		(2 * 0x1000 + 3 * 0x200)
#define CORE_ADDR_BANK_Q					(2 * 0x1000 + 4 * 0x200)
#define CORE_ADDR_BANK_DQ					(2 * 0x1000 + 9 * 0x100)
#define CORE_ADDR_BANK_Q_FACTOR		(2 * 0x1000 + 5 * 0x200)
#define CORE_ADDR_BANK_Q_COEFF		(2 * 0x1000 + 6 * 0x200)
#define CORE_ADDR_BANK_QINV				(2 * 0x1000 + 7 * 0x200)

#define CORE_ADDR_BANK_S					(3 * 0x1000 + 0 * 0x200)
#define CORE_ADDR_BANK_XM					(3 * 0x1000 + 1 * 0x200)
#define CORE_ADDR_BANK_YM					(3 * 0x1000 + 2 * 0x200)

// bit maps
#define CORE_CONTROL_BIT_NEXT		0x00000002
#define CORE_STATUS_BIT_VALID		0x00000002

#define CORE_MODE_USING_CRT			0x00000002
#define CORE_MODE_WITHOUT_CRT		0x00000000


//
// test vectors
//
static const uint32_t M_1024[] 			       = M_1024_INIT;
static const uint32_t N_1024[]			 	     = N_1024_INIT;
static const uint32_t N_FACTOR_1024[]      = N_FACTOR_1024_INIT;
static const uint32_t N_COEFF_1024[]	     = N_COEFF_1024_INIT;
static       uint32_t X_1024[]				     = X_1024_INIT;
static       uint32_t Y_1024[]				     = Y_1024_INIT;
static const uint32_t P_1024[]				     = P_1024_INIT;
static const uint32_t Q_1024[]				     = Q_1024_INIT;
static const uint32_t P_FACTOR_1024[]      = P_FACTOR_1024_INIT;
static const uint32_t Q_FACTOR_1024[]      = Q_FACTOR_1024_INIT;
static const uint32_t P_COEFF_1024[]	     = P_COEFF_1024_INIT;
static const uint32_t Q_COEFF_1024[]	     = Q_COEFF_1024_INIT;
static const uint32_t D_1024[]				     = D_1024_INIT;
static const uint32_t DP_1024[]			       = DP_1024_INIT;
static const uint32_t DQ_1024[]			       = DQ_1024_INIT;
static const uint32_t QINV_1024[]		       = QINV_1024_INIT;
static const uint32_t XM_1024[]			       = XM_1024_INIT;
static const uint32_t YM_1024[]			       = YM_1024_INIT;
static const uint32_t S_1024[]				     = S_1024_INIT;

static const uint32_t M_2048[] 			       = M_2048_INIT;
static const uint32_t N_2048[]			 	     = N_2048_INIT;
static const uint32_t N_FACTOR_2048[]      = N_FACTOR_2048_INIT;
static const uint32_t N_COEFF_2048[]	     = N_COEFF_2048_INIT;
static       uint32_t X_2048[]				     = X_2048_INIT;
static       uint32_t Y_2048[]				     = Y_2048_INIT;
static const uint32_t P_2048[]				     = P_2048_INIT;
static const uint32_t Q_2048[]				     = Q_2048_INIT;
static const uint32_t P_FACTOR_2048[]      = P_FACTOR_2048_INIT;
static const uint32_t Q_FACTOR_2048[]      = Q_FACTOR_2048_INIT;
static const uint32_t P_COEFF_2048[]	     = P_COEFF_2048_INIT;
static const uint32_t Q_COEFF_2048[]	     = Q_COEFF_2048_INIT;
static const uint32_t D_2048[]				     = D_2048_INIT;
static const uint32_t DP_2048[]			       = DP_2048_INIT;
static const uint32_t DQ_2048[]			       = DQ_2048_INIT;
static const uint32_t QINV_2048[]		       = QINV_2048_INIT;
static const uint32_t XM_2048[]			       = XM_2048_INIT;
static const uint32_t YM_2048[]			       = YM_2048_INIT;
static const uint32_t S_2048[]				     = S_2048_INIT;

static const uint32_t M_4096[] 			       = M_4096_INIT;
static const uint32_t N_4096[]			 	     = N_4096_INIT;
static const uint32_t N_FACTOR_4096[]      = N_FACTOR_4096_INIT;
static const uint32_t N_COEFF_4096[]	     = N_COEFF_4096_INIT;
static       uint32_t X_4096[]				     = X_4096_INIT;
static       uint32_t Y_4096[]				     = Y_4096_INIT;
static const uint32_t P_4096[]				     = P_4096_INIT;
static const uint32_t Q_4096[]				     = Q_4096_INIT;
static const uint32_t P_FACTOR_4096[]      = P_FACTOR_4096_INIT;
static const uint32_t Q_FACTOR_4096[]      = Q_FACTOR_4096_INIT;
static const uint32_t P_COEFF_4096[]	     = P_COEFF_4096_INIT;
static const uint32_t Q_COEFF_4096[]	     = Q_COEFF_4096_INIT;
static const uint32_t D_4096[]				     = D_4096_INIT;
static const uint32_t DP_4096[]			       = DP_4096_INIT;
static const uint32_t DQ_4096[]			       = DQ_4096_INIT;
static const uint32_t QINV_4096[]		       = QINV_4096_INIT;
static const uint32_t XM_4096[]			       = XM_4096_INIT;
static const uint32_t YM_4096[]			       = YM_4096_INIT;
static const uint32_t S_4096[]				     = S_4096_INIT;


//
// buffers
//
static uint32_t mod_rev[BUF_NUM_WORDS];
static uint32_t mod_factor_rev[BUF_NUM_WORDS];
static uint32_t mod_coeff_rev[BUF_NUM_WORDS+1];


//
// prototypes
//
void toggle_yellow_led(void);

int check_montgomery_factor(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_factor);
int check_modulus_coeff(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_coeff);

int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
		const uint32_t *m,        const uint32_t *n,
		const uint32_t *n_factor, const uint32_t *n_coeff,
		      uint32_t *x,              uint32_t *y,
		const uint32_t *p,        const uint32_t *q,
		const uint32_t *p_factor, const uint32_t *p_coeff,
		const uint32_t *q_factor, const uint32_t *q_coeff,
		const uint32_t *dp,       const uint32_t *dq,
		const uint32_t *d,
		const uint32_t *qinv,
		const uint32_t *s,
		const uint32_t *xm,       const uint32_t *ym);	

//
// easier calls
//
#define sign_without_crt(k,f,m,n,nf,nc,x,y,d,s,xm,ym) \
			 _sign_handler    (k,0,f,m,n,nf,nc,x,y,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,d,NULL,s,xm,ym)
		
#define sign_using_crt(k,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,qinv,s,xm,ym) \
			 _sign_handler  (k,1,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,NULL,qinv,s,xm,ym)

		
//
// dirty workarounds
//
#define _ntohl(n) (((((unsigned long)(n) & 0xFF))       << 24)| \
                   ((((unsigned long)(n) & 0xFF00))     << 8) | \
                   ((((unsigned long)(n) & 0xFF0000))   >> 8) | \
                   ((((unsigned long)(n) & 0xFF000000)) >> 24))

#define _htonl(n) (((((unsigned long)(n) & 0xFF))       << 24)| \
                   ((((unsigned long)(n) & 0xFF00))     << 8) | \
                   ((((unsigned long)(n) & 0xFF0000))   >> 8) | \
                   ((((unsigned long)(n) & 0xFF000000)) >> 24))


//
// Core Offset
//									 
#define MODEXPNG_CORE_NUM 0x26


//
// more dirty workarounds
//
static void _fmc_read_32(uint32_t from_addr, uint32_t *to_ptr)
{
		uint32_t src_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + from_addr) & FMC_FPGA_ADDR_MASK);
		uint32_t t = *((uint32_t *)src_addr);
		*to_ptr = _ntohl(t);
}

static void _fmc_write_32(uint32_t to_addr, uint32_t value)
{
		uint32_t t = _htonl(value);
		uint32_t dst_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + to_addr) & FMC_FPGA_ADDR_MASK);
		*(uint32_t *)dst_addr = t;
}


//
// test routine
//
int main()
{
		int ok, first_run;
		long long int iters;
	
		ok = sizeof iters;
	
		// initialize
		stm_init();
		fmc_init();
	
		// initialize 
		led_on(LED_GREEN);
		led_off(LED_RED);
		led_off(LED_YELLOW);
		led_off(LED_BLUE);

		// make sure, that ModExpNG is there
		uint32_t core_name0;
		uint32_t core_name1;
		uint32_t core_version;
	
		_fmc_read_32(CORE_ADDR_NAME0,   &core_name0);
		_fmc_read_32(CORE_ADDR_NAME1,   &core_name1);
		_fmc_read_32(CORE_ADDR_VERSION, &core_version);
	
		// "mode", "xpng"
		if ((core_name0 != 0x6D6F6465) || (core_name1 != 0x78706E67))
		{		led_off(LED_GREEN);
				led_on(LED_RED);
				while (1);
		}
		
		// check, that reference code works correctly
		ok = 1;
		/**/
		ok = ok && check_montgomery_factor(1024, N_1024, N_FACTOR_1024);
		ok = ok && check_montgomery_factor( 512, P_1024, P_FACTOR_1024);
		ok = ok && check_montgomery_factor( 512, Q_1024, Q_FACTOR_1024);
		ok = ok && check_montgomery_factor(2048, N_2048, N_FACTOR_2048);
		ok = ok && check_montgomery_factor(1024, P_2048, P_FACTOR_2048);
		ok = ok && check_montgomery_factor(1024, Q_2048, Q_FACTOR_2048);
		ok = ok && check_montgomery_factor(4096, N_4096, N_FACTOR_4096);
		ok = ok && check_montgomery_factor(2048, P_4096, P_FACTOR_4096);
		ok = ok && check_montgomery_factor(2048, Q_4096, Q_FACTOR_4096);
		/**//**/
		ok = ok && check_modulus_coeff(1024, N_1024, N_COEFF_1024);
		ok = ok && check_modulus_coeff( 512, P_1024, P_COEFF_1024);
		ok = ok && check_modulus_coeff( 512, Q_1024, Q_COEFF_1024);
		ok = ok && check_modulus_coeff(2048, N_2048, N_COEFF_2048);
		ok = ok && check_modulus_coeff(1024, P_2048, P_COEFF_2048);
		ok = ok && check_modulus_coeff(1024, Q_2048, Q_COEFF_2048);
//  ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec)
		ok = ok && check_modulus_coeff(2048, P_4096, P_COEFF_4096);
		ok = ok && check_modulus_coeff(2048, Q_4096, Q_COEFF_4096);
		/**/
		if (!ok)
		{		led_off(LED_GREEN);
				led_on(LED_RED);
				while (1);
		}
		
		// repeat forever
		ok = 1, first_run = 1, iters = 0;
		while (1)
		{	
				ok = ok && sign_without_crt(1024, first_run,
						M_1024,  N_1024, N_FACTOR_1024, N_COEFF_1024,
						X_1024,  Y_1024, D_1024,        S_1024,
						XM_1024, YM_1024);
				
				ok = ok && sign_without_crt(2048, first_run,
						M_2048,  N_2048, N_FACTOR_2048, N_COEFF_2048,
						X_2048,  Y_2048, D_2048,        S_2048,
						XM_2048, YM_2048);

				ok = ok && sign_without_crt(4096, first_run,
						M_4096,  N_4096, N_FACTOR_4096, N_COEFF_4096,
						X_4096,  Y_4096, D_4096,        S_4096,
						XM_4096, YM_4096);
			
				ok = ok && sign_using_crt(1024, first_run,
						M_1024,        N_1024,       N_FACTOR_1024, N_COEFF_1024,
						X_1024,        Y_1024,       P_1024,        Q_1024,
						P_FACTOR_1024, P_COEFF_1024, Q_FACTOR_1024, Q_COEFF_1024,
						DP_1024,       DQ_1024,      QINV_1024,     S_1024,
						XM_1024,       YM_1024);
				
				ok = ok && sign_using_crt(2048, first_run,
						M_2048,        N_2048,       N_FACTOR_2048, N_COEFF_2048,
						X_2048,        Y_2048,       P_2048,        Q_2048,
						P_FACTOR_2048, P_COEFF_2048, Q_FACTOR_2048, Q_COEFF_2048,
						DP_2048,       DQ_2048,      QINV_2048,     S_2048,
						XM_2048,       YM_2048);

				ok = ok && sign_using_crt(4096, first_run,
						M_4096,        N_4096,       N_FACTOR_4096, N_COEFF_4096,
						X_4096,        Y_4096,       P_4096,        Q_4096,
						P_FACTOR_4096, P_COEFF_4096, Q_FACTOR_4096, Q_COEFF_4096,
						DP_4096,       DQ_4096,      QINV_4096,     S_4096,
						XM_4096,       YM_4096);
				
				if (!ok)
				{		led_off(LED_GREEN);
						led_on(LED_RED);
				}

				first_run = 0, iters++;
				
				toggle_yellow_led();
		}
}

int check_montgomery_factor(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_factor)
{
		uint32_t i, j;
		uint32_t num_words = key_length / UINT32_BITS;

		// _calc_montgomery_factor() expects the least significant byte in [0],
		// but C array initialization places it in [N-1], so we need to
		// reverse the array before passing it to the function
		for (i=0, j=num_words-1; i<num_words; i++, j--)
				mod_rev[i] = mod[j];
	
		// compute Montgomery factor
		_calc_montgomery_factor(num_words, mod_rev, mod_factor_rev);
	
		// we now need to compare the calculated factor to the reference value,
		// _calc_montgomery_factor() places the least significant byte in [0],
		// but C array initialization places the least significant byte of the 
		// reference value in [N-1], so we need to go in opposite directions
		// when comparing
		for (i=0, j=num_words-1; i<num_words; i++, j--)
				if (mod_factor_rev[i] != mod_factor[j]) return 0;
	
		// everything went just fine
		return 1;
}


int check_modulus_coeff(uint32_t key_length, const uint32_t *mod, const uint32_t *mod_coeff)
{
		uint32_t i, j;
		uint32_t num_words = key_length / UINT32_BITS;

		// _calc_modulus_coeff() expects the least significant byte in [0],
		// but C array initialization places it in [N-1], so we need to
		// reverse the array before passing it to the function
		for (i=0, j=num_words-1; i<num_words; i++, j--)
				mod_rev[i] = mod[j];
	
		// compute modulus-dependent speed-up coefficient
		_calc_modulus_coeff(num_words, mod_rev, mod_coeff_rev);
	
		// we now need to compare the calculated coefficient to the reference value,
		// _calc_modulus_coeff() places the least significant byte in [0],
		// but C array initialization places the least significant byte of the 
		// reference value in [N], so we need to go in opposite directions
		// when comparing, also note, that we should process N+1 words, since the
	  // coefficient is slightly longer, than the modulus
		for (i=0, j=num_words; i<=num_words; i++, j--)
				if (mod_coeff_rev[i] != mod_coeff[j]) return 0;
	
		// everything went just fine
		return 1;
}


int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
		const uint32_t *m,        const uint32_t *n,
		const uint32_t *n_factor, const uint32_t *n_coeff,
		      uint32_t *x,              uint32_t *y,
		const uint32_t *p,        const uint32_t *q,
		const uint32_t *p_factor, const uint32_t *p_coeff,
		const uint32_t *q_factor, const uint32_t *q_coeff,
		const uint32_t *dp,       const uint32_t *dq,		
		const uint32_t *d,
		const uint32_t *qinv,
		const uint32_t *s,
		const uint32_t *xm,       const uint32_t *ym)
{
		uint32_t i, j, num_cyc;
		uint32_t num_words = (key_length / sizeof(uint32_t)) >> 3;
		uint32_t num_words_half = num_words >> 1;
		uint32_t reg_control, reg_status;
		uint32_t reg_mode;
		uint32_t reg_modulus_bits, reg_exponent_bits;

		// fill in all the necessary input values
		// d is only written when CRT is not enabled (we wipe it otherwise just in case)
		// note, that n_coeff is one word larger, than the modulus, so we need a single
		// extra write after the word-by-word loop
		for (i=0, j=num_words-1; i<num_words; i++, j--)
		{		              _fmc_write_32(CORE_ADDR_BANK_M        + i * sizeof(uint32_t), m[j]);
				              _fmc_write_32(CORE_ADDR_BANK_N        + i * sizeof(uint32_t), n[j]);
				              _fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]);
				              _fmc_write_32(CORE_ADDR_BANK_N_COEFF  + i * sizeof(uint32_t), n_coeff[j+1]);	// mind the +1
			                _fmc_write_32(CORE_ADDR_BANK_X        + i * sizeof(uint32_t), x[j]);
			                _fmc_write_32(CORE_ADDR_BANK_Y        + i * sizeof(uint32_t), y[j]);
			  if (!use_crt) _fmc_write_32(CORE_ADDR_BANK_D        + i * sizeof(uint32_t), d[j]);
				else					_fmc_write_32(CORE_ADDR_BANK_D        + i * sizeof(uint32_t), 0);
		}
		_fmc_write_32(CORE_ADDR_BANK_N_COEFF  + i * sizeof(uint32_t), n_coeff[0]);	// j+1 is 0 by now, i is num_words
				
		// also fill in all the input values necessary for CRT mode
		// again, we need to write a pair of extra words for p_coeff and q_coeff after the loop
		if (use_crt)
		{		for (i=0, j=num_words_half-1; i<num_words_half; i++, j--)
				{		_fmc_write_32(CORE_ADDR_BANK_P        + i * sizeof(uint32_t), p[j]);
						_fmc_write_32(CORE_ADDR_BANK_Q        + i * sizeof(uint32_t), q[j]);
						_fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]);
						_fmc_write_32(CORE_ADDR_BANK_P_COEFF  + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1!
						_fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]);
						_fmc_write_32(CORE_ADDR_BANK_Q_COEFF  + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1!
						_fmc_write_32(CORE_ADDR_BANK_DP       + i * sizeof(uint32_t), dp[j]);
						_fmc_write_32(CORE_ADDR_BANK_DQ       + i * sizeof(uint32_t), dq[j]);
						_fmc_write_32(CORE_ADDR_BANK_QINV     + i * sizeof(uint32_t), qinv[j]);
			  }
				_fmc_write_32(CORE_ADDR_BANK_P_COEFF  + i * sizeof(uint32_t), p_coeff[0]);	// j+1 is 0 by now, i is num_words_half
				_fmc_write_32(CORE_ADDR_BANK_Q_COEFF  + i * sizeof(uint32_t), q_coeff[0]);	// j+1 is 0 by now, i is num_words_half
		}
		
		// set parameters (there's no need to divide key length by two when CRT is enabled,
		// the core takes care of that by itself automatically)
		reg_mode          = use_crt ? CORE_MODE_USING_CRT : CORE_MODE_WITHOUT_CRT;
		reg_modulus_bits  = key_length;
		reg_exponent_bits = key_length;
		
		_fmc_write_32(CORE_ADDR_MODE,          reg_mode);
		_fmc_write_32(CORE_ADDR_MODULUS_BITS,  reg_modulus_bits);
		_fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits);
	
		// clear 'next' control bit, then set 'next' control bit again to trigger new operation
		reg_control = 0;
		_fmc_write_32(CORE_ADDR_CONTROL, reg_control);
		reg_control = CORE_CONTROL_BIT_NEXT;
		_fmc_write_32(CORE_ADDR_CONTROL, reg_control);

		// wait for 'ready' status bit to be set, also turn on the blue LED while the
		// core is busy to allow precise measurement with a scope
		num_cyc = 0;
		do
		{		num_cyc++;
				_fmc_read_32(CORE_ADDR_STATUS, &reg_status);
		}
		while (!(reg_status & CORE_STATUS_BIT_VALID));
		
		// read back s, xm and ym word-by-word
		// the first time the function is called, we compare the mutated blinding
		// factors to the known correct reference values
		// if the very first mutation was ok, we overwrite the currently used
		// factors with the mutated ones, so the next time we sign, the new
		// mutated factors will be used
		// we obviously only know the mutated pair of factors beforehand during the very first call,
		// so we don't verify them starting from the second call, but the signature should
		// always stay the same, so we always verify it
		uint32_t s_word, xm_word, ym_word;
		for (i=0, j=num_words-1; i<num_words; i++, j--)
		{		_fmc_read_32(CORE_ADDR_BANK_S  + i * sizeof(uint32_t), &s_word);
				_fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word);
				_fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word);

				if (s_word != s[j]) return 0;
				
				if (first_run)
				{		if (xm_word != xm[j]) return 0;
						if (ym_word != ym[j]) return 0;
				}
				else
				{		x[j] = xm_word;
						y[j] = ym_word;
				}
		}
		
		// everything went just fine
		return 1;
}


//
// toggle the yellow led to indicate that we're not stuck somewhere
//
void toggle_yellow_led(void)
{
		static int led_state = 0;

		led_state = !led_state;

		if (led_state) led_on(LED_YELLOW);
		else           led_off(LED_YELLOW);
}


//
// SysTick
//
void SysTick_Handler(void)
{
		HAL_IncTick();
		HAL_SYSTICK_IRQHandler();
}


//
// End-of-File
//