aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-09-04 00:14:09 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-09-04 00:14:09 +0300
commit058c54213a307fd360df1486f5d369d04b3a84d9 (patch)
treec5c363d03f476858bcd0fab6edc676db7f4583f0
parent32f31c9a447ed600f0924c02d07bbf945988d10c (diff)
Updated STM32 demo program to show how to use the precomputation block.
-rw-r--r--src/rtl/modexpa7_top.v40
-rw-r--r--src/stm32/modexpa7_driver_sample.c236
-rw-r--r--src/tb/tb_wrapper.v131
3 files changed, 322 insertions, 85 deletions
diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v
index 7723b88..ea3d2c2 100644
--- a/src/rtl/modexpa7_top.v
+++ b/src/rtl/modexpa7_top.v
@@ -109,24 +109,38 @@ module modexpa7_top #
reg valid_reg = 1'b0;
assign ready = ready_reg;
- assign valid = valid_reg;
+ assign valid = valid_reg;
+
+ reg init_trig_latch;
+ reg next_trig_latch;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_IDLE)
+ //
+ case ({next_trig, init_trig})
+ 2'b00: {next_trig_latch, init_trig_latch} <= 2'b00; // do nothing
+ 2'b01: {next_trig_latch, init_trig_latch} <= 2'b01; // precalculate
+ 2'b10: {next_trig_latch, init_trig_latch} <= 2'b10; // exponentiate
+ 2'b11: {next_trig_latch, init_trig_latch} <= 2'b01; // 'init' has priority over 'next'
+ endcase
// ready flag logic
always @(posedge clk or negedge rst_n)
//
- if (rst_n == 1'b0) ready_reg <= 1'b0; // reset flag to default state
+ if (rst_n == 1'b0) ready_reg <= 1'b0; // reset flag to default state
else case (fsm_state)
- FSM_STATE_IDLE: if (init_trig) ready_reg <= 1'b0; // clear flag when operation is started
- FSM_STATE_STOP: if (!ready_reg) ready_reg <= 1'b1; // set flag after operation is finished
+ FSM_STATE_IDLE: if (init_trig) ready_reg <= 1'b0; // clear flag when operation is started
+ FSM_STATE_STOP: if (init_trig_latch) ready_reg <= 1'b1; // set flag after operation is finished
endcase
// valid flag logic
always @(posedge clk or negedge rst_n)
//
- if (rst_n == 1'b0) valid_reg <= 1'b0; // reset flag to default state
+ if (rst_n == 1'b0) valid_reg <= 1'b0; // reset flag to default state
else case (fsm_state)
- FSM_STATE_IDLE: if (next_trig) valid_reg <= 1'b0; // clear flag when operation is started
- FSM_STATE_STOP: if (!valid_reg) valid_reg <= 1'b1; // set flag after operation is finished
+ FSM_STATE_IDLE: if (next_trig) valid_reg <= 1'b0; // clear flag when operation is started
+ FSM_STATE_STOP: if (next_trig_latch) valid_reg <= 1'b1; // set flag after operation is finished
endcase
@@ -137,14 +151,20 @@ module modexpa7_top #
reg [OPERAND_ADDR_WIDTH+4:0] exponent_num_bits_latch;
// save number of words in modulus when pre-calculation has been triggered,
- // i.e. user has apparently loaded a new modulus into the core
+ // i.e. user has apparently loaded a new modulus into the core
+ //
+ // we also need to update modulus length when user wants to exponentiate,
+ // because he could have done precomputation for some modulus, then used
+ // a different length modulus and then reverted back the original modulus
+ // without doing precomputation (dammit, spent whole day chasing this bug :(
always @(posedge clk)
//
- if (fsm_next_state == FSM_STATE_PRECALC_START)
+ if ((fsm_next_state == FSM_STATE_PRECALC_START) ||
+ (fsm_next_state == FSM_STATE_EXPONENT_START))
modulus_num_words_latch <= modulus_num_words;
// save number of bits in exponent when exponentiation has been triggered,
- // i.e. user has loaded a new message into the core and wants exponentiate
+ // i.e. user has loaded a new message into the core and wants to exponentiate
always @(posedge clk)
//
if (fsm_next_state == FSM_STATE_EXPONENT_START)
diff --git a/src/stm32/modexpa7_driver_sample.c b/src/stm32/modexpa7_driver_sample.c
index 390c949..e1de2bd 100644
--- a/src/stm32/modexpa7_driver_sample.c
+++ b/src/stm32/modexpa7_driver_sample.c
@@ -59,12 +59,19 @@
#define CORE_ADDR_BUFFER_BITS (0x13 << 2)
#define CORE_ADDR_ARRAY_BITS (0x14 << 2)
+ // operand bank size
+#define BANK_LENGTH 0x200 // 0x200 = 512 bytes = 4096 bits
// locations of operand buffers
-#define CORE_ADDR_BANK_MODULUS (0x800 + 0 * 0x200)
-#define CORE_ADDR_BANK_MESSAGE (0x800 + 1 * 0x200)
-#define CORE_ADDR_BANK_EXPONENT (0x800 + 2 * 0x200)
-#define CORE_ADDR_BANK_RESULT (0x800 + 3 * 0x200)
+#define CORE_ADDR_BANK_MODULUS (BANK_LENGTH * (8 + 0))
+#define CORE_ADDR_BANK_MESSAGE (BANK_LENGTH * (8 + 1))
+#define CORE_ADDR_BANK_EXPONENT (BANK_LENGTH * (8 + 2))
+#define CORE_ADDR_BANK_RESULT (BANK_LENGTH * (8 + 3))
+
+#define CORE_ADDR_BANK_MODULUS_COEFF_OUT (BANK_LENGTH * (8 + 4))
+#define CORE_ADDR_BANK_MODULUS_COEFF_IN (BANK_LENGTH * (8 + 5))
+#define CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT (BANK_LENGTH * (8 + 6))
+#define CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN (BANK_LENGTH * (8 + 7))
// bit maps
#define CORE_CONTROL_BIT_INIT 0x00000001
@@ -75,6 +82,27 @@
#define CORE_MODE_BIT_CRT 0x00000002
+ /*
+ * zero operands
+ */
+#define Z_384 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+
+#define Z_192 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000}
+
+#define Z_512 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+
+#define Z_256 \
+ {0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000}
/*
* test vectors
@@ -83,11 +111,15 @@ static const uint32_t m_384[] = M_384;
static const uint32_t n_384[] = N_384;
static const uint32_t d_384[] = D_384;
static const uint32_t s_384[] = S_384;
+static uint32_t n_coeff_384[] = Z_384;
+static uint32_t factor_384[] = Z_384;
static const uint32_t m_512[] = M_512;
static const uint32_t n_512[] = N_512;
static const uint32_t d_512[] = D_512;
static const uint32_t s_512[] = S_512;
+static uint32_t n_coeff_512[] = Z_512;
+static uint32_t factor_512[] = Z_512;
static const uint32_t p_192[] = P_192;
static const uint32_t q_192[] = Q_192;
@@ -95,6 +127,10 @@ static const uint32_t dp_192[] = DP_192;
static const uint32_t dq_192[] = DQ_192;
static const uint32_t mp_192[] = MP_192;
static const uint32_t mq_192[] = MQ_192;
+static uint32_t p_coeff_192[] = Z_192;
+static uint32_t q_coeff_192[] = Z_192;
+static uint32_t factor_p_192[] = Z_192;
+static uint32_t factor_q_192[] = Z_192;
static const uint32_t p_256[] = P_256;
static const uint32_t q_256[] = Q_256;
@@ -102,7 +138,10 @@ static const uint32_t dp_256[] = DP_256;
static const uint32_t dq_256[] = DQ_256;
static const uint32_t mp_256[] = MP_256;
static const uint32_t mq_256[] = MQ_256;
-
+static uint32_t p_coeff_256[] = Z_256;
+static uint32_t q_coeff_256[] = Z_256;
+static uint32_t factor_p_256[] = Z_256;
+static uint32_t factor_q_256[] = Z_256;
/*
@@ -110,16 +149,25 @@ static const uint32_t mq_256[] = MQ_256;
*/
void toggle_yellow_led(void);
-void setup_modexpa7( const uint32_t *n, size_t l);
+void setup_modexpa7( const uint32_t *n,
+ uint32_t *coeff,
+ uint32_t *factor,
+ size_t l);
-int test_modexpa7( const uint32_t *m,
+int test_modexpa7( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l);
-int test_modexpa7_crt( const uint32_t *m,
+int test_modexpa7_crt( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l);
@@ -148,10 +196,10 @@ int main()
fmc_read_32(CORE_ADDR_NAME1, &core_name1);
fmc_read_32(CORE_ADDR_VERSION, &core_version);
- // must be "mode", "xpa7", "0.20"
+ // must be "mode", "xpa7", "0.25"
if ( (core_name0 != 0x6D6F6465) ||
(core_name1 != 0x78706137) ||
- (core_version != 0x302E3230))
+ (core_version != 0x302E3235))
{
led_off(LED_GREEN);
led_on(LED_RED);
@@ -164,61 +212,63 @@ int main()
// largest supported operand width, systolic array "power"
fmc_read_32(CORE_ADDR_BUFFER_BITS, &core_buffer_bits);
- fmc_read_32(CORE_ADDR_ARRAY_BITS, &core_array_bits);
+ fmc_read_32(CORE_ADDR_ARRAY_BITS, &core_array_bits);
+
+ //
+ // do pre-computation for all the moduli and store speed-up quantities,
+ // note that each key requires three precomputations: one for the entire
+ // public key and two for each of the corresponding private key components
+ //
+ // we set the 'init' control bit, wait for `ready' status bit to go high,
+ // then retrieve the calculated values from the corresponding "output" banks
+ //
+ // we turn off the green led and turn the yellow led during the process to
+ // get an idea of how long it takes
+ //
+
+ led_off(LED_GREEN);
+ led_on(LED_YELLOW);
+
+ // 384-bit key and 192-bit primes
+ setup_modexpa7(n_384, n_coeff_384, factor_384, 384);
+ setup_modexpa7(p_192, p_coeff_192, factor_p_192, 192);
+ setup_modexpa7(q_192, q_coeff_192, factor_q_192, 192);
+
+ // 512-bit key and 256-bit primes
+ setup_modexpa7(n_512, n_coeff_512, factor_512, 512);
+ setup_modexpa7(p_256, p_coeff_256, factor_p_256, 256);
+ setup_modexpa7(q_256, q_coeff_256, factor_q_256, 256);
+
+ led_off(LED_YELLOW);
+ led_on(LED_GREEN);
+
// repeat forever
while (1)
- {
- // New modulus requires precomputation of modulus-dependent
- // speed-up coefficient, this must be done once per new
- // modulus, i.e. when we're repeatedly signing with the
- // same key, we only need to do precomputation once before
- // starting the very first signing operation.
-
+ {
// fresh start
ok = 1;
-
- {
- // run precomputation of modulus-dependent factor for the 384-bit modulus
- setup_modexpa7(n_384, 384);
-
- // try signing the message from the 384-bit test vector
- ok = ok && test_modexpa7(m_384, d_384, s_384, 384);
- }
- {
- // run precomputation of modulus-dependent factor for the 512-bit modulus
- setup_modexpa7(n_512, 512);
-
- // try signing the message from the 512-bit test vector
- ok = ok && test_modexpa7(m_512, d_512, s_512, 512);
- }
- {
- // run precomputation of modulus-dependent factor for the first 192-bit part of 384-bit modulus
- setup_modexpa7(p_192, 192);
-
+ {
+ // try signing the message with the 384-bit test vector
+ ok = ok && test_modexpa7(n_384, m_384, d_384, s_384, n_coeff_384, factor_384, 384);
+
// try signing 384-bit base using 192-bit exponent
- ok = ok && test_modexpa7_crt(m_384, dp_192, mp_192, 192);
-
- // run precomputation of modulus-dependent factor for the second 192-bit part of 384-bit modulus
- setup_modexpa7(q_192, 192);
+ ok = ok && test_modexpa7_crt(p_192, m_384, dp_192, mp_192, p_coeff_192, factor_p_192, 192);
// try signing 384-bit base using 192-bit exponent
- ok = ok && test_modexpa7_crt(m_384, dq_192, mq_192, 192);
+ ok = ok && test_modexpa7_crt(q_192, m_384, dq_192, mq_192, q_coeff_192, factor_q_192, 192);
}
+
+ {
+ // try signing the message with the 512-bit test vector
+ ok = ok && test_modexpa7(n_512, m_512, d_512, s_512, n_coeff_512, factor_512, 512);
- {
- // run precomputation of modulus-dependent factor for the first 256-bit part of 512-bit modulus
- setup_modexpa7(p_256, 256);
-
// try signing 512-bit base using 256-bit exponent
- ok = ok && test_modexpa7_crt(m_512, dp_256, mp_256, 256);
-
- // run precomputation of modulus-dependent factor for the second 256-bit part of 512-bit modulus
- setup_modexpa7(q_256, 256);
+ ok = ok && test_modexpa7_crt(p_256, m_512, dp_256, mp_256, p_coeff_256, factor_p_256, 256);
// try signing 512-bit base using 256-bit exponent
- ok = ok && test_modexpa7_crt(m_512, dq_256, mq_256, 256);
+ ok = ok && test_modexpa7_crt(q_256, m_512, dq_256, mq_256, q_coeff_256, factor_q_256, 256);
}
// turn on the red led to indicate something went wrong
@@ -234,15 +284,18 @@ int main()
/*
- * Load new modulus and do the necessary precomputations.
+ * Load new modulus and do all the necessary precomputations.
*/
void setup_modexpa7( const uint32_t *n,
+ uint32_t *coeff,
+ uint32_t *factor,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
uint32_t n_word;
+ uint32_t coeff_word, factor_word;
uint32_t dummy_num_cyc;
// determine numbers of 32-bit words
@@ -250,10 +303,9 @@ void setup_modexpa7( const uint32_t *n,
// set modulus width
num_bits = l;
- fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
- // fill modulus bank (the least significant word
- // is at the lowest offset)
+ // fill modulus bank (the least significant word is at the lowest offset)
for (i=0; i<num_words; i++)
{ n_word = n[i];
fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
@@ -273,42 +325,70 @@ void setup_modexpa7( const uint32_t *n,
fmc_read_32(CORE_ADDR_STATUS, &reg_status);
}
while (!(reg_status & CORE_STATUS_BIT_READY));
+
+ // retrieve the modulus-dependent coefficient and Montgomery factor
+ // from the corresponding core "output" banks and store them for later use
+ for (i=0; i<num_words; i++)
+ {
+ fmc_read_32(CORE_ADDR_BANK_MODULUS_COEFF_OUT + i * sizeof(uint32_t), &coeff_word);
+ coeff[i] = coeff_word;
+
+ fmc_read_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT + i * sizeof(uint32_t), &factor_word);
+ factor[i] = factor_word;
+ }
}
//
// Sign the message and compare it against the correct reference value.
//
-int test_modexpa7( const uint32_t *m,
+int test_modexpa7( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
- uint32_t m_word, d_word, s_word;
+ uint32_t n_word, m_word, d_word, s_word;
+ uint32_t coeff_word, factor_word;
uint32_t dummy_num_cyc;
uint32_t mode;
// determine numbers of 32-bit words
num_words = l >> 5;
- // set exponent width
+ // set modulus width, exponent width
num_bits = l;
- fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
// disable CRT mode
mode = 0;
fmc_write_32(CORE_ADDR_MODE, &mode);
- // fill message and exponent banks (the least significant
- // word is at the lowest offset)
+ // fill modulus, message and exponent banks (the least significant
+ // word is at the lowest offset), we also need to fill "input" core
+ // banks with previously pre-calculated and saved modulus-dependent
+ // speed-up coefficient and Montgomery factor
for (i=0; i<num_words; i++)
- { m_word = m[i];
+ {
+ n_word = n[i];
+ m_word = m[i];
d_word = d[i];
+
+ fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
fmc_write_32(CORE_ADDR_BANK_MESSAGE + ((num_words - (i + 1)) * sizeof(uint32_t)), &m_word);
fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
+
+ coeff_word = coeff[i];
+ factor_word = factor[i];
+
+ fmc_write_32(CORE_ADDR_BANK_MODULUS_COEFF_IN + i * sizeof(uint32_t), &coeff_word);
+ fmc_write_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN + i * sizeof(uint32_t), &factor_word);
}
// clear 'next' control bit, then set 'next' control bit again
@@ -331,8 +411,7 @@ int test_modexpa7( const uint32_t *m,
{
fmc_read_32(CORE_ADDR_BANK_RESULT + (i * sizeof(uint32_t)), &s_word);
- if (s_word != s[num_words - (i + 1)])
- return 0;
+ if (s_word != s[num_words - (i + 1)]) return 0;
}
// everything went just fine
@@ -340,34 +419,49 @@ int test_modexpa7( const uint32_t *m,
}
-int test_modexpa7_crt( const uint32_t *m,
+int test_modexpa7_crt( const uint32_t *n,
+ const uint32_t *m,
const uint32_t *d,
const uint32_t *s,
+ const uint32_t *coeff,
+ const uint32_t *factor,
size_t l)
{
size_t i, num_words;
uint32_t num_bits;
uint32_t reg_control, reg_status;
- uint32_t m_word, d_word, s_word;
+ uint32_t n_word, m_word, d_word, s_word;
+ uint32_t coeff_word, factor_word;
uint32_t dummy_num_cyc;
uint32_t mode;
// determine numbers of 32-bit words
num_words = l >> 5;
- // set exponent width
+ // set modulus width, exponent width
num_bits = l;
- fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+ fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
// enable CRT mode
mode = CORE_MODE_BIT_CRT;
fmc_write_32(CORE_ADDR_MODE, &mode);
- // fill exponent bank (the least significant word
- // is at the lowest offset)
+ // fill modulus and exponent banks (the least significant word is at
+ // the lowest offset), we also need to fill "input" core banks with
+ // previously pre-calculated and saved modulus-dependent speed-up
+ // coefficient and Montgomery factor
for (i=0; i<num_words; i++)
- { d_word = d[i];
+ { n_word = n[i];
+ d_word = d[i];
+ fmc_write_32(CORE_ADDR_BANK_MODULUS + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
+
+ coeff_word = coeff[i];
+ factor_word = factor[i];
+
+ fmc_write_32(CORE_ADDR_BANK_MODULUS_COEFF_IN + i * sizeof(uint32_t), &coeff_word);
+ fmc_write_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN + i * sizeof(uint32_t), &factor_word);
}
// fill message bank (the least significant word
diff --git a/src/tb/tb_wrapper.v b/src/tb/tb_wrapper.v
index fae0934..054333e 100644
--- a/src/tb/tb_wrapper.v
+++ b/src/tb/tb_wrapper.v
@@ -2,6 +2,13 @@
module tb_wrapper;
+
+ //
+ // Test Vectors
+ //
+ `include "modexp_fpga_model_vectors.v";
+
+
/*
* Settings
*/
@@ -25,7 +32,7 @@ module tb_wrapper;
*/
reg bus_cs;
reg bus_we;
- reg [USE_OPERAND_ADDR_WIDTH+2:0] bus_addr;
+ reg [USE_OPERAND_ADDR_WIDTH+3:0] bus_addr;
reg [ 32-1:0] bus_wr_data;
wire [ 32-1:0] bus_rd_data;
@@ -47,7 +54,10 @@ module tb_wrapper;
.read_data (bus_rd_data)
);
+ integer i;
reg [31: 0] tmp;
+ reg [383:0] shreg;
+ reg poll;
initial begin
//
rst_n = 0;
@@ -74,11 +84,95 @@ module tb_wrapper;
write_reg('h11, 32'd384); // MODULUS_BITS
read_reg ('h11, tmp);
//
+ write_reg('h10, 32'd0); // MODE
+ read_reg ('h10, tmp);
+ //
+ // pre-calculate 384-bit quantities
+ //
+ shreg = N_384;
+ for (i=0; i<384/32; i=i+1) begin
+ write_bank(3'b000, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+ shreg = shreg >> 32;
+ end
+ //
+ write_reg('h08, 32'd0); // CONTROL.init = 0
+ write_reg('h08, 32'd1); // CONTROL.init = 1
+ //
+ poll = 1;
+ while (poll) begin
+ #10;
+ read_reg('h09, tmp); // tmp = STATUS
+ poll = ~tmp[0]; // poll = STATUS.ready
+ end
+ //
+ // fill banks
+ //
+ for (i=0; i<384/32; i=i+1) begin
+ read_bank(3'b100, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+ write_bank(3'b101, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+ read_bank(3'b110, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+ write_bank(3'b111, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+ end
+ //
+ shreg = M_384;
+ for (i=0; i<384/32; i=i+1) begin
+ write_bank(3'b001, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+ shreg = shreg >> 32;
+ end
+ //
+ shreg = D_384;
+ for (i=0; i<384/32; i=i+1) begin
+ write_bank(3'b010, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+ shreg = shreg >> 32;
+ end
+ //
+ // wipe
+ //
+ shreg = {384{1'b0}};
+ for (i=0; i<384/32; i=i+1) begin
+ write_bank(3'b000, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+ shreg = shreg >> 32;
+ end
+ //
+ write_reg('h08, 32'd0); // CONTROL.init = 0
+ write_reg('h08, 32'd1); // CONTROL.init = 1
+ //
+ poll = 1;
+ while (poll) begin
+ #10;
+ read_reg('h09, tmp); // tmp = STATUS
+ poll = ~tmp[0]; // poll = STATUS.ready
+ end
+ //
+ // restore
+ //
+ shreg = N_384;
+ for (i=0; i<384/32; i=i+1) begin
+ write_bank(3'b000, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+ shreg = shreg >> 32;
+ end
+ //
+ //
+ //
+ write_reg('h08, 32'd0); // CONTROL.next = 0
+ write_reg('h08, 32'd2); // CONTROL.next = 1
+ //
+ poll = 1;
+ while (poll) begin
+ #10;
+ read_reg('h09, tmp); // tmp = STATUS
+ poll = ~tmp[1]; // poll = STATUS.valid
+ end
+ //
+ for (i=0; i<384/32; i=i+1) begin
+ read_bank(3'b011, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+ shreg = {tmp, shreg[383:32]};
+ end
//
end
task read_reg;
- input [USE_OPERAND_ADDR_WIDTH+1:0] addr;
+ input [USE_OPERAND_ADDR_WIDTH+2:0] addr;
output [ 32-1:0] data;
begin
bus_cs = 1;
@@ -89,9 +183,23 @@ module tb_wrapper;
data = bus_rd_data;
end
endtask
+
+ task read_bank;
+ input [ 2:0] bank;
+ input [USE_OPERAND_ADDR_WIDTH-1:0] addr;
+ output [ 32-1:0] data;
+ begin
+ bus_cs = 1;
+ bus_addr = {1'b1, bank, addr};
+ #10;
+ bus_cs = 0;
+ bus_addr = 'bX;
+ data = bus_rd_data;
+ end
+ endtask
task write_reg;
- input [USE_OPERAND_ADDR_WIDTH+1:0] addr;
+ input [USE_OPERAND_ADDR_WIDTH+2:0] addr;
input [ 32-1:0] data;
begin
bus_cs = 1;
@@ -104,6 +212,21 @@ module tb_wrapper;
bus_addr = 'bX;
end
endtask
-
+
+ task write_bank;
+ input [ 2:0] bank;
+ input [USE_OPERAND_ADDR_WIDTH-1:0] addr;
+ input [ 32-1:0] data;
+ begin
+ bus_cs = 1;
+ bus_we = 1;
+ bus_addr = {1'b1, bank, addr};
+ bus_wr_data = data;
+ #10;
+ bus_cs = 0;
+ bus_we = 0;
+ bus_addr = 'bX;
+ end
+ endtask
endmodule