aboutsummaryrefslogtreecommitdiff
path: root/src/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtl')
-rw-r--r--src/rtl/modexpa7_top.v120
-rw-r--r--src/rtl/modexpa7_wrapper.v30
2 files changed, 100 insertions, 50 deletions
diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v
index ad101dd..ea3d2c2 100644
--- a/src/rtl/modexpa7_top.v
+++ b/src/rtl/modexpa7_top.v
@@ -54,7 +54,7 @@ module modexpa7_top #
input bus_cs,
input bus_we,
- input [OPERAND_ADDR_WIDTH+1:0] bus_addr,
+ input [OPERAND_ADDR_WIDTH+2:0] bus_addr,
input [ 32-1:0] bus_data_wr,
output [ 32-1:0] bus_data_rd
);
@@ -109,24 +109,38 @@ module modexpa7_top #
reg valid_reg = 1'b0;
assign ready = ready_reg;
- assign valid = valid_reg;
+ assign valid = valid_reg;
+
+ reg init_trig_latch;
+ reg next_trig_latch;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_IDLE)
+ //
+ case ({next_trig, init_trig})
+ 2'b00: {next_trig_latch, init_trig_latch} <= 2'b00; // do nothing
+ 2'b01: {next_trig_latch, init_trig_latch} <= 2'b01; // precalculate
+ 2'b10: {next_trig_latch, init_trig_latch} <= 2'b10; // exponentiate
+ 2'b11: {next_trig_latch, init_trig_latch} <= 2'b01; // 'init' has priority over 'next'
+ endcase
// ready flag logic
always @(posedge clk or negedge rst_n)
//
- if (rst_n == 1'b0) ready_reg <= 1'b0; // reset flag to default state
+ if (rst_n == 1'b0) ready_reg <= 1'b0; // reset flag to default state
else case (fsm_state)
- FSM_STATE_IDLE: if (init_trig) ready_reg <= 1'b0; // clear flag when operation is started
- FSM_STATE_STOP: if (!ready_reg) ready_reg <= 1'b1; // set flag after operation is finished
+ FSM_STATE_IDLE: if (init_trig) ready_reg <= 1'b0; // clear flag when operation is started
+ FSM_STATE_STOP: if (init_trig_latch) ready_reg <= 1'b1; // set flag after operation is finished
endcase
// valid flag logic
always @(posedge clk or negedge rst_n)
//
- if (rst_n == 1'b0) valid_reg <= 1'b0; // reset flag to default state
+ if (rst_n == 1'b0) valid_reg <= 1'b0; // reset flag to default state
else case (fsm_state)
- FSM_STATE_IDLE: if (next_trig) valid_reg <= 1'b0; // clear flag when operation is started
- FSM_STATE_STOP: if (!valid_reg) valid_reg <= 1'b1; // set flag after operation is finished
+ FSM_STATE_IDLE: if (next_trig) valid_reg <= 1'b0; // clear flag when operation is started
+ FSM_STATE_STOP: if (next_trig_latch) valid_reg <= 1'b1; // set flag after operation is finished
endcase
@@ -137,14 +151,20 @@ module modexpa7_top #
reg [OPERAND_ADDR_WIDTH+4:0] exponent_num_bits_latch;
// save number of words in modulus when pre-calculation has been triggered,
- // i.e. user has apparently loaded a new modulus into the core
+ // i.e. user has apparently loaded a new modulus into the core
+ //
+ // we also need to update modulus length when user wants to exponentiate,
+ // because he could have done precomputation for some modulus, then used
+ // a different length modulus and then reverted back the original modulus
+ // without doing precomputation (dammit, spent whole day chasing this bug :(
always @(posedge clk)
//
- if (fsm_next_state == FSM_STATE_PRECALC_START)
+ if ((fsm_next_state == FSM_STATE_PRECALC_START) ||
+ (fsm_next_state == FSM_STATE_EXPONENT_START))
modulus_num_words_latch <= modulus_num_words;
// save number of bits in exponent when exponentiation has been triggered,
- // i.e. user has loaded a new message into the core and wants exponentiate
+ // i.e. user has loaded a new message into the core and wants to exponentiate
always @(posedge clk)
//
if (fsm_next_state == FSM_STATE_EXPONENT_START)
@@ -154,17 +174,21 @@ module modexpa7_top #
/*
* Split bus address into bank/word parts.
*/
- wire [ 2 - 1 : 0] bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+1:OPERAND_ADDR_WIDTH];
+ wire [ 3 - 1 : 0] bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+2:OPERAND_ADDR_WIDTH];
wire [OPERAND_ADDR_WIDTH - 1 : 0] bus_addr_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
/*
* Define bank offsets.
*/
- localparam [ 1: 0] BANK_MODULUS = 2'b00; // 0
- localparam [ 1: 0] BANK_MESSAGE = 2'b01; // 1
- localparam [ 1: 0] BANK_EXPONENT = 2'b10; // 2
- localparam [ 1: 0] BANK_RESULT = 2'b11; // 3
+ localparam [ 2: 0] BANK_MODULUS = 3'b000; // 0
+ localparam [ 2: 0] BANK_MESSAGE = 3'b001; // 1
+ localparam [ 2: 0] BANK_EXPONENT = 3'b010; // 2
+ localparam [ 2: 0] BANK_RESULT = 3'b011; // 3
+ localparam [ 2: 0] BANK_MODULUS_COEFF_OUT = 3'b100; // 5
+ localparam [ 2: 0] BANK_MODULUS_COEFF_IN = 3'b101; // 4
+ localparam [ 2: 0] BANK_MONTGOMERY_FACTOR_OUT = 3'b110; // 7
+ localparam [ 2: 0] BANK_MONTGOMERY_FACTOR_IN = 3'b111; // 6
/*
@@ -176,7 +200,7 @@ module modexpa7_top #
*
* Note, that the core does squaring and multiplication simultaneously, so
* there are two identical systolic multipliers inside. It's better to have two
- * copies of modulus to give router some freeding in placing the multipliers,
+ * copies of modulus to give router some freedom in placing the multipliers,
* that's why there are actually two identical block memories N1 and N2 instead of N.
* User reads from the first one, but writes to both of them. Note that the synthesis
* tool might get too clever and find out that N1 and N2 are identical and decide
@@ -250,14 +274,18 @@ module modexpa7_top #
/*
- * Instantiate internal memories.
+ * Instantiate more block memories.
+ *
+ * Fast modular exponentiation requires two pre-calculated helper quantities: Montgomery
+ * factor F and modulus-dependent speed-up coefficient N_COEFF. This core has two separate
+ * buffers for each of those quantities, during pre-computation F and N_COEFF are written to
+ * the "output" buffers, so that user can retrieve them and store along with the key for
+ * future use. During exponentiation F and N_COEFF are read from the "input" buffers and
+ * must be supplied by user along with the modulus.
*
- * We have two block memories: F for Montgomery factor and N_COEFF for modulus-dependent
- * coefficient, they are written to during pre-calculation and read from during exponentiation.
- *
- * Note, that there are actually two identical block memories N_COEFF1 and N_COEFF2 instead of
- * just one N_COEFF, read the explanation above. F is only used by one of the multipliers, so
- * we don't need F1 and F2.
+ * Note, that there are actually two identical input block memories N_COEFF1 and N_COEFF2
+ * instead of just one N_COEFF, read the explanation above. F is only used by one of
+ * the multipliers, so we don't need F1 and F2.
*/
wire [OPERAND_ADDR_WIDTH-1:0] core_f_addr_wr;
@@ -274,20 +302,38 @@ module modexpa7_top #
wire core_f_wren;
wire core_n_coeff_wren;
+
+ wire [ 32-1:0] user_f_out_data;
+ wire [ 32-1:0] user_f_in_data;
+ wire [ 32-1:0] user_n_coeff_out_data;
+ wire [ 32-1:0] user_n_coeff_in_data;
+
+ wire user_f_in_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MONTGOMERY_FACTOR_IN);
+ wire user_n_coeff_in_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MODULUS_COEFF_IN);
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
- bram_f (.clk(clk),
+ bram_f_out (.clk(clk),
.a_addr(core_f_addr_wr), .a_out(), .a_wr(core_f_wren), .a_in(core_f_data_wr),
+ .b_addr(bus_addr_word), .b_out(user_f_out_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_f_in (.clk(clk),
+ .a_addr(bus_addr_word), .a_out(user_f_in_data), .a_wr(user_f_in_wren), .a_in(bus_data_wr),
.b_addr(core_f_addr_rd), .b_out(core_f_data_rd));
-
+
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
- bram_n_coeff1 (.clk(clk),
+ bram_n_coeff_out (.clk(clk),
.a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
+ .b_addr(bus_addr_word), .b_out(user_n_coeff_out_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_n_coeff_in1 (.clk(clk),
+ .a_addr(bus_addr_word), .a_out(user_n_coeff_in_data), .a_wr(user_n_coeff_in_wren), .a_in(bus_data_wr),
.b_addr(core_n_coeff1_addr_rd), .b_out(core_n_coeff1_data_rd));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
- bram_n_coeff2 (.clk(clk),
- .a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
+ bram_n_coeff_in2 (.clk(clk),
+ .a_addr(bus_addr_word), .a_out(), .a_wr(user_n_coeff_in_wren), .a_in(bus_data_wr),
.b_addr(core_n_coeff2_addr_rd), .b_out(core_n_coeff2_data_rd));
@@ -461,7 +507,7 @@ module modexpa7_top #
*/
// delay bus_addr_bank by 1 clock cycle to remember from where we've just been reading
- reg [1: 0] bus_addr_bank_dly;
+ reg [2: 0] bus_addr_bank_dly;
always @(posedge clk)
if (bus_cs) bus_addr_bank_dly <= bus_addr_bank;
@@ -474,12 +520,16 @@ module modexpa7_top #
//
case (bus_addr_bank_dly)
//
- BANK_MODULUS: bus_data_rd_mux = user_n_data;
- BANK_MESSAGE: bus_data_rd_mux = user_m_data;
- BANK_EXPONENT: bus_data_rd_mux = user_d_data;
- BANK_RESULT: bus_data_rd_mux = user_r_data;
+ BANK_MODULUS: bus_data_rd_mux = user_n_data;
+ BANK_MESSAGE: bus_data_rd_mux = user_m_data;
+ BANK_EXPONENT: bus_data_rd_mux = user_d_data;
+ BANK_RESULT: bus_data_rd_mux = user_r_data;
+ //
+ BANK_MODULUS_COEFF_OUT: bus_data_rd_mux = user_n_coeff_out_data;
+ BANK_MODULUS_COEFF_IN: bus_data_rd_mux = user_n_coeff_in_data;
+ BANK_MONTGOMERY_FACTOR_OUT: bus_data_rd_mux = user_f_out_data;
+ BANK_MONTGOMERY_FACTOR_IN: bus_data_rd_mux = user_f_in_data;
//
endcase
-
endmodule
diff --git a/src/rtl/modexpa7_wrapper.v b/src/rtl/modexpa7_wrapper.v
index a4e2319..8ebc22a 100644
--- a/src/rtl/modexpa7_wrapper.v
+++ b/src/rtl/modexpa7_wrapper.v
@@ -42,7 +42,7 @@ module modexpa7_wrapper #
input cs,
input we,
- input [OPERAND_ADDR_WIDTH+2:0] address,
+ input [OPERAND_ADDR_WIDTH+3:0] address,
input [ 32-1:0] write_data,
output [ 32-1:0] read_data
);
@@ -54,8 +54,8 @@ module modexpa7_wrapper #
localparam ADDR_MSB_REGS = 1'b0;
localparam ADDR_MSB_CORE = 1'b1;
- wire address_msb = address[OPERAND_ADDR_WIDTH+2];
- wire [OPERAND_ADDR_WIDTH+1:0] address_lsb = address[OPERAND_ADDR_WIDTH+1:0];
+ wire address_msb = address[OPERAND_ADDR_WIDTH+3];
+ wire [OPERAND_ADDR_WIDTH+2:0] address_lsb = address[OPERAND_ADDR_WIDTH+2:0];
/*
@@ -68,17 +68,17 @@ module modexpa7_wrapper #
/*
* Registers
*/
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_NAME0 = 'h00; //
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_NAME1 = 'h01; //
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_VERSION = 'h02; //
-
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_CONTROL = 'h08; // {next, init}
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_STATUS = 'h09; // {valid, ready}
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_MODE = 'h10; // {crt, dummy}
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_MODULUS_BITS = 'h11; // number of bits in modulus
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_EXPONENT_BITS = 'h12; // number of bits in exponent
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_BUFFER_BITS = 'h13; // largest supported number of bits
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_ARRAY_BITS = 'h14; // number of bits in systolic array
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_NAME0 = 'h00; //
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_NAME1 = 'h01; //
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_VERSION = 'h02; //
+
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_CONTROL = 'h08; // {next, init}
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_STATUS = 'h09; // {valid, ready}
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_MODE = 'h10; // {crt, dummy}
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_MODULUS_BITS = 'h11; // number of bits in modulus
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_EXPONENT_BITS = 'h12; // number of bits in exponent
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_BUFFER_BITS = 'h13; // largest supported number of bits
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_ARRAY_BITS = 'h14; // number of bits in systolic array
localparam CONTROL_INIT_BIT = 0;
localparam CONTROL_NEXT_BIT = 1;
@@ -91,7 +91,7 @@ module modexpa7_wrapper #
localparam CORE_NAME0 = 32'h6D6F6465; // "mode"
localparam CORE_NAME1 = 32'h78706137; // "xpa7"
- localparam CORE_VERSION = 32'h302E3230; // "0.20"
+ localparam CORE_VERSION = 32'h302E3235; // "0.25"
/*