aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-08-28 15:02:30 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-08-28 15:02:30 +0300
commit3538f0350c462cef3326b29a180536be0a403390 (patch)
treee2bf7ba72d3144b5aac155cd2ed003b8c86e2e2f
parentfc1c4fcdc95bf85b71f778a941e631fc573db0c3 (diff)
Exposed internal buffers for Montgomery factor F and modulus-dependent speed-up coefficient N_COEFF
so that they can be retireved and stored along with the modulus. Split coreesponding buffers into "input" and "output" banks, during pre-computation F and N_COEFF are written to read-only "output" banks, during exponentiation F and N_COEFF are read from read-write "input" banks and must be supplied by user.
-rw-r--r--src/rtl/modexpa7_top.v80
-rw-r--r--src/rtl/modexpa7_wrapper.v30
2 files changed, 70 insertions, 40 deletions
diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v
index ad101dd..7723b88 100644
--- a/src/rtl/modexpa7_top.v
+++ b/src/rtl/modexpa7_top.v
@@ -54,7 +54,7 @@ module modexpa7_top #
input bus_cs,
input bus_we,
- input [OPERAND_ADDR_WIDTH+1:0] bus_addr,
+ input [OPERAND_ADDR_WIDTH+2:0] bus_addr,
input [ 32-1:0] bus_data_wr,
output [ 32-1:0] bus_data_rd
);
@@ -154,17 +154,21 @@ module modexpa7_top #
/*
* Split bus address into bank/word parts.
*/
- wire [ 2 - 1 : 0] bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+1:OPERAND_ADDR_WIDTH];
+ wire [ 3 - 1 : 0] bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+2:OPERAND_ADDR_WIDTH];
wire [OPERAND_ADDR_WIDTH - 1 : 0] bus_addr_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
/*
* Define bank offsets.
*/
- localparam [ 1: 0] BANK_MODULUS = 2'b00; // 0
- localparam [ 1: 0] BANK_MESSAGE = 2'b01; // 1
- localparam [ 1: 0] BANK_EXPONENT = 2'b10; // 2
- localparam [ 1: 0] BANK_RESULT = 2'b11; // 3
+ localparam [ 2: 0] BANK_MODULUS = 3'b000; // 0
+ localparam [ 2: 0] BANK_MESSAGE = 3'b001; // 1
+ localparam [ 2: 0] BANK_EXPONENT = 3'b010; // 2
+ localparam [ 2: 0] BANK_RESULT = 3'b011; // 3
+ localparam [ 2: 0] BANK_MODULUS_COEFF_OUT = 3'b100; // 5
+ localparam [ 2: 0] BANK_MODULUS_COEFF_IN = 3'b101; // 4
+ localparam [ 2: 0] BANK_MONTGOMERY_FACTOR_OUT = 3'b110; // 7
+ localparam [ 2: 0] BANK_MONTGOMERY_FACTOR_IN = 3'b111; // 6
/*
@@ -176,7 +180,7 @@ module modexpa7_top #
*
* Note, that the core does squaring and multiplication simultaneously, so
* there are two identical systolic multipliers inside. It's better to have two
- * copies of modulus to give router some freeding in placing the multipliers,
+ * copies of modulus to give router some freedom in placing the multipliers,
* that's why there are actually two identical block memories N1 and N2 instead of N.
* User reads from the first one, but writes to both of them. Note that the synthesis
* tool might get too clever and find out that N1 and N2 are identical and decide
@@ -250,14 +254,18 @@ module modexpa7_top #
/*
- * Instantiate internal memories.
+ * Instantiate more block memories.
+ *
+ * Fast modular exponentiation requires two pre-calculated helper quantities: Montgomery
+ * factor F and modulus-dependent speed-up coefficient N_COEFF. This core has two separate
+ * buffers for each of those quantities, during pre-computation F and N_COEFF are written to
+ * the "output" buffers, so that user can retrieve them and store along with the key for
+ * future use. During exponentiation F and N_COEFF are read from the "input" buffers and
+ * must be supplied by user along with the modulus.
*
- * We have two block memories: F for Montgomery factor and N_COEFF for modulus-dependent
- * coefficient, they are written to during pre-calculation and read from during exponentiation.
- *
- * Note, that there are actually two identical block memories N_COEFF1 and N_COEFF2 instead of
- * just one N_COEFF, read the explanation above. F is only used by one of the multipliers, so
- * we don't need F1 and F2.
+ * Note, that there are actually two identical input block memories N_COEFF1 and N_COEFF2
+ * instead of just one N_COEFF, read the explanation above. F is only used by one of
+ * the multipliers, so we don't need F1 and F2.
*/
wire [OPERAND_ADDR_WIDTH-1:0] core_f_addr_wr;
@@ -274,20 +282,38 @@ module modexpa7_top #
wire core_f_wren;
wire core_n_coeff_wren;
+
+ wire [ 32-1:0] user_f_out_data;
+ wire [ 32-1:0] user_f_in_data;
+ wire [ 32-1:0] user_n_coeff_out_data;
+ wire [ 32-1:0] user_n_coeff_in_data;
+
+ wire user_f_in_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MONTGOMERY_FACTOR_IN);
+ wire user_n_coeff_in_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MODULUS_COEFF_IN);
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
- bram_f (.clk(clk),
+ bram_f_out (.clk(clk),
.a_addr(core_f_addr_wr), .a_out(), .a_wr(core_f_wren), .a_in(core_f_data_wr),
+ .b_addr(bus_addr_word), .b_out(user_f_out_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_f_in (.clk(clk),
+ .a_addr(bus_addr_word), .a_out(user_f_in_data), .a_wr(user_f_in_wren), .a_in(bus_data_wr),
.b_addr(core_f_addr_rd), .b_out(core_f_data_rd));
-
+
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
- bram_n_coeff1 (.clk(clk),
+ bram_n_coeff_out (.clk(clk),
.a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
+ .b_addr(bus_addr_word), .b_out(user_n_coeff_out_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_n_coeff_in1 (.clk(clk),
+ .a_addr(bus_addr_word), .a_out(user_n_coeff_in_data), .a_wr(user_n_coeff_in_wren), .a_in(bus_data_wr),
.b_addr(core_n_coeff1_addr_rd), .b_out(core_n_coeff1_data_rd));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
- bram_n_coeff2 (.clk(clk),
- .a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
+ bram_n_coeff_in2 (.clk(clk),
+ .a_addr(bus_addr_word), .a_out(), .a_wr(user_n_coeff_in_wren), .a_in(bus_data_wr),
.b_addr(core_n_coeff2_addr_rd), .b_out(core_n_coeff2_data_rd));
@@ -461,7 +487,7 @@ module modexpa7_top #
*/
// delay bus_addr_bank by 1 clock cycle to remember from where we've just been reading
- reg [1: 0] bus_addr_bank_dly;
+ reg [2: 0] bus_addr_bank_dly;
always @(posedge clk)
if (bus_cs) bus_addr_bank_dly <= bus_addr_bank;
@@ -474,12 +500,16 @@ module modexpa7_top #
//
case (bus_addr_bank_dly)
//
- BANK_MODULUS: bus_data_rd_mux = user_n_data;
- BANK_MESSAGE: bus_data_rd_mux = user_m_data;
- BANK_EXPONENT: bus_data_rd_mux = user_d_data;
- BANK_RESULT: bus_data_rd_mux = user_r_data;
+ BANK_MODULUS: bus_data_rd_mux = user_n_data;
+ BANK_MESSAGE: bus_data_rd_mux = user_m_data;
+ BANK_EXPONENT: bus_data_rd_mux = user_d_data;
+ BANK_RESULT: bus_data_rd_mux = user_r_data;
+ //
+ BANK_MODULUS_COEFF_OUT: bus_data_rd_mux = user_n_coeff_out_data;
+ BANK_MODULUS_COEFF_IN: bus_data_rd_mux = user_n_coeff_in_data;
+ BANK_MONTGOMERY_FACTOR_OUT: bus_data_rd_mux = user_f_out_data;
+ BANK_MONTGOMERY_FACTOR_IN: bus_data_rd_mux = user_f_in_data;
//
endcase
-
endmodule
diff --git a/src/rtl/modexpa7_wrapper.v b/src/rtl/modexpa7_wrapper.v
index a4e2319..8ebc22a 100644
--- a/src/rtl/modexpa7_wrapper.v
+++ b/src/rtl/modexpa7_wrapper.v
@@ -42,7 +42,7 @@ module modexpa7_wrapper #
input cs,
input we,
- input [OPERAND_ADDR_WIDTH+2:0] address,
+ input [OPERAND_ADDR_WIDTH+3:0] address,
input [ 32-1:0] write_data,
output [ 32-1:0] read_data
);
@@ -54,8 +54,8 @@ module modexpa7_wrapper #
localparam ADDR_MSB_REGS = 1'b0;
localparam ADDR_MSB_CORE = 1'b1;
- wire address_msb = address[OPERAND_ADDR_WIDTH+2];
- wire [OPERAND_ADDR_WIDTH+1:0] address_lsb = address[OPERAND_ADDR_WIDTH+1:0];
+ wire address_msb = address[OPERAND_ADDR_WIDTH+3];
+ wire [OPERAND_ADDR_WIDTH+2:0] address_lsb = address[OPERAND_ADDR_WIDTH+2:0];
/*
@@ -68,17 +68,17 @@ module modexpa7_wrapper #
/*
* Registers
*/
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_NAME0 = 'h00; //
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_NAME1 = 'h01; //
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_VERSION = 'h02; //
-
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_CONTROL = 'h08; // {next, init}
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_STATUS = 'h09; // {valid, ready}
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_MODE = 'h10; // {crt, dummy}
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_MODULUS_BITS = 'h11; // number of bits in modulus
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_EXPONENT_BITS = 'h12; // number of bits in exponent
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_BUFFER_BITS = 'h13; // largest supported number of bits
- localparam [OPERAND_ADDR_WIDTH+1:0] ADDR_ARRAY_BITS = 'h14; // number of bits in systolic array
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_NAME0 = 'h00; //
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_NAME1 = 'h01; //
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_VERSION = 'h02; //
+
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_CONTROL = 'h08; // {next, init}
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_STATUS = 'h09; // {valid, ready}
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_MODE = 'h10; // {crt, dummy}
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_MODULUS_BITS = 'h11; // number of bits in modulus
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_EXPONENT_BITS = 'h12; // number of bits in exponent
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_BUFFER_BITS = 'h13; // largest supported number of bits
+ localparam [OPERAND_ADDR_WIDTH+2:0] ADDR_ARRAY_BITS = 'h14; // number of bits in systolic array
localparam CONTROL_INIT_BIT = 0;
localparam CONTROL_NEXT_BIT = 1;
@@ -91,7 +91,7 @@ module modexpa7_wrapper #
localparam CORE_NAME0 = 32'h6D6F6465; // "mode"
localparam CORE_NAME1 = 32'h78706137; // "xpa7"
- localparam CORE_VERSION = 32'h302E3230; // "0.20"
+ localparam CORE_VERSION = 32'h302E3235; // "0.25"
/*