diff options
-rw-r--r-- | rtl/ed25519_settings.vh | 39 | ||||
-rw-r--r-- | rtl/modular_multiplier/ed25519_modular_multiplier.v | 451 |
2 files changed, 267 insertions, 223 deletions
diff --git a/rtl/ed25519_settings.vh b/rtl/ed25519_settings.vh new file mode 100644 index 0000000..08fe8af --- /dev/null +++ b/rtl/ed25519_settings.vh @@ -0,0 +1,39 @@ +//====================================================================== +// +// Copyright (c) 2018, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`define ED25519_MAC16_PRIMITIVE mac16_generic +`define ED25519_ADD47_PRIMITIVE adder47_generic + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v index 9f8ead7..56c8537 100644 --- a/rtl/modular_multiplier/ed25519_modular_multiplier.v +++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v @@ -36,6 +36,7 @@ // //------------------------------------------------------------------------------ + module ed25519_modular_multiplier ( clk, rst_n, @@ -46,6 +47,12 @@ module ed25519_modular_multiplier // + // Settings + // +`include "../ed25519_settings.vh" + + + // // Constants // localparam integer OPERAND_NUM_WORDS = 8; @@ -105,7 +112,7 @@ module ed25519_modular_multiplier reg [WORD_COUNTER_WIDTH-1:0] index_a; reg [WORD_COUNTER_WIDTH-1:0] index_b; - /* map registers to output ports */ + // map registers to output ports assign a_addr = index_a; assign b_addr = index_b; @@ -125,37 +132,39 @@ module ed25519_modular_multiplier assign rdy = fsm_shreg[0]; - wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - //wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; - //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; - //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; - //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; - //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; - //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; - //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; - //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; - //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; - - wire inc_index_a = |fsm_shreg_inc_index_a; -// wire store_word_a = |fsm_shreg_store_word_a; -// wire inc_index_b = |fsm_shreg_inc_index_b; -// wire clear_mac_ab = |fsm_shreg_inc_index_b; -// wire shift_wide_a = |fsm_shreg_inc_index_b; -// wire enable_mac_ab = |fsm_shreg_inc_index_b; -// wire store_si_msb = |fsm_shreg_store_si_msb; -// wire store_si_lsb = fsm_shreg_store_si_lsb; -// wire shift_si = |fsm_shreg_shift_si; -// wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; -// wire store_c_word = |fsm_shreg_store_c_word; -// wire reduce_start = fsm_shreg_reduce_start; -// wire reduce_stop = fsm_shreg_reduce_stop; + wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0)]; + wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1)]; + wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_part_b = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)]; + wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_b = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)]; + wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_enable_mac_ab = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)]; + wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)]; + wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2)]; + wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+3):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+1)]; + wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4)]; + wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+4)]; + wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5)]; + wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_wait = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; + wire store_word_a = |fsm_shreg_store_word_a; + wire store_part_b = |fsm_shreg_store_part_b; + wire dec_index_b = |fsm_shreg_dec_index_b; + wire enable_mac_ab = |fsm_shreg_enable_mac_ab; + wire store_si_msb = |fsm_shreg_store_si_msb; + wire store_si_lsb = |fsm_shreg_store_si_lsb; + wire shift_si = |fsm_shreg_shift_si; + wire mask_cw1_sum = |fsm_shreg_mask_cw1_sum; + wire store_c_word = |fsm_shreg_store_c_word; + wire reduce_start = |fsm_shreg_reduce_start; + wire reduce_wait = |fsm_shreg_reduce_wait; // // FSM Logic // -// wire reduce_done; - + wire reduce_done; + wire fsm_freeze = reduce_wait && !reduce_done; + always @(posedge clk or negedge rst_n) // if (rst_n == 1'b0) @@ -164,242 +173,238 @@ module ed25519_modular_multiplier // else begin // - if (rdy) - fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - else - /*if (!reduce_stop || reduce_done)*/ - fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + else if (!fsm_freeze) fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; end -// -// Word Index Increment Logic -// - -/* -reg index_b_ff; - -always @(posedge clk) -// -if (inc_index_b) index_b_ff <= ~index_b_ff; -else index_b_ff <= 1'b0; - -always @(posedge clk) -// -if (rdy) begin -// -index_a <= WORD_INDEX_ZERO; -index_b <= WORD_INDEX_LAST; -// -end else begin -// -if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); -if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); -// -end - - -// -// Wide Operand Buffer -// -reg [255:0] buf_a_wide; - -always @(posedge clk) -// -if (store_word_a) -buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; -else if (shift_wide_a) -buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; - - -// -// Multiplier Array -// -wire mac_inhibit; // control signal to pause all accumulators - -wire [46: 0] mac[0:15]; // outputs of all accumulators -reg [15: 0] mac_clear; // individual per-accumulator clear flag - -assign mac_inhibit = ~enable_mac_ab; - -always @(posedge clk) -// -if (!clear_mac_ab) -mac_clear <= {16{1'b1}}; -else begin - -if (mac_clear == {16{1'b1}}) -mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; -else -mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; + // + // A Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) index_a <= WORD_INDEX_ZERO; + else if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); -end + // + // B Word Index Decrement Logic + // + always @(posedge clk) + // + if (rdy) index_b <= WORD_INDEX_LAST; + else if (dec_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); -// -// Array of parallel multipliers -// -genvar i; -generate for (i=0; i<16; i=i+1) -begin : gen_mac_array -// -mac16_wrapper mac16_inst -( -.clk (clk), -.ce (~mac_inhibit), + + // + // Wide Operand Buffer + // + reg [255:0] buf_a_wide; -.clr (mac_clear[i]), + always @(posedge clk) + // + if (store_word_a) buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]}; + else if (enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; + -.a (buf_a_wide[16*i+:16]), -.b (index_b_ff ? b_din[15:0] : b_din[31:16]), -.s (mac[i]) -); -// -end -endgenerate + // + // B Word Splitter + // + + /* + * 0: store the upper 16-bit part of the current B word + * 1: store the lower 16-bit part of the current B word + */ + + reg index_b_ff = 1'b0; -// -// Intermediate Words -// -reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; -reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + always @(posedge clk) + // + if (dec_index_b) index_b_ff <= ~index_b_ff; + else index_b_ff <= 1'b0; + + // + // Narrow Operand Buffer + // + reg [15:0] buf_b_narrow; + + always @(posedge clk) + // + if (store_part_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0]; -wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; -wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; -generate for (i=0; i<16; i=i+1) -begin : gen_si_lsb_new -assign si_lsb_new[47*i+:47] = mac[15-i]; -end -endgenerate + // + // MAC Clear Logic + // + reg [15:0] mac_clear; -generate for (i=1; i<16; i=i+1) -begin : gen_si_msb_new -assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; -end -endgenerate + always @(posedge clk) + // + if (!enable_mac_ab) mac_clear <= {16{1'b1}}; + else begin + if (mac_clear[0]) mac_clear <= 16'b0000000000000010; + else if (mac_clear[15]) mac_clear <= 16'b1111111111111111; + else mac_clear <= {mac_clear[14:0], 1'b0}; + end -always @(posedge clk) begin -// -if (shift_si) begin -si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; -si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; -end else begin -if (store_si_msb) -si_msb <= si_msb_new; + // + // MAC Array + // + wire [46:0] mac_accum[0:15]; -if (store_si_lsb) -si_lsb <= si_lsb_new; -end + genvar i; + + generate for (i=0; i<16; i=i+1) + // + begin : gen_mac16_array + // + `ED25519_MAC16_PRIMITIVE mac16_inst + ( + .clk (clk), + .ce (enable_mac_ab), -end + .clr (mac_clear[i]), + .a (buf_a_wide[16 * i +: 16]), + .b (buf_b_narrow), + .s (mac_accum[i]) + ); + // + end + // + endgenerate -// -// Accumulators -// -wire [46: 0] add47_cw0_s; -wire [46: 0] add47_cw1_s; + // + // Intermediate Words + // + reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; + reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; -// -// cw0, b, cw1, b -// -reg [30: 0] si_prev_dly; -reg [15: 0] si_next_dly; -always @(posedge clk) -// -if (shift_si) -si_prev_dly <= si_lsb[93:63]; -else -si_prev_dly <= {31{1'b0}}; + wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; + wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + + generate for (i=0; i<16; i=i+1) + begin : gen_si_lsb_new + assign si_lsb_new[47*i+:47] = mac_accum[15-i]; + end + endgenerate -always @(posedge clk) -// -si_next_dly <= si_lsb[62:47]; + generate for (i=1; i<16; i=i+1) + begin : gen_si_msb_new + assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac_accum[i] : si_msb[47*(15-i)+:47]; + end + endgenerate -wire [46: 0] add47_cw0_a = si_lsb[46:0]; -wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + always @(posedge clk) + // + if (shift_si) begin + si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; + si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; + end else begin + if (store_si_msb) si_msb <= si_msb_new; + if (store_si_lsb) si_lsb <= si_lsb_new; + end -wire [46: 0] add47_cw1_a = add47_cw0_s; -wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; -adder47_wrapper add47_cw0_inst -( -.clk (clk), -.a (add47_cw0_a), -.b (add47_cw0_b), -.s (add47_cw0_s) -); + // + // Accumulators + // + wire [46: 0] add47_cw0_s; + wire [46: 0] add47_cw1_s; -adder47_wrapper add47_cw1_inst -( -.clk (clk), -.a (add47_cw1_a), -.b (add47_cw1_b), -.s (add47_cw1_s) -); + // + // cw0, cw1 + // + reg [30: 0] si_prev_dly; + reg [15: 0] si_next_dly; + always @(posedge clk) + // + if (shift_si) si_prev_dly <= si_lsb[93:63]; + else si_prev_dly <= {31{1'b0}}; -// -// Full-Size Product -// -reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + always @(posedge clk) + // + si_next_dly <= si_lsb[47+:16]; + + wire [46:0] add47_cw0_a = si_lsb[46:0]; + wire [46:0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + + wire [46:0] add47_cw1_a = add47_cw0_s; + wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, mask_cw1_sum ? {15{1'b0}} : add47_cw1_s[32+:15]}; + + `ED25519_ADD47_PRIMITIVE add47_cw0_inst + ( + .clk (clk), + .a (add47_cw0_a), + .b (add47_cw0_b), + .s (add47_cw0_s) + ); + + `ED25519_ADD47_PRIMITIVE add47_cw1_inst + ( + .clk (clk), + .a (add47_cw1_a), + .b (add47_cw1_b), + .s (add47_cw1_s) + ); -wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; -wire [ 31:0] reduce_c_word; + + // + // Full-Size Product + // + reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + + wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; + wire [ 31:0] reduce_c_word; -always @(posedge clk) -// -if (store_c_word) -bram_c_addr <= bram_c_addr + 1'b1; -else -bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + always @(posedge clk) + // + if (store_c_word) bram_c_addr <= bram_c_addr + 1'b1; + else bram_c_addr <= {(2*WORD_COUNTER_WIDTH){1'b0}}; -bram_1rw_1ro_readfirst # -( -.MEM_WIDTH (32), -.MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) -) -bram_c_inst -( -.clk (clk), + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH(32), + .MEM_ADDR_BITS(WORD_COUNTER_WIDTH + 1) + ) + bram_c_inst + ( + .clk (clk), -.a_addr (bram_c_addr), -.a_wr (store_c_word), -.a_in (add47_cw1_s[31:0]), -.a_out (), + .a_addr (bram_c_addr), + .a_wr (store_c_word), + .a_in (add47_cw1_s[31:0]), + .a_out (), -.b_addr (reduce_c_addr), -.b_out (reduce_c_word) -); + .b_addr (reduce_c_addr), + .b_out (reduce_c_word) + ); -// -// Reduction Stage -// -modular_reductor_256 reduce_256_inst -( -.clk (clk), -.rst_n (rst_n), + // + // Reduction Stage + // + ed25519_modular_reductor reductor_inst + ( + .clk (clk), + .rst_n (rst_n), -.ena (reduce_start), -.rdy (reduce_done), + .ena (reduce_start), + .rdy (reduce_done), -.x_addr (reduce_c_addr), -.n_addr (n_addr), -.p_addr (p_addr), -.p_wren (p_wren), + .x_addr (reduce_c_addr), + .y_addr (p_addr), + .y_wren (p_wren), -.x_din (reduce_c_word), -.n_din (n_din), -.p_dout (p_dout) -); -*/ + .x_din (reduce_c_word), + .y_dout (p_dout) + ); endmodule |