diff options
Diffstat (limited to 'rtl/modular_multiplier/ed25519_modular_multiplier.v')
-rw-r--r-- | rtl/modular_multiplier/ed25519_modular_multiplier.v | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v new file mode 100644 index 0000000..9f8ead7 --- /dev/null +++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v @@ -0,0 +1,410 @@ +//------------------------------------------------------------------------------ +// +// ed25519_modular_multiplier.v +// ----------------------------------------------------------------------------- +// Curve25519 Modular Multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module ed25519_modular_multiplier +( + clk, rst_n, + ena, rdy, + a_addr, b_addr, p_addr, p_wren, + a_din, b_din, p_dout +); + + + // + // Constants + // + localparam integer OPERAND_NUM_WORDS = 8; + localparam integer WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input clk; // system clock + input rst_n; // active-low async reset + + input ena; // enable input + output rdy; // ready output + + output [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word + output [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word + output [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + + output p_wren; // store current P word now + + input [31:0] a_din; // current word of A + input [31:0] b_din; // current word of B + output [31:0] p_dout; // current word of P + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_a; + reg [WORD_COUNTER_WIDTH-1:0] index_b; + + /* map registers to output ports */ + assign a_addr = index_a; + assign b_addr = index_b; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 1 + + 2 * OPERAND_NUM_WORDS + 1 + + 2 * OPERAND_NUM_WORDS + 2 + + 0 * OPERAND_NUM_WORDS + 2 + + 0 * OPERAND_NUM_WORDS + 1; + + localparam [FSM_SHREG_WIDTH-1:0] FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1}; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg = FSM_SHREG_INIT; + + assign rdy = fsm_shreg[0]; + + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + //wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; + //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; + //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; +// wire store_word_a = |fsm_shreg_store_word_a; +// wire inc_index_b = |fsm_shreg_inc_index_b; +// wire clear_mac_ab = |fsm_shreg_inc_index_b; +// wire shift_wide_a = |fsm_shreg_inc_index_b; +// wire enable_mac_ab = |fsm_shreg_inc_index_b; +// wire store_si_msb = |fsm_shreg_store_si_msb; +// wire store_si_lsb = fsm_shreg_store_si_lsb; +// wire shift_si = |fsm_shreg_shift_si; +// wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; +// wire store_c_word = |fsm_shreg_store_c_word; +// wire reduce_start = fsm_shreg_reduce_start; +// wire reduce_stop = fsm_shreg_reduce_stop; + + + // + // FSM Logic + // +// wire reduce_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= FSM_SHREG_INIT; + // + else begin + // + if (rdy) + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + else + /*if (!reduce_stop || reduce_done)*/ + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + end + + +// +// Word Index Increment Logic +// + +/* +reg index_b_ff; + +always @(posedge clk) +// +if (inc_index_b) index_b_ff <= ~index_b_ff; +else index_b_ff <= 1'b0; + +always @(posedge clk) +// +if (rdy) begin +// +index_a <= WORD_INDEX_ZERO; +index_b <= WORD_INDEX_LAST; +// +end else begin +// +if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); +if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); +// +end + + +// +// Wide Operand Buffer +// +reg [255:0] buf_a_wide; + +always @(posedge clk) +// +if (store_word_a) +buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; +else if (shift_wide_a) +buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; + + +// +// Multiplier Array +// +wire mac_inhibit; // control signal to pause all accumulators + +wire [46: 0] mac[0:15]; // outputs of all accumulators +reg [15: 0] mac_clear; // individual per-accumulator clear flag + +assign mac_inhibit = ~enable_mac_ab; + +always @(posedge clk) +// +if (!clear_mac_ab) +mac_clear <= {16{1'b1}}; +else begin + +if (mac_clear == {16{1'b1}}) +mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; +else +mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; + + +end + +// +// Array of parallel multipliers +// +genvar i; +generate for (i=0; i<16; i=i+1) +begin : gen_mac_array +// +mac16_wrapper mac16_inst +( +.clk (clk), +.ce (~mac_inhibit), + +.clr (mac_clear[i]), + +.a (buf_a_wide[16*i+:16]), +.b (index_b_ff ? b_din[15:0] : b_din[31:16]), +.s (mac[i]) +); +// +end +endgenerate + +// +// Intermediate Words +// +reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; +reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + + +wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; +wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + +generate for (i=0; i<16; i=i+1) +begin : gen_si_lsb_new +assign si_lsb_new[47*i+:47] = mac[15-i]; +end +endgenerate + +generate for (i=1; i<16; i=i+1) +begin : gen_si_msb_new +assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; +end +endgenerate + +always @(posedge clk) begin +// +if (shift_si) begin +si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; +si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; +end else begin + +if (store_si_msb) +si_msb <= si_msb_new; + +if (store_si_lsb) +si_lsb <= si_lsb_new; +end + +end + + +// +// Accumulators +// +wire [46: 0] add47_cw0_s; +wire [46: 0] add47_cw1_s; + + +// +// cw0, b, cw1, b +// +reg [30: 0] si_prev_dly; +reg [15: 0] si_next_dly; + +always @(posedge clk) +// +if (shift_si) +si_prev_dly <= si_lsb[93:63]; +else +si_prev_dly <= {31{1'b0}}; + +always @(posedge clk) +// +si_next_dly <= si_lsb[62:47]; + +wire [46: 0] add47_cw0_a = si_lsb[46:0]; +wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + +wire [46: 0] add47_cw1_a = add47_cw0_s; +wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; + +adder47_wrapper add47_cw0_inst +( +.clk (clk), +.a (add47_cw0_a), +.b (add47_cw0_b), +.s (add47_cw0_s) +); + +adder47_wrapper add47_cw1_inst +( +.clk (clk), +.a (add47_cw1_a), +.b (add47_cw1_b), +.s (add47_cw1_s) +); + + + +// +// Full-Size Product +// +reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + +wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; +wire [ 31:0] reduce_c_word; + +always @(posedge clk) +// +if (store_c_word) +bram_c_addr <= bram_c_addr + 1'b1; +else +bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + +bram_1rw_1ro_readfirst # +( +.MEM_WIDTH (32), +.MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) +) +bram_c_inst +( +.clk (clk), + +.a_addr (bram_c_addr), +.a_wr (store_c_word), +.a_in (add47_cw1_s[31:0]), +.a_out (), + +.b_addr (reduce_c_addr), +.b_out (reduce_c_word) +); + + +// +// Reduction Stage +// +modular_reductor_256 reduce_256_inst +( +.clk (clk), +.rst_n (rst_n), + +.ena (reduce_start), +.rdy (reduce_done), + +.x_addr (reduce_c_addr), +.n_addr (n_addr), +.p_addr (p_addr), +.p_wren (p_wren), + +.x_din (reduce_c_word), +.n_din (n_din), +.p_dout (p_dout) +); +*/ + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ |