From 9fa6e368879d30835880b3bb0e87c8cf13dd9874 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Sun, 12 Feb 2017 22:21:57 +0300 Subject: Various clean-ups * Added sample C program for STM32 to test the core in hardware * Parametrized math modules are now instantiated with explicit operand width for clarify (previously relied on default parameter values in underlying modules) * Fixed some comments --- rtl/curve/curve_dbl_add_256.v | 12 +- rtl/modular/modular_multiplier_256.v | 30 +-- rtl/modular/modular_reductor_256.v | 352 +++++++++++++++++++---------------- 3 files changed, 213 insertions(+), 181 deletions(-) (limited to 'rtl') diff --git a/rtl/curve/curve_dbl_add_256.v b/rtl/curve/curve_dbl_add_256.v index 08a9931..8ef505d 100644 --- a/rtl/curve/curve_dbl_add_256.v +++ b/rtl/curve/curve_dbl_add_256.v @@ -1,8 +1,8 @@ //------------------------------------------------------------------------------ // -// curve_adder_256.v +// curve_dbl_add_256.v // ----------------------------------------------------------------------------- -// Elliptic curve point adder. +// Elliptic curve point adder and doubler. // // Authors: Pavel Shatov // @@ -292,7 +292,13 @@ module curve_dbl_add_256 wire [ 32-1:0] mw_mov_din_x; wire [ 32-1:0] mw_mov_dout_y; - mw_mover mw_mover_inst + mw_mover # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + + ) + mw_mover_inst ( .clk (clk), .rst_n (rst_n), diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v index 8487aee..c2f2661 100644 --- a/rtl/modular/modular_multiplier_256.v +++ b/rtl/modular/modular_multiplier_256.v @@ -293,8 +293,8 @@ module modular_multiplier_256 // // Accumulators // - wire [46: 0] add48_cw0_s; - wire [46: 0] add48_cw1_s; + wire [46: 0] add47_cw0_s; + wire [46: 0] add47_cw1_s; // @@ -314,26 +314,26 @@ module modular_multiplier_256 // si_next_dly <= si_lsb[62:47]; - wire [46: 0] add48_cw0_a = si_lsb[46:0]; - wire [46: 0] add48_cw0_b = {{16{1'b0}}, si_prev_dly}; + wire [46: 0] add47_cw0_a = si_lsb[46:0]; + wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; - wire [46: 0] add48_cw1_a = add48_cw0_s; - wire [46: 0] add48_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add48_cw1_s[46:32]}}; + wire [46: 0] add47_cw1_a = add47_cw0_s; + wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; - adder47_wrapper add48_cw0_inst + adder47_wrapper add47_cw0_inst ( .clk (clk), - .a (add48_cw0_a), - .b (add48_cw0_b), - .s (add48_cw0_s) + .a (add47_cw0_a), + .b (add47_cw0_b), + .s (add47_cw0_s) ); - adder47_wrapper add48_cw1_inst + adder47_wrapper add47_cw1_inst ( .clk (clk), - .a (add48_cw1_a), - .b (add48_cw1_b), - .s (add48_cw1_s) + .a (add47_cw1_a), + .b (add47_cw1_b), + .s (add47_cw1_s) ); @@ -364,7 +364,7 @@ module modular_multiplier_256 .a_addr (bram_c_addr), .a_wr (store_c_word), - .a_in (add48_cw1_s[31:0]), + .a_in (add47_cw1_s[31:0]), .a_out (), .b_addr (reduce_c_addr), diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v index 774f42e..e4b346a 100644 --- a/rtl/modular/modular_reductor_256.v +++ b/rtl/modular/modular_reductor_256.v @@ -1,39 +1,39 @@ -//------------------------------------------------------------------------------ -// -// modular_reductor_256.v -// ----------------------------------------------------------------------------- +//------------------------------------------------------------------------------ +// +// modular_reductor_256.v +// ----------------------------------------------------------------------------- // Modular reductor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// //------------------------------------------------------------------------------ module modular_reductor_256 @@ -44,62 +44,62 @@ module modular_reductor_256 x_din, n_din, p_dout ); - // - // Constants - // + // + // Constants + // localparam OPERAND_NUM_WORDS = 8; localparam WORD_COUNTER_WIDTH = 3; - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1; - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; - input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? - WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; - end + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end endfunction // // Ports - // + // input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input + input wire rst_n; // active-low async reset + + input wire ena; // enable input output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word - output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word - output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + + output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word output wire p_wren; // store current P word now - + input wire [ 31:0] x_din; // X input wire [ 31:0] n_din; // N (must be P-256!) output wire [ 31:0] p_dout; // P = X mod N - // - // Word Indices - // + // + // Word Indices + // reg [WORD_COUNTER_WIDTH:0] index_x; - - - /* map registers to output ports */ + + + /* map registers to output ports */ assign x_addr = index_x; - - // + + // // FSM // localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1; @@ -136,34 +136,34 @@ module modular_reductor_256 wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done; - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // if (rdy) // fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // + // else if (!reduce_stop || reduce_done_all) // - fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // end - // - // Word Index Increment Logic - // - always @(posedge clk) - // - if (rdy) - // - index_x <= WORD_INDEX_LAST; - // + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) + // + index_x <= WORD_INDEX_LAST; + // else if (inc_index_x) // index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x); @@ -173,14 +173,27 @@ module modular_reductor_256 // Look-up Table // - // TODO: Explain s5!!! + // + // Take a look at the corresponding C model for more information + // on how exactly the math behind reduction works. The first step + // is to assemble nine 256-bit values ("z-words") from 32-bit parts + // of the full 512-bit product ("c-word"). The problem with z5 is + // that it contains c13 two times. This implementation scans from + // c15 to c0 and writes current part of c-word into corresponding + // parts of z-words. Since those 32-bit parts are stored in block + // memories, one source word can only be written to one location in + // every z-word at a time. The trick is to delay c13 and then write + // the delayed value at the corresponding location in z5 instead of + // the next c12. "z_save" flag is used to indicate that the current + // word should be delayed and written once again during the next cycle. + // reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; // reg [9 -1:0] z_wren; // reg [9 -1:0] z_mask; // mask input to store zero word reg [9 -1:0] z_save; // save previous word once again - always @(posedge clk) + always @(posedge clk) // if (inc_index_x) // @@ -204,9 +217,9 @@ module modular_reductor_256 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx}; 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx}; 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx}; - // + // default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}}; - // + // endcase always @(posedge clk) @@ -231,9 +244,9 @@ module modular_reductor_256 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - // + // default: z_wren <= {9{1'b0}}; - // + // endcase always @(posedge clk) @@ -260,11 +273,11 @@ module modular_reductor_256 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}; 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - // + // default: z_mask <= {9{1'bX}}; - // + // endcase - + always @(posedge clk) // if (inc_index_x) @@ -289,9 +302,9 @@ module modular_reductor_256 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - // + // default: z_save <= {9{1'bX}}; - // + // endcase @@ -312,18 +325,18 @@ module modular_reductor_256 // begin : gen_z_bram // - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_c_inst - ( - .clk (clk), - - .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), - .a_wr (z_wren[z-1] & store_word_z), - .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), - .a_out (), - - .b_addr (reduce_z_addr[z]), - .b_out (reduce_z_dout[z]) + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_c_inst + ( + .clk (clk), + + .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), + .a_wr (z_wren[z-1] & store_word_z), + .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), + .a_out (), + + .b_addr (reduce_z_addr[z]), + .b_out (reduce_z_dout[z]) ); // end @@ -355,50 +368,49 @@ module modular_reductor_256 reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr; - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_sum0_inst - ( - .clk (clk), - - .a_addr (bram_sum0_wr_addr), - .a_wr (bram_sum0_wr_wren), - .a_in (bram_sum0_wr_din), - .a_out (), - - .b_addr (bram_sum0_rd_addr), - .b_out (bram_sum0_rd_dout) + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum0_inst + ( + .clk (clk), + + .a_addr (bram_sum0_wr_addr), + .a_wr (bram_sum0_wr_wren), + .a_in (bram_sum0_wr_din), + .a_out (), + + .b_addr (bram_sum0_rd_addr), + .b_out (bram_sum0_rd_dout) ); - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_sum1_inst - ( - .clk (clk), - - .a_addr (bram_sum1_wr_addr), - .a_wr (bram_sum1_wr_wren), - .a_in (bram_sum1_wr_din), - .a_out (), - - .b_addr (bram_sum1_rd_addr), - .b_out (bram_sum1_rd_dout) + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum1_inst + ( + .clk (clk), + + .a_addr (bram_sum1_wr_addr), + .a_wr (bram_sum1_wr_wren), + .a_in (bram_sum1_wr_din), + .a_out (), + + .b_addr (bram_sum1_rd_addr), + .b_out (bram_sum1_rd_dout) ); - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_diff_inst - ( - .clk (clk), - - .a_addr (bram_diff_wr_addr), - .a_wr (bram_diff_wr_wren), - .a_in (bram_diff_wr_din), - .a_out (), - - .b_addr (bram_diff_rd_addr), - .b_out (bram_diff_rd_dout) + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_diff_inst + ( + .clk (clk), + + .a_addr (bram_diff_wr_addr), + .a_wr (bram_diff_wr_wren), + .a_in (bram_diff_wr_din), + .a_out (), + + .b_addr (bram_diff_rd_addr), + .b_out (bram_diff_rd_dout) ); - wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr; wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr; wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr; @@ -414,7 +426,12 @@ module modular_reductor_256 // n_addr - only 1 output, because all modules are in sync - modular_adder adder_inst0 + modular_adder # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + adder_inst0 ( .clk (clk), .rst_n (rst_n), @@ -426,14 +443,19 @@ module modular_reductor_256 .n_addr (), .s_addr (bram_sum0_wr_addr), .s_wren (bram_sum0_wr_wren), - + .a_din (adder0_a_din), .b_din (adder0_b_din), .n_din (n_din), .s_dout (bram_sum0_wr_din) ); - modular_adder adder_inst1 + modular_adder # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + adder_inst1 ( .clk (clk), .rst_n (rst_n), @@ -445,14 +467,19 @@ module modular_reductor_256 .n_addr (), .s_addr (bram_sum1_wr_addr), .s_wren (bram_sum1_wr_wren), - + .a_din (adder1_a_din), .b_din (adder1_b_din), .n_din (n_din), .s_dout (bram_sum1_wr_din) ); - modular_subtractor subtractor_inst + modular_subtractor # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + subtractor_inst ( .clk (clk), .rst_n (rst_n), @@ -464,7 +491,7 @@ module modular_reductor_256 .n_addr (n_addr), .d_addr (bram_diff_wr_addr), .d_wren (bram_diff_wr_wren), - + .a_din (subtractor_a_din), .b_din (subtractor_b_din), .n_din (n_din), @@ -473,7 +500,7 @@ module modular_reductor_256 // - // address + // Address (Operand) Selector // always @(*) // @@ -572,7 +599,6 @@ module modular_reductor_256 endcase - // // adder 0 // @@ -650,17 +676,17 @@ module modular_reductor_256 end - - - assign p_addr = bram_sum0_wr_addr; + // + // Address Mapping + // + assign p_addr = bram_sum0_wr_addr; assign p_wren = bram_sum0_wr_wren & store_p; assign p_dout = bram_sum0_wr_din; - - + endmodule -//------------------------------------------------------------------------------ -// End-of-File +//------------------------------------------------------------------------------ +// End-of-File //------------------------------------------------------------------------------ -- cgit v1.2.3