From d88715489690e1d77558bb2d89adce92ecabdc84 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 18 Jul 2017 02:26:18 +0300 Subject: Started adding exponentiator module w/ testbench. --- src/rtl/modexpa7_exponentiator.v | 578 +++++++++++++++++++++++++++++++++++++ src/tb/modexp_fpga_model_vectors.v | 22 ++ src/tb/tb_exponentiator.v | 424 +++++++++++++++++++++++++++ 3 files changed, 1024 insertions(+) create mode 100644 src/rtl/modexpa7_exponentiator.v create mode 100644 src/tb/tb_exponentiator.v diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v new file mode 100644 index 0000000..1f55cec --- /dev/null +++ b/src/rtl/modexpa7_exponentiator.v @@ -0,0 +1,578 @@ +//====================================================================== +// +// modexpa7_exponentiator.v +// ----------------------------------------------------------------------------- +// Modular Montgomery Exponentiator. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2017, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module modexpa7_exponentiator # + ( + // + // This sets the address widths of memory buffers. Internal data + // width is 32 bits, so for e.g. 2048-bit operands buffers must store + // 2048 / 32 = 64 words, and these need 5-bit address bus, because + // 2 ** 6 = 64. + // + parameter OPERAND_ADDR_WIDTH = 4, + + // + // Explain. + // + parameter SYSTOLIC_ARRAY_POWER = 2 + ) + ( + input clk, + input rst_n, + + input ena, + output rdy, + + output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n_coeff2_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] r_bram_addr, + + input [ 32-1:0] m_bram_out, + input [ 32-1:0] d_bram_out, + input [ 32-1:0] n1_bram_out, + input [ 32-1:0] n2_bram_out, + input [ 32-1:0] n_coeff1_bram_out, + input [ 32-1:0] n_coeff2_bram_out, + + output [ 32-1:0] r_bram_in, + output r_bram_wr, + + input [OPERAND_ADDR_WIDTH-1:0] n_num_words, + input [OPERAND_ADDR_WIDTH+4:0] d_num_bits + ); + + + // + // FSM Declaration + // + localparam [ 7: 0] FSM_STATE_IDLE = 8'h00; + + localparam [ 7: 0] FSM_STATE_INIT_1 = 8'hA1; + localparam [ 7: 0] FSM_STATE_INIT_2 = 8'hA2; + localparam [ 7: 0] FSM_STATE_INIT_3 = 8'hA3; + localparam [ 7: 0] FSM_STATE_INIT_4 = 8'hA4; + + localparam [ 7: 0] FSM_STATE_LOAD_1 = 8'hB1; + localparam [ 7: 0] FSM_STATE_LOAD_2 = 8'hB2; + localparam [ 7: 0] FSM_STATE_LOAD_3 = 8'hB3; + localparam [ 7: 0] FSM_STATE_LOAD_4 = 8'hB4; + + localparam [ 7: 0] FSM_STATE_CALC_1 = 8'hC1; + localparam [ 7: 0] FSM_STATE_CALC_2 = 8'hC2; + localparam [ 7: 0] FSM_STATE_CALC_3 = 8'hC3; + + localparam [ 7: 0] FSM_STATE_FILL_1 = 8'hD1; + localparam [ 7: 0] FSM_STATE_FILL_2 = 8'hD2; + localparam [ 7: 0] FSM_STATE_FILL_3 = 8'hD3; + localparam [ 7: 0] FSM_STATE_FILL_4 = 8'hD4; + + localparam [ 7: 0] FSM_STATE_NEXT = 8'hE0; + + localparam [ 7: 0] FSM_STATE_STOP = 8'hFF; + + // + // FSM State / Next State + // + reg [ 7: 0] fsm_state = FSM_STATE_IDLE; + reg [ 7: 0] fsm_next_state; + + + // + // Enable Delay and Trigger + // + reg ena_dly = 1'b0; + + /* delay enable by one clock cycle */ + always @(posedge clk) ena_dly <= ena; + + /* trigger new operation when enable goes high */ + wire ena_trig = ena && !ena_dly; + + + // + // Ready Flag Logic + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + + /* reset flag */ + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear flag when operation is started */ + if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig; + + /* set flag after operation is finished */ + if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1; + + end + + + // + // Parameters Latch + // + reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; + reg [OPERAND_ADDR_WIDTH+4:0] d_num_bits_latch; + + /* save number of words in a and b when new operation starts */ + always @(posedge clk) + // + if (fsm_next_state == FSM_STATE_INIT_1) + {n_num_words_latch, d_num_bits_latch} <= {n_num_words, d_num_bits}; + + + // + // Block Memory Addresses + // + + /* + * Explain what every memory does. + * + */ + + /* the very first addresses */ + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}}; + + /* the very last addresses */ + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {n_num_words_latch}; + + /* address registers */ + reg [OPERAND_ADDR_WIDTH-1:0] m_addr; + reg [OPERAND_ADDR_WIDTH-1:0] d_addr; + reg [OPERAND_ADDR_WIDTH-1:0] r_addr; + reg [OPERAND_ADDR_WIDTH-1:0] t0_addr; + reg [OPERAND_ADDR_WIDTH-1:0] t1_addr; + reg [OPERAND_ADDR_WIDTH-1:0] t2_addr_wr; + wire [OPERAND_ADDR_WIDTH-1:0] t2_addr_rd; + reg [OPERAND_ADDR_WIDTH-1:0] p_addr_wr; + wire [OPERAND_ADDR_WIDTH-1:0] p1_addr_rd; + wire [OPERAND_ADDR_WIDTH-1:0] p2_addr_rd; + wire [OPERAND_ADDR_WIDTH-1:0] p3_addr_rd; + wire [OPERAND_ADDR_WIDTH-1:0] pp_addr_wr; + reg [OPERAND_ADDR_WIDTH-1:0] pp_addr_rd; + wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_wr; + reg [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd; + + /* handy increment values */ + wire [OPERAND_ADDR_WIDTH-1:0] m_addr_next = m_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] d_addr_next = d_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] t0_addr_next = t0_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] t1_addr_next = t1_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] t2_addr_wr_next = t2_addr_wr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] p_addr_wr_next = p_addr_wr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] pp_addr_rd_next = pp_addr_rd + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd_next = tp_addr_rd + 1'b1; + + /* handy stop flags */ + wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; + wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; + wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; + wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; + + /* map registers to top-level ports */ + assign m_bram_addr = m_addr; + assign d_bram_addr = d_addr; + assign r_bram_addr = r_addr; + + // + // Internal Memories + // + + /* memory inputs */ + reg [31: 0] t0_data_in; + reg [31: 0] t1_data_in; + reg [31: 0] t2_data_in; + reg [31: 0] p_data_in; + wire [31: 0] pp_data_in; + wire [31: 0] tp_data_in; + + /* memory outputs */ + wire [31: 0] t0_data_out; + wire [31: 0] t1_data_out; + wire [31: 0] t2_data_out; + wire [31: 0] p1_data_out; + wire [31: 0] p2_data_out; + wire [31: 0] p3_data_out; + wire [31: 0] pp_data_out; + wire [31: 0] tp_data_out; + + /* write enables */ + reg t0_wren; + reg t1_wren; + reg t2_wren; + reg p_wren; + wire pp_wren; + wire tp_wren; + + bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out)); + + bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_t2 (.clk(clk), + .a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(), + .b_addr(t2_addr_rd), .b_out(t2_data_out)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_p1 (.clk(clk), + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), + .b_addr(p1_addr_rd), .b_out(p1_data_out)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_p2 (.clk(clk), + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), + .b_addr(p2_addr_rd), .b_out(p2_data_out)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_p3 (.clk(clk), + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), + .b_addr(p3_addr_rd), .b_out(p3_data_out)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_pp (.clk(clk), + .a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(), + .b_addr(pp_addr_rd), .b_out(pp_data_out)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_tp (.clk(clk), + .a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(), + .b_addr(tp_addr_rd), .b_out(tp_data_out)); + + + + // + // Memory Address Control Logic + // + always @(posedge clk) begin + // + // m_addr + // + case (fsm_next_state) + FSM_STATE_INIT_1: m_addr <= bram_addr_zero; + FSM_STATE_INIT_2, + FSM_STATE_INIT_3, + FSM_STATE_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + endcase + // + // p_addr_wr + // + case (fsm_next_state) + FSM_STATE_INIT_3, + FSM_STATE_FILL_3: p_addr_wr <= bram_addr_zero; + FSM_STATE_INIT_4, + FSM_STATE_FILL_4: p_addr_wr <= p_addr_wr_next; + endcase + // + // t0_addr + // + case (fsm_next_state) + FSM_STATE_LOAD_3: t0_addr <= bram_addr_zero; + FSM_STATE_LOAD_4: t0_addr <= t0_addr_next; + endcase + // + // t1_addr + // + case (fsm_next_state) + FSM_STATE_INIT_3: t1_addr <= bram_addr_zero; + FSM_STATE_INIT_4: t1_addr <= t1_addr_next; + // + FSM_STATE_LOAD_1: t1_addr <= bram_addr_zero; + FSM_STATE_LOAD_2, + FSM_STATE_LOAD_3, + FSM_STATE_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; + endcase + // + // t2_addr_wr + // + case (fsm_next_state) + FSM_STATE_INIT_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_INIT_4: t2_addr_wr <= t2_addr_wr_next; + endcase + // + // pp_addr_rd + // + case (fsm_next_state) + FSM_STATE_FILL_1: pp_addr_rd <= bram_addr_zero; + FSM_STATE_FILL_2, + FSM_STATE_FILL_3, + FSM_STATE_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd; + endcase + // + end + + + // + // Memory Write Enable Logic + // + always @(posedge clk) begin + // + // p_wren + // + case (fsm_next_state) + FSM_STATE_INIT_3, + FSM_STATE_INIT_4, + FSM_STATE_FILL_3, + FSM_STATE_FILL_4: p_wren <= 1'b1; + default: p_wren <= 1'b0; + endcase + // + // t0_wren + // + case (fsm_next_state) + FSM_STATE_LOAD_3, + FSM_STATE_LOAD_4: t0_wren <= 1'b1; + default: t0_wren <= 1'b0; + endcase + // + // t1_wren + // + case (fsm_next_state) + FSM_STATE_INIT_3, + FSM_STATE_INIT_4: t1_wren <= 1'b1; + default: t1_wren <= 1'b0; + endcase + // + // t2_wren + // + case (fsm_next_state) + FSM_STATE_INIT_3, + FSM_STATE_INIT_4: t2_wren <= 1'b1; + default: t2_wren <= 1'b0; + endcase + // + end + + + // + // Memory Input Selector + // + always @(posedge clk) begin + // + case (fsm_next_state) + FSM_STATE_INIT_3: {t2_data_in, t1_data_in} <= {2{32'd1}}; + FSM_STATE_INIT_4: {t2_data_in, t1_data_in} <= {2{32'd0}}; + default: {t2_data_in, t1_data_in} <= {2{32'dX}}; + endcase + // + case (fsm_next_state) + FSM_STATE_INIT_3, + FSM_STATE_INIT_4: p_data_in <= m_bram_out; + // + FSM_STATE_FILL_3, + FSM_STATE_FILL_4: p_data_in <= pp_data_out; + // + default: p_data_in <= 32'dX; + endcase + // + // t0_data_in + // + case (fsm_next_state) + FSM_STATE_LOAD_3, + FSM_STATE_LOAD_4: t0_data_in <= t1_data_out; + default: t0_data_in <= 32'dX; + endcase + // + end + + + // + // Double Multiplier + // + reg mul_ena; + wire mul_rdy_pp; + wire mul_rdy_tp; + wire mul_rdy_all = mul_rdy_pp & mul_rdy_tp; + + modexpa7_systolic_multiplier # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH), + .SYSTOLIC_ARRAY_POWER (SYSTOLIC_ARRAY_POWER) + ) + mul_pp + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mul_ena), + .rdy (mul_rdy_pp), + + .a_bram_addr (p1_addr_rd), + .b_bram_addr (p2_addr_rd), + .n_bram_addr (n1_bram_addr), + .n_coeff_bram_addr (n_coeff1_bram_addr), + .r_bram_addr (pp_addr_wr), + + .a_bram_out (p1_data_out), + .b_bram_out (p2_data_out), + .n_bram_out (n1_bram_out), + .n_coeff_bram_out (n_coeff1_bram_out), + + .r_bram_in (pp_data_in), + .r_bram_wr (pp_wren), + + .ab_num_words (n_num_words_latch) + ); + + modexpa7_systolic_multiplier # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH), + .SYSTOLIC_ARRAY_POWER (SYSTOLIC_ARRAY_POWER) + ) + mul_tp + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mul_ena), + .rdy (mul_rdy_tp), + + .a_bram_addr (t2_addr_rd), + .b_bram_addr (p3_addr_rd), + .n_bram_addr (n2_bram_addr), + .n_coeff_bram_addr (n_coeff2_bram_addr), + .r_bram_addr (tp_addr_wr), + + .a_bram_out (t2_data_out), + .b_bram_out (p3_data_out), + .n_bram_out (n2_bram_out), + .n_coeff_bram_out (n_coeff2_bram_out), + + .r_bram_in (tp_data_in), + .r_bram_wr (tp_wren), + + .ab_num_words (n_num_words_latch) + ); + + + always @(posedge clk) + // + mul_ena <= (fsm_next_state == FSM_STATE_CALC_1) ? 1'b1 : 1'b0; + + + // + // Bit Counter + // + reg [OPERAND_ADDR_WIDTH+4:0] bit_cnt; + + wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}}; + wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_last = d_num_bits_latch; + wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_next = bit_cnt + 1'b1; + + /* handy flag */ + wire bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0; + + always @(posedge clk) + // + if (fsm_next_state == FSM_STATE_LOAD_1) + // + case (fsm_state) + FSM_STATE_INIT_4: bit_cnt <= bit_cnt_zero; + FSM_STATE_NEXT: bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt; + endcase + + + + // + // FSM Process + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else fsm_state <= fsm_next_state; + + + // + // FSM Transition Logic + // + always @* begin + // + fsm_next_state = FSM_STATE_STOP; + // + case (fsm_state) + // + FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_INIT_1; + else fsm_next_state = FSM_STATE_IDLE; + // + FSM_STATE_INIT_1: fsm_next_state = FSM_STATE_INIT_2; + FSM_STATE_INIT_2: fsm_next_state = FSM_STATE_INIT_3; + FSM_STATE_INIT_3: fsm_next_state = FSM_STATE_INIT_4; + FSM_STATE_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_LOAD_1; + else fsm_next_state = FSM_STATE_INIT_4; + // + FSM_STATE_LOAD_1: fsm_next_state = FSM_STATE_LOAD_2; + FSM_STATE_LOAD_2: fsm_next_state = FSM_STATE_LOAD_3; + FSM_STATE_LOAD_3: fsm_next_state = FSM_STATE_LOAD_4; + FSM_STATE_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_CALC_1; + else fsm_next_state = FSM_STATE_LOAD_4; + // + FSM_STATE_CALC_1: fsm_next_state = FSM_STATE_CALC_2; + FSM_STATE_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_CALC_3; + else fsm_next_state = FSM_STATE_CALC_2; + FSM_STATE_CALC_3: fsm_next_state = FSM_STATE_FILL_1; + // + FSM_STATE_FILL_1: fsm_next_state = FSM_STATE_FILL_2; + FSM_STATE_FILL_2: fsm_next_state = FSM_STATE_FILL_3; + FSM_STATE_FILL_3: fsm_next_state = FSM_STATE_FILL_4; + FSM_STATE_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_NEXT; + else fsm_next_state = FSM_STATE_FILL_4; + // + FSM_STATE_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_STOP; + else fsm_next_state = FSM_STATE_LOAD_1; + // + FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE; + // + endcase + // + end + + +endmodule diff --git a/src/tb/modexp_fpga_model_vectors.v b/src/tb/modexp_fpga_model_vectors.v index 7a2b8e9..d5284c9 100644 --- a/src/tb/modexp_fpga_model_vectors.v +++ b/src/tb/modexp_fpga_model_vectors.v @@ -30,6 +30,16 @@ localparam [383:0] M_FACTOR_384 = 32'h91e92683, 32'hc483bb6c, 32'h0ee1571d, 32'h6e28c2f5, 32'hff5e6b61, 32'h65fb6164, 32'hd3651e5a, 32'h746b8ca0}; +localparam [383:0] D_384 = + {32'had24a30c, 32'h766d8dc3, 32'he2100b02, 32'h24d1c4b0, + 32'hbb6a6342, 32'h577df9be, 32'h89bb1ec3, 32'hdc3259f0, + 32'h1a343f93, 32'h57a12599, 32'ha328ae2f, 32'hf85ef401}; + +localparam [383:0] S_384 = + {32'h65752d0f, 32'h9a017293, 32'h36bfa115, 32'h4a7a81fc, + 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16, + 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819}; + localparam [511:0] M_512 = {32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb, 32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613, @@ -66,3 +76,15 @@ localparam [511:0] M_FACTOR_512 = 32'h663032a3, 32'h70734b62, 32'h2d30c132, 32'hefa75cc6, 32'h9f18b32a, 32'h97d6ddf8, 32'h2f6df2d0, 32'he9098874}; +localparam [511:0] D_512 = + {32'hc9686c43, 32'hbbe28d66, 32'h758ef8bc, 32'h9b7828e5, + 32'h2ec2804a, 32'hb76745de, 32'h83fcbba0, 32'h2d9eba78, + 32'h215f4cc2, 32'hf49387b3, 32'h8ed0b9dc, 32'h6c129231, + 32'h944368be, 32'hdbf2db79, 32'h16323c49, 32'h34cdf801}; + +localparam [511:0] S_512 = + {32'hcc2fc6b6, 32'he4849987, 32'h75773499, 32'hcb0792b0, + 32'he79f4600, 32'hb2d739c5, 32'h1a661ac6, 32'hd3bf2db5, + 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d, + 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203}; + diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v new file mode 100644 index 0000000..8ca9444 --- /dev/null +++ b/src/tb/tb_exponentiator.v @@ -0,0 +1,424 @@ +//====================================================================== +// +// tb_expoentiator.v +// ----------------------------------------------------------------------------- +// Testbench for Montgomery modular exponentiation block. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2017, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module tb_exponentiator; + + // + // Test Vectors + // + `include "modexp_fpga_model_vectors.v"; + + // + // Parameters + // + localparam NUM_WORDS_384 = 384 / 32; + localparam NUM_WORDS_512 = 512 / 32; + + // + // Clock (100 MHz) + // + reg clk = 1'b0; + always #5 clk = ~clk; + + // + // Inputs + // + reg rst_n; + reg ena; + + reg [ 3: 0] n_num_words; + reg [ 8: 0] d_num_bits; + + // + // Outputs + // + wire rdy; + + // + // Integers + // + integer w; + + // + // BRAM Interfaces + // + wire [ 3: 0] core_m_addr; + wire [ 3: 0] core_d_addr; + wire [ 3: 0] core_n1_addr; + wire [ 3: 0] core_n2_addr; + wire [ 3: 0] core_n_coeff1_addr; + wire [ 3: 0] core_n_coeff2_addr; + wire [ 3: 0] core_r_addr; + + wire [31: 0] core_m_data; + wire [31: 0] core_d_data; + wire [31: 0] core_n1_data; + wire [31: 0] core_n2_data; + wire [31: 0] core_n_coeff1_data; + wire [31: 0] core_n_coeff2_data; + wire [31: 0] core_r_data_in; + + wire core_r_wren; + + reg [ 3: 0] tb_mdn_addr; + reg [ 3: 0] tb_r_addr; + + reg [31:0] tb_m_data; + reg [31:0] tb_d_data; + reg [31:0] tb_n_data; + reg [31:0] tb_n_coeff_data; + wire [31:0] tb_r_data; + + reg tb_mdn_wren; + + // + // BRAMs + // + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_m (.clk(clk), + .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_m_data), .a_out(), + .b_addr(core_m_addr), .b_out(core_m_data)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_d (.clk(clk), + .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_d_data), .a_out(), + .b_addr(core_d_addr), .b_out(core_d_data)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_n1 (.clk(clk), + .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(), + .b_addr(core_n1_addr), .b_out(core_n1_data)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_n2 (.clk(clk), + .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(), + .b_addr(core_n2_addr), .b_out(core_n2_data)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_n_coeff1 (.clk(clk), + .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(), + .b_addr(core_n_coeff1_addr), .b_out(core_n_coeff1_data)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_n_coeff2 (.clk(clk), + .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(), + .b_addr(core_n_coeff2_addr), .b_out(core_n_coeff2_data)); + + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_r (.clk(clk), + .a_addr(core_r_addr), .a_wr(core_r_wren), .a_in(core_r_data_in), .a_out(), + .b_addr(tb_r_addr), .b_out(tb_r_data)); + + // + // UUT + // + modexpa7_exponentiator # + ( + .OPERAND_ADDR_WIDTH (4), // 32 * (2**4) = 512-bit operands + .SYSTOLIC_ARRAY_POWER (2) // 2 ** 2 = 4-tap systolic array + ) + uut + ( + .clk (clk), + .rst_n (rst_n), + + .ena (ena), + .rdy (rdy), + + .m_bram_addr (core_m_addr), + .d_bram_addr (core_d_addr), + .n1_bram_addr (core_n1_addr), + .n2_bram_addr (core_n2_addr), + .n_coeff1_bram_addr (core_n_coeff1_addr), + .n_coeff2_bram_addr (core_n_coeff2_addr), + .r_bram_addr (core_r_addr), + + .m_bram_out (core_m_data), + .d_bram_out (core_d_data), + .n1_bram_out (core_n1_data), + .n2_bram_out (core_n2_data), + .n_coeff1_bram_out (core_n_coeff1_data), + .n_coeff2_bram_out (core_n_coeff1_data), + + .r_bram_in (core_r_data_in), + .r_bram_wr (core_r_wren), + + .n_num_words (n_num_words), + .d_num_bits (d_num_bits) + ); + + + // + // Script + // + initial begin + + rst_n = 1'b0; + ena = 1'b0; + + #200; + rst_n = 1'b1; + #100; + + test_exponent_384(M_FACTOR_384, D_384, N_384, N_COEFF_384, S_384); + //test_exponent_512(M_512); + + end + + + // + // Test Tasks + // + + task test_exponent_384; + // + input [383:0] m; + input [383:0] d; + input [383:0] n; + input [383:0] n_coeff; + input [383:0] s; + reg [383:0] r; + // + integer i; + // + begin + // + n_num_words = 4'd11; // set number of words + d_num_bits = 9'd383; // set number of bits + // + write_memory_384(m, d, n, n_coeff); // fill memory + + ena = 1; // start operation + #10; // + ena = 0; // clear flag + + while (!rdy) #10; // wait for operation to complete + read_memory_384(r); // get result from memory + + $display(" calculated: %x", r); // display result + $display(" expected: %x", s); // + + // check calculated value + if (r === s) begin + $display(" OK"); + $display("SUCCESS: Test passed."); + end else begin + $display(" ERROR"); + $display("FAILURE: Test not passed."); + end + // + end + // + endtask + /* + task test_factor_512; + // + input [511:0] n; + reg [511:0] f; + reg [511:0] factor; + integer i; + // + begin + // + calc_factor_512(n, f); // calculate factor on-the-fly + + // make sure, that the value matches the one saved in the include file + if (f !== FACTOR_512) begin + $display("ERROR: Calculated factor value differs from the one in the test vector!"); + $finish; + end + + + n_num_words = 4'd15; // set number of words + write_memory_512(n); // fill memory + + ena = 1; // start operation + #10; // + ena = 0; // clear flag + + while (!rdy) #10; // wait for operation to complete + read_memory_512(factor); // get result from memory + + $display(" calculated: %x", factor); // display result + $display(" expected: %x", f); // + + // check calculated value + if (f === factor) begin + $display(" OK"); + $display("SUCCESS: Test passed."); + end else begin + $display(" ERROR"); + $display("FAILURE: Test not passed."); + end + // + end + // + endtask + */ + + // + // write_memory_384 + // + task write_memory_384; + // + input [383:0] m; + input [383:0] d; + input [383:0] n; + input [383:0] n_coeff; + reg [383:0] m_shreg; + reg [383:0] d_shreg; + reg [383:0] n_shreg; + reg [383:0] n_coeff_shreg; + // + begin + // + tb_mdn_wren = 1; // start filling memories + m_shreg = m; // preload shift register + d_shreg = d; // preload shift register + n_shreg = n; // preload shift register + n_coeff_shreg = n_coeff; // preload shift register + // + for (w=0; w