aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-18 02:26:18 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-18 02:26:18 +0300
commitd88715489690e1d77558bb2d89adce92ecabdc84 (patch)
treee1e778d89fd8590cab6204204f7cff4f055a2e03
parent72a67f04a21ba4006c7b5bf38e01a3aa6592740f (diff)
Started adding exponentiator module w/ testbench.
-rw-r--r--src/rtl/modexpa7_exponentiator.v578
-rw-r--r--src/tb/modexp_fpga_model_vectors.v22
-rw-r--r--src/tb/tb_exponentiator.v424
3 files changed, 1024 insertions, 0 deletions
diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
new file mode 100644
index 0000000..1f55cec
--- /dev/null
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -0,0 +1,578 @@
+//======================================================================
+//
+// modexpa7_exponentiator.v
+// -----------------------------------------------------------------------------
+// Modular Montgomery Exponentiator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpa7_exponentiator #
+ (
+ //
+ // This sets the address widths of memory buffers. Internal data
+ // width is 32 bits, so for e.g. 2048-bit operands buffers must store
+ // 2048 / 32 = 64 words, and these need 5-bit address bus, because
+ // 2 ** 6 = 64.
+ //
+ parameter OPERAND_ADDR_WIDTH = 4,
+
+ //
+ // Explain.
+ //
+ parameter SYSTOLIC_ARRAY_POWER = 2
+ )
+ (
+ input clk,
+ input rst_n,
+
+ input ena,
+ output rdy,
+
+ output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n_coeff2_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] r_bram_addr,
+
+ input [ 32-1:0] m_bram_out,
+ input [ 32-1:0] d_bram_out,
+ input [ 32-1:0] n1_bram_out,
+ input [ 32-1:0] n2_bram_out,
+ input [ 32-1:0] n_coeff1_bram_out,
+ input [ 32-1:0] n_coeff2_bram_out,
+
+ output [ 32-1:0] r_bram_in,
+ output r_bram_wr,
+
+ input [OPERAND_ADDR_WIDTH-1:0] n_num_words,
+ input [OPERAND_ADDR_WIDTH+4:0] d_num_bits
+ );
+
+
+ //
+ // FSM Declaration
+ //
+ localparam [ 7: 0] FSM_STATE_IDLE = 8'h00;
+
+ localparam [ 7: 0] FSM_STATE_INIT_1 = 8'hA1;
+ localparam [ 7: 0] FSM_STATE_INIT_2 = 8'hA2;
+ localparam [ 7: 0] FSM_STATE_INIT_3 = 8'hA3;
+ localparam [ 7: 0] FSM_STATE_INIT_4 = 8'hA4;
+
+ localparam [ 7: 0] FSM_STATE_LOAD_1 = 8'hB1;
+ localparam [ 7: 0] FSM_STATE_LOAD_2 = 8'hB2;
+ localparam [ 7: 0] FSM_STATE_LOAD_3 = 8'hB3;
+ localparam [ 7: 0] FSM_STATE_LOAD_4 = 8'hB4;
+
+ localparam [ 7: 0] FSM_STATE_CALC_1 = 8'hC1;
+ localparam [ 7: 0] FSM_STATE_CALC_2 = 8'hC2;
+ localparam [ 7: 0] FSM_STATE_CALC_3 = 8'hC3;
+
+ localparam [ 7: 0] FSM_STATE_FILL_1 = 8'hD1;
+ localparam [ 7: 0] FSM_STATE_FILL_2 = 8'hD2;
+ localparam [ 7: 0] FSM_STATE_FILL_3 = 8'hD3;
+ localparam [ 7: 0] FSM_STATE_FILL_4 = 8'hD4;
+
+ localparam [ 7: 0] FSM_STATE_NEXT = 8'hE0;
+
+ localparam [ 7: 0] FSM_STATE_STOP = 8'hFF;
+
+ //
+ // FSM State / Next State
+ //
+ reg [ 7: 0] fsm_state = FSM_STATE_IDLE;
+ reg [ 7: 0] fsm_next_state;
+
+
+ //
+ // Enable Delay and Trigger
+ //
+ reg ena_dly = 1'b0;
+
+ /* delay enable by one clock cycle */
+ always @(posedge clk) ena_dly <= ena;
+
+ /* trigger new operation when enable goes high */
+ wire ena_trig = ena && !ena_dly;
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+
+ /* reset flag */
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag when operation is started */
+ if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig;
+
+ /* set flag after operation is finished */
+ if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1;
+
+ end
+
+
+ //
+ // Parameters Latch
+ //
+ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch;
+ reg [OPERAND_ADDR_WIDTH+4:0] d_num_bits_latch;
+
+ /* save number of words in a and b when new operation starts */
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_INIT_1)
+ {n_num_words_latch, d_num_bits_latch} <= {n_num_words, d_num_bits};
+
+
+ //
+ // Block Memory Addresses
+ //
+
+ /*
+ * Explain what every memory does.
+ *
+ */
+
+ /* the very first addresses */
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}};
+
+ /* the very last addresses */
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {n_num_words_latch};
+
+ /* address registers */
+ reg [OPERAND_ADDR_WIDTH-1:0] m_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] d_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] r_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] t0_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] t1_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] t2_addr_wr;
+ wire [OPERAND_ADDR_WIDTH-1:0] t2_addr_rd;
+ reg [OPERAND_ADDR_WIDTH-1:0] p_addr_wr;
+ wire [OPERAND_ADDR_WIDTH-1:0] p1_addr_rd;
+ wire [OPERAND_ADDR_WIDTH-1:0] p2_addr_rd;
+ wire [OPERAND_ADDR_WIDTH-1:0] p3_addr_rd;
+ wire [OPERAND_ADDR_WIDTH-1:0] pp_addr_wr;
+ reg [OPERAND_ADDR_WIDTH-1:0] pp_addr_rd;
+ wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_wr;
+ reg [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd;
+
+ /* handy increment values */
+ wire [OPERAND_ADDR_WIDTH-1:0] m_addr_next = m_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] d_addr_next = d_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] t0_addr_next = t0_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] t1_addr_next = t1_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] t2_addr_wr_next = t2_addr_wr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] p_addr_wr_next = p_addr_wr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] pp_addr_rd_next = pp_addr_rd + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd_next = tp_addr_rd + 1'b1;
+
+ /* handy stop flags */
+ wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+ wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+
+ /* map registers to top-level ports */
+ assign m_bram_addr = m_addr;
+ assign d_bram_addr = d_addr;
+ assign r_bram_addr = r_addr;
+
+ //
+ // Internal Memories
+ //
+
+ /* memory inputs */
+ reg [31: 0] t0_data_in;
+ reg [31: 0] t1_data_in;
+ reg [31: 0] t2_data_in;
+ reg [31: 0] p_data_in;
+ wire [31: 0] pp_data_in;
+ wire [31: 0] tp_data_in;
+
+ /* memory outputs */
+ wire [31: 0] t0_data_out;
+ wire [31: 0] t1_data_out;
+ wire [31: 0] t2_data_out;
+ wire [31: 0] p1_data_out;
+ wire [31: 0] p2_data_out;
+ wire [31: 0] p3_data_out;
+ wire [31: 0] pp_data_out;
+ wire [31: 0] tp_data_out;
+
+ /* write enables */
+ reg t0_wren;
+ reg t1_wren;
+ reg t2_wren;
+ reg p_wren;
+ wire pp_wren;
+ wire tp_wren;
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out));
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_t2 (.clk(clk),
+ .a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(),
+ .b_addr(t2_addr_rd), .b_out(t2_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_p1 (.clk(clk),
+ .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p1_addr_rd), .b_out(p1_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_p2 (.clk(clk),
+ .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p2_addr_rd), .b_out(p2_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_p3 (.clk(clk),
+ .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p3_addr_rd), .b_out(p3_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_pp (.clk(clk),
+ .a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(),
+ .b_addr(pp_addr_rd), .b_out(pp_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_tp (.clk(clk),
+ .a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(),
+ .b_addr(tp_addr_rd), .b_out(tp_data_out));
+
+
+
+ //
+ // Memory Address Control Logic
+ //
+ always @(posedge clk) begin
+ //
+ // m_addr
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_1: m_addr <= bram_addr_zero;
+ FSM_STATE_INIT_2,
+ FSM_STATE_INIT_3,
+ FSM_STATE_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ endcase
+ //
+ // p_addr_wr
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3,
+ FSM_STATE_FILL_3: p_addr_wr <= bram_addr_zero;
+ FSM_STATE_INIT_4,
+ FSM_STATE_FILL_4: p_addr_wr <= p_addr_wr_next;
+ endcase
+ //
+ // t0_addr
+ //
+ case (fsm_next_state)
+ FSM_STATE_LOAD_3: t0_addr <= bram_addr_zero;
+ FSM_STATE_LOAD_4: t0_addr <= t0_addr_next;
+ endcase
+ //
+ // t1_addr
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3: t1_addr <= bram_addr_zero;
+ FSM_STATE_INIT_4: t1_addr <= t1_addr_next;
+ //
+ FSM_STATE_LOAD_1: t1_addr <= bram_addr_zero;
+ FSM_STATE_LOAD_2,
+ FSM_STATE_LOAD_3,
+ FSM_STATE_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+ endcase
+ //
+ // t2_addr_wr
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_INIT_4: t2_addr_wr <= t2_addr_wr_next;
+ endcase
+ //
+ // pp_addr_rd
+ //
+ case (fsm_next_state)
+ FSM_STATE_FILL_1: pp_addr_rd <= bram_addr_zero;
+ FSM_STATE_FILL_2,
+ FSM_STATE_FILL_3,
+ FSM_STATE_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+ endcase
+ //
+ end
+
+
+ //
+ // Memory Write Enable Logic
+ //
+ always @(posedge clk) begin
+ //
+ // p_wren
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3,
+ FSM_STATE_INIT_4,
+ FSM_STATE_FILL_3,
+ FSM_STATE_FILL_4: p_wren <= 1'b1;
+ default: p_wren <= 1'b0;
+ endcase
+ //
+ // t0_wren
+ //
+ case (fsm_next_state)
+ FSM_STATE_LOAD_3,
+ FSM_STATE_LOAD_4: t0_wren <= 1'b1;
+ default: t0_wren <= 1'b0;
+ endcase
+ //
+ // t1_wren
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3,
+ FSM_STATE_INIT_4: t1_wren <= 1'b1;
+ default: t1_wren <= 1'b0;
+ endcase
+ //
+ // t2_wren
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3,
+ FSM_STATE_INIT_4: t2_wren <= 1'b1;
+ default: t2_wren <= 1'b0;
+ endcase
+ //
+ end
+
+
+ //
+ // Memory Input Selector
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3: {t2_data_in, t1_data_in} <= {2{32'd1}};
+ FSM_STATE_INIT_4: {t2_data_in, t1_data_in} <= {2{32'd0}};
+ default: {t2_data_in, t1_data_in} <= {2{32'dX}};
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_3,
+ FSM_STATE_INIT_4: p_data_in <= m_bram_out;
+ //
+ FSM_STATE_FILL_3,
+ FSM_STATE_FILL_4: p_data_in <= pp_data_out;
+ //
+ default: p_data_in <= 32'dX;
+ endcase
+ //
+ // t0_data_in
+ //
+ case (fsm_next_state)
+ FSM_STATE_LOAD_3,
+ FSM_STATE_LOAD_4: t0_data_in <= t1_data_out;
+ default: t0_data_in <= 32'dX;
+ endcase
+ //
+ end
+
+
+ //
+ // Double Multiplier
+ //
+ reg mul_ena;
+ wire mul_rdy_pp;
+ wire mul_rdy_tp;
+ wire mul_rdy_all = mul_rdy_pp & mul_rdy_tp;
+
+ modexpa7_systolic_multiplier #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH),
+ .SYSTOLIC_ARRAY_POWER (SYSTOLIC_ARRAY_POWER)
+ )
+ mul_pp
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mul_ena),
+ .rdy (mul_rdy_pp),
+
+ .a_bram_addr (p1_addr_rd),
+ .b_bram_addr (p2_addr_rd),
+ .n_bram_addr (n1_bram_addr),
+ .n_coeff_bram_addr (n_coeff1_bram_addr),
+ .r_bram_addr (pp_addr_wr),
+
+ .a_bram_out (p1_data_out),
+ .b_bram_out (p2_data_out),
+ .n_bram_out (n1_bram_out),
+ .n_coeff_bram_out (n_coeff1_bram_out),
+
+ .r_bram_in (pp_data_in),
+ .r_bram_wr (pp_wren),
+
+ .ab_num_words (n_num_words_latch)
+ );
+
+ modexpa7_systolic_multiplier #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH),
+ .SYSTOLIC_ARRAY_POWER (SYSTOLIC_ARRAY_POWER)
+ )
+ mul_tp
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mul_ena),
+ .rdy (mul_rdy_tp),
+
+ .a_bram_addr (t2_addr_rd),
+ .b_bram_addr (p3_addr_rd),
+ .n_bram_addr (n2_bram_addr),
+ .n_coeff_bram_addr (n_coeff2_bram_addr),
+ .r_bram_addr (tp_addr_wr),
+
+ .a_bram_out (t2_data_out),
+ .b_bram_out (p3_data_out),
+ .n_bram_out (n2_bram_out),
+ .n_coeff_bram_out (n_coeff2_bram_out),
+
+ .r_bram_in (tp_data_in),
+ .r_bram_wr (tp_wren),
+
+ .ab_num_words (n_num_words_latch)
+ );
+
+
+ always @(posedge clk)
+ //
+ mul_ena <= (fsm_next_state == FSM_STATE_CALC_1) ? 1'b1 : 1'b0;
+
+
+ //
+ // Bit Counter
+ //
+ reg [OPERAND_ADDR_WIDTH+4:0] bit_cnt;
+
+ wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
+ wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_last = d_num_bits_latch;
+ wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_next = bit_cnt + 1'b1;
+
+ /* handy flag */
+ wire bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_LOAD_1)
+ //
+ case (fsm_state)
+ FSM_STATE_INIT_4: bit_cnt <= bit_cnt_zero;
+ FSM_STATE_NEXT: bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
+ endcase
+
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ else fsm_state <= fsm_next_state;
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @* begin
+ //
+ fsm_next_state = FSM_STATE_STOP;
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_INIT_1;
+ else fsm_next_state = FSM_STATE_IDLE;
+ //
+ FSM_STATE_INIT_1: fsm_next_state = FSM_STATE_INIT_2;
+ FSM_STATE_INIT_2: fsm_next_state = FSM_STATE_INIT_3;
+ FSM_STATE_INIT_3: fsm_next_state = FSM_STATE_INIT_4;
+ FSM_STATE_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_LOAD_1;
+ else fsm_next_state = FSM_STATE_INIT_4;
+ //
+ FSM_STATE_LOAD_1: fsm_next_state = FSM_STATE_LOAD_2;
+ FSM_STATE_LOAD_2: fsm_next_state = FSM_STATE_LOAD_3;
+ FSM_STATE_LOAD_3: fsm_next_state = FSM_STATE_LOAD_4;
+ FSM_STATE_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_CALC_1;
+ else fsm_next_state = FSM_STATE_LOAD_4;
+ //
+ FSM_STATE_CALC_1: fsm_next_state = FSM_STATE_CALC_2;
+ FSM_STATE_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_CALC_3;
+ else fsm_next_state = FSM_STATE_CALC_2;
+ FSM_STATE_CALC_3: fsm_next_state = FSM_STATE_FILL_1;
+ //
+ FSM_STATE_FILL_1: fsm_next_state = FSM_STATE_FILL_2;
+ FSM_STATE_FILL_2: fsm_next_state = FSM_STATE_FILL_3;
+ FSM_STATE_FILL_3: fsm_next_state = FSM_STATE_FILL_4;
+ FSM_STATE_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_NEXT;
+ else fsm_next_state = FSM_STATE_FILL_4;
+ //
+ FSM_STATE_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_STOP;
+ else fsm_next_state = FSM_STATE_LOAD_1;
+ //
+ FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;
+ //
+ endcase
+ //
+ end
+
+
+endmodule
diff --git a/src/tb/modexp_fpga_model_vectors.v b/src/tb/modexp_fpga_model_vectors.v
index 7a2b8e9..d5284c9 100644
--- a/src/tb/modexp_fpga_model_vectors.v
+++ b/src/tb/modexp_fpga_model_vectors.v
@@ -30,6 +30,16 @@ localparam [383:0] M_FACTOR_384 =
32'h91e92683, 32'hc483bb6c, 32'h0ee1571d, 32'h6e28c2f5,
32'hff5e6b61, 32'h65fb6164, 32'hd3651e5a, 32'h746b8ca0};
+localparam [383:0] D_384 =
+ {32'had24a30c, 32'h766d8dc3, 32'he2100b02, 32'h24d1c4b0,
+ 32'hbb6a6342, 32'h577df9be, 32'h89bb1ec3, 32'hdc3259f0,
+ 32'h1a343f93, 32'h57a12599, 32'ha328ae2f, 32'hf85ef401};
+
+localparam [383:0] S_384 =
+ {32'h65752d0f, 32'h9a017293, 32'h36bfa115, 32'h4a7a81fc,
+ 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16,
+ 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819};
+
localparam [511:0] M_512 =
{32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb,
32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613,
@@ -66,3 +76,15 @@ localparam [511:0] M_FACTOR_512 =
32'h663032a3, 32'h70734b62, 32'h2d30c132, 32'hefa75cc6,
32'h9f18b32a, 32'h97d6ddf8, 32'h2f6df2d0, 32'he9098874};
+localparam [511:0] D_512 =
+ {32'hc9686c43, 32'hbbe28d66, 32'h758ef8bc, 32'h9b7828e5,
+ 32'h2ec2804a, 32'hb76745de, 32'h83fcbba0, 32'h2d9eba78,
+ 32'h215f4cc2, 32'hf49387b3, 32'h8ed0b9dc, 32'h6c129231,
+ 32'h944368be, 32'hdbf2db79, 32'h16323c49, 32'h34cdf801};
+
+localparam [511:0] S_512 =
+ {32'hcc2fc6b6, 32'he4849987, 32'h75773499, 32'hcb0792b0,
+ 32'he79f4600, 32'hb2d739c5, 32'h1a661ac6, 32'hd3bf2db5,
+ 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d,
+ 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203};
+
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
new file mode 100644
index 0000000..8ca9444
--- /dev/null
+++ b/src/tb/tb_exponentiator.v
@@ -0,0 +1,424 @@
+//======================================================================
+//
+// tb_expoentiator.v
+// -----------------------------------------------------------------------------
+// Testbench for Montgomery modular exponentiation block.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module tb_exponentiator;
+
+ //
+ // Test Vectors
+ //
+ `include "modexp_fpga_model_vectors.v";
+
+ //
+ // Parameters
+ //
+ localparam NUM_WORDS_384 = 384 / 32;
+ localparam NUM_WORDS_512 = 512 / 32;
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+ //
+ // Inputs
+ //
+ reg rst_n;
+ reg ena;
+
+ reg [ 3: 0] n_num_words;
+ reg [ 8: 0] d_num_bits;
+
+ //
+ // Outputs
+ //
+ wire rdy;
+
+ //
+ // Integers
+ //
+ integer w;
+
+ //
+ // BRAM Interfaces
+ //
+ wire [ 3: 0] core_m_addr;
+ wire [ 3: 0] core_d_addr;
+ wire [ 3: 0] core_n1_addr;
+ wire [ 3: 0] core_n2_addr;
+ wire [ 3: 0] core_n_coeff1_addr;
+ wire [ 3: 0] core_n_coeff2_addr;
+ wire [ 3: 0] core_r_addr;
+
+ wire [31: 0] core_m_data;
+ wire [31: 0] core_d_data;
+ wire [31: 0] core_n1_data;
+ wire [31: 0] core_n2_data;
+ wire [31: 0] core_n_coeff1_data;
+ wire [31: 0] core_n_coeff2_data;
+ wire [31: 0] core_r_data_in;
+
+ wire core_r_wren;
+
+ reg [ 3: 0] tb_mdn_addr;
+ reg [ 3: 0] tb_r_addr;
+
+ reg [31:0] tb_m_data;
+ reg [31:0] tb_d_data;
+ reg [31:0] tb_n_data;
+ reg [31:0] tb_n_coeff_data;
+ wire [31:0] tb_r_data;
+
+ reg tb_mdn_wren;
+
+ //
+ // BRAMs
+ //
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_m (.clk(clk),
+ .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_m_data), .a_out(),
+ .b_addr(core_m_addr), .b_out(core_m_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_d (.clk(clk),
+ .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_d_data), .a_out(),
+ .b_addr(core_d_addr), .b_out(core_d_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_n1 (.clk(clk),
+ .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+ .b_addr(core_n1_addr), .b_out(core_n1_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_n2 (.clk(clk),
+ .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+ .b_addr(core_n2_addr), .b_out(core_n2_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_n_coeff1 (.clk(clk),
+ .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+ .b_addr(core_n_coeff1_addr), .b_out(core_n_coeff1_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_n_coeff2 (.clk(clk),
+ .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+ .b_addr(core_n_coeff2_addr), .b_out(core_n_coeff2_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_r (.clk(clk),
+ .a_addr(core_r_addr), .a_wr(core_r_wren), .a_in(core_r_data_in), .a_out(),
+ .b_addr(tb_r_addr), .b_out(tb_r_data));
+
+ //
+ // UUT
+ //
+ modexpa7_exponentiator #
+ (
+ .OPERAND_ADDR_WIDTH (4), // 32 * (2**4) = 512-bit operands
+ .SYSTOLIC_ARRAY_POWER (2) // 2 ** 2 = 4-tap systolic array
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .m_bram_addr (core_m_addr),
+ .d_bram_addr (core_d_addr),
+ .n1_bram_addr (core_n1_addr),
+ .n2_bram_addr (core_n2_addr),
+ .n_coeff1_bram_addr (core_n_coeff1_addr),
+ .n_coeff2_bram_addr (core_n_coeff2_addr),
+ .r_bram_addr (core_r_addr),
+
+ .m_bram_out (core_m_data),
+ .d_bram_out (core_d_data),
+ .n1_bram_out (core_n1_data),
+ .n2_bram_out (core_n2_data),
+ .n_coeff1_bram_out (core_n_coeff1_data),
+ .n_coeff2_bram_out (core_n_coeff1_data),
+
+ .r_bram_in (core_r_data_in),
+ .r_bram_wr (core_r_wren),
+
+ .n_num_words (n_num_words),
+ .d_num_bits (d_num_bits)
+ );
+
+
+ //
+ // Script
+ //
+ initial begin
+
+ rst_n = 1'b0;
+ ena = 1'b0;
+
+ #200;
+ rst_n = 1'b1;
+ #100;
+
+ test_exponent_384(M_FACTOR_384, D_384, N_384, N_COEFF_384, S_384);
+ //test_exponent_512(M_512);
+
+ end
+
+
+ //
+ // Test Tasks
+ //
+
+ task test_exponent_384;
+ //
+ input [383:0] m;
+ input [383:0] d;
+ input [383:0] n;
+ input [383:0] n_coeff;
+ input [383:0] s;
+ reg [383:0] r;
+ //
+ integer i;
+ //
+ begin
+ //
+ n_num_words = 4'd11; // set number of words
+ d_num_bits = 9'd383; // set number of bits
+ //
+ write_memory_384(m, d, n, n_coeff); // fill memory
+
+ ena = 1; // start operation
+ #10; //
+ ena = 0; // clear flag
+
+ while (!rdy) #10; // wait for operation to complete
+ read_memory_384(r); // get result from memory
+
+ $display(" calculated: %x", r); // display result
+ $display(" expected: %x", s); //
+
+ // check calculated value
+ if (r === s) begin
+ $display(" OK");
+ $display("SUCCESS: Test passed.");
+ end else begin
+ $display(" ERROR");
+ $display("FAILURE: Test not passed.");
+ end
+ //
+ end
+ //
+ endtask
+ /*
+ task test_factor_512;
+ //
+ input [511:0] n;
+ reg [511:0] f;
+ reg [511:0] factor;
+ integer i;
+ //
+ begin
+ //
+ calc_factor_512(n, f); // calculate factor on-the-fly
+
+ // make sure, that the value matches the one saved in the include file
+ if (f !== FACTOR_512) begin
+ $display("ERROR: Calculated factor value differs from the one in the test vector!");
+ $finish;
+ end
+
+
+ n_num_words = 4'd15; // set number of words
+ write_memory_512(n); // fill memory
+
+ ena = 1; // start operation
+ #10; //
+ ena = 0; // clear flag
+
+ while (!rdy) #10; // wait for operation to complete
+ read_memory_512(factor); // get result from memory
+
+ $display(" calculated: %x", factor); // display result
+ $display(" expected: %x", f); //
+
+ // check calculated value
+ if (f === factor) begin
+ $display(" OK");
+ $display("SUCCESS: Test passed.");
+ end else begin
+ $display(" ERROR");
+ $display("FAILURE: Test not passed.");
+ end
+ //
+ end
+ //
+ endtask
+ */
+
+ //
+ // write_memory_384
+ //
+ task write_memory_384;
+ //
+ input [383:0] m;
+ input [383:0] d;
+ input [383:0] n;
+ input [383:0] n_coeff;
+ reg [383:0] m_shreg;
+ reg [383:0] d_shreg;
+ reg [383:0] n_shreg;
+ reg [383:0] n_coeff_shreg;
+ //
+ begin
+ //
+ tb_mdn_wren = 1; // start filling memories
+ m_shreg = m; // preload shift register
+ d_shreg = d; // preload shift register
+ n_shreg = n; // preload shift register
+ n_coeff_shreg = n_coeff; // preload shift register
+ //
+ for (w=0; w<NUM_WORDS_384; w=w+1) begin // write all words
+ tb_mdn_addr = w[3:0]; // set address
+ tb_m_data = m_shreg[31:0]; // set data
+ tb_d_data = d_shreg[31:0]; // set data
+ tb_n_data = n_shreg[31:0]; // set data
+ tb_n_coeff_data = n_coeff_shreg[31:0]; // set data
+ m_shreg = {{32{1'bX}}, m_shreg[383:32]}; // update shift register
+ d_shreg = {{32{1'bX}}, d_shreg[383:32]}; // update shift register
+ n_shreg = {{32{1'bX}}, n_shreg[383:32]}; // update shift register
+ n_coeff_shreg = {{32{1'bX}}, n_coeff_shreg[383:32]}; // update shift register
+ #10; // wait for 1 clock tick
+ end
+ //
+ tb_mdn_addr = {4{1'bX}}; // wipe addresses
+ tb_m_data = {32{1'bX}}; // wipe data
+ tb_d_data = {32{1'bX}}; // wipe data
+ tb_n_data = {32{1'bX}}; // wipe data
+ tb_n_coeff_data = {32{1'bX}}; // wipe data
+ tb_mdn_wren = 0; // stop filling memory
+ //
+ end
+ //
+ endtask
+
+ /*
+ //
+ // write_memory_512
+ //
+ task write_memory_512;
+ //
+ input [511:0] n;
+ reg [511:0] n_shreg;
+ //
+ begin
+ //
+ tb_n_wren = 1; // start filling memories
+ n_shreg = n; // preload shift register
+ //
+ for (w=0; w<NUM_WORDS_512; w=w+1) begin // write all words
+ tb_n_addr = w[3:0]; // set address
+ tb_n_data = n_shreg[31:0]; // set data
+ n_shreg = {{32{1'bX}}, n_shreg[511:32]}; // update shift register
+ #10; // wait for 1 clock tick
+ end
+ //
+ tb_n_addr = {4{1'bX}}; // wipe addresses
+ tb_n_data = {32{1'bX}}; // wipe data
+ tb_n_wren = 0; // stop filling memory
+ //
+ end
+ //
+ endtask
+ */
+
+ //
+ // read_memory_384
+ //
+ task read_memory_384;
+ //
+ output [383:0] r;
+ reg [383:0] r_shreg;
+ //
+ begin
+ //
+ for (w=0; w<NUM_WORDS_384; w=w+1) begin // read result word-by-word
+ tb_r_addr = w[3:0]; // set address
+ #10; // wait for 1 clock tick
+ r_shreg = {tb_r_data, r_shreg[383:32]}; // store data word
+ end
+ //
+ tb_r_addr = {4{1'bX}}; // wipe address
+ r = r_shreg; // return
+ //
+ end
+ //
+ endtask
+
+ /*
+ //
+ // read_memory_512
+ //
+ task read_memory_512;
+ //
+ output [511:0] f;
+ reg [511:0] f_shreg;
+ //
+ begin
+ //
+ for (w=0; w<NUM_WORDS_512; w=w+1) begin // read result word-by-word
+ tb_f_addr = w[3:0]; // set address
+ #10; // wait for 1 clock tick
+ f_shreg = {tb_f_data, f_shreg[511:32]}; // store data word
+ end
+ //
+ tb_f_addr = {4{1'bX}}; // wipe address
+ f = f_shreg; // return
+ //
+ end
+ //
+ endtask
+ */
+
+endmodule
+
+//======================================================================
+// End of file
+//======================================================================