aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-06-27 13:48:50 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-06-27 13:48:50 +0300
commite4b70153e20ba3f6231a580d748d5eacecfd54ec (patch)
tree72766a157ff9a9ea4dd2ecf628da4576d627eb80
parent61206120195522734a3762d25e9d057c01ade087 (diff)
Added Montgomery modulus-dependent coefficient calculation block
* work in progress
-rw-r--r--src/rtl/modexpa7_n_coeff.v498
-rw-r--r--src/tb/tb_n_coeff.v292
2 files changed, 790 insertions, 0 deletions
diff --git a/src/rtl/modexpa7_n_coeff.v b/src/rtl/modexpa7_n_coeff.v
new file mode 100644
index 0000000..1e763ba
--- /dev/null
+++ b/src/rtl/modexpa7_n_coeff.v
@@ -0,0 +1,498 @@
+//======================================================================
+//
+// modexpa7_n_coeff.v
+// -----------------------------------------------------------------------------
+// Montgomery modulus-dependent coefficient calculation block.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpa7_n_coeff #
+ (
+ //
+ // This sets the address widths of memory buffers. Internal data
+ // width is 32 bits, so for e.g. 1024-bit operands buffers must store
+ // 1024 / 32 = 32 words, and these need 5-bit address bus, because
+ // 2 ** 5 = 32.
+ //
+ parameter OPERAND_ADDR_WIDTH = 5
+ )
+ (
+ input clk,
+ input rst_n,
+
+ input ena,
+ output rdy,
+
+ output [OPERAND_ADDR_WIDTH-1:0] n_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n_coeff_bram_addr,
+
+ input [ 32-1:0] n_bram_out,
+
+ output [ 32-1:0] n_coeff_bram_in,
+ output n_coeff_bram_wr,
+
+ input [OPERAND_ADDR_WIDTH-1:0] n_num_words
+ );
+
+ //
+ // FSM Declaration
+ //
+ localparam [ 7: 0] FSM_STATE_IDLE = 8'h00;
+
+ localparam [ 7: 0] FSM_STATE_INIT_1 = 8'hA1;
+ localparam [ 7: 0] FSM_STATE_INIT_2 = 8'hA2;
+ localparam [ 7: 0] FSM_STATE_INIT_3 = 8'hA3;
+ localparam [ 7: 0] FSM_STATE_INIT_4 = 8'hA4;
+ localparam [ 7: 0] FSM_STATE_INIT_5 = 8'hA5;
+
+ localparam [ 7: 0] FSM_STATE_CALC_1 = 8'hB1;
+ localparam [ 7: 0] FSM_STATE_CALC_2 = 8'hB2;
+ localparam [ 7: 0] FSM_STATE_CALC_3 = 8'hB3;
+ localparam [ 7: 0] FSM_STATE_CALC_4 = 8'hB4;
+ /*
+ localparam [ 7: 0] FSM_STATE_CALC_5 = 8'hB5;
+ localparam [ 7: 0] FSM_STATE_CALC_6 = 8'hB6;
+ localparam [ 7: 0] FSM_STATE_CALC_7 = 8'hB7;
+ localparam [ 7: 0] FSM_STATE_CALC_8 = 8'hB8;
+
+ localparam [ 7: 0] FSM_STATE_SAVE_1 = 8'hC1;
+ localparam [ 7: 0] FSM_STATE_SAVE_2 = 8'hC2;
+ localparam [ 7: 0] FSM_STATE_SAVE_3 = 8'hC3;
+ localparam [ 7: 0] FSM_STATE_SAVE_4 = 8'hC4;
+ localparam [ 7: 0] FSM_STATE_SAVE_5 = 8'hC5;
+ */
+ localparam [ 7: 0] FSM_STATE_STOP = 8'hFF;
+
+ reg [ 7: 0] fsm_state = FSM_STATE_IDLE;
+ reg [ 7: 0] fsm_next_state;
+
+
+ //
+ // Enable Delay (Trigger)
+ //
+ reg ena_dly = 1'b0;
+ wire ena_trig = ena && !ena_dly;
+ always @(posedge clk) ena_dly <= ena;
+
+
+ //
+ // Parameters Latch
+ //
+ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch;
+
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_INIT_1)
+ n_num_words_latch <= n_num_words;
+
+
+ //
+ // Addresses
+ //
+ localparam [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = n_num_words_latch;
+
+
+ /*
+ //
+ // Cycle Counters
+ //
+ reg [OPERAND_ADDR_WIDTH+5:0] cyc_cnt; // cycle counter
+
+ wire [OPERAND_ADDR_WIDTH+5:0] cyc_cnt_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
+ wire [OPERAND_ADDR_WIDTH+5:0] cyc_cnt_last = {n_num_words, 1'b1, {5{1'b1}}};
+ wire [OPERAND_ADDR_WIDTH+5:0] cyc_cnt_next = cyc_cnt + 1'b1;
+
+ wire cyc_cnt_done = (cyc_cnt == cyc_cnt_last) ? 1'b1 : 1'b0;
+
+
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_CALC_1)
+ //
+ case (fsm_state)
+ FSM_STATE_INIT_2: cyc_cnt <= cyc_cnt_zero;
+ FSM_STATE_SAVE_5: cyc_cnt <= cyc_cnt_done ? cyc_cnt : cyc_cnt_next;
+ endcase
+ */
+
+
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+ if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig;
+ if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1;
+ end
+
+
+ //
+ // Block Memories
+ //
+ reg [OPERAND_ADDR_WIDTH-1:0] n_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] r_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] b_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] nn_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] t_addr_wr;
+ reg [OPERAND_ADDR_WIDTH-1:0] t_addr_rd;
+
+ reg [31: 0] r_data_in;
+ reg [31: 0] b_data_in;
+ reg [31: 0] nn_data_in;
+ reg [31: 0] t_data_in;
+
+ wire [31: 0] r_data_out;
+ wire [31: 0] b_data_out;
+ wire [31: 0] nn_data_out;
+ wire [31: 0] t_data_out;
+
+ reg r_wren;
+ reg b_wren;
+ reg nn_wren;
+ reg t_wren;
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_r (.clk(clk), .a_addr(r_addr), .a_wr(r_wren), .a_in(r_data_in), .a_out(r_data_out));
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_b (.clk(clk), .a_addr(b_addr), .a_wr(b_wren), .a_in(b_data_in), .a_out(b_data_out));
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_nn (.clk(clk), .a_addr(nn_addr), .a_wr(nn_wren), .a_in(nn_data_in), .a_out(nn_data_out));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_t (.clk(clk), .a_addr(t_addr_wr), .a_wr(t_wren), .a_in(t_data_in), .a_out(), .b_addr(t_addr_rd), .b_out(t_data_out));
+
+ assign n_bram_addr = n_addr;
+
+ wire [OPERAND_ADDR_WIDTH-1:0] n_addr_next = n_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] b_addr_next = b_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] nn_addr_next = nn_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] t_addr_wr_next = t_addr_wr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] t_addr_rd_next = t_addr_rd + 1'b1;
+
+ wire n_addr_done = (n_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire b_addr_done = (b_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire nn_addr_done = (nn_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t_addr_wr_done = (t_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t_addr_rd_done = (t_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+
+
+ //
+ // Subtractor
+ //
+ wire [31: 0] add_s;
+ wire add_c_in;
+ reg add_b_lsb;
+ reg add_c_in_mask;
+ reg add_c_in_mask_dly;
+ wire add_c_out;
+
+ assign add_c_in = add_c_out & ~add_c_in_mask;
+
+ always @(posedge clk)
+ //
+ add_c_in_mask <= (fsm_next_state == FSM_STATE_INIT_2) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ add_b_lsb <= (fsm_next_state == FSM_STATE_INIT_2) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ add_c_in_mask_dly <= add_c_in_mask;
+
+ ip_add32 add_inst
+ (
+ .clk (clk),
+ .a (~n_bram_out),
+ .b ({{31{1'b0}}, add_b_lsb}),
+ .c_in (add_c_in),
+ .s (add_s),
+ .c_out (add_c_out)
+ );
+
+
+ //
+ // Multiplier
+ //
+ reg [31: 0] pe_a;
+ reg [31: 0] pe_b;
+ reg [31: 0] pe_t;
+ reg [31: 0] pe_c_in;
+ wire [31: 0] pe_p;
+ wire [31: 0] pe_c_out;
+
+ modexpa7_pe_mul pe2
+ (
+ .clk (clk),
+ .a (pe_a),
+ .b (pe_b),
+ .t (pe_t),
+ .c_in (pe_c_in),
+ .p (pe_p),
+ .c_out (pe_c_out)
+ );
+
+
+ /*
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_CALC_2: f0_data_out_carry <= 1'b0;
+ FSM_STATE_CALC_3,
+ FSM_STATE_CALC_4,
+ FSM_STATE_CALC_5,
+ FSM_STATE_CALC_6: f0_data_out_carry <= f0_data_out[31];
+ default: f0_data_out_carry <= 1'bX;
+ endcase
+ */
+
+ /*
+ reg sub_b_out_dly1;
+ reg f0_data_out_carry_dly1;
+ reg f0_data_out_carry_dly2;
+
+ always @(posedge clk) sub_b_out_dly1 <= sub_b_out;
+
+ always @(posedge clk) f0_data_out_carry_dly1 <= f0_data_out_carry;
+ always @(posedge clk) f0_data_out_carry_dly2 <= f0_data_out_carry_dly1;
+
+ reg flag_keep_f;
+
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_SAVE_1)
+ flag_keep_f <= sub_b_out_dly1 & ~f0_data_out_carry_dly2;
+ */
+
+ always @* t_addr_rd = r_addr + nn_addr;
+
+ always @(posedge clk) begin
+ //
+ case (fsm_next_state)
+
+ FSM_STATE_INIT_1: n_addr <= bram_addr_zero;
+
+ FSM_STATE_INIT_2,
+ FSM_STATE_INIT_3,
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: n_addr <= !n_addr_done ? n_addr_next : n_addr;
+
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4: nn_addr <= bram_addr_zero;
+ FSM_STATE_INIT_5: nn_addr <= nn_addr_next;
+ FSM_STATE_CALC_1:
+ case (fsm_state)
+ FSM_STATE_INIT_5: nn_addr <= bram_addr_zero;
+ endcase
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4: r_addr <= bram_addr_zero;
+ FSM_STATE_INIT_5: r_addr <= r_addr_next;
+ FSM_STATE_CALC_1: r_addr <= bram_addr_zero;
+ FSM_STATE_CALC_2,
+ FSM_STATE_CALC_3,
+ FSM_STATE_CALC_4: r_addr <= r_addr_next;
+
+ endcase
+ //
+ case (fsm_next_state)
+
+ FSM_STATE_INIT_4: b_addr <= bram_addr_zero;
+
+ FSM_STATE_INIT_5: b_addr <= b_addr_next;
+
+ endcase
+ //
+ end
+
+
+ always @(posedge clk) begin
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: nn_wren <= 1'b1;
+ default: nn_wren <= 1'b0;
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: r_wren <= 1'b1;
+ default: r_wren <= 1'b0;
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: b_wren <= 1'b1;
+ default: b_wren <= 1'b0;
+ endcase
+ /*
+ case (fsm_next_state)
+ FSM_STATE_SAVE_3,
+ FSM_STATE_SAVE_4,
+ FSM_STATE_SAVE_5: f_wren <= cyc_cnt_done;
+ default: f_wren <= 1'b0;
+ endcase
+ */
+ end
+
+ always @(posedge clk) begin
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: nn_data_in <= add_s;
+ default: nn_data_in <= {32{1'bX}};
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: r_data_in <= {{31{1'b0}}, add_c_in_mask_dly};
+ default: r_data_in <= {32{1'bX}};
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_INIT_4,
+ FSM_STATE_INIT_5: b_data_in <= {{31{1'b0}}, add_c_in_mask_dly};
+ default: b_data_in <= {32{1'bX}};
+ endcase
+ /*
+ case (fsm_next_state)
+ FSM_STATE_CALC_3,
+ FSM_STATE_CALC_4,
+ FSM_STATE_CALC_5,
+ FSM_STATE_CALC_6: f1_data_in <= f0_data_out_shifted;
+ default: f1_data_in <= {32{1'bX}};
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_CALC_5,
+ FSM_STATE_CALC_6,
+ FSM_STATE_CALC_7,
+ FSM_STATE_CALC_8: f2_data_in <= sub_d;
+ default: f2_data_in <= {32{1'bX}};
+ endcase
+ //
+ case (fsm_next_state)
+ FSM_STATE_SAVE_3,
+ FSM_STATE_SAVE_4,
+ FSM_STATE_SAVE_5: f_data_in <= flag_keep_f ? f1_data_out : f2_data_out;
+ default: f_data_in <= {32{1'bX}};
+ endcase
+ */
+ end
+
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ else fsm_state <= fsm_next_state;
+
+
+ always @* begin
+ //
+ fsm_next_state = FSM_STATE_STOP;
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_INIT_1;
+ else fsm_next_state = FSM_STATE_IDLE;
+
+ FSM_STATE_INIT_1: fsm_next_state = FSM_STATE_INIT_2;
+
+ FSM_STATE_INIT_2: fsm_next_state = FSM_STATE_INIT_3;
+
+ FSM_STATE_INIT_3: fsm_next_state = FSM_STATE_INIT_4;
+
+ FSM_STATE_INIT_4: fsm_next_state = FSM_STATE_INIT_5;
+
+ FSM_STATE_INIT_5: if (nn_addr_done) fsm_next_state = FSM_STATE_CALC_1;
+ else fsm_next_state = FSM_STATE_INIT_5;
+
+ FSM_STATE_CALC_1: fsm_next_state = FSM_STATE_CALC_2;
+
+ FSM_STATE_CALC_2: fsm_next_state = FSM_STATE_CALC_3;
+
+ FSM_STATE_CALC_3: fsm_next_state = FSM_STATE_CALC_4;
+
+ FSM_STATE_CALC_4: fsm_next_state = FSM_STATE_STOP;//FSM_STATE_CALC_5;
+ /*
+ FSM_STATE_CALC_5: fsm_next_state = FSM_STATE_CALC_6;
+
+ FSM_STATE_CALC_6: if (f1_addr_done) fsm_next_state = FSM_STATE_CALC_7;
+ else fsm_next_state = FSM_STATE_CALC_6;
+
+ FSM_STATE_CALC_7: fsm_next_state = FSM_STATE_CALC_8;
+
+ FSM_STATE_CALC_8: fsm_next_state = FSM_STATE_SAVE_1;
+
+ FSM_STATE_SAVE_1: fsm_next_state = FSM_STATE_SAVE_2;
+
+ FSM_STATE_SAVE_2: fsm_next_state = FSM_STATE_SAVE_3;
+
+ FSM_STATE_SAVE_3: fsm_next_state = FSM_STATE_SAVE_4;
+
+ FSM_STATE_SAVE_4: if (f12_addr_done_dly) fsm_next_state = FSM_STATE_SAVE_5;
+ else fsm_next_state = FSM_STATE_SAVE_4;
+
+ FSM_STATE_SAVE_5: if (cyc_cnt_done) fsm_next_state = FSM_STATE_STOP;
+ else fsm_next_state = FSM_STATE_CALC_1;
+ */
+ FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;
+
+ endcase
+ end
+
+
+endmodule
+
+//======================================================================
+// End of file
+//======================================================================
diff --git a/src/tb/tb_n_coeff.v b/src/tb/tb_n_coeff.v
new file mode 100644
index 0000000..6ab824a
--- /dev/null
+++ b/src/tb/tb_n_coeff.v
@@ -0,0 +1,292 @@
+//======================================================================
+//
+// tb_n_coeff.v
+// -----------------------------------------------------------------------------
+// Testbench for Montgomery modulus-depentent coefficient calculation block.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module tb_n_coeff;
+
+ //
+ // Test Vectors
+ //
+ `include "../modexp_fpga_model_vectors.v";
+
+ //
+ // Parameters
+ //
+ localparam NUM_WORDS_384 = 384 / 32;
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+ //
+ // Inputs-
+ //
+ reg rst_n;
+ reg ena;
+
+ reg [ 3: 0] n_num_words;
+
+ //
+ // Outputs
+ //
+ wire rdy;
+
+ //
+ // Integers
+ //
+ integer w;
+
+ //
+ // BRAM Interfaces
+ //
+ wire [ 3: 0] core_n_addr;
+ wire [ 3: 0] core_n_coeff_addr;
+
+ wire [31: 0] core_n_data;
+ wire [31: 0] core_n_coeff_data_in;
+
+ wire core_n_coeff_wren;
+
+ reg [ 3: 0] tb_n_addr;
+ reg [ 3: 0] tb_n_coeff_addr;
+
+ reg [31:0] tb_n_data;
+ wire [31:0] tb_n_coeff_data;
+
+ reg tb_n_wren;
+
+ //
+ // BRAMs
+ //
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_n (.clk(clk),
+ .a_addr(tb_n_addr), .a_wr(tb_n_wren), .a_in(tb_n_data), .a_out(),
+ .b_addr(core_n_addr), .b_out(core_n_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_n_coeff (.clk(clk),
+ .a_addr(core_n_coeff_addr), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_in), .a_out(),
+ .b_addr(tb_n_coeff_addr), .b_out(tb_n_coeff_data));
+
+ //
+ // UUT
+ //
+ modexpa7_n_coeff #
+ (
+ .OPERAND_ADDR_WIDTH (4) // 32 * (2**4) = 512-bit operands
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .n_bram_addr (core_n_addr),
+ .n_coeff_bram_addr (core_n_coeff_addr),
+
+ .n_bram_out (core_n_data),
+
+ .n_coeff_bram_in (core_n_coeff_data_in),
+ .n_coeff_bram_wr (core_n_coeff_wren),
+
+ .n_num_words (n_num_words)
+ );
+
+
+ //
+ // Script
+ //
+ initial begin
+
+ rst_n = 1'b0;
+ ena = 1'b0;
+
+ #200;
+ rst_n = 1'b1;
+ #100;
+
+ test_n_coeff_384(N_384);
+
+ end
+
+
+ //
+ // Test Tasks
+ //
+
+ task test_n_coeff_384;
+ input [383:0] n;
+ reg [383:0] n_coeff;
+ reg [383:0] result;
+ integer i;
+ begin
+
+ calc_n_coeff_384(n, n_coeff); // calculate n_coeff on-the-fly
+
+ // make sure, that the value matches the one saved in the include file
+ if (n_coeff !== N_COEFF_384) begin
+ $display("ERROR: Calculated factor value differs from the one in the test vector!");
+ $finish;
+ end
+
+
+ n_num_words = 4'd11; // set number of words
+
+ write_memory_384(n); // fill memory
+
+ ena = 1; // start operation
+ #10; //
+ ena = 0; // clear flag
+
+ while (!rdy) #10; // wait for operation to complete
+ read_memory_384(result); // get result from memory
+
+ $display(" calculated: %x", result); //
+ $display(" expected: %x", n_coeff); //
+
+ // check calculated value
+ if (n_coeff === result) begin
+ $display(" OK");
+ $display("SUCCESS: Test passed.");
+ end else begin
+ $display(" ERROR");
+ $display("FAILURE: Test not passed.");
+ end
+
+ end
+
+ endtask
+
+
+ task write_memory_384;
+
+ input [383:0] n;
+
+ reg [383:0] n_shreg;
+
+ begin
+
+ tb_n_wren = 1; // start filling memories
+
+ n_shreg = n; //
+
+ for (w=0; w<NUM_WORDS_384; w=w+1) begin // write all words
+
+ tb_n_addr = w[3:0]; // set addresses
+
+ tb_n_data = n_shreg[31:0]; //
+
+ n_shreg = {{32{1'bX}}, n_shreg[383:32]}; //
+
+ #10; // wait for 1 clock tick
+
+ end
+
+ tb_n_addr = {4{1'bX}}; // wipe addresses
+
+ tb_n_data = {32{1'bX}}; //
+
+ tb_n_wren = 0; // stop filling memories
+
+ end
+
+ endtask
+
+
+ task read_memory_384;
+
+ output [383:0] n_coeff;
+ reg [383:0] n_coeff_shreg;
+
+ begin
+
+ // read result word-by-word
+ for (w=0; w<NUM_WORDS_384; w=w+1) begin
+ tb_n_coeff_addr = w[3:0]; // set address
+ #10; // wait for 1 clock tick
+ n_coeff_shreg = {tb_n_coeff_data, n_coeff_shreg[383:32]}; // store data word
+ end
+
+ tb_n_coeff_addr = {4{1'bX}}; // wipe address
+ n_coeff = n_coeff_shreg; // return
+
+ end
+
+ endtask
+
+
+ task calc_n_coeff_384;
+
+ input [383:0] n;
+ output [383:0] n_coeff;
+ reg [383:0] r;
+ reg [383:0] nn;
+ reg [383:0] t;
+ reg [383:0] b;
+ integer i;
+
+ begin
+
+ r = 384'd1;
+ b = 384'd1;
+ nn = ~n + 1'b1;
+
+ for (i=1; i<384; i=i+1) begin
+ b = {b[382:0], 1'b0};
+ t = r * nn;
+ if (t[i] == 1'b1)
+ r = r + b;
+ end
+
+ n_coeff = r;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//======================================================================
+// End of file
+//======================================================================