From 72902f5b40ac695786f5103d2a5a456c6c7ee83f Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 21 Oct 2019 13:04:07 +0300 Subject: Redesigned the testbench. Core clock does not necessarily need to be twice faster than the bus clock now. It can be the same, or say four times faster. --- bench/tb_core_full_512.v | 294 ++++++++++++++++--------- rtl/modexpng_dsp48e1.vh | 1 + rtl/modexpng_dsp_array_block.v | 84 ++++++- rtl/modexpng_dsp_slice_primitive.vh | 9 + rtl/modexpng_dsp_slice_wrapper.v | 135 ------------ rtl/modexpng_dsp_slice_wrapper_generic.v | 189 ++++++++++++++++ rtl/modexpng_dsp_slice_wrapper_xilinx.v | 135 ++++++++++++ rtl/modexpng_io_block.v | 19 +- rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v | 66 +++--- rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v | 59 +++-- rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v | 43 ++++ rtl/modexpng_uop_rom.v | 20 ++ 12 files changed, 727 insertions(+), 327 deletions(-) create mode 100644 rtl/modexpng_dsp_slice_primitive.vh delete mode 100644 rtl/modexpng_dsp_slice_wrapper.v create mode 100644 rtl/modexpng_dsp_slice_wrapper_generic.v create mode 100644 rtl/modexpng_dsp_slice_wrapper_xilinx.v diff --git a/bench/tb_core_full_512.v b/bench/tb_core_full_512.v index 221a2c6..75accff 100644 --- a/bench/tb_core_full_512.v +++ b/bench/tb_core_full_512.v @@ -31,6 +31,7 @@ module tb_core_full_512; reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1]; reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ]; reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] D[0:TB_NUM_WORDS_N-1]; reg [31:0] DP[0:TB_NUM_WORDS_PQ-1]; reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1]; reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1]; @@ -81,6 +82,10 @@ module tb_core_full_512; Q_COEFF[ 0] = 32'h5eee9ecd; Q_COEFF[ 1] = 32'h085153b0; Q_COEFF[ 2] = 32'h85326da6; Q_COEFF[ 3] = 32'h7521931a; Q_COEFF[ 4] = 32'h99e0eef1; Q_COEFF[ 5] = 32'ha219917b; Q_COEFF[ 6] = 32'he8e9087a; Q_COEFF[ 7] = 32'h5239d12b; Q_COEFF[ 8] = 32'h0000ed92; + D[ 0] = 32'hf127ca41; D[ 1] = 32'hc4975ff0; D[ 2] = 32'h69ebbe13; D[ 3] = 32'h66fe0018; + D[ 4] = 32'hf2089237; D[ 5] = 32'hfa3f05ab; D[ 6] = 32'h2ab183c4; D[ 7] = 32'h1e4b3c04; + D[ 8] = 32'ha67974e8; D[ 9] = 32'ha6714d63; D[ 10] = 32'hfe5cd801; D[ 11] = 32'h13f2071a; + D[ 12] = 32'h0b978309; D[ 13] = 32'hb0ddb4a0; D[ 14] = 32'ha437a2cc; D[ 15] = 32'h2391b2fb; DP[ 0] = 32'h3891ed91; DP[ 1] = 32'h775046c2; DP[ 2] = 32'h60180c26; DP[ 3] = 32'h5130700a; DP[ 4] = 32'hb13c8216; DP[ 5] = 32'h833fcf78; DP[ 6] = 32'h7ab89b12; DP[ 7] = 32'hb976758c; DQ[ 0] = 32'h28cc59ad; DQ[ 1] = 32'h3ce6ed45; DQ[ 2] = 32'ha1f53aeb; DQ[ 3] = 32'h06ca05e1; @@ -100,26 +105,45 @@ module tb_core_full_512; S[ 8] = 32'h2854a51a; S[ 9] = 32'h0245619b; S[ 10] = 32'hfb67ef8f; S[ 11] = 32'hcc5bdd4f; S[ 12] = 32'ha70f58bd; S[ 13] = 32'h31f15702; S[ 14] = 32'hd6f36259; S[ 15] = 32'h280e67e0; end - // // Clocks // - `define CLK_FREQUENCY_MHZ (100.0) - `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) - `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + `define CLK_FREQUENCY_MHZ (100.0) + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + `define CLK_PERIOD_QUARTER_NS (0.5 * `CLK_PERIOD_HALF_NS) - `define CLK_BUS_FREQUENCY_MHZ (50.0) - `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) - `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) - - reg clk = 1'b1; - reg clk_bus = 1'b0; + `define CLK_BUS_FREQUENCY_MHZ (25.0) + `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) + `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) + + reg clk = 1'b1; + reg clk_dly = 1'b0; + wire clk_idle = clk & clk_dly; + + reg clk_bus = 1'b1; + reg clk_bus_dly = 1'b0; + wire clk_bus_idle = clk_bus & clk_bus_dly; - always #`CLK_PERIOD_HALF_NS clk = ~clk; + always #`CLK_PERIOD_HALF_NS clk <= ~clk; + always #`CLK_BUS_PERIOD_HALF_NS clk_bus <= ~clk_bus; + + always @(clk ) clk_dly <= #(`CLK_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk; + always @(clk_bus) clk_bus_dly <= #(`CLK_BUS_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk_bus; + + + // + // Clock Sync + // + task sync_clk; + while (clk_idle !== 1) _wait_quarter_clk_tick; + endtask - always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus; + task sync_clk_bus; + while (clk_bus_idle !== 1) _wait_quarter_clk_tick; + endtask // @@ -143,7 +167,6 @@ module tb_core_full_512; // // System Bus // - reg bus_ready; reg bus_cs = 1'b0; reg bus_we = 1'b0; reg [11:0] bus_addr; @@ -185,67 +208,102 @@ module tb_core_full_512; // - // Routine (Bus) + // Bus Init Routine // - initial begin - - bus_ready = 1'b0; + task core_set_input; + begin + core_set_input_1; + core_set_input_2; + wait_clk_bus_ticks(10); + $display("Core input banks written."); + end + endtask - while (rst) wait_clock_bus_tick; - wait_clock_bus_ticks(10); - $display("Core came out of reset."); - - set_input_1; - set_input_2; - - wait_clock_bus_ticks(10); - bus_ready = 1'b1; - end + // + // Script + // + initial main; // - // Routine (Control/Status, Bus) + // Main Routine (Control/Status, Bus) // - initial begin - - _wait_half_clock_tick; - wait_clock_ticks(100); - rst = 1'b0; - - while (!bus_ready) wait_clock_tick; - wait_clock_ticks(10); - $display("Core input banks written."); - - word_index_last_n = CORE_NUM_WORDS_N - 1; - word_index_last_pq = CORE_NUM_WORDS_PQ - 1; - - bit_index_last_n = TB_MODULUS_LENGTH_N - 1; - bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + task main; + begin - core_crt_mode = 1'b1; + sync_clk; // switch to fast core clock + core_reset; // reset core + core_set_params; // set parameters (modulus width, exponent length) - core_next = 1'b1; - wait_clock_tick; - core_next = 1'b0; - $display("Pulsed 'next' control signal."); + sync_clk_bus; // switch to slow bus clock + core_set_input; // write to core input banks + /* + sync_clk; // switch to fast core clock + core_set_crt_mode(1); // enable CRT signing + core_pulse_next; // assert 'next' bit for one cycle + core_wait_valid; // wait till 'valid' bit gets asserted + + sync_clk_bus; // switch to slow bus clock + core_get_output; // read from core output banks + core_verify_output; // check, whether core output matches precomputed known good refrence values + */ + sync_clk; // switch to fast core clock + core_set_crt_mode(0); // disable CRT signing + core_pulse_next; // assert 'next' bit for one cycle + core_wait_valid; // wait till 'valid' bit gets asserted + + sync_clk_bus; // switch to slow bus clock + core_get_output; // read from core output banks + core_verify_output; // check, whether core output matches precomputed known good refrence values + end + endtask - while (!core_valid) wait_clock_tick; - wait_clock_ticks(10); - - $display("Detected high 'valid' status signal."); - core_crt_mode = 1'bX; - - wait_clock_ticks(10); - get_output; - wait_clock_ticks(10); - - $display("Core output banks read."); - - verify; + task core_reset; + begin + wait_clk_ticks(100); + rst = 1'b0; + wait_clk_ticks(10); + $display("Core reset finished."); + end + endtask - end + task core_set_params; + begin + word_index_last_n = CORE_NUM_WORDS_N - 1; + word_index_last_pq = CORE_NUM_WORDS_PQ - 1; + bit_index_last_n = TB_MODULUS_LENGTH_N - 1; + bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + $display("Core parameters set."); + end + endtask + task core_set_crt_mode; + input _crt; + begin + core_crt_mode = _crt; + if (_crt) $display("Enabled CRT mode."); + else $display("Disabled CRT mode."); + end + endtask + + task core_pulse_next; + begin + core_next = 1'b1; + wait_clk_tick; + core_next = 1'b0; + $display("Pulsed core 'next' control signal."); + end + endtask + + task core_wait_valid; + begin + while (!core_valid) wait_clk_tick; + wait_clk_ticks(10); + $display("Detected high core 'valid' status signal."); + end + endtask + // // Variables @@ -254,58 +312,61 @@ module tb_core_full_512; // - // set_input_1; + // core_set_input_1 // - task set_input_1; - reg [9:0] _tn; + task core_set_input_1; + reg [9:0] _tn; + reg [31:0] zzz; begin _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word - for (_w=0; _w