diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-21 13:04:07 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-21 13:04:07 +0300 |
commit | 72902f5b40ac695786f5103d2a5a456c6c7ee83f (patch) | |
tree | 59a644e74fa4fdc25c92b8d261150ef4899323f5 | |
parent | 9eac252242c69e51a38a9a88c87b564dd40b6257 (diff) |
Redesigned the testbench. Core clock does not necessarily need to be twice
faster than the bus clock now. It can be the same, or say four times faster.
-rw-r--r-- | bench/tb_core_full_512.v | 294 | ||||
-rw-r--r-- | rtl/modexpng_dsp48e1.vh | 1 | ||||
-rw-r--r-- | rtl/modexpng_dsp_array_block.v | 84 | ||||
-rw-r--r-- | rtl/modexpng_dsp_slice_primitive.vh | 9 | ||||
-rw-r--r-- | rtl/modexpng_dsp_slice_wrapper_generic.v | 189 | ||||
-rw-r--r-- | rtl/modexpng_dsp_slice_wrapper_xilinx.v (renamed from rtl/modexpng_dsp_slice_wrapper.v) | 4 | ||||
-rw-r--r-- | rtl/modexpng_io_block.v | 19 | ||||
-rw-r--r-- | rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v | 66 | ||||
-rw-r--r-- | rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v | 59 | ||||
-rw-r--r-- | rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v | 43 | ||||
-rw-r--r-- | rtl/modexpng_uop_rom.v | 20 |
11 files changed, 594 insertions, 194 deletions
diff --git a/bench/tb_core_full_512.v b/bench/tb_core_full_512.v index 221a2c6..75accff 100644 --- a/bench/tb_core_full_512.v +++ b/bench/tb_core_full_512.v @@ -31,6 +31,7 @@ module tb_core_full_512; reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1]; reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ]; reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] D[0:TB_NUM_WORDS_N-1]; reg [31:0] DP[0:TB_NUM_WORDS_PQ-1]; reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1]; reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1]; @@ -81,6 +82,10 @@ module tb_core_full_512; Q_COEFF[ 0] = 32'h5eee9ecd; Q_COEFF[ 1] = 32'h085153b0; Q_COEFF[ 2] = 32'h85326da6; Q_COEFF[ 3] = 32'h7521931a; Q_COEFF[ 4] = 32'h99e0eef1; Q_COEFF[ 5] = 32'ha219917b; Q_COEFF[ 6] = 32'he8e9087a; Q_COEFF[ 7] = 32'h5239d12b; Q_COEFF[ 8] = 32'h0000ed92; + D[ 0] = 32'hf127ca41; D[ 1] = 32'hc4975ff0; D[ 2] = 32'h69ebbe13; D[ 3] = 32'h66fe0018; + D[ 4] = 32'hf2089237; D[ 5] = 32'hfa3f05ab; D[ 6] = 32'h2ab183c4; D[ 7] = 32'h1e4b3c04; + D[ 8] = 32'ha67974e8; D[ 9] = 32'ha6714d63; D[ 10] = 32'hfe5cd801; D[ 11] = 32'h13f2071a; + D[ 12] = 32'h0b978309; D[ 13] = 32'hb0ddb4a0; D[ 14] = 32'ha437a2cc; D[ 15] = 32'h2391b2fb; DP[ 0] = 32'h3891ed91; DP[ 1] = 32'h775046c2; DP[ 2] = 32'h60180c26; DP[ 3] = 32'h5130700a; DP[ 4] = 32'hb13c8216; DP[ 5] = 32'h833fcf78; DP[ 6] = 32'h7ab89b12; DP[ 7] = 32'hb976758c; DQ[ 0] = 32'h28cc59ad; DQ[ 1] = 32'h3ce6ed45; DQ[ 2] = 32'ha1f53aeb; DQ[ 3] = 32'h06ca05e1; @@ -100,26 +105,45 @@ module tb_core_full_512; S[ 8] = 32'h2854a51a; S[ 9] = 32'h0245619b; S[ 10] = 32'hfb67ef8f; S[ 11] = 32'hcc5bdd4f; S[ 12] = 32'ha70f58bd; S[ 13] = 32'h31f15702; S[ 14] = 32'hd6f36259; S[ 15] = 32'h280e67e0; end - // // Clocks // - `define CLK_FREQUENCY_MHZ (100.0) - `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) - `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + `define CLK_FREQUENCY_MHZ (100.0) + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + `define CLK_PERIOD_QUARTER_NS (0.5 * `CLK_PERIOD_HALF_NS) - `define CLK_BUS_FREQUENCY_MHZ (50.0) - `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) - `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) - - reg clk = 1'b1; - reg clk_bus = 1'b0; + `define CLK_BUS_FREQUENCY_MHZ (25.0) + `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) + `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) + + reg clk = 1'b1; + reg clk_dly = 1'b0; + wire clk_idle = clk & clk_dly; + + reg clk_bus = 1'b1; + reg clk_bus_dly = 1'b0; + wire clk_bus_idle = clk_bus & clk_bus_dly; - always #`CLK_PERIOD_HALF_NS clk = ~clk; + always #`CLK_PERIOD_HALF_NS clk <= ~clk; + always #`CLK_BUS_PERIOD_HALF_NS clk_bus <= ~clk_bus; + + always @(clk ) clk_dly <= #(`CLK_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk; + always @(clk_bus) clk_bus_dly <= #(`CLK_BUS_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk_bus; + + + // + // Clock Sync + // + task sync_clk; + while (clk_idle !== 1) _wait_quarter_clk_tick; + endtask - always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus; + task sync_clk_bus; + while (clk_bus_idle !== 1) _wait_quarter_clk_tick; + endtask // @@ -143,7 +167,6 @@ module tb_core_full_512; // // System Bus // - reg bus_ready; reg bus_cs = 1'b0; reg bus_we = 1'b0; reg [11:0] bus_addr; @@ -185,67 +208,102 @@ module tb_core_full_512; // - // Routine (Bus) + // Bus Init Routine // - initial begin - - bus_ready = 1'b0; + task core_set_input; + begin + core_set_input_1; + core_set_input_2; + wait_clk_bus_ticks(10); + $display("Core input banks written."); + end + endtask - while (rst) wait_clock_bus_tick; - wait_clock_bus_ticks(10); - $display("Core came out of reset."); - - set_input_1; - set_input_2; - - wait_clock_bus_ticks(10); - bus_ready = 1'b1; - end + // + // Script + // + initial main; // - // Routine (Control/Status, Bus) + // Main Routine (Control/Status, Bus) // - initial begin - - _wait_half_clock_tick; - wait_clock_ticks(100); - rst = 1'b0; - - while (!bus_ready) wait_clock_tick; - wait_clock_ticks(10); - $display("Core input banks written."); - - word_index_last_n = CORE_NUM_WORDS_N - 1; - word_index_last_pq = CORE_NUM_WORDS_PQ - 1; - - bit_index_last_n = TB_MODULUS_LENGTH_N - 1; - bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + task main; + begin - core_crt_mode = 1'b1; + sync_clk; // switch to fast core clock + core_reset; // reset core + core_set_params; // set parameters (modulus width, exponent length) - core_next = 1'b1; - wait_clock_tick; - core_next = 1'b0; - $display("Pulsed 'next' control signal."); + sync_clk_bus; // switch to slow bus clock + core_set_input; // write to core input banks + /* + sync_clk; // switch to fast core clock + core_set_crt_mode(1); // enable CRT signing + core_pulse_next; // assert 'next' bit for one cycle + core_wait_valid; // wait till 'valid' bit gets asserted + + sync_clk_bus; // switch to slow bus clock + core_get_output; // read from core output banks + core_verify_output; // check, whether core output matches precomputed known good refrence values + */ + sync_clk; // switch to fast core clock + core_set_crt_mode(0); // disable CRT signing + core_pulse_next; // assert 'next' bit for one cycle + core_wait_valid; // wait till 'valid' bit gets asserted + + sync_clk_bus; // switch to slow bus clock + core_get_output; // read from core output banks + core_verify_output; // check, whether core output matches precomputed known good refrence values + end + endtask - while (!core_valid) wait_clock_tick; - wait_clock_ticks(10); - - $display("Detected high 'valid' status signal."); - core_crt_mode = 1'bX; - - wait_clock_ticks(10); - get_output; - wait_clock_ticks(10); - - $display("Core output banks read."); - - verify; + task core_reset; + begin + wait_clk_ticks(100); + rst = 1'b0; + wait_clk_ticks(10); + $display("Core reset finished."); + end + endtask - end + task core_set_params; + begin + word_index_last_n = CORE_NUM_WORDS_N - 1; + word_index_last_pq = CORE_NUM_WORDS_PQ - 1; + bit_index_last_n = TB_MODULUS_LENGTH_N - 1; + bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + $display("Core parameters set."); + end + endtask + task core_set_crt_mode; + input _crt; + begin + core_crt_mode = _crt; + if (_crt) $display("Enabled CRT mode."); + else $display("Disabled CRT mode."); + end + endtask + + task core_pulse_next; + begin + core_next = 1'b1; + wait_clk_tick; + core_next = 1'b0; + $display("Pulsed core 'next' control signal."); + end + endtask + + task core_wait_valid; + begin + while (!core_valid) wait_clk_tick; + wait_clk_ticks(10); + $display("Detected high core 'valid' status signal."); + end + endtask + // // Variables @@ -254,58 +312,61 @@ module tb_core_full_512; // - // set_input_1; + // core_set_input_1 // - task set_input_1; - reg [9:0] _tn; + task core_set_input_1; + reg [9:0] _tn; + reg [31:0] zzz; begin _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N, _w[6:0], N[_w]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]); - bus_write(2'd0, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_X, _w[6:0], X[_w]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_Y, _w[6:0], Y[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_M, _w[6:0], M[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_N, _w[6:0], N[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]); + bus_write(2'd1, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_X, _w[6:0], X[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_Y, _w[6:0], Y[_w]); end endtask // - // set_input_2; + // core_set_input_2 // - task set_input_2; + task core_set_input_2; begin -// for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]); - for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]); - for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]); - for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]); + //for (_w=0; _w< TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd2, BANK_IN_2_D, { _w[6:0]}, D [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]); + for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]); + for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]); end endtask // - // get_output; + // core_get_output // - task get_output; + task core_get_output; begin - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]); - for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_S, _w[6:0], S_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd3, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd3, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd3, BANK_OUT_S, _w[6:0], S_READBACK[_w]); + wait_clk_bus_ticks(10); + $display("Core output banks read."); end endtask // - // verify; + // core_verify_output // - task verify; + task core_verify_output; // reg xm_ok; reg ym_ok; @@ -387,7 +448,7 @@ module tb_core_full_512; input [31:0] data; begin _bus_drive(1'b1, 1'b1, {sel, bank, addr}, data); - wait_clock_bus_tick; + wait_clk_bus_tick; _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); end endtask @@ -403,7 +464,7 @@ module tb_core_full_512; output [31:0] data; begin _bus_drive(1'b1, 1'b0, {sel, bank, addr}, 32'hXXXXXXXX); - wait_clock_bus_tick; + wait_clk_bus_tick; data = bus_data_rd; _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); end @@ -411,48 +472,61 @@ module tb_core_full_512; // - // _wait_half_clock_tick() + // _wait_quarter_clk_tick() // - task _wait_half_clock_tick; - #`CLK_PERIOD_HALF_NS; + task _wait_quarter_clk_tick; + #`CLK_PERIOD_QUARTER_NS; endtask + // - // wait_clock_tick() + // _wait_half_clk_tick() // - task wait_clock_tick; + task _wait_half_clk_tick; begin - _wait_half_clock_tick; - _wait_half_clock_tick; + _wait_quarter_clk_tick; + _wait_quarter_clk_tick; end endtask // - // wait_clock_bus_tick() + // wait_clk_tick() // - task wait_clock_bus_tick; + task wait_clk_tick; + begin + _wait_half_clk_tick; + _wait_half_clk_tick; + end + endtask + + + // + // wait_clk_bus_tick() + // + task wait_clk_bus_tick; #`CLK_BUS_PERIOD_NS; endtask // - // wait_clock_ticks() + // wait_clk_ticks() // - task wait_clock_ticks; + task wait_clk_ticks; input integer num_ticks; for (_n=0; _n<num_ticks; _n=_n+1) - wait_clock_tick; + wait_clk_tick; endtask - + // - // wait_clock_bus_ticks() + // wait_clk_bus_ticks() // - task wait_clock_bus_ticks; + task wait_clk_bus_ticks; input integer num_ticks; for (_n=0; _n<num_ticks; _n=_n+1) - wait_clock_bus_tick; + wait_clk_bus_tick; endtask - + + endmodule diff --git a/rtl/modexpng_dsp48e1.vh b/rtl/modexpng_dsp48e1.vh index bc3d55c..410ad41 100644 --- a/rtl/modexpng_dsp48e1.vh +++ b/rtl/modexpng_dsp48e1.vh @@ -6,3 +6,4 @@ localparam DSP48E1_P_W = 48; localparam DSP48E1_INMODE_W = 5; localparam DSP48E1_OPMODE_W = 7; localparam DSP48E1_ALUMODE_W = 4; + diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v index 8c4e844..6b4ad3c 100644 --- a/rtl/modexpng_dsp_array_block.v +++ b/rtl/modexpng_dsp_array_block.v @@ -6,8 +6,9 @@ module modexpng_dsp_array_block a, b, p ); - `include "modexpng_dsp48e1.vh" `include "modexpng_parameters.vh" + `include "modexpng_dsp48e1.vh" + `include "modexpng_dsp_slice_primitive.vh" input clk; @@ -38,14 +39,87 @@ module modexpng_dsp_array_block ce_a2 <= ce_a1; ce_b1 <= ce_b0; end - + + /// + wire [46:0] p_debug_direct; + wire [17:0] casc_a_debug_direct; + wire [15:0] casc_b_debug_direct; + + wire [46:0] p_debug_cascade; + + wire [46:0] p_ref_direct = p[ 0 +: MAC_W]; + wire [46:0] p_ref_cascade = p[MAC_W +: MAC_W]; + + modexpng_dsp_slice_wrapper_xilinx # + ( + .AB_INPUT("DIRECT"), + .B_REG(2) + ) + dsp_debug_direct + ( + .clk (clk), + + .ce_a1 (ce_a0), + .ce_b1 (ce_b0), + .ce_a2 (ce_a1), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[0 +: 18]), + .b (b), + .p (p_debug_direct), + + .inmode ({DSP48E1_INMODE_W{1'b0}}), + .opmode ({1'b0, mode_z[0], 1'b0, 2'b01, 2'b01}), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), + + .casc_a_in (WORD_EXT_ZERO), + .casc_b_in (WORD_ZERO), + + .casc_a_out (casc_a_debug_direct), + .casc_b_out (casc_b_debug_direct) + ); + + modexpng_dsp_slice_wrapper_xilinx # + ( + .AB_INPUT("CASCADE"), + .B_REG(1) + ) + dsp_debug_cascade + ( + .clk (clk), + + .ce_a1 (ce_a1), + .ce_b1 (1'b0), + .ce_a2 (ce_a2), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[0 +: 18]), + .b (b), + .p (p_debug_cascade), + + .inmode ({DSP48E1_INMODE_W{1'b0}}), + .opmode ({1'b0, mode_z[1], 1'b0, 2'b01, 2'b01}), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), + + .casc_a_in (casc_a_debug_direct), + .casc_b_in (casc_b_debug_direct), + + .casc_a_out (), + .casc_b_out () + ); genvar z; generate for (z=0; z<NUM_MULTS_HALF; z=z+1) // begin : gen_DSP48E1 // - modexpng_dsp_slice_wrapper # + `MODEXPNG_DSP_SLICE # ( .AB_INPUT("DIRECT"), .B_REG(2) @@ -77,7 +151,7 @@ module modexpng_dsp_array_block .casc_b_out (casc_b[z]) ); // - modexpng_dsp_slice_wrapper # + `MODEXPNG_DSP_SLICE # ( .AB_INPUT("CASCADE"), .B_REG(1) @@ -113,7 +187,7 @@ module modexpng_dsp_array_block // endgenerate - modexpng_dsp_slice_wrapper # + `MODEXPNG_DSP_SLICE # ( .AB_INPUT("DIRECT"), .B_REG(2) diff --git a/rtl/modexpng_dsp_slice_primitive.vh b/rtl/modexpng_dsp_slice_primitive.vh new file mode 100644 index 0000000..02d9a5d --- /dev/null +++ b/rtl/modexpng_dsp_slice_primitive.vh @@ -0,0 +1,9 @@ +`ifndef MODEXPNG_ENABLE_DEBUG + +`define MODEXPNG_DSP_SLICE modexpng_dsp_slice_wrapper_xilinx + +`else + +`define MODEXPNG_DSP_SLICE modexpng_dsp_slice_wrapper_generic + +`endif diff --git a/rtl/modexpng_dsp_slice_wrapper_generic.v b/rtl/modexpng_dsp_slice_wrapper_generic.v new file mode 100644 index 0000000..7183d74 --- /dev/null +++ b/rtl/modexpng_dsp_slice_wrapper_generic.v @@ -0,0 +1,189 @@ +module modexpng_dsp_slice_wrapper_generic # +( + AB_INPUT = "DIRECT", + B_REG = 2 +) +( + clk, + ce_a1, ce_b1, ce_a2, ce_b2, + ce_m, ce_p, ce_mode, + a, b, p, + inmode, opmode, alumode, + casc_a_in, casc_b_in, + casc_a_out, casc_b_out +); + + `include "modexpng_parameters.vh" + `include "modexpng_dsp48e1.vh" + + input clk; // + input ce_a1; // + input ce_b1; // + input ce_a2; // + input ce_b2; // + input ce_m; // + input ce_p; // + input ce_mode; // + input [ WORD_EXT_W -1:0] a; // + input [ WORD_W -1:0] b; // + output [ MAC_W -1:0] p; // + input [ DSP48E1_INMODE_W -1:0] inmode; // + input [ DSP48E1_OPMODE_W -1:0] opmode; // + input [DSP48E1_ALUMODE_W -1:0] alumode; // + input [ WORD_EXT_W -1:0] casc_a_in; // + input [ WORD_W -1:0] casc_b_in; // + output [ WORD_EXT_W -1:0] casc_a_out; // + output [ WORD_W -1:0] casc_b_out; // + + // + // A Port + // + wire [WORD_EXT_W -1:0] a_mux = AB_INPUT == "DIRECT" ? a : casc_a_in; + reg [WORD_EXT_W -1:0] a_reg1; + reg [WORD_EXT_W -1:0] a_reg2; + + assign casc_a_out = a_reg1; + + always @(posedge clk) begin + if (ce_a1) a_reg1 <= a_mux; + if (ce_a2) a_reg2 <= a_reg1; + end + + // + // B Port + // + wire [WORD_W -1:0] b_mux = AB_INPUT == "DIRECT" ? b : casc_b_in; + reg [WORD_W -1:0] b_reg1; + reg [WORD_W -1:0] b_reg2; + + assign casc_b_out = b_reg1; + + always @(posedge clk) begin + if (ce_b1) b_reg1 <= b_mux; + if (ce_b2) b_reg2 <= B_REG == 2 ? b_reg1 : b_mux; + end + + // + // OPMODE Port + // + reg [DSP48E1_OPMODE_W -1:0] opmode_reg; + + always @(posedge clk) begin + if (ce_mode) opmode_reg <= opmode; + end + + // + // M, P + // + reg [MAC_W-1:0] m_reg; + reg [MAC_W-1:0] p_reg; + + wire [MAC_W-1:0] a_pad = {{MAC_W-WORD_EXT_W{1'b0}}, a_reg2}; + wire [MAC_W-1:0] b_pad = {{MAC_W-WORD_W{1'b0}}, b_reg2}; + wire [MAC_W-1:0] p_pad = opmode_reg[5] ? p_reg : {MAC_W{1'b0}}; + + assign p = p_reg; + + always @(posedge clk) begin + if (ce_m) m_reg <= a_pad * b_pad; + if (ce_p) p_reg <= m_reg + p_pad; + end + + + /* + DSP48E1 # + ( + .AREG (2), + .BREG (B_REG), + .CREG (0), + .DREG (0), + .ADREG (0), + .MREG (1), + .PREG (1), + .ACASCREG (1), + .BCASCREG (1), + .INMODEREG (0), + .OPMODEREG (1), + .ALUMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), + + .A_INPUT (AB_INPUT), + .B_INPUT (AB_INPUT), + + .USE_DPORT ("FALSE"), + .USE_MULT ("DYNAMIC"), + .USE_SIMD ("ONE48"), + + .MASK ({DSP48E1_P_W{1'b1}}), + .PATTERN ({DSP48E1_P_W{1'b0}}), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + + .USE_PATTERN_DETECT ("NO_PATDET"), + .AUTORESET_PATDET ("NO_RESET") + ) + DSP48E1_inst + ( + .CLK (clk), + + .CEA1 (ce_a1), + .CEB1 (ce_b1), + .CEA2 (ce_a2), + .CEB2 (ce_b2), + .CEAD (1'b0), + .CEC (1'b0), + .CED (1'b0), + .CEM (ce_m), + .CEP (ce_p), + .CEINMODE (1'b0), + .CECTRL (ce_mode), + .CEALUMODE (1'b0), + .CECARRYIN (1'b0), + + .A ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, a}), + .B ({{(DSP48E1_B_W-WORD_W){1'b0}}, b}), + .C ({DSP48E1_C_W{1'b0}}), + .D ({DSP48E1_D_W{1'b0}}), + .P ({p_dummy, p}), + + .INMODE (inmode), + .OPMODE (opmode), + .ALUMODE (alumode), + + .ACIN ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, casc_a_in}), + .BCIN ({{(DSP48E1_B_W-WORD_W){1'b0}}, casc_b_in}), + .ACOUT ({casc_a_dummy, casc_a_out}), + .BCOUT ({casc_b_dummy, casc_b_out}), + .PCIN ({DSP48E1_P_W{1'b0}}), + .PCOUT (), + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .RSTA (1'b0), + .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0), + .RSTINMODE (1'b0), + .RSTCTRL (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), + + .UNDERFLOW (), + .OVERFLOW (), + .PATTERNDETECT (), + .PATTERNBDETECT (), + + .CARRYIN (1'b0), + .CARRYOUT (), + .CARRYINSEL (3'b000), + + .MULTSIGNIN (1'b0), + .MULTSIGNOUT () + ); + */ + + +endmodule diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper_xilinx.v index 3d13570..9c1a60d 100644 --- a/rtl/modexpng_dsp_slice_wrapper.v +++ b/rtl/modexpng_dsp_slice_wrapper_xilinx.v @@ -1,4 +1,4 @@ -module modexpng_dsp_slice_wrapper # +module modexpng_dsp_slice_wrapper_xilinx # ( AB_INPUT = "DIRECT", B_REG = 2 @@ -130,6 +130,6 @@ module modexpng_dsp_slice_wrapper # .MULTSIGNIN (1'b0), .MULTSIGNOUT () - ); + ); endmodule diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v index 06ce2b1..6d008aa 100644 --- a/rtl/modexpng_io_block.v +++ b/rtl/modexpng_io_block.v @@ -82,12 +82,12 @@ module modexpng_io_block wire [ BUS_DATA_W -1:0] bus_data_rd_input_1; wire [ BUS_DATA_W -1:0] bus_data_rd_output; - wire bus_we_input_1 = bus_we && (bus_addr_msb == 2'd0); - wire bus_we_input_2 = bus_we && (bus_addr_msb == 2'd1); + wire bus_we_input_1 = bus_we && (bus_addr_msb == 2'd1); + wire bus_we_input_2 = bus_we && (bus_addr_msb == 2'd2); - wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'b00); - wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'b01); - wire bus_cs_output = bus_cs && (bus_addr_msb == 2'b10); + wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'd1); + wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'd2); + wire bus_cs_output = bus_cs && (bus_addr_msb == 2'd3); /* INPUT_1 */ `MODEXPNG_TDP_36K_X16_X32 bram_input_1 @@ -125,7 +125,6 @@ module modexpng_io_block .doutb (in_2_dout) // ); - /* OUTPUT */ `MODEXPNG_SDP_36K_X32_X16 bram_output ( @@ -152,10 +151,10 @@ module modexpng_io_block // case (bus_addr_msb_dly) // - 2'd0: bus_data_rd_mux = bus_data_rd_input_1; - 2'd1: bus_data_rd_mux = 32'hDEADC0DE; - 2'd2: bus_data_rd_mux = bus_data_rd_output; - 2'd3: bus_data_rd_mux = 32'hDEADC0DE; + 2'd0: bus_data_rd_mux = 32'hDEADC0DE; + 2'd1: bus_data_rd_mux = bus_data_rd_input_1; + 2'd2: bus_data_rd_mux = 32'hDEADC0DE; + 2'd3: bus_data_rd_mux = bus_data_rd_output; // endcase diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v index 034b00b..3797e41 100644 --- a/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v +++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v @@ -34,42 +34,40 @@ module modexpng_sdp_36k_x16_x32_wrapper_generic // - // BRAM_SDP_MACRO + // Memory // - BRAM_SDP_MACRO # - ( - .DEVICE ("7SERIES"), - - .BRAM_SIZE ("36Kb"), - - .WRITE_WIDTH (BUS_DATA_W), - .READ_WIDTH (WORD_W), - - .DO_REG (1), - .WRITE_MODE ("READ_FIRST"), - - .SRVAL (72'h000000000000000000), - .INIT (72'h000000000000000000), - - .INIT_FILE ("NONE"), - .SIM_COLLISION_CHECK ("NONE") - ) - BRAM_SDP_MACRO_inst - ( - .RST (1'b0), + reg [BUS_DATA_W -1:0] mem[0:2**(BANK_ADDR_W+BUS_OP_ADDR_W)-1]; + + // + // Write Port + // + always @(posedge clk_bus) + // + if (ena && wea) + mem[addra] <= dina; + + // + // Read Port + // + reg [WORD_W -1:0] doutb_reg1; + reg [WORD_W -1:0] doutb_reg2; + + assign doutb = doutb_reg2; + + wire [BUS_DATA_W -1:0] mem_addrb = mem[addrb[BANK_ADDR_W + OP_ADDR_W -1:1]]; + + wire [ WORD_W -1:0] mem_addrb_msb = mem_addrb[ BUS_DATA_W -1:WORD_W]; + wire [ WORD_W -1:0] mem_addrb_lsb = mem_addrb[ WORD_W -1: 0]; - .WRCLK (clk_bus), - .WREN (ena), - .WE ({4{wea}}), - .WRADDR (addra), - .DI (dina), - - .RDCLK (clk), - .RDEN (enb), - .REGCE (regceb), - .RDADDR (addrb), - .DO (doutb) - ); + always @(posedge clk) + // + if (enb) + doutb_reg1 <= addrb[0] ? mem_addrb_msb : mem_addrb_lsb; + + always @(posedge clk) + // + if (regceb) + doutb_reg2 <= doutb_reg1; endmodule diff --git a/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v b/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v index c74daac..586cadf 100644 --- a/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v +++ b/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v @@ -33,41 +33,34 @@ module modexpng_sdp_36k_x32_x16_wrapper_generic // - // BRAM_SDP_MACRO + // Memory // - BRAM_SDP_MACRO # - ( - .DEVICE ("7SERIES"), - - .BRAM_SIZE ("36Kb"), - - .WRITE_WIDTH (WORD_W), - .READ_WIDTH (BUS_DATA_W), - - .DO_REG (0), - .WRITE_MODE ("READ_FIRST"), - - .SRVAL (72'h000000000000000000), - .INIT (72'h000000000000000000), - - .INIT_FILE ("NONE"), - .SIM_COLLISION_CHECK ("NONE") - ) - BRAM_SDP_MACRO_inst - ( - .RST (1'b0), + reg [BUS_DATA_W -1:0] mem[0:2**(BANK_ADDR_W+BUS_OP_ADDR_W)-1]; + + // + // Write Port + // + wire [BANK_ADDR_W + BUS_OP_ADDR_W -2:0] addra_msb = addra[BANK_ADDR_W + BUS_OP_ADDR_W -1:1]; + wire addra_lsb = addra[0]; + + always @(posedge clk) + // + if (ena && wea) begin + if (addra_lsb) mem[addra_msb][BUS_DATA_W-1:WORD_W] <= dina; + else mem[addra_msb][ WORD_W-1: 0] <= dina; + end + + // + // Read Port + // + reg [BUS_DATA_W -1:0] doutb_reg; - .WRCLK (clk), - .WREN (ena), - .WE ({2{wea}}), - .WRADDR (addra), - .DI (dina), + assign doutb = doutb_reg; - .RDCLK (clk_bus), - .RDEN (enb), - .REGCE (1'b0), - .RDADDR (addrb), - .DO (doutb) - ); + always @(posedge clk_bus) + // + if (enb) + doutb_reg <= mem[addrb]; + endmodule diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v index 5e69bef..fda7cf6 100644 --- a/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v +++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v @@ -35,6 +35,48 @@ module modexpng_tdp_36k_x16_x32_wrapper_generic // + // Memory + // + reg [BUS_DATA_W -1:0] mem[0:2**(BANK_ADDR_W+BUS_OP_ADDR_W)-1]; + + // + // Read-Write Port + // + reg [BUS_DATA_W -1:0] douta_reg; + + assign douta = douta_reg; + + always @(posedge clk_bus) + // + if (ena) begin + if (wea) mem[addra] <= dina; + douta_reg <= mem[addra]; + end + + // + // Read Port + // + reg [WORD_W -1:0] doutb_reg1; + reg [WORD_W -1:0] doutb_reg2; + + assign doutb = doutb_reg2; + + wire [BUS_DATA_W -1:0] mem_addrb = mem[addrb[BANK_ADDR_W + OP_ADDR_W -1:1]]; + + wire [ WORD_W -1:0] mem_addrb_msb = mem_addrb[ BUS_DATA_W -1:WORD_W]; + wire [ WORD_W -1:0] mem_addrb_lsb = mem_addrb[ WORD_W -1: 0]; + + always @(posedge clk) + // + if (enb) + doutb_reg1 <= addrb[0] ? mem_addrb_msb : mem_addrb_lsb; + + always @(posedge clk) + // + if (regceb) + doutb_reg2 <= doutb_reg1; +/* + // // BRAM_TDP_MACRO // BRAM_TDP_MACRO # @@ -84,5 +126,6 @@ module modexpng_tdp_36k_x16_x32_wrapper_generic .DIB ({WORD_W{1'b0}}), .DOB (doutb) ); + */ endmodule diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v index 522e9ca..5d6308c 100644 --- a/rtl/modexpng_uop_rom.v +++ b/rtl/modexpng_uop_rom.v @@ -16,6 +16,8 @@ module modexpng_uop_rom // case (addr) // + // CRT mode + // 7'd000: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // 7'd001: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // 7'd002: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // @@ -107,9 +109,27 @@ module modexpng_uop_rom 7'd058: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_NARROW_A }; // // 7'd059: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_OUT_S }; // + // + // Non-CRT Mode (i.e. only when "D" is known) + // + 7'd064: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 7'd065: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 7'd066: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // + 7'd067: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // + 7'd068: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + 7'd069: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + // + 7'd070: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd071: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd072: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 7'd073: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 7'd074: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + 7'd075: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // // default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_DNC }; // // + + endcase endmodule |