From 584393ac5fc9bbe80887702ec2fc97bee999c5e7 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 21 Oct 2019 15:13:01 +0300 Subject: Further work: - added core wrapper - fixed module resets across entire core (all the resets are now consistently active-low) - continued refactoring --- bench/tb_core_full_512.v | 4 +- bench/tb_wrapper.v | 221 +++++++++++++++++ rtl/modexpng_core_top.v | 28 +-- rtl/modexpng_dsp_array_block.v | 76 +----- rtl/modexpng_general_worker.v | 20 +- rtl/modexpng_io_block.v | 9 +- rtl/modexpng_io_manager.v | 24 +- rtl/modexpng_mmm_dual.v | 496 +++++++++++++++++++------------------- rtl/modexpng_mmm_dual_fsm.vh | 47 ++++ rtl/modexpng_mmm_fsm.vh | 43 ---- rtl/modexpng_parameters.vh | 2 +- rtl/modexpng_recombinator_block.v | 51 ++-- rtl/modexpng_reductor.v | 29 +-- rtl/modexpng_storage_block.v | 8 +- rtl/modexpng_storage_manager.v | 12 +- rtl/modexpng_uop_engine.v | 18 +- rtl/modexpng_wrapper.v | 393 ++++++++++++++++++++++++++++++ 17 files changed, 1010 insertions(+), 471 deletions(-) create mode 100644 bench/tb_wrapper.v create mode 100644 rtl/modexpng_mmm_dual_fsm.vh delete mode 100644 rtl/modexpng_mmm_fsm.vh create mode 100644 rtl/modexpng_wrapper.v diff --git a/bench/tb_core_full_512.v b/bench/tb_core_full_512.v index e2604f0..cbcdd1d 100644 --- a/bench/tb_core_full_512.v +++ b/bench/tb_core_full_512.v @@ -238,7 +238,7 @@ module tb_core_full_512; sync_clk_bus; // switch to slow bus clock core_set_input; // write to core input banks - /* + sync_clk; // switch to fast core clock core_set_crt_mode(1); // enable CRT signing core_pulse_next; // assert 'next' bit for one cycle @@ -247,7 +247,7 @@ module tb_core_full_512; sync_clk_bus; // switch to slow bus clock core_get_output; // read from core output banks core_verify_output; // check, whether core output matches precomputed known good refrence values - */ + sync_clk; // switch to fast core clock core_set_crt_mode(0); // disable CRT signing core_pulse_next; // assert 'next' bit for one cycle diff --git a/bench/tb_wrapper.v b/bench/tb_wrapper.v new file mode 100644 index 0000000..2000ec5 --- /dev/null +++ b/bench/tb_wrapper.v @@ -0,0 +1,221 @@ +`timescale 1ns / 1ps + +module tb_wrapper; + + + // + // Clocks + // + `define CLK_FREQUENCY_MHZ (100.0) + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + `define CLK_PERIOD_QUARTER_NS (0.5 * `CLK_PERIOD_HALF_NS) + + `define CLK_BUS_FREQUENCY_MHZ (25.0) + `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) + `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) + + reg clk = 1'b1; + reg clk_bus = 1'b1; + reg clk_bus_dly = 1'b0; + wire clk_bus_idle = clk_bus & clk_bus_dly; + + always #`CLK_PERIOD_HALF_NS clk <= ~clk; + always #`CLK_BUS_PERIOD_HALF_NS clk_bus <= ~clk_bus; + + always @(clk_bus) clk_bus_dly <= #(`CLK_BUS_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk_bus; + + + // + // Clock Sync + // + task sync_clk_bus; + while (clk_bus_idle !== 1) _wait_quarter_clk_tick; + endtask + + + // + // Reset + // + reg rst_n = 1'b0; + + + // + // System Bus + // + reg bus_cs = 1'b0; + reg bus_we = 1'b0; + reg [11:0] bus_addr; + reg [31:0] bus_data_wr; + wire [31:0] bus_data_rd; + + + // + // UUT + // + modexpng_wrapper uut + ( + .clk (clk_bus), + .rst_n (rst_n), + + .clk_core (clk), + + .cs (bus_cs), + .we (bus_we), + + .address (bus_addr), + .write_data (bus_data_wr), + .read_data (bus_data_rd) + ); + + + // + // Script + // + initial main; + + + // + // Main Routine (Control/Status, Bus) + // + reg [31:0] ti, to; + task main; + begin + + sync_clk_bus; + wait_clk_bus_ticks(10); + rst_n = 1'b1; + wait_clk_bus_ticks(10); + + bus_read('h11, to); $display("modulus_bits = %0d", to); + ti = 100; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 510; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 511; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 512; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 513; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 514; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + + + ti = 1022; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 1023; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 1024; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 1025; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 1026; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + + ti = 4094; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 4095; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 4096; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 4097; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 4098; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + + + bus_read('h12, to); $display("exponent_bits = %0d", to); + ti = 0; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 1; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 2; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 3; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 4; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 5; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 6; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 7; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 8; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + + ti = 100; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 510; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 511; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 512; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 513; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 514; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + + + ti = 1022; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 1023; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 1024; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 1025; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 1026; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + + ti = 4094; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 4095; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 4096; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 4097; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 4098; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + + + end + endtask + + + // + // _bus_drive() + // + task _bus_drive; + input cs; + input we; + input [11:0] addr; + input [31:0] data; + {bus_cs, bus_we, bus_addr, bus_data_wr} <= {cs, we, addr, data}; + endtask + + + // + // bus_write() + // + task bus_write; + input [ 9:0] offset; + input [31:0] data; + begin + _bus_drive(1'b1, 1'b1, {2'b00, offset}, data); + wait_clk_bus_tick; + _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); + end + endtask + + + // + // bus_read() + // + task bus_read; + input [ 9:0] offset; + output [31:0] data; + begin + _bus_drive(1'b1, 1'b0, {2'b00, offset}, 32'hXXXXXXXX); + wait_clk_bus_tick; + data = bus_data_rd; + _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); + end + endtask + + + // + // Variables + // + integer _n; + + + // + // _wait_quarter_clk_tick() + // + task _wait_quarter_clk_tick; + #`CLK_PERIOD_QUARTER_NS; + endtask + + + // + // wait_clk_bus_tick() + // + task wait_clk_bus_tick; + #`CLK_BUS_PERIOD_NS; + endtask + + + // + // wait_clk_bus_ticks() + // + task wait_clk_bus_ticks; + input integer num_ticks; + for (_n=0; _n MAX_OP_W) fix_modulus_bits = MAX_OP_W[BIT_INDEX_W:ZEROES_BIT_INDEX_W]; + else fix_modulus_bits = width [BIT_INDEX_W:ZEROES_BIT_INDEX_W]; + endfunction + + function [OP_ADDR_W-1: 0] calc_modulus_num_words_n; + input [BIT_INDEX_W:ZEROES_BIT_INDEX_W] width; + calc_modulus_num_words_n = {width, {(ZEROES_BIT_INDEX_W-WORD_MUX_W){1'b0}}} - 1'b1; // truncates msb + endfunction + + function [OP_ADDR_W-1: 0] calc_modulus_num_words_pq; + input [BIT_INDEX_W:ZEROES_BIT_INDEX_W] width; + calc_modulus_num_words_pq = {width, {(ZEROES_BIT_INDEX_W-WORD_MUX_W-1){1'b0}}} - 1'b1; // fits exactly + endfunction + + task write_modulus_bits; + input [BIT_INDEX_W:0] width; + begin + wrap_modulus_bits_msb <= fix_modulus_bits(width); + wrap_word_index_last_n <= calc_modulus_num_words_n(fix_modulus_bits(width)); + wrap_word_index_last_pq <= calc_modulus_num_words_pq(fix_modulus_bits(width)); + end + endtask + + + // + // Update exponent width + // + function [BIT_INDEX_W:0] fix_exponent_bits; + input [BIT_INDEX_W:0] width; + if (width < MIN_EXP_W) fix_exponent_bits = MIN_EXP_W; + else if (width > MAX_OP_W ) fix_exponent_bits = MAX_OP_W; + else fix_exponent_bits = width; + endfunction + + function [BIT_INDEX_W-1:0] calc_exponent_num_bits_n; + input [BIT_INDEX_W :0] width; + calc_exponent_num_bits_n = width - 1'b1; // truncates msb + endfunction + + function [BIT_INDEX_W-1:0] calc_exponent_num_bits_pq; + input [BIT_INDEX_W: 0] width; + calc_exponent_num_bits_pq = width[BIT_INDEX_W:1] - 1'b1; // fits exactly + endfunction + + task write_exponent_bits; + input [BIT_INDEX_W:0] width; + begin + wrap_exponent_bits <= fix_exponent_bits(width); + wrap_bit_index_last_n <= calc_exponent_num_bits_n(fix_exponent_bits(width)); + wrap_bit_index_last_pq <= calc_exponent_num_bits_pq(fix_exponent_bits(width)); + end + endtask + + + // + // Read Interface + // + always @(posedge clk) + // + if (cs && addr_msb_is_wrap) + // + case (address) + // + ADDR_NAME0: wrap_read_data <= CORE_NAME0; + ADDR_NAME1: wrap_read_data <= CORE_NAME1; + ADDR_VERSION: wrap_read_data <= CORE_VERSION; + ADDR_CONTROL: wrap_read_data <= {{30{1'b0}}, wrap_reg_control, 1'b0}; + ADDR_STATUS: wrap_read_data <= {{30{1'b0}}, wrap_reg_status, 1'b1}; + // + ADDR_MODE: wrap_read_data <= {{30{1'b0}}, wrap_reg_mode, 1'b0}; + ADDR_MODULUS_BITS: wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_modulus_bits_msb, {ZEROES_BIT_INDEX_W{1'b0}}}; + ADDR_EXPONENT_BITS: wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_exponent_bits}; + ADDR_BANK_BITS: wrap_read_data <= MAX_OP_W; + ADDR_NUM_MULTS: wrap_read_data <= NUM_MULTS; + // + default: wrap_read_data <= 32'h00000000; + // + endcase + + + // + // Register / Core Memory Selector + // + reg [1:0] addr_msb_last; + + wire addr_msb_last_is_wrap = addr_msb_last == ADDR_MSB_WRAP; + + always @(posedge clk) + addr_msb_last <= addr_msb; + + assign read_data = addr_msb_last_is_wrap ? wrap_read_data : core_read_data; + + +endmodule -- cgit v1.2.3