diff options
-rw-r--r-- | bench/tb_core_selector.v | 192 | ||||
-rw-r--r-- | config/core.cfg | 50 | ||||
-rwxr-xr-x | config/core_config.py | 341 | ||||
-rw-r--r-- | extra/reset_replicator.v | 93 |
4 files changed, 550 insertions, 126 deletions
diff --git a/bench/tb_core_selector.v b/bench/tb_core_selector.v new file mode 100644 index 0000000..a8a174b --- /dev/null +++ b/bench/tb_core_selector.v @@ -0,0 +1,192 @@ +`timescale 1ns / 1ps + +module tb_core_selector; + + + // + // System Clock, System Reset + // + `define SYS_CLK_FREQUENCY_MHZ ( 100.0 ) + `define SYS_CLK_PERIOD_NS (1000.0 / `SYS_CLK_FREQUENCY_MHZ) + `define SYS_CLK_PERIOD_HALF_NS ( 0.5 * `SYS_CLK_PERIOD_NS ) + + reg sys_clk = 1'b0; + initial forever #`SYS_CLK_PERIOD_HALF_NS sys_clk = ~sys_clk; + + `define SYS_RST_N_ACTIVE 1'b0 + `define SYS_RST_N_INACTIVE 1'b1 + + reg sys_rst_n = `SYS_RST_N_ACTIVE; + + + // + // System Bus + // + reg [23: 0] sys_fmc_addr; + reg sys_fmc_wr = 1'b0; + reg sys_fmc_rd = 1'b0; + wire [31: 0] sys_read_data; + reg [31: 0] sys_write_data; + wire sys_error; + + + // + // UUT + // + core_selector uut + ( + .sys_clk (sys_clk), + .sys_rst_n (sys_rst_n), + + .sys_fmc_addr (sys_fmc_addr), + .sys_fmc_wr (sys_fmc_wr), + .sys_fmc_rd (sys_fmc_rd), + .sys_read_data (sys_read_data), + .sys_write_data (sys_write_data), + .sys_error (sys_error), + + .mkm_sclk (), + .mkm_cs_n (), + .mkm_do (1'b0), + .mkm_di (), + + .core_clk (1'b0), + + .noise (1'b0), + .debug () + ); + + + // + // Script + // + + // + // Here's what the following routine does. We know that at address 0 there's always a BOARD_REGS core, which + // has a 32-bit dummy register at offset 255. ECDSA cores also have a 32-bit dummy register at offset 15. + // We write some values into the two dummy registers to test the address decoding logic (we write to two cores + // with different numbers, the offsets of registers are also different). Then we do a readback and compare + // the read value with the written one. The code assumes, that the default "hsm" core configuration is used, + // where the number of ECDSA-256 is 0x37. If this is not the case, adapt the first parameter passed to the + // sys_bus_write() calls. + // + + localparam [31:0] MAGIC_1 = 32'hCCAA5533; + localparam [31:0] MAGIC_2 = 32'hCA5335AC; + + reg [31:0] wr, rd; + initial begin + + wait_sys_clk_ticks(200); + sys_rst_n = `SYS_RST_N_INACTIVE; + wait_sys_clk_ticks(100); + + wr = MAGIC_1; + sys_bus_write(16'h0000, 8'd255, wr); + wait_sys_clk_ticks(10); + + wr = MAGIC_2; + sys_bus_write(16'h0037, 8'd15, wr); + wait_sys_clk_ticks(10); + + wr = MAGIC_1; + sys_bus_read(16'h0000, 8'd255, rd); + wait_sys_clk_ticks(10); + if (rd !== wr) begin + $display("ERROR: wr = 0x%08x, rd = 0x%08x", wr, rd); + wait_sys_clk_ticks(100); + $finish; + end + + wr = MAGIC_2; + sys_bus_read(16'h0037, 8'd15, rd); + wait_sys_clk_ticks(10); + if (rd !== wr) begin + $display("ERROR: wr = 0x%08x, rd = 0x%08x", wr, rd); + wait_sys_clk_ticks(100); + $finish; + end + + $display("Test passed."); + $finish; + + end + + + // + // _wait_half_sys_clk_tick() + // + task _wait_half_sys_clk_tick; + #`SYS_CLK_PERIOD_HALF_NS; + endtask + + + // + // wait_sys_clk_tick() + // + task wait_sys_clk_tick; + begin + _wait_half_sys_clk_tick; + _wait_half_sys_clk_tick; + end + endtask + + + // + // wait_sys_clk_ticks() + // + task wait_sys_clk_ticks; + input integer _num_ticks; + integer _n; + for (_n=0; _n<_num_ticks; _n=_n+1) + wait_sys_clk_tick; + endtask + + + // + // _sys_bus_drive() + // + task _sys_bus_drive; + input [23: 0] _addr; + input _wr; + input _rd; + input [31: 0] _write_data; + {sys_fmc_addr, sys_fmc_wr, sys_fmc_rd, sys_write_data} <= + { _addr, _wr, _rd, _write_data} ; + endtask + + + // + // sys_bus_read() + // + task sys_bus_read; + input [15:0] _num; + input [ 7:0] _reg; + output [31:0] _data; + begin + _sys_bus_drive({_num, _reg}, 1'b0, 1'b1, {32{1'bX}}); + wait_sys_clk_tick; + _sys_bus_drive(24'hXXXX, 1'b0, 1'b0, {32{1'bX}}); + wait_sys_clk_ticks(3); + _data = sys_read_data; + _sys_bus_drive(24'hXXXX, 1'b0, 1'b0, {32{1'bX}}); + end + endtask + + + // + // sys_bus_write() + // + task sys_bus_write; + input [15:0] _num; + input [ 7:0] _reg; + input [31:0] _data; + begin + _sys_bus_drive({_num, _reg}, 1'b1, 1'b0, _data); + wait_sys_clk_tick; + _sys_bus_drive(24'hXXXX, 1'b0, 1'b0, {32{1'bX}}); + end + endtask + + +endmodule diff --git a/config/core.cfg b/config/core.cfg index 6520393..697f8bc 100644 --- a/config/core.cfg +++ b/config/core.cfg @@ -36,8 +36,9 @@ modexp = modexpa7 extra wires = output wire mkm_sclk, output wire mkm_cs_n, - input wire mkm_do, + input wire mkm_do, output wire mkm_di, + input wire core_clk, requires = mkmif/dummy-mkmif [board dev-bridge] @@ -73,6 +74,10 @@ cores = trng # for testing just the Modular Exponentiation cores = modexp +[project modexpng] +# for testing just the ModExpNG +cores = modexpng + [project mkmif] # for testing just the Master Key Memory Interface cores = mkmif @@ -86,13 +91,13 @@ cores = sha256 aes trng modexp mkmif # Make me one with everything, except we want two modexp cores for parallel CRT cores = sha1 sha256 sha512 aes trng modexp modexp mkmif ecdsa256 ecdsa384 -[project keywrap] -# for testing Joachim's keywrap core with RSA signing -cores = mkmif sha256 aes trng modexp modexp ecdsa256 ecdsa384 keywrap +[project hsm_ng] +# Make me one with everything, except we want the new ModExpNG core +cores = sha1 sha256 sha512 aes trng modexp modexp modexpng mkmif ecdsa256 ecdsa384 -[project keywrap] -# for testing Joachim's keywrap core with RSA signing -cores = mkmif sha256 aes trng modexp modexp ecdsa256 ecdsa384 keywrap +[project hsm_ng_keywrap] +# everything, with the full range of modexp and keywrap options +cores = sha1 sha256 sha512 aes keywrap trng modexp modexp modexpng mkmif ecdsa256 ecdsa384 # [core] sections # @@ -219,6 +224,37 @@ vfiles = lib/lowlevel/artix7/dsp48e1_wrapper.v lib/lowlevel/artix7/dsp48e1_wrapper_modexp.v +[core modexpng] +# ModExpNG for Xilinx Artix-7 +core blocks = 16 +block memory = yes +error wire = no +module name = modexpng_wrapper +reset name = rst_n +extra ports = + .clk_core(core_clk), +vfiles = + ../user/shatov/modexpng/rtl/modexpng_wrapper.v + ../user/shatov/modexpng/rtl/modexpng_core_top.v + ../user/shatov/modexpng/rtl/modexpng_general_worker.v + ../user/shatov/modexpng/rtl/modexpng_mmm_dual.v + ../user/shatov/modexpng/rtl/modexpng_reductor.v + ../user/shatov/modexpng/rtl/modexpng_dsp_array_block.v + ../user/shatov/modexpng/rtl/modexpng_io_block.v + ../user/shatov/modexpng/rtl/modexpng_io_manager.v + ../user/shatov/modexpng/rtl/modexpng_storage_block.v + ../user/shatov/modexpng/rtl/modexpng_storage_manager.v + ../user/shatov/modexpng/rtl/modexpng_uop_rom.v + ../user/shatov/modexpng/rtl/modexpng_uop_engine.v + ../user/shatov/modexpng/rtl/modexpng_recombinator_block.v + ../user/shatov/modexpng/rtl/modexpng_recombinator_cell.v + ../user/shatov/modexpng/rtl/modexpng_dsp_slice_mult_wrapper_xilinx.v + ../user/shatov/modexpng/rtl/modexpng_dsp_slice_addsub_wrapper_xilinx.v + ../user/shatov/modexpng/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v + ../user/shatov/modexpng/rtl/modexpng_sdp_36k_x16_x32_wrapper_xilinx.v + ../user/shatov/modexpng/rtl/modexpng_sdp_36k_x32_x16_wrapper_xilinx.v + ../user/shatov/modexpng/rtl/modexpng_tdp_36k_x16_x32_wrapper_xilinx.v + [core modexps6] # ModExp for Xilinx Spartan-6 core blocks = 4 diff --git a/config/core_config.py b/config/core_config.py index d84f8f1..d511228 100755 --- a/config/core_config.py +++ b/config/core_config.py @@ -5,7 +5,7 @@ Generate core_selector.v and core_vfiles.mk for a set of cores. """ #======================================================================= -# Copyright (c) 2015-2017, NORDUnet A/S All rights reserved. +# Copyright (c) 2015-2017, 2019 NORDUnet A/S All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are @@ -34,21 +34,6 @@ Generate core_selector.v and core_vfiles.mk for a set of cores. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #======================================================================= -# The modexpa7 core drags in a one clock cycle delay to other cores, -# to compensate for the extra clock cycle consumed by the block -# memories used in the modexpa7 core. We probably want a general -# solution for this, because we're going to run into this problem for -# any core that handles arguments big enough to require block memory. - -# To Do: -# -# - Consider automating the one-clock-cycle delay stuff by adding -# another boolean flag to the config file. Default would be no -# delay, if any included core sets the "I use block memories" flag, -# all other cores would get the delay. Slightly tedious but -# something we can calculate easily enough, and probably an -# improvement over wiring in the delay when nothing needs it. - def main(): """ Parse arguments and config file, generate core list, generate output. @@ -82,7 +67,7 @@ def main(): Core.modexp = cfg.get(board_section, "modexp") if Core.extra_wires: # restore formatting - Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n" + Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n" if args.core: cores = args.core @@ -98,7 +83,6 @@ def main(): except ValueError: if core not in cores: cores.append(core) - cores.insert(0, "board_regs") cores.insert(1, "comm_regs") @@ -111,21 +95,41 @@ def main(): core_number = 0 for core in cores: core_number = core.assign_core_number(core_number) - + + for i, core in enumerate(cores): + core.assign_seq_number(i) + + # On the unused piece of code below: we really should not try to + # optimize out the delay. This may have worked earlier, when we only + # had a small set of simple cores. There are a lot of complex cores + # by now, so the readback multiplexer gets pretty wide and will never + # meet timing if we make it purely combinatorial. Moreover, it turns + # out that additional delays are necessary to make it work at higher + # clock speeds. if False: # For some reason, attempting to optimize out the delay # code entirely results in a non-working bitstream. Don't # know why, disabling the optimization works, so just do # that for now. - + Core.need_one_cycle_delay = any(core.block_memory for core in cores) + # longest core/subcore instance name + max_name_len = 0 + for core in cores: + if len(core.instance_name) > max_name_len: + max_name_len = len(core.instance_name) + for subcore in core.subcores: + if len(subcore.instance_name) > max_name_len: + max_name_len = len(subcore.instance_name) + args.verilog.write(createModule_template.format( + core_count = len(cores), core = cores[0], - addrs = "".join(core.createAddr() for core in cores), - insts = "".join(core.createInstance() for core in cores), - muxes = "".join(core.createMux() for core in cores) )) + addrs = "".join(core.createAddr(max_name_len) for core in cores), + insts = "".join(core.createInstance() for core in cores), + muxes = "".join(core.createMux() for core in cores) )) args.makefile.write(listVfiles_template.format( vfiles = "".join(core.listVfiles() for core in cores))) @@ -193,6 +197,7 @@ class Core(object): self.name = name self.cfg_section = "core " + name self.core_number = None + self.seq_number = None self.vfiles = [] self.error_wire = True self.block_memory = False @@ -211,6 +216,9 @@ class Core(object): subcore.assign_core_number(n + i + 1) return n + self.blocks + def assign_seq_number(self, n): + self.seq_number = n + def configure(self, cfg): if self.instance_number == 0: self.vfiles.extend(cfg.getvalues(self.cfg_section, "vfiles")) @@ -221,7 +229,7 @@ class Core(object): self.block_memory = cfg.getboolean(self.cfg_section, "block memory", self.block_memory) self.extra_ports = cfg.get(self.cfg_section, "extra ports") if self.extra_ports: - self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n" + self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n" self.blocks = int(cfg.get(self.cfg_section, "core blocks") or 1) self.block_max = self.blocks - 1 if self.blocks > 1: @@ -257,28 +265,44 @@ class Core(object): @property def error_wire_decl(self): - return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else "" + return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else "" @property def error_port(self): - return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else "" + return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else "" @property def one_cycle_delay(self): return one_cycle_delay_template.format(core = self) if self.need_one_cycle_delay and not self.block_memory else "" @property + def extra_pipeline_stage(self): + return extra_pipeline_stage_template.format(core = self) + + @property def mux_core_addr(self): if self.blocks == 1 or self.subcores: return "CORE_ADDR_{core.upper_instance_name}".format(core=self) else: - return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {0}".format(i, core=self) for i in range(self.blocks)) + return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{0:04X}".format(i, core=self) for i in range(self.blocks)) @property - def mux_data_reg(self): - return "read_data_" + self.instance_name + ("_reg" if self.need_one_cycle_delay and not self.block_memory else "") + def reg_data_out(self): + return "reg_read_data_" + self.instance_name + + @property + def comb_data_out(self): + return "comb_read_data_" + self.instance_name + + @property + def wire_data_out(self): + return self.comb_data_out if self.need_one_cycle_delay and not self.block_memory else self.reg_data_out @property + def pipe_data_out(self): + return "pipe_read_data_" + self.instance_name + + @property def mux_error_reg(self): return "error_" + self.instance_name if self.error_wire else "0" @@ -293,10 +317,10 @@ class Core(object): template = createInstance_template_dummy if self.dummy else createInstance_template_generic if self.blocks == 1 else createInstance_template_multi_block return template.format(core = self) - def createAddr(self): + def createAddr(self, max_name_len): if self.dummy: return "" - return createAddr_template.format(core = self) + "".join(subcore.createAddr() for subcore in self.subcores) + return createAddr_template.format(core = self, name_pad = max_name_len) + "".join(subcore.createAddr(max_name_len) for subcore in self.subcores) def createMux(self): if self.dummy: @@ -328,32 +352,44 @@ class SubCore(Core): # Template used by .createAddr() methods. createAddr_template = """\ - localparam CORE_ADDR_{core.upper_instance_name:21s} = {core.addr_width}'h{core.core_number:02x}; + localparam CORE_ADDR_{core.upper_instance_name:{name_pad}s} = {core.addr_width}'h{core.core_number:02x}; """ # Template used by Core.createInstance(). createInstance_template_generic = """\ - //---------------------------------------------------------------- - // {core.upper_instance_name} - //---------------------------------------------------------------- - wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name}); - wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl} - - {core.module_name} {core.parameters}{core.instance_name}_inst - ( - .clk(sys_clk), - .{core.reset_name}(sys_rst_n), + //---------------------------------------------------------------- + // {core.upper_instance_name} + //---------------------------------------------------------------- + wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name}); + wire [31: 0] {core.wire_data_out};{core.error_wire_decl} + + reg select_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [31: 0] write_data_{core.instance_name}; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 7: 0] addr_{core.instance_name}; + + always @(posedge sys_clk) begin + select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs; + write_{core.instance_name} <= sys_{core.bus_name}_wr; + write_data_{core.instance_name} <= sys_write_data; + addr_{core.instance_name} <= addr_core_reg; + end + + {core.module_name} {core.parameters}{core.instance_name}_inst + ( + .clk(sys_clk), + .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]), {core.extra_ports} - .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)), - .we(sys_{core.bus_name}_wr), - - .address(addr_core_reg), - .write_data(sys_write_data), - .read_data(read_data_{core.instance_name}){core.error_port} - ); + .cs(select_{core.instance_name}), + .we(write_{core.instance_name}), + .address(addr_{core.instance_name}), + .write_data(write_data_{core.instance_name}), + .read_data({core.wire_data_out}){core.error_port} + ); {core.one_cycle_delay} +{core.extra_pipeline_stage} """ @@ -361,27 +397,39 @@ createInstance_template_generic = """\ # enough from the base template that it's easier to make this separate. createInstance_template_multi_block = """\ - //---------------------------------------------------------------- - // {core.upper_instance_name} - //---------------------------------------------------------------- - wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x}); - wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl} - wire [{core.block_bit_max}:0] {core.instance_name}_prefix = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name}; - - {core.module_name} {core.parameters}{core.instance_name}_inst - ( - .clk(sys_clk), - .{core.reset_name}(sys_rst_n), + //---------------------------------------------------------------- + // {core.upper_instance_name} + //---------------------------------------------------------------- + wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= (CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x})); + wire [31: 0] {core.wire_data_out};{core.error_wire_decl} + wire [{core.block_bit_max:>2}: 0] prefix_{core.instance_name} = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name}[{core.block_bit_max}:0]; + + reg select_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 31: 0] write_data_{core.instance_name}; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.block_bits}+7: 0] addr_{core.instance_name}; + + always @(posedge sys_clk) begin + select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs; + write_{core.instance_name} <= sys_{core.bus_name}_wr; + write_data_{core.instance_name} <= sys_write_data; + addr_{core.instance_name} <= {{prefix_{core.instance_name}, addr_core_reg}}; + end + + {core.module_name} {core.parameters}{core.instance_name}_inst + ( + .clk(sys_clk), + .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]), {core.extra_ports} - .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)), - .we(sys_{core.bus_name}_wr), - - .address({{{core.instance_name}_prefix, addr_core_reg}}), - .write_data(sys_write_data), - .read_data(read_data_{core.instance_name}){core.error_port} - ); + .cs(select_{core.instance_name}), + .we(write_{core.instance_name}), + .address(addr_{core.instance_name}), + .write_data(write_data_{core.instance_name}), + .read_data({core.wire_data_out}){core.error_port} + ); {core.one_cycle_delay} +{core.extra_pipeline_stage} """ @@ -395,19 +443,28 @@ createInstance_template_dummy = """\ # Template for one-cycle delay code. one_cycle_delay_template = """\ - reg [31: 0] read_data_{core.instance_name}_reg; - always @(posedge sys_clk) - read_data_{core.instance_name}_reg <= read_data_{core.instance_name}; + (* SHREG_EXTRACT="NO" *) + reg [31: 0] {core.reg_data_out}; + always @(posedge sys_clk) + {core.reg_data_out} <= {core.wire_data_out}; +""" + +# Template for an extra delay cycle code. + +extra_pipeline_stage_template = """\ + (* SHREG_EXTRACT="NO" *) + reg [31: 0] {core.pipe_data_out}; + always @(posedge sys_clk) + {core.pipe_data_out} <= {core.reg_data_out}; """ # Template for .createMux() methods. createMux_template = """\ - {core.mux_core_addr}: - begin - sys_read_data_mux = {core0.mux_data_reg}; - sys_error_mux = {core0.mux_error_reg}; - end + {core.mux_core_addr}: begin + sys_read_data_mux <= {core0.pipe_data_out}; + sys_error_mux <= {core0.mux_error_reg}; + end """ # Top-level (createModule) template. @@ -416,56 +473,102 @@ createModule_template = """\ // NOTE: This file is generated; do not edit. module core_selector - ( - input wire sys_clk, - input wire sys_rst_n, - - input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr, - input wire sys_{core.bus_name}_wr, - input wire sys_{core.bus_name}_rd, - output wire [31: 0] sys_read_data, - input wire [31: 0] sys_write_data, - output wire sys_error, -{core.extra_wires} - input wire noise, - output wire [7 : 0] debug - ); - - - //---------------------------------------------------------------- - // Address Decoder - //---------------------------------------------------------------- - // upper {core.addr_width} bits specify core being addressed - wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8]; - // lower 8 bits specify register offset in core - wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0]; +( + input wire sys_clk, + input wire sys_rst_n, + + input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr, + input wire sys_{core.bus_name}_wr, + input wire sys_{core.bus_name}_rd, + output wire [31: 0] sys_read_data, + input wire [31: 0] sys_write_data, + output wire sys_error, + {core.extra_wires} + input wire noise, + output wire [ 7 :0] debug +); + + + //---------------------------------------------------------------- + // Localized Resets Generator + //---------------------------------------------------------------- + wire [{core_count}-1:0] sys_rst_n_fanout; + reset_replicator # + ( + .SHREG_WIDTH(8), + .FANOUT_WIDTH({core_count}) + ) + reset_replicator_inst + ( + .sys_clk_in (sys_clk), + .sys_rst_n_in (sys_rst_n), + .sys_rst_n_out (sys_rst_n_fanout) + ); + + + //---------------------------------------------------------------- + // Address Decoder + //---------------------------------------------------------------- + // upper {core.addr_width} bits specify core being addressed + // lower 8 bits specify register offset in core + wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8]; + wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0]; + + + //---------------------------------------------------------------- + // Core Address Table + //---------------------------------------------------------------- +{addrs} - //---------------------------------------------------------------- - // Core Address Table - //---------------------------------------------------------------- -{addrs} + //---------------------------------------------------------------- + // Core Instances + //---------------------------------------------------------------- + wire sys_{core.bus_name}_cs = sys_{core.bus_name}_rd || sys_{core.bus_name}_wr; {insts} - //---------------------------------------------------------------- - // Output (Read Data) Multiplexer - //---------------------------------------------------------------- - reg [31: 0] sys_read_data_mux; - assign sys_read_data = sys_read_data_mux; - reg sys_error_mux; - assign sys_error = sys_error_mux; - always @* - - case (addr_core_num) + + //---------------------------------------------------------------- + // Output (Read Data) Multiplexer + //---------------------------------------------------------------- + (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly1 = 1'b0; + (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly2 = 1'b0; + (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly3 = 1'b0; + + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly1; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly2; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly3; + + always @(posedge sys_clk) begin + sys_{core.bus_name}_cs_dly1 <= sys_{core.bus_name}_cs; + sys_{core.bus_name}_cs_dly2 <= sys_{core.bus_name}_cs_dly1; + sys_{core.bus_name}_cs_dly3 <= sys_{core.bus_name}_cs_dly2; + end + + always @(posedge sys_clk) begin + if (sys_{core.bus_name}_cs) addr_core_num_dly1 <= addr_core_num; + if (sys_{core.bus_name}_cs_dly1) addr_core_num_dly2 <= addr_core_num_dly1; + if (sys_{core.bus_name}_cs_dly2) addr_core_num_dly3 <= addr_core_num_dly2; + end + + reg [31: 0] sys_read_data_mux; + reg sys_error_mux; + + assign sys_read_data = sys_read_data_mux; + assign sys_error = sys_error_mux; + + always @(posedge sys_clk) + + if (sys_{core.bus_name}_cs_dly3) + + case (addr_core_num_dly3) {muxes} - default: - begin - sys_read_data_mux = {{32{{1'b0}}}}; - sys_error_mux = 1; - end - endcase - + default: begin + sys_read_data_mux <= {{32{{1'b0}}}}; + sys_error_mux <= 1'b1; + end + endcase endmodule diff --git a/extra/reset_replicator.v b/extra/reset_replicator.v new file mode 100644 index 0000000..ccb704b --- /dev/null +++ b/extra/reset_replicator.v @@ -0,0 +1,93 @@ +//====================================================================== +// +// reset_replicator.v +// ------------------ +// +// Generates localized copies of the system-wide reset so that each core can +// have its own copy. This way there's more room for the placer to do its job. +// +// Author: Pavel Shatov +// Copyright (c) 2016, 2018-2019 NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module reset_replicator +( + sys_clk_in, + sys_rst_n_in, + sys_rst_n_out +); + + // + // Parameters + // + parameter integer SHREG_WIDTH = 8; + parameter integer FANOUT_WIDTH = 8; + + // + // Ports + // + input sys_clk_in; + input sys_rst_n_in; + output [FANOUT_WIDTH-1:0] sys_rst_n_out; + + // + // Internals + // + wire [FANOUT_WIDTH-1:0] sys_rst_int; + + // + // Localized Reset Replication + // + genvar i; + generate for (i=0; i<FANOUT_WIDTH; i=i+1) + // + begin : gen_sys_rst_n_out + // + LUT1 #(.INIT(2'b01)) LUT1_inst + ( .I0(sys_rst_n_in), + .O(sys_rst_int[i]) + ); + // + (* SHREG_EXTRACT="NO" *) + (* EQUIVALENT_REGISTER_REMOVAL="NO" *) + reg [SHREG_WIDTH-1:0] sys_rst_n_shreg_copy = {SHREG_WIDTH{1'b0}}; + // + always @(posedge sys_clk_in or posedge sys_rst_int[i]) + // + if (sys_rst_int[i]) sys_rst_n_shreg_copy <= {SHREG_WIDTH{1'b0}}; + else sys_rst_n_shreg_copy <= {sys_rst_n_shreg_copy[SHREG_WIDTH-2:0], 1'b1}; + // + assign sys_rst_n_out[i] = sys_rst_n_shreg_copy[SHREG_WIDTH-1]; + // + end + // + endgenerate + +endmodule |