diff options
Diffstat (limited to 'config')
-rwxr-xr-x | config/core_config.py | 341 |
1 files changed, 222 insertions, 119 deletions
diff --git a/config/core_config.py b/config/core_config.py index 4033279..61e77d5 100755 --- a/config/core_config.py +++ b/config/core_config.py @@ -5,7 +5,7 @@ Generate core_selector.v and core_vfiles.mk for a set of cores. """ #======================================================================= -# Copyright (c) 2015-2017, NORDUnet A/S All rights reserved. +# Copyright (c) 2015-2017, 2019 NORDUnet A/S All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are @@ -34,21 +34,6 @@ Generate core_selector.v and core_vfiles.mk for a set of cores. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #======================================================================= -# The modexpa7 core drags in a one clock cycle delay to other cores, -# to compensate for the extra clock cycle consumed by the block -# memories used in the modexpa7 core. We probably want a general -# solution for this, because we're going to run into this problem for -# any core that handles arguments big enough to require block memory. - -# To Do: -# -# - Consider automating the one-clock-cycle delay stuff by adding -# another boolean flag to the config file. Default would be no -# delay, if any included core sets the "I use block memories" flag, -# all other cores would get the delay. Slightly tedious but -# something we can calculate easily enough, and probably an -# improvement over wiring in the delay when nothing needs it. - def main(): """ Parse arguments and config file, generate core list, generate output. @@ -82,7 +67,7 @@ def main(): Core.modexp = cfg.get(board_section, "modexp") if Core.extra_wires: # restore formatting - Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n" + Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n" if args.core: cores = args.core @@ -98,7 +83,6 @@ def main(): except ValueError: if core not in cores: cores.append(core) - cores.insert(0, "board_regs") cores.insert(1, "comm_regs") @@ -111,21 +95,41 @@ def main(): core_number = 0 for core in cores: core_number = core.assign_core_number(core_number) - + + for i, core in enumerate(cores): + core.assign_seq_number(i) + + # On the unused piece of code below: we really should not try to + # optimize out the delay. This may have worked earlier, when we only + # had a small set of simple cores. There are a lot of complex cores + # by now, so the readback multiplexer gets pretty wide and will never + # meet timing if we make it purely combinatorial. Moreover, it turns + # out that additional delays are necessary to make it work at higher + # clock speeds. if False: # For some reason, attempting to optimize out the delay # code entirely results in a non-working bitstream. Don't # know why, disabling the optimization works, so just do # that for now. - + Core.need_one_cycle_delay = any(core.block_memory for core in cores) + # longest core/subcore instance name + max_name_len = 0 + for core in cores: + if len(core.instance_name) > max_name_len: + max_name_len = len(core.instance_name) + for subcore in core.subcores: + if len(subcore.instance_name) > max_name_len: + max_name_len = len(subcore.instance_name) + args.verilog.write(createModule_template.format( + core_count = len(cores), core = cores[0], - addrs = "".join(core.createAddr() for core in cores), - insts = "".join(core.createInstance() for core in cores), - muxes = "".join(core.createMux() for core in cores) )) + addrs = "".join(core.createAddr(max_name_len) for core in cores), + insts = "".join(core.createInstance() for core in cores), + muxes = "".join(core.createMux() for core in cores) )) args.makefile.write(listVfiles_template.format( vfiles = "".join(core.listVfiles() for core in cores))) @@ -193,6 +197,7 @@ class Core(object): self.name = name self.cfg_section = "core " + name self.core_number = None + self.seq_number = None self.vfiles = [] self.error_wire = True self.block_memory = False @@ -211,6 +216,9 @@ class Core(object): subcore.assign_core_number(n + i + 1) return n + self.blocks + def assign_seq_number(self, n): + self.seq_number = n + def configure(self, cfg): if self.instance_number == 0: self.vfiles.extend(cfg.getvalues(self.cfg_section, "vfiles")) @@ -221,7 +229,7 @@ class Core(object): self.block_memory = cfg.getboolean(self.cfg_section, "block memory", self.block_memory) self.extra_ports = cfg.get(self.cfg_section, "extra ports") if self.extra_ports: - self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n" + self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n" self.blocks = int(cfg.get(self.cfg_section, "core blocks") or 1) self.block_max = self.blocks - 1 if self.blocks > 1: @@ -257,28 +265,44 @@ class Core(object): @property def error_wire_decl(self): - return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else "" + return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else "" @property def error_port(self): - return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else "" + return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else "" @property def one_cycle_delay(self): return one_cycle_delay_template.format(core = self) if self.need_one_cycle_delay and not self.block_memory else "" @property + def extra_pipeline_stage(self): + return extra_pipeline_stage_template.format(core = self) + + @property def mux_core_addr(self): if self.blocks == 1 or self.subcores: return "CORE_ADDR_{core.upper_instance_name}".format(core=self) else: - return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {0}".format(i, core=self) for i in range(self.blocks)) + return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{0:04X}".format(i, core=self) for i in range(self.blocks)) @property - def mux_data_reg(self): - return "read_data_" + self.instance_name + ("_reg" if self.need_one_cycle_delay and not self.block_memory else "") + def reg_data_out(self): + return "reg_read_data_" + self.instance_name + + @property + def comb_data_out(self): + return "comb_read_data_" + self.instance_name + + @property + def wire_data_out(self): + return self.comb_data_out if self.need_one_cycle_delay and not self.block_memory else self.reg_data_out @property + def pipe_data_out(self): + return "pipe_read_data_" + self.instance_name + + @property def mux_error_reg(self): return "error_" + self.instance_name if self.error_wire else "0" @@ -293,10 +317,10 @@ class Core(object): template = createInstance_template_dummy if self.dummy else createInstance_template_generic if self.blocks == 1 else createInstance_template_multi_block return template.format(core = self) - def createAddr(self): + def createAddr(self, max_name_len): if self.dummy: return "" - return createAddr_template.format(core = self) + "".join(subcore.createAddr() for subcore in self.subcores) + return createAddr_template.format(core = self, name_pad = max_name_len) + "".join(subcore.createAddr(max_name_len) for subcore in self.subcores) def createMux(self): if self.dummy: @@ -328,32 +352,44 @@ class SubCore(Core): # Template used by .createAddr() methods. createAddr_template = """\ - localparam CORE_ADDR_{core.upper_instance_name:21s} = {core.addr_width}'h{core.core_number:02x}; + localparam CORE_ADDR_{core.upper_instance_name:{name_pad}s} = {core.addr_width}'h{core.core_number:02x}; """ # Template used by Core.createInstance(). createInstance_template_generic = """\ - //---------------------------------------------------------------- - // {core.upper_instance_name} - //---------------------------------------------------------------- - wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name}); - wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl} - - {core.module_name} {core.parameters}{core.instance_name}_inst - ( - .clk(sys_clk), - .{core.reset_name}(sys_rst_n), + //---------------------------------------------------------------- + // {core.upper_instance_name} + //---------------------------------------------------------------- + wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name}); + wire [31: 0] {core.wire_data_out};{core.error_wire_decl} + + reg select_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [31: 0] write_data_{core.instance_name}; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 7: 0] addr_{core.instance_name}; + + always @(posedge sys_clk) begin + select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs; + write_{core.instance_name} <= sys_{core.bus_name}_wr; + write_data_{core.instance_name} <= sys_write_data; + addr_{core.instance_name} <= addr_core_reg; + end + + {core.module_name} {core.parameters}{core.instance_name}_inst + ( + .clk(sys_clk), + .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]), {core.extra_ports} - .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)), - .we(sys_{core.bus_name}_wr), - - .address(addr_core_reg), - .write_data(sys_write_data), - .read_data(read_data_{core.instance_name}){core.error_port} - ); + .cs(select_{core.instance_name}), + .we(write_{core.instance_name}), + .address(addr_{core.instance_name}), + .write_data(write_data_{core.instance_name}), + .read_data({core.wire_data_out}){core.error_port} + ); {core.one_cycle_delay} +{core.extra_pipeline_stage} """ @@ -361,27 +397,39 @@ createInstance_template_generic = """\ # enough from the base template that it's easier to make this separate. createInstance_template_multi_block = """\ - //---------------------------------------------------------------- - // {core.upper_instance_name} - //---------------------------------------------------------------- - wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x}); - wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl} - wire [{core.block_bit_max}:0] {core.instance_name}_prefix = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name}; - - {core.module_name} {core.parameters}{core.instance_name}_inst - ( - .clk(sys_clk), - .{core.reset_name}(sys_rst_n), + //---------------------------------------------------------------- + // {core.upper_instance_name} + //---------------------------------------------------------------- + wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= (CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x})); + wire [31: 0] {core.wire_data_out};{core.error_wire_decl} + wire [{core.block_bit_max:>2}: 0] prefix_{core.instance_name} = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name}[{core.block_bit_max}:0]; + + reg select_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 31: 0] write_data_{core.instance_name}; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.block_bits}+7: 0] addr_{core.instance_name}; + + always @(posedge sys_clk) begin + select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs; + write_{core.instance_name} <= sys_{core.bus_name}_wr; + write_data_{core.instance_name} <= sys_write_data; + addr_{core.instance_name} <= {{prefix_{core.instance_name}, addr_core_reg}}; + end + + {core.module_name} {core.parameters}{core.instance_name}_inst + ( + .clk(sys_clk), + .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]), {core.extra_ports} - .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)), - .we(sys_{core.bus_name}_wr), - - .address({{{core.instance_name}_prefix, addr_core_reg}}), - .write_data(sys_write_data), - .read_data(read_data_{core.instance_name}){core.error_port} - ); + .cs(select_{core.instance_name}), + .we(write_{core.instance_name}), + .address(addr_{core.instance_name}), + .write_data(write_data_{core.instance_name}), + .read_data({core.wire_data_out}){core.error_port} + ); {core.one_cycle_delay} +{core.extra_pipeline_stage} """ @@ -395,19 +443,28 @@ createInstance_template_dummy = """\ # Template for one-cycle delay code. one_cycle_delay_template = """\ - reg [31: 0] read_data_{core.instance_name}_reg; - always @(posedge sys_clk) - read_data_{core.instance_name}_reg <= read_data_{core.instance_name}; + (* SHREG_EXTRACT="NO" *) + reg [31: 0] {core.reg_data_out}; + always @(posedge sys_clk) + {core.reg_data_out} <= {core.wire_data_out}; +""" + +# Template for an extra delay cycle code. + +extra_pipeline_stage_template = """\ + (* SHREG_EXTRACT="NO" *) + reg [31: 0] {core.pipe_data_out}; + always @(posedge sys_clk) + {core.pipe_data_out} <= {core.reg_data_out}; """ # Template for .createMux() methods. createMux_template = """\ - {core.mux_core_addr}: - begin - sys_read_data_mux = {core0.mux_data_reg}; - sys_error_mux = {core0.mux_error_reg}; - end + {core.mux_core_addr}: begin + sys_read_data_mux <= {core0.pipe_data_out}; + sys_error_mux <= {core0.mux_error_reg}; + end """ # Top-level (createModule) template. @@ -416,56 +473,102 @@ createModule_template = """\ // NOTE: This file is generated; do not edit. module core_selector - ( - input wire sys_clk, - input wire sys_rst_n, - - input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr, - input wire sys_{core.bus_name}_wr, - input wire sys_{core.bus_name}_rd, - output wire [31: 0] sys_read_data, - input wire [31: 0] sys_write_data, - output wire sys_error, -{core.extra_wires} - input wire noise, - output wire [7 : 0] debug - ); - - - //---------------------------------------------------------------- - // Address Decoder - //---------------------------------------------------------------- - // upper {core.addr_width} bits specify core being addressed - wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8]; - // lower 8 bits specify register offset in core - wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0]; +( + input wire sys_clk, + input wire sys_rst_n, + + input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr, + input wire sys_{core.bus_name}_wr, + input wire sys_{core.bus_name}_rd, + output wire [31: 0] sys_read_data, + input wire [31: 0] sys_write_data, + output wire sys_error, + {core.extra_wires} + input wire noise, + output wire [ 7 :0] debug +); + + + //---------------------------------------------------------------- + // Localized Resets Generator + //---------------------------------------------------------------- + wire [{core_count}-1:0] sys_rst_n_fanout; + reset_replicator # + ( + .SHREG_WIDTH(8), + .FANOUT_WIDTH({core_count}) + ) + reset_replicator_inst + ( + .sys_clk_in (sys_clk), + .sys_rst_n_in (sys_rst_n), + .sys_rst_n_out (sys_rst_n_fanout) + ); + + + //---------------------------------------------------------------- + // Address Decoder + //---------------------------------------------------------------- + // upper {core.addr_width} bits specify core being addressed + // lower 8 bits specify register offset in core + wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8]; + wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0]; + + + //---------------------------------------------------------------- + // Core Address Table + //---------------------------------------------------------------- +{addrs} - //---------------------------------------------------------------- - // Core Address Table - //---------------------------------------------------------------- -{addrs} + //---------------------------------------------------------------- + // Core Instances + //---------------------------------------------------------------- + wire sys_{core.bus_name}_cs = sys_{core.bus_name}_rd || sys_{core.bus_name}_wr; {insts} - //---------------------------------------------------------------- - // Output (Read Data) Multiplexer - //---------------------------------------------------------------- - reg [31: 0] sys_read_data_mux; - assign sys_read_data = sys_read_data_mux; - reg sys_error_mux; - assign sys_error = sys_error_mux; - always @* - - case (addr_core_num) + + //---------------------------------------------------------------- + // Output (Read Data) Multiplexer + //---------------------------------------------------------------- + (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly1 = 1'b0; + (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly2 = 1'b0; + (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly3 = 1'b0; + + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly1; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly2; + (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly3; + + always @(posedge sys_clk) begin + sys_{core.bus_name}_cs_dly1 <= sys_{core.bus_name}_cs; + sys_{core.bus_name}_cs_dly2 <= sys_{core.bus_name}_cs_dly1; + sys_{core.bus_name}_cs_dly3 <= sys_{core.bus_name}_cs_dly2; + end + + always @(posedge sys_clk) begin + if (sys_{core.bus_name}_cs) addr_core_num_dly1 <= addr_core_num; + if (sys_{core.bus_name}_cs_dly1) addr_core_num_dly2 <= addr_core_num_dly1; + if (sys_{core.bus_name}_cs_dly2) addr_core_num_dly3 <= addr_core_num_dly2; + end + + reg [31: 0] sys_read_data_mux; + reg sys_error_mux; + + assign sys_read_data = sys_read_data_mux; + assign sys_error = sys_error_mux; + + always @(posedge sys_clk) + + if (sys_{core.bus_name}_cs_dly3) + + case (addr_core_num_dly3) {muxes} - default: - begin - sys_read_data_mux = {{32{{1'b0}}}}; - sys_error_mux = 1; - end - endcase - + default: begin + sys_read_data_mux <= {{32{{1'b0}}}}; + sys_error_mux <= 1'b1; + end + endcase endmodule |