aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfig/core_config.py341
1 files changed, 222 insertions, 119 deletions
diff --git a/config/core_config.py b/config/core_config.py
index 4033279..61e77d5 100755
--- a/config/core_config.py
+++ b/config/core_config.py
@@ -5,7 +5,7 @@ Generate core_selector.v and core_vfiles.mk for a set of cores.
"""
#=======================================================================
-# Copyright (c) 2015-2017, NORDUnet A/S All rights reserved.
+# Copyright (c) 2015-2017, 2019 NORDUnet A/S All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -34,21 +34,6 @@ Generate core_selector.v and core_vfiles.mk for a set of cores.
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=======================================================================
-# The modexpa7 core drags in a one clock cycle delay to other cores,
-# to compensate for the extra clock cycle consumed by the block
-# memories used in the modexpa7 core. We probably want a general
-# solution for this, because we're going to run into this problem for
-# any core that handles arguments big enough to require block memory.
-
-# To Do:
-#
-# - Consider automating the one-clock-cycle delay stuff by adding
-# another boolean flag to the config file. Default would be no
-# delay, if any included core sets the "I use block memories" flag,
-# all other cores would get the delay. Slightly tedious but
-# something we can calculate easily enough, and probably an
-# improvement over wiring in the delay when nothing needs it.
-
def main():
"""
Parse arguments and config file, generate core list, generate output.
@@ -82,7 +67,7 @@ def main():
Core.modexp = cfg.get(board_section, "modexp")
if Core.extra_wires:
# restore formatting
- Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n"
+ Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n"
if args.core:
cores = args.core
@@ -98,7 +83,6 @@ def main():
except ValueError:
if core not in cores:
cores.append(core)
-
cores.insert(0, "board_regs")
cores.insert(1, "comm_regs")
@@ -111,21 +95,41 @@ def main():
core_number = 0
for core in cores:
core_number = core.assign_core_number(core_number)
-
+
+ for i, core in enumerate(cores):
+ core.assign_seq_number(i)
+
+ # On the unused piece of code below: we really should not try to
+ # optimize out the delay. This may have worked earlier, when we only
+ # had a small set of simple cores. There are a lot of complex cores
+ # by now, so the readback multiplexer gets pretty wide and will never
+ # meet timing if we make it purely combinatorial. Moreover, it turns
+ # out that additional delays are necessary to make it work at higher
+ # clock speeds.
if False:
# For some reason, attempting to optimize out the delay
# code entirely results in a non-working bitstream. Don't
# know why, disabling the optimization works, so just do
# that for now.
-
+
Core.need_one_cycle_delay = any(core.block_memory for core in cores)
+ # longest core/subcore instance name
+ max_name_len = 0
+ for core in cores:
+ if len(core.instance_name) > max_name_len:
+ max_name_len = len(core.instance_name)
+ for subcore in core.subcores:
+ if len(subcore.instance_name) > max_name_len:
+ max_name_len = len(subcore.instance_name)
+
args.verilog.write(createModule_template.format(
+ core_count = len(cores),
core = cores[0],
- addrs = "".join(core.createAddr() for core in cores),
- insts = "".join(core.createInstance() for core in cores),
- muxes = "".join(core.createMux() for core in cores) ))
+ addrs = "".join(core.createAddr(max_name_len) for core in cores),
+ insts = "".join(core.createInstance() for core in cores),
+ muxes = "".join(core.createMux() for core in cores) ))
args.makefile.write(listVfiles_template.format(
vfiles = "".join(core.listVfiles() for core in cores)))
@@ -193,6 +197,7 @@ class Core(object):
self.name = name
self.cfg_section = "core " + name
self.core_number = None
+ self.seq_number = None
self.vfiles = []
self.error_wire = True
self.block_memory = False
@@ -211,6 +216,9 @@ class Core(object):
subcore.assign_core_number(n + i + 1)
return n + self.blocks
+ def assign_seq_number(self, n):
+ self.seq_number = n
+
def configure(self, cfg):
if self.instance_number == 0:
self.vfiles.extend(cfg.getvalues(self.cfg_section, "vfiles"))
@@ -221,7 +229,7 @@ class Core(object):
self.block_memory = cfg.getboolean(self.cfg_section, "block memory", self.block_memory)
self.extra_ports = cfg.get(self.cfg_section, "extra ports")
if self.extra_ports:
- self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n"
+ self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n"
self.blocks = int(cfg.get(self.cfg_section, "core blocks") or 1)
self.block_max = self.blocks - 1
if self.blocks > 1:
@@ -257,28 +265,44 @@ class Core(object):
@property
def error_wire_decl(self):
- return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else ""
+ return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else ""
@property
def error_port(self):
- return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else ""
+ return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else ""
@property
def one_cycle_delay(self):
return one_cycle_delay_template.format(core = self) if self.need_one_cycle_delay and not self.block_memory else ""
@property
+ def extra_pipeline_stage(self):
+ return extra_pipeline_stage_template.format(core = self)
+
+ @property
def mux_core_addr(self):
if self.blocks == 1 or self.subcores:
return "CORE_ADDR_{core.upper_instance_name}".format(core=self)
else:
- return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {0}".format(i, core=self) for i in range(self.blocks))
+ return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{0:04X}".format(i, core=self) for i in range(self.blocks))
@property
- def mux_data_reg(self):
- return "read_data_" + self.instance_name + ("_reg" if self.need_one_cycle_delay and not self.block_memory else "")
+ def reg_data_out(self):
+ return "reg_read_data_" + self.instance_name
+
+ @property
+ def comb_data_out(self):
+ return "comb_read_data_" + self.instance_name
+
+ @property
+ def wire_data_out(self):
+ return self.comb_data_out if self.need_one_cycle_delay and not self.block_memory else self.reg_data_out
@property
+ def pipe_data_out(self):
+ return "pipe_read_data_" + self.instance_name
+
+ @property
def mux_error_reg(self):
return "error_" + self.instance_name if self.error_wire else "0"
@@ -293,10 +317,10 @@ class Core(object):
template = createInstance_template_dummy if self.dummy else createInstance_template_generic if self.blocks == 1 else createInstance_template_multi_block
return template.format(core = self)
- def createAddr(self):
+ def createAddr(self, max_name_len):
if self.dummy:
return ""
- return createAddr_template.format(core = self) + "".join(subcore.createAddr() for subcore in self.subcores)
+ return createAddr_template.format(core = self, name_pad = max_name_len) + "".join(subcore.createAddr(max_name_len) for subcore in self.subcores)
def createMux(self):
if self.dummy:
@@ -328,32 +352,44 @@ class SubCore(Core):
# Template used by .createAddr() methods.
createAddr_template = """\
- localparam CORE_ADDR_{core.upper_instance_name:21s} = {core.addr_width}'h{core.core_number:02x};
+ localparam CORE_ADDR_{core.upper_instance_name:{name_pad}s} = {core.addr_width}'h{core.core_number:02x};
"""
# Template used by Core.createInstance().
createInstance_template_generic = """\
- //----------------------------------------------------------------
- // {core.upper_instance_name}
- //----------------------------------------------------------------
- wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name});
- wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl}
-
- {core.module_name} {core.parameters}{core.instance_name}_inst
- (
- .clk(sys_clk),
- .{core.reset_name}(sys_rst_n),
+ //----------------------------------------------------------------
+ // {core.upper_instance_name}
+ //----------------------------------------------------------------
+ wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name});
+ wire [31: 0] {core.wire_data_out};{core.error_wire_decl}
+
+ reg select_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [31: 0] write_data_{core.instance_name};
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 7: 0] addr_{core.instance_name};
+
+ always @(posedge sys_clk) begin
+ select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs;
+ write_{core.instance_name} <= sys_{core.bus_name}_wr;
+ write_data_{core.instance_name} <= sys_write_data;
+ addr_{core.instance_name} <= addr_core_reg;
+ end
+
+ {core.module_name} {core.parameters}{core.instance_name}_inst
+ (
+ .clk(sys_clk),
+ .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]),
{core.extra_ports}
- .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)),
- .we(sys_{core.bus_name}_wr),
-
- .address(addr_core_reg),
- .write_data(sys_write_data),
- .read_data(read_data_{core.instance_name}){core.error_port}
- );
+ .cs(select_{core.instance_name}),
+ .we(write_{core.instance_name}),
+ .address(addr_{core.instance_name}),
+ .write_data(write_data_{core.instance_name}),
+ .read_data({core.wire_data_out}){core.error_port}
+ );
{core.one_cycle_delay}
+{core.extra_pipeline_stage}
"""
@@ -361,27 +397,39 @@ createInstance_template_generic = """\
# enough from the base template that it's easier to make this separate.
createInstance_template_multi_block = """\
- //----------------------------------------------------------------
- // {core.upper_instance_name}
- //----------------------------------------------------------------
- wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x});
- wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl}
- wire [{core.block_bit_max}:0] {core.instance_name}_prefix = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name};
-
- {core.module_name} {core.parameters}{core.instance_name}_inst
- (
- .clk(sys_clk),
- .{core.reset_name}(sys_rst_n),
+ //----------------------------------------------------------------
+ // {core.upper_instance_name}
+ //----------------------------------------------------------------
+ wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= (CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x}));
+ wire [31: 0] {core.wire_data_out};{core.error_wire_decl}
+ wire [{core.block_bit_max:>2}: 0] prefix_{core.instance_name} = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name}[{core.block_bit_max}:0];
+
+ reg select_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 31: 0] write_data_{core.instance_name};
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.block_bits}+7: 0] addr_{core.instance_name};
+
+ always @(posedge sys_clk) begin
+ select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs;
+ write_{core.instance_name} <= sys_{core.bus_name}_wr;
+ write_data_{core.instance_name} <= sys_write_data;
+ addr_{core.instance_name} <= {{prefix_{core.instance_name}, addr_core_reg}};
+ end
+
+ {core.module_name} {core.parameters}{core.instance_name}_inst
+ (
+ .clk(sys_clk),
+ .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]),
{core.extra_ports}
- .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)),
- .we(sys_{core.bus_name}_wr),
-
- .address({{{core.instance_name}_prefix, addr_core_reg}}),
- .write_data(sys_write_data),
- .read_data(read_data_{core.instance_name}){core.error_port}
- );
+ .cs(select_{core.instance_name}),
+ .we(write_{core.instance_name}),
+ .address(addr_{core.instance_name}),
+ .write_data(write_data_{core.instance_name}),
+ .read_data({core.wire_data_out}){core.error_port}
+ );
{core.one_cycle_delay}
+{core.extra_pipeline_stage}
"""
@@ -395,19 +443,28 @@ createInstance_template_dummy = """\
# Template for one-cycle delay code.
one_cycle_delay_template = """\
- reg [31: 0] read_data_{core.instance_name}_reg;
- always @(posedge sys_clk)
- read_data_{core.instance_name}_reg <= read_data_{core.instance_name};
+ (* SHREG_EXTRACT="NO" *)
+ reg [31: 0] {core.reg_data_out};
+ always @(posedge sys_clk)
+ {core.reg_data_out} <= {core.wire_data_out};
+"""
+
+# Template for an extra delay cycle code.
+
+extra_pipeline_stage_template = """\
+ (* SHREG_EXTRACT="NO" *)
+ reg [31: 0] {core.pipe_data_out};
+ always @(posedge sys_clk)
+ {core.pipe_data_out} <= {core.reg_data_out};
"""
# Template for .createMux() methods.
createMux_template = """\
- {core.mux_core_addr}:
- begin
- sys_read_data_mux = {core0.mux_data_reg};
- sys_error_mux = {core0.mux_error_reg};
- end
+ {core.mux_core_addr}: begin
+ sys_read_data_mux <= {core0.pipe_data_out};
+ sys_error_mux <= {core0.mux_error_reg};
+ end
"""
# Top-level (createModule) template.
@@ -416,56 +473,102 @@ createModule_template = """\
// NOTE: This file is generated; do not edit.
module core_selector
- (
- input wire sys_clk,
- input wire sys_rst_n,
-
- input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr,
- input wire sys_{core.bus_name}_wr,
- input wire sys_{core.bus_name}_rd,
- output wire [31: 0] sys_read_data,
- input wire [31: 0] sys_write_data,
- output wire sys_error,
-{core.extra_wires}
- input wire noise,
- output wire [7 : 0] debug
- );
-
-
- //----------------------------------------------------------------
- // Address Decoder
- //----------------------------------------------------------------
- // upper {core.addr_width} bits specify core being addressed
- wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8];
- // lower 8 bits specify register offset in core
- wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0];
+(
+ input wire sys_clk,
+ input wire sys_rst_n,
+
+ input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr,
+ input wire sys_{core.bus_name}_wr,
+ input wire sys_{core.bus_name}_rd,
+ output wire [31: 0] sys_read_data,
+ input wire [31: 0] sys_write_data,
+ output wire sys_error,
+ {core.extra_wires}
+ input wire noise,
+ output wire [ 7 :0] debug
+);
+
+
+ //----------------------------------------------------------------
+ // Localized Resets Generator
+ //----------------------------------------------------------------
+ wire [{core_count}-1:0] sys_rst_n_fanout;
+ reset_replicator #
+ (
+ .SHREG_WIDTH(8),
+ .FANOUT_WIDTH({core_count})
+ )
+ reset_replicator_inst
+ (
+ .sys_clk_in (sys_clk),
+ .sys_rst_n_in (sys_rst_n),
+ .sys_rst_n_out (sys_rst_n_fanout)
+ );
+
+
+ //----------------------------------------------------------------
+ // Address Decoder
+ //----------------------------------------------------------------
+ // upper {core.addr_width} bits specify core being addressed
+ // lower 8 bits specify register offset in core
+ wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8];
+ wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0];
+
+
+ //----------------------------------------------------------------
+ // Core Address Table
+ //----------------------------------------------------------------
+{addrs}
- //----------------------------------------------------------------
- // Core Address Table
- //----------------------------------------------------------------
-{addrs}
+ //----------------------------------------------------------------
+ // Core Instances
+ //----------------------------------------------------------------
+ wire sys_{core.bus_name}_cs = sys_{core.bus_name}_rd || sys_{core.bus_name}_wr;
{insts}
- //----------------------------------------------------------------
- // Output (Read Data) Multiplexer
- //----------------------------------------------------------------
- reg [31: 0] sys_read_data_mux;
- assign sys_read_data = sys_read_data_mux;
- reg sys_error_mux;
- assign sys_error = sys_error_mux;
- always @*
-
- case (addr_core_num)
+
+ //----------------------------------------------------------------
+ // Output (Read Data) Multiplexer
+ //----------------------------------------------------------------
+ (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly1 = 1'b0;
+ (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly2 = 1'b0;
+ (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly3 = 1'b0;
+
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly1;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly2;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly3;
+
+ always @(posedge sys_clk) begin
+ sys_{core.bus_name}_cs_dly1 <= sys_{core.bus_name}_cs;
+ sys_{core.bus_name}_cs_dly2 <= sys_{core.bus_name}_cs_dly1;
+ sys_{core.bus_name}_cs_dly3 <= sys_{core.bus_name}_cs_dly2;
+ end
+
+ always @(posedge sys_clk) begin
+ if (sys_{core.bus_name}_cs) addr_core_num_dly1 <= addr_core_num;
+ if (sys_{core.bus_name}_cs_dly1) addr_core_num_dly2 <= addr_core_num_dly1;
+ if (sys_{core.bus_name}_cs_dly2) addr_core_num_dly3 <= addr_core_num_dly2;
+ end
+
+ reg [31: 0] sys_read_data_mux;
+ reg sys_error_mux;
+
+ assign sys_read_data = sys_read_data_mux;
+ assign sys_error = sys_error_mux;
+
+ always @(posedge sys_clk)
+
+ if (sys_{core.bus_name}_cs_dly3)
+
+ case (addr_core_num_dly3)
{muxes}
- default:
- begin
- sys_read_data_mux = {{32{{1'b0}}}};
- sys_error_mux = 1;
- end
- endcase
-
+ default: begin
+ sys_read_data_mux <= {{32{{1'b0}}}};
+ sys_error_mux <= 1'b1;
+ end
+ endcase
endmodule