aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2020-01-23 13:08:59 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2020-01-23 13:22:13 +0300
commit5807b0bfd7efe8dd3f83d679730241847517980b (patch)
treed57844e74edb8cec0ac967497085201a0f9f4773
parent4f5c4453343430cce0067007deace5c1ddd7c29e (diff)
Reworked core selector generation script.
The core selector is now multi-cycle (see /core/platform/alpha commit 35359243a63cac4a9e8cce6bd718f17756ce8a98 message for more details). In short, for write operations, every core now has its own copy of chip select, address and write data registers. For read operations we should never ever need the combinational readback multiplexor again, it just won't meet timing with so many complex cores. Cores with combinational outputs, primarily those that don't have block memory inside, always have additional output registers. Moreover, the readback multiplexor is now registered too, this is required to get the multicycle constraint to work properly (again, refer to the aforementioned commit message).
-rwxr-xr-xconfig/core_config.py341
1 files changed, 222 insertions, 119 deletions
diff --git a/config/core_config.py b/config/core_config.py
index 4033279..61e77d5 100755
--- a/config/core_config.py
+++ b/config/core_config.py
@@ -5,7 +5,7 @@ Generate core_selector.v and core_vfiles.mk for a set of cores.
"""
#=======================================================================
-# Copyright (c) 2015-2017, NORDUnet A/S All rights reserved.
+# Copyright (c) 2015-2017, 2019 NORDUnet A/S All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -34,21 +34,6 @@ Generate core_selector.v and core_vfiles.mk for a set of cores.
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=======================================================================
-# The modexpa7 core drags in a one clock cycle delay to other cores,
-# to compensate for the extra clock cycle consumed by the block
-# memories used in the modexpa7 core. We probably want a general
-# solution for this, because we're going to run into this problem for
-# any core that handles arguments big enough to require block memory.
-
-# To Do:
-#
-# - Consider automating the one-clock-cycle delay stuff by adding
-# another boolean flag to the config file. Default would be no
-# delay, if any included core sets the "I use block memories" flag,
-# all other cores would get the delay. Slightly tedious but
-# something we can calculate easily enough, and probably an
-# improvement over wiring in the delay when nothing needs it.
-
def main():
"""
Parse arguments and config file, generate core list, generate output.
@@ -82,7 +67,7 @@ def main():
Core.modexp = cfg.get(board_section, "modexp")
if Core.extra_wires:
# restore formatting
- Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n"
+ Core.extra_wires = Core.extra_wires.replace("\n", "\n ") + "\n"
if args.core:
cores = args.core
@@ -98,7 +83,6 @@ def main():
except ValueError:
if core not in cores:
cores.append(core)
-
cores.insert(0, "board_regs")
cores.insert(1, "comm_regs")
@@ -111,21 +95,41 @@ def main():
core_number = 0
for core in cores:
core_number = core.assign_core_number(core_number)
-
+
+ for i, core in enumerate(cores):
+ core.assign_seq_number(i)
+
+ # On the unused piece of code below: we really should not try to
+ # optimize out the delay. This may have worked earlier, when we only
+ # had a small set of simple cores. There are a lot of complex cores
+ # by now, so the readback multiplexer gets pretty wide and will never
+ # meet timing if we make it purely combinatorial. Moreover, it turns
+ # out that additional delays are necessary to make it work at higher
+ # clock speeds.
if False:
# For some reason, attempting to optimize out the delay
# code entirely results in a non-working bitstream. Don't
# know why, disabling the optimization works, so just do
# that for now.
-
+
Core.need_one_cycle_delay = any(core.block_memory for core in cores)
+ # longest core/subcore instance name
+ max_name_len = 0
+ for core in cores:
+ if len(core.instance_name) > max_name_len:
+ max_name_len = len(core.instance_name)
+ for subcore in core.subcores:
+ if len(subcore.instance_name) > max_name_len:
+ max_name_len = len(subcore.instance_name)
+
args.verilog.write(createModule_template.format(
+ core_count = len(cores),
core = cores[0],
- addrs = "".join(core.createAddr() for core in cores),
- insts = "".join(core.createInstance() for core in cores),
- muxes = "".join(core.createMux() for core in cores) ))
+ addrs = "".join(core.createAddr(max_name_len) for core in cores),
+ insts = "".join(core.createInstance() for core in cores),
+ muxes = "".join(core.createMux() for core in cores) ))
args.makefile.write(listVfiles_template.format(
vfiles = "".join(core.listVfiles() for core in cores)))
@@ -193,6 +197,7 @@ class Core(object):
self.name = name
self.cfg_section = "core " + name
self.core_number = None
+ self.seq_number = None
self.vfiles = []
self.error_wire = True
self.block_memory = False
@@ -211,6 +216,9 @@ class Core(object):
subcore.assign_core_number(n + i + 1)
return n + self.blocks
+ def assign_seq_number(self, n):
+ self.seq_number = n
+
def configure(self, cfg):
if self.instance_number == 0:
self.vfiles.extend(cfg.getvalues(self.cfg_section, "vfiles"))
@@ -221,7 +229,7 @@ class Core(object):
self.block_memory = cfg.getboolean(self.cfg_section, "block memory", self.block_memory)
self.extra_ports = cfg.get(self.cfg_section, "extra ports")
if self.extra_ports:
- self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n"
+ self.extra_ports = self.extra_ports.replace("\n", "\n ") + "\n"
self.blocks = int(cfg.get(self.cfg_section, "core blocks") or 1)
self.block_max = self.blocks - 1
if self.blocks > 1:
@@ -257,28 +265,44 @@ class Core(object):
@property
def error_wire_decl(self):
- return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else ""
+ return "\n wire error_{core.instance_name};".format(core = self) if self.error_wire else ""
@property
def error_port(self):
- return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else ""
+ return ",\n .error(error_{core.instance_name})".format(core = self) if self.error_wire else ""
@property
def one_cycle_delay(self):
return one_cycle_delay_template.format(core = self) if self.need_one_cycle_delay and not self.block_memory else ""
@property
+ def extra_pipeline_stage(self):
+ return extra_pipeline_stage_template.format(core = self)
+
+ @property
def mux_core_addr(self):
if self.blocks == 1 or self.subcores:
return "CORE_ADDR_{core.upper_instance_name}".format(core=self)
else:
- return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {0}".format(i, core=self) for i in range(self.blocks))
+ return ",\n ".join("CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{0:04X}".format(i, core=self) for i in range(self.blocks))
@property
- def mux_data_reg(self):
- return "read_data_" + self.instance_name + ("_reg" if self.need_one_cycle_delay and not self.block_memory else "")
+ def reg_data_out(self):
+ return "reg_read_data_" + self.instance_name
+
+ @property
+ def comb_data_out(self):
+ return "comb_read_data_" + self.instance_name
+
+ @property
+ def wire_data_out(self):
+ return self.comb_data_out if self.need_one_cycle_delay and not self.block_memory else self.reg_data_out
@property
+ def pipe_data_out(self):
+ return "pipe_read_data_" + self.instance_name
+
+ @property
def mux_error_reg(self):
return "error_" + self.instance_name if self.error_wire else "0"
@@ -293,10 +317,10 @@ class Core(object):
template = createInstance_template_dummy if self.dummy else createInstance_template_generic if self.blocks == 1 else createInstance_template_multi_block
return template.format(core = self)
- def createAddr(self):
+ def createAddr(self, max_name_len):
if self.dummy:
return ""
- return createAddr_template.format(core = self) + "".join(subcore.createAddr() for subcore in self.subcores)
+ return createAddr_template.format(core = self, name_pad = max_name_len) + "".join(subcore.createAddr(max_name_len) for subcore in self.subcores)
def createMux(self):
if self.dummy:
@@ -328,32 +352,44 @@ class SubCore(Core):
# Template used by .createAddr() methods.
createAddr_template = """\
- localparam CORE_ADDR_{core.upper_instance_name:21s} = {core.addr_width}'h{core.core_number:02x};
+ localparam CORE_ADDR_{core.upper_instance_name:{name_pad}s} = {core.addr_width}'h{core.core_number:02x};
"""
# Template used by Core.createInstance().
createInstance_template_generic = """\
- //----------------------------------------------------------------
- // {core.upper_instance_name}
- //----------------------------------------------------------------
- wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name});
- wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl}
-
- {core.module_name} {core.parameters}{core.instance_name}_inst
- (
- .clk(sys_clk),
- .{core.reset_name}(sys_rst_n),
+ //----------------------------------------------------------------
+ // {core.upper_instance_name}
+ //----------------------------------------------------------------
+ wire enable_{core.instance_name} = (addr_core_num == CORE_ADDR_{core.upper_instance_name});
+ wire [31: 0] {core.wire_data_out};{core.error_wire_decl}
+
+ reg select_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [31: 0] write_data_{core.instance_name};
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 7: 0] addr_{core.instance_name};
+
+ always @(posedge sys_clk) begin
+ select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs;
+ write_{core.instance_name} <= sys_{core.bus_name}_wr;
+ write_data_{core.instance_name} <= sys_write_data;
+ addr_{core.instance_name} <= addr_core_reg;
+ end
+
+ {core.module_name} {core.parameters}{core.instance_name}_inst
+ (
+ .clk(sys_clk),
+ .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]),
{core.extra_ports}
- .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)),
- .we(sys_{core.bus_name}_wr),
-
- .address(addr_core_reg),
- .write_data(sys_write_data),
- .read_data(read_data_{core.instance_name}){core.error_port}
- );
+ .cs(select_{core.instance_name}),
+ .we(write_{core.instance_name}),
+ .address(addr_{core.instance_name}),
+ .write_data(write_data_{core.instance_name}),
+ .read_data({core.wire_data_out}){core.error_port}
+ );
{core.one_cycle_delay}
+{core.extra_pipeline_stage}
"""
@@ -361,27 +397,39 @@ createInstance_template_generic = """\
# enough from the base template that it's easier to make this separate.
createInstance_template_multi_block = """\
- //----------------------------------------------------------------
- // {core.upper_instance_name}
- //----------------------------------------------------------------
- wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x});
- wire [31: 0] read_data_{core.instance_name};{core.error_wire_decl}
- wire [{core.block_bit_max}:0] {core.instance_name}_prefix = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name};
-
- {core.module_name} {core.parameters}{core.instance_name}_inst
- (
- .clk(sys_clk),
- .{core.reset_name}(sys_rst_n),
+ //----------------------------------------------------------------
+ // {core.upper_instance_name}
+ //----------------------------------------------------------------
+ wire enable_{core.instance_name} = (addr_core_num >= CORE_ADDR_{core.upper_instance_name}) && (addr_core_num <= (CORE_ADDR_{core.upper_instance_name} + {core.addr_width}'h{core.block_max:02x}));
+ wire [31: 0] {core.wire_data_out};{core.error_wire_decl}
+ wire [{core.block_bit_max:>2}: 0] prefix_{core.instance_name} = addr_core_num[{core.block_bit_max}:0] - CORE_ADDR_{core.upper_instance_name}[{core.block_bit_max}:0];
+
+ reg select_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg write_{core.instance_name} = 1'b0;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [ 31: 0] write_data_{core.instance_name};
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.block_bits}+7: 0] addr_{core.instance_name};
+
+ always @(posedge sys_clk) begin
+ select_{core.instance_name} <= enable_{core.instance_name} && sys_{core.bus_name}_cs;
+ write_{core.instance_name} <= sys_{core.bus_name}_wr;
+ write_data_{core.instance_name} <= sys_write_data;
+ addr_{core.instance_name} <= {{prefix_{core.instance_name}, addr_core_reg}};
+ end
+
+ {core.module_name} {core.parameters}{core.instance_name}_inst
+ (
+ .clk(sys_clk),
+ .{core.reset_name}(sys_rst_n_fanout[{core.seq_number}]),
{core.extra_ports}
- .cs(enable_{core.instance_name} & (sys_{core.bus_name}_rd | sys_{core.bus_name}_wr)),
- .we(sys_{core.bus_name}_wr),
-
- .address({{{core.instance_name}_prefix, addr_core_reg}}),
- .write_data(sys_write_data),
- .read_data(read_data_{core.instance_name}){core.error_port}
- );
+ .cs(select_{core.instance_name}),
+ .we(write_{core.instance_name}),
+ .address(addr_{core.instance_name}),
+ .write_data(write_data_{core.instance_name}),
+ .read_data({core.wire_data_out}){core.error_port}
+ );
{core.one_cycle_delay}
+{core.extra_pipeline_stage}
"""
@@ -395,19 +443,28 @@ createInstance_template_dummy = """\
# Template for one-cycle delay code.
one_cycle_delay_template = """\
- reg [31: 0] read_data_{core.instance_name}_reg;
- always @(posedge sys_clk)
- read_data_{core.instance_name}_reg <= read_data_{core.instance_name};
+ (* SHREG_EXTRACT="NO" *)
+ reg [31: 0] {core.reg_data_out};
+ always @(posedge sys_clk)
+ {core.reg_data_out} <= {core.wire_data_out};
+"""
+
+# Template for an extra delay cycle code.
+
+extra_pipeline_stage_template = """\
+ (* SHREG_EXTRACT="NO" *)
+ reg [31: 0] {core.pipe_data_out};
+ always @(posedge sys_clk)
+ {core.pipe_data_out} <= {core.reg_data_out};
"""
# Template for .createMux() methods.
createMux_template = """\
- {core.mux_core_addr}:
- begin
- sys_read_data_mux = {core0.mux_data_reg};
- sys_error_mux = {core0.mux_error_reg};
- end
+ {core.mux_core_addr}: begin
+ sys_read_data_mux <= {core0.pipe_data_out};
+ sys_error_mux <= {core0.mux_error_reg};
+ end
"""
# Top-level (createModule) template.
@@ -416,56 +473,102 @@ createModule_template = """\
// NOTE: This file is generated; do not edit.
module core_selector
- (
- input wire sys_clk,
- input wire sys_rst_n,
-
- input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr,
- input wire sys_{core.bus_name}_wr,
- input wire sys_{core.bus_name}_rd,
- output wire [31: 0] sys_read_data,
- input wire [31: 0] sys_write_data,
- output wire sys_error,
-{core.extra_wires}
- input wire noise,
- output wire [7 : 0] debug
- );
-
-
- //----------------------------------------------------------------
- // Address Decoder
- //----------------------------------------------------------------
- // upper {core.addr_width} bits specify core being addressed
- wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8];
- // lower 8 bits specify register offset in core
- wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0];
+(
+ input wire sys_clk,
+ input wire sys_rst_n,
+
+ input wire [{core.bus_max}: 0] sys_{core.bus_name}_addr,
+ input wire sys_{core.bus_name}_wr,
+ input wire sys_{core.bus_name}_rd,
+ output wire [31: 0] sys_read_data,
+ input wire [31: 0] sys_write_data,
+ output wire sys_error,
+ {core.extra_wires}
+ input wire noise,
+ output wire [ 7 :0] debug
+);
+
+
+ //----------------------------------------------------------------
+ // Localized Resets Generator
+ //----------------------------------------------------------------
+ wire [{core_count}-1:0] sys_rst_n_fanout;
+ reset_replicator #
+ (
+ .SHREG_WIDTH(8),
+ .FANOUT_WIDTH({core_count})
+ )
+ reset_replicator_inst
+ (
+ .sys_clk_in (sys_clk),
+ .sys_rst_n_in (sys_rst_n),
+ .sys_rst_n_out (sys_rst_n_fanout)
+ );
+
+
+ //----------------------------------------------------------------
+ // Address Decoder
+ //----------------------------------------------------------------
+ // upper {core.addr_width} bits specify core being addressed
+ // lower 8 bits specify register offset in core
+ wire [{core.addr_max:>2}: 0] addr_core_num = sys_{core.bus_name}_addr[{core.bus_max}: 8];
+ wire [ 7: 0] addr_core_reg = sys_{core.bus_name}_addr[ 7: 0];
+
+
+ //----------------------------------------------------------------
+ // Core Address Table
+ //----------------------------------------------------------------
+{addrs}
- //----------------------------------------------------------------
- // Core Address Table
- //----------------------------------------------------------------
-{addrs}
+ //----------------------------------------------------------------
+ // Core Instances
+ //----------------------------------------------------------------
+ wire sys_{core.bus_name}_cs = sys_{core.bus_name}_rd || sys_{core.bus_name}_wr;
{insts}
- //----------------------------------------------------------------
- // Output (Read Data) Multiplexer
- //----------------------------------------------------------------
- reg [31: 0] sys_read_data_mux;
- assign sys_read_data = sys_read_data_mux;
- reg sys_error_mux;
- assign sys_error = sys_error_mux;
- always @*
-
- case (addr_core_num)
+
+ //----------------------------------------------------------------
+ // Output (Read Data) Multiplexer
+ //----------------------------------------------------------------
+ (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly1 = 1'b0;
+ (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly2 = 1'b0;
+ (* SHREG_EXTRACT="NO" *) reg sys_{core.bus_name}_cs_dly3 = 1'b0;
+
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly1;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly2;
+ (* SHREG_EXTRACT="NO" *) (* EQUIVALENT_REGISTER_REMOVAL="NO" *) reg [{core.addr_max:>2}: 0] addr_core_num_dly3;
+
+ always @(posedge sys_clk) begin
+ sys_{core.bus_name}_cs_dly1 <= sys_{core.bus_name}_cs;
+ sys_{core.bus_name}_cs_dly2 <= sys_{core.bus_name}_cs_dly1;
+ sys_{core.bus_name}_cs_dly3 <= sys_{core.bus_name}_cs_dly2;
+ end
+
+ always @(posedge sys_clk) begin
+ if (sys_{core.bus_name}_cs) addr_core_num_dly1 <= addr_core_num;
+ if (sys_{core.bus_name}_cs_dly1) addr_core_num_dly2 <= addr_core_num_dly1;
+ if (sys_{core.bus_name}_cs_dly2) addr_core_num_dly3 <= addr_core_num_dly2;
+ end
+
+ reg [31: 0] sys_read_data_mux;
+ reg sys_error_mux;
+
+ assign sys_read_data = sys_read_data_mux;
+ assign sys_error = sys_error_mux;
+
+ always @(posedge sys_clk)
+
+ if (sys_{core.bus_name}_cs_dly3)
+
+ case (addr_core_num_dly3)
{muxes}
- default:
- begin
- sys_read_data_mux = {{32{{1'b0}}}};
- sys_error_mux = 1;
- end
- endcase
-
+ default: begin
+ sys_read_data_mux <= {{32{{1'b0}}}};
+ sys_error_mux <= 1'b1;
+ end
+ endcase
endmodule