aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:16:58 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:16:58 +0300
commitfde62e373fdfcefefb7da10757a3db933160c911 (patch)
tree7f0a5b37be84af4399a7f629ca062a3cbb147f37
parent3ea94c872afe6309c43ac7eccf877734c33f5421 (diff)
Major rewrite (different core hierarchy, buses, wrappers, etc).
-rw-r--r--rtl/modexpng_dsp_array_block.v143
-rw-r--r--rtl/modexpng_dsp_slice_wrapper.v125
-rw-r--r--rtl/modexpng_mac.v54
-rw-r--r--rtl/modexpng_mac_array.v116
-rw-r--r--rtl/modexpng_mem.v93
-rw-r--r--rtl/modexpng_mmm_col_index.v90
-rw-r--r--rtl/modexpng_mmm_din_addr.v167
-rw-r--r--rtl/modexpng_mmm_dout_addr.v167
-rw-r--r--rtl/modexpng_mmm_dual_x8.v951
-rw-r--r--rtl/modexpng_mmm_fsm_old.vh (renamed from rtl/modexpng_mmm_fsm.vh)0
-rw-r--r--rtl/modexpng_mmm_pad.v153
-rw-r--r--rtl/modexpng_mmm_transporter.v157
-rw-r--r--rtl/modexpng_mmm_x8_dual.v550
-rw-r--r--rtl/modexpng_parameters.vh39
-rw-r--r--rtl/modexpng_parameters_old.vh40
-rw-r--r--rtl/modexpng_parameters_x8_old.vh (renamed from rtl/modexpng_parameters_x8.vh)0
-rw-r--r--rtl/modexpng_part_recombinator.v1128
-rw-r--r--rtl/modexpng_recombinator_block.v1234
-rw-r--r--rtl/modexpng_recombinator_cell.v35
-rw-r--r--rtl/modexpng_reductor.v270
-rw-r--r--rtl/modexpng_storage_block.v226
-rw-r--r--rtl/modexpng_storage_manager.v200
22 files changed, 3202 insertions, 2736 deletions
diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v
new file mode 100644
index 0000000..9c4ee93
--- /dev/null
+++ b/rtl/modexpng_dsp_array_block.v
@@ -0,0 +1,143 @@
+module modexpng_dsp_array_block
+(
+ input clk,
+
+ input ce_a,
+ input ce_b,
+ input ce_m,
+ input ce_p,
+ input ce_mode,
+
+ input [9 -1:0] mode_z,
+
+ input [5*18-1:0] a,
+ input [1*16-1:0] b,
+ output [9*47-1:0] p
+);
+
+ `include "modexpng_parameters_x8.vh"
+
+ wire [17:0] casc_a[0:3];
+ wire [15:0] casc_b[0:3];
+
+ wire ce_a0 = ce_a;
+ reg ce_a1 = 1'b0;
+ reg ce_a2 = 1'b0;
+
+ wire ce_b0 = ce_b;
+ reg ce_b1 = 1'b0;
+
+ always @(posedge clk) begin
+ ce_a1 <= ce_a0;
+ ce_a2 <= ce_a1;
+ ce_b1 <= ce_b0;
+ end
+
+
+ genvar z;
+ generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ //
+ begin : gen_DSP48E1
+ //
+ modexpng_dsp_slice_wrapper #
+ (
+ .AB_INPUT("DIRECT"),
+ .B_REG(2)
+ )
+ dsp_direct
+ (
+ .clk (clk),
+
+ .ce_a1 (ce_a0),
+ .ce_b1 (ce_b0),
+ .ce_a2 (ce_a1),
+ .ce_b2 (ce_b1),
+ .ce_m (ce_m),
+ .ce_p (ce_p),
+ .ce_mode (ce_mode),
+
+ .a (a[z*18+:18]),
+ .b (b),
+ .p (p[47*2*z+:47]),
+
+ .inmode (5'b00000),
+ .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
+ .alumode (4'b0000),
+
+ .casc_a_in ({18{1'b0}}),
+ .casc_b_in ({16{1'b0}}),
+
+ .casc_a_out (casc_a[z]),
+ .casc_b_out (casc_b[z])
+ );
+ //
+ modexpng_dsp_slice_wrapper #
+ (
+ .AB_INPUT("CASCADE"),
+ .B_REG(1)
+ )
+ dsp_cascade
+ (
+ .clk (clk),
+
+ .ce_a1 (ce_a1),
+ .ce_b1 (1'b0),
+ .ce_a2 (ce_a2),
+ .ce_b2 (ce_b1),
+ .ce_m (ce_m),
+ .ce_p (ce_p),
+ .ce_mode (ce_mode),
+
+ .a (a[z*18+:18]),
+ .b (b),
+ .p (p[47*(2*z+1)+:47]),
+
+ .inmode (5'b00000),
+ .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}),
+ .alumode (4'b0000),
+
+ .casc_a_in (casc_a[z]),
+ .casc_b_in (casc_b[z]),
+
+ .casc_a_out (),
+ .casc_b_out ()
+ );
+ //
+ end
+ //
+ endgenerate
+
+ modexpng_dsp_slice_wrapper #
+ (
+ .AB_INPUT("DIRECT"),
+ .B_REG(2)
+ )
+ dsp_aux
+ (
+ .clk (clk),
+
+ .ce_a1 (ce_a0),
+ .ce_b1 (ce_b0),
+ .ce_a2 (ce_a1),
+ .ce_b2 (ce_b1),
+ .ce_m (ce_m),
+ .ce_p (ce_p),
+ .ce_mode (ce_mode),
+
+ .a (a[4*18+:18]),
+ .b (b),
+ .p (p[47*2*4+:47]),
+
+ .inmode (5'b00000),
+ .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}),
+ .alumode (4'b0000),
+
+ .casc_a_in ({18{1'b0}}),
+ .casc_b_in ({16{1'b0}}),
+
+ .casc_a_out (),
+ .casc_b_out ()
+ );
+
+
+endmodule
diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper.v
new file mode 100644
index 0000000..f565eec
--- /dev/null
+++ b/rtl/modexpng_dsp_slice_wrapper.v
@@ -0,0 +1,125 @@
+module modexpng_dsp_slice_wrapper #
+(
+ AB_INPUT = "DIRECT",
+ B_REG = 2
+)
+(
+ input clk,
+ input ce_a1,
+ input ce_b1,
+ input ce_a2,
+ input ce_b2,
+ input ce_m,
+ input ce_p,
+ input ce_mode,
+ input [17:0] a,
+ input [15:0] b,
+ output [46:0] p,
+ input [ 4:0] inmode,
+ input [ 6:0] opmode,
+ input [ 3:0] alumode,
+ input [17:0] casc_a_in,
+ input [15:0] casc_b_in,
+ output [17:0] casc_a_out,
+ output [15:0] casc_b_out
+);
+
+ wire [30-18-1:0] casc_a_dummy;
+ wire [18-16-1:0] casc_b_dummy;
+ wire [48-47-1:0] p_dummy;
+
+ DSP48E1 #
+ (
+ .AREG (2),
+ .BREG (B_REG),
+ .CREG (0),
+ .DREG (0),
+ .ADREG (0),
+ .MREG (1),
+ .PREG (1),
+ .ACASCREG (1),
+ .BCASCREG (1),
+ .INMODEREG (0),
+ .OPMODEREG (1),
+ .ALUMODEREG (0),
+ .CARRYINREG (0),
+ .CARRYINSELREG (0),
+
+ .A_INPUT (AB_INPUT),
+ .B_INPUT (AB_INPUT),
+
+ .USE_DPORT ("FALSE"),
+ .USE_MULT ("DYNAMIC"),
+ .USE_SIMD ("ONE48"),
+
+ .MASK (48'h3fffffffffff),
+ .PATTERN (48'h000000000000),
+ .SEL_MASK ("MASK"),
+ .SEL_PATTERN ("PATTERN"),
+
+ .USE_PATTERN_DETECT ("NO_PATDET"),
+ .AUTORESET_PATDET ("NO_RESET")
+ )
+ DSP48E1_inst
+ (
+ .CLK (clk),
+
+ .CEA1 (ce_a1),
+ .CEB1 (ce_b1),
+ .CEA2 (ce_a2),
+ .CEB2 (ce_b2),
+ .CEAD (1'b0),
+ .CEC (1'b0),
+ .CED (1'b0),
+ .CEM (ce_m),
+ .CEP (ce_p),
+ .CEINMODE (1'b0),
+ .CECTRL (ce_mode),
+ .CEALUMODE (1'b0),
+ .CECARRYIN (1'b0),
+
+ .A ({{(30-18){1'b0}}, a}),
+ .B ({{(18-16){1'b0}}, b}),
+ .C ({48{1'b0}}),
+ .D ({25{1'b0}}),
+ .P ({p_dummy, p}),
+
+ .INMODE (inmode),
+ .OPMODE (opmode),
+ .ALUMODE (alumode),
+
+ .ACIN ({{(30-18){1'b0}}, casc_a_in}),
+ .BCIN ({{(18-16){1'b0}}, casc_b_in}),
+ .ACOUT ({casc_a_dummy, casc_a_out}),
+ .BCOUT ({casc_b_dummy, casc_b_out}),
+ .PCIN ({48{1'b0}}),
+ .PCOUT (),
+ .CARRYCASCIN (1'b0),
+ .CARRYCASCOUT (),
+
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+ .RSTC (1'b0),
+ .RSTD (1'b0),
+ .RSTM (1'b0),
+ .RSTP (1'b0),
+ .RSTINMODE (1'b0),
+ .RSTCTRL (1'b0),
+ .RSTALUMODE (1'b0),
+ .RSTALLCARRYIN (1'b0),
+
+ .UNDERFLOW (),
+ .OVERFLOW (),
+ .PATTERNDETECT (),
+ .PATTERNBDETECT (),
+
+ .CARRYIN (1'b0),
+ .CARRYOUT (),
+ .CARRYINSEL (3'b000),
+
+ .MULTSIGNIN (1'b0),
+ .MULTSIGNOUT ()
+ );
+
+
+endmodule
diff --git a/rtl/modexpng_mac.v b/rtl/modexpng_mac.v
deleted file mode 100644
index 9105dab..0000000
--- a/rtl/modexpng_mac.v
+++ /dev/null
@@ -1,54 +0,0 @@
-module modexpng_mac
-(
- clk,
- ce, clr,
- casc_a,
- a_in, b_in, p_out,
- a_casc_in, a_casc_out
-);
-
- input clk;
- input ce;
- input clr;
- input casc_a;
- input [16:0] a_in;
- input [16:0] b_in;
- output [46:0] p_out;
- input [16:0] a_casc_in;
- output [16:0] a_casc_out;
-
- reg [16:0] a_reg;
- reg [16:0] b_reg;
- assign a_casc_out = a_reg;
- always @(posedge clk)
- //
- if (ce) {b_reg, a_reg} <= {b_in, casc_a ? a_casc_in : a_in};
-
- reg ce_dly1;
- reg ce_dly2;
- always @(posedge clk)
- //
- {ce_dly2, ce_dly1} <= {ce_dly1, ce};
-
- reg clr_dly1;
- reg clr_dly2;
- always @(posedge clk) begin
- //
- if (ce) clr_dly1 <= clr;
- if (ce_dly1) clr_dly2 <= clr_dly1;
- //
- end
-
- reg [33:0] m_reg;
- wire [46:0] m_reg_ext = {{13{1'b0}}, m_reg};
- always @(posedge clk)
- //
- if (ce_dly1) m_reg <= {{17{1'b0}}, a_reg} * {{17{1'b0}}, b_reg};
-
- reg [46:0] p_reg;
- assign p_out = p_reg;
- always @(posedge clk)
- //
- if (ce_dly2) p_reg <= clr_dly2 ? m_reg_ext : p_reg + m_reg_ext;
-
-endmodule
diff --git a/rtl/modexpng_mac_array.v b/rtl/modexpng_mac_array.v
deleted file mode 100644
index 067929e..0000000
--- a/rtl/modexpng_mac_array.v
+++ /dev/null
@@ -1,116 +0,0 @@
-module modexpng_mac_array
-(
- clk,
- ce, clr,
- ce_aux, clr_aux,
- casc_a, casc_a_aux,
- a_in, b_in, p_out,
- a_in_aux, p_out_aux
-);
-
-
- //
- // Includes
- //
- `include "modexpng_parameters.vh"
- `include "modexpng_parameters_x8.vh"
-
-
- //
- // Ports
- //
- input clk;
- input ce;
- input [NUM_MULTS -1:0] clr;
- input ce_aux;
- input clr_aux;
- input [NUM_MULTS -2:0] casc_a;
- input casc_a_aux;
- input [NUM_MULTS * WORD_WIDTH -1:0] a_in;
- input [ 1 * WORD_WIDTH -1:0] b_in;
- output [NUM_MULTS * MAC_WIDTH -1:0] p_out;
- input [ 1 * WORD_WIDTH -1:0] a_in_aux;
- output [ 1 * MAC_WIDTH -1:0] p_out_aux;
-
-
- //
- // A-Cascade Paths
- //
- wire [WORD_WIDTH-1:0] a_casc_int[0:NUM_MULTS-2];
- wire [WORD_WIDTH-1:0] a_casc_int_aux;
-
-
- //
- // LSB
- //
- modexpng_mac mac_lsb
- (
- .clk (clk),
- .ce (ce),
- .clr (clr[0]),
- .casc_a (1'b0),
- .a_in (a_in[0+:WORD_WIDTH]),
- .b_in (b_in),
- .p_out (p_out[0+:MAC_WIDTH]),
- .a_casc_in ({WORD_WIDTH{1'b0}}),
- .a_casc_out (a_casc_int[0])
- );
-
-
- //
- // INT
- //
- genvar z;
- generate for (z=1; z<(NUM_MULTS-1); z=z+1)
- begin : gen_modexpng_mac_int
- modexpng_mac mac_int
- (
- .clk (clk),
- .ce (ce),
- .clr (clr[z]),
- .casc_a (casc_a[z-1]),
- .a_in (a_in[z*WORD_WIDTH+:WORD_WIDTH]),
- .b_in (b_in),
- .p_out (p_out[z*MAC_WIDTH+:MAC_WIDTH]),
- .a_casc_in (a_casc_int[z-1]),
- .a_casc_out (a_casc_int[z])
- );
- end
- endgenerate
-
-
- //
- // MSB
- //
- modexpng_mac mac_msb
- (
- .clk (clk),
- .ce (ce),
- .clr (clr[NUM_MULTS-1]),
- .casc_a (casc_a[NUM_MULTS-2]),
- .a_in (a_in[(NUM_MULTS-1)*WORD_WIDTH+:WORD_WIDTH]),
- .b_in (b_in),
- .p_out (p_out[(NUM_MULTS-1)*MAC_WIDTH+:MAC_WIDTH]),
- .a_casc_in (a_casc_int[NUM_MULTS-2]),
- .a_casc_out (a_casc_int_aux)
- );
-
-
- //
- // AUX
- //
- modexpng_mac mac_aux
- (
- .clk (clk),
- .ce (ce_aux),
- .clr (clr_aux),
- .casc_a (casc_a_aux),
- .a_in (a_in_aux),
- .b_in (b_in),
- .p_out (p_out_aux),
- .a_casc_in (a_casc_int_aux),
- .a_casc_out ()
- );
-
-
-endmodule
diff --git a/rtl/modexpng_mem.v b/rtl/modexpng_mem.v
deleted file mode 100644
index ca89214..0000000
--- a/rtl/modexpng_mem.v
+++ /dev/null
@@ -1,93 +0,0 @@
-//
-// TODO: Add license text!
-//
-
-module modexpng_mem #
-(
- parameter MEM_WIDTH = 17,
- parameter MEM_ADDR_BITS = 6
-)
-(
- input clk,
-
- input [MEM_ADDR_BITS-1:0] a_addr,
- input a_en,
- input a_wr,
- input [MEM_WIDTH -1:0] a_in,
- output [MEM_WIDTH -1:0] a_out,
-
- input [MEM_ADDR_BITS-1:0] b_addr,
- input b_en,
- input b_reg_en,
- output [MEM_WIDTH -1:0] b_out
-);
-
-
- //
- // BRAM
- //
- (* RAM_STYLE="BLOCK" *)
- reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1];
-
-
- //
- // Initialization for Simulation
- //
- /*
- integer c;
- initial begin
- for (c=0; c<(2**MEM_ADDR_BITS); c=c+1)
- bram[c] = {MEM_WIDTH{1'b0}};
- end
- */
-
-
-
- //
- // Output Registers
- //
- reg [MEM_WIDTH-1:0] bram_b;
- reg [MEM_WIDTH-1:0] bram_b_reg;
-
- assign a_out = 32'hDEADCE11;
- assign b_out = bram_b_reg;
-
-
- //
- // Note, that when both ports are accessing the same location, conflict can
- // potentionally arise. See Xilinx UG473 (pages 19-20, "Conflict
- // Avoidance") for more information. In our configuration to avoid that the
- // write port must be coded to operate in READ_FIRST mode. If the write
- // port is overwriting the same address the read port is accessing, the
- // write port must read the previously stored data (not the data it is
- // writing, as that would be WRITE_FIRST mode).
- //
-
-
- //
- // Write-Only Port A
- //
- always @(posedge clk)
- //
- if (a_en)
- //
- if (a_wr) bram[a_addr] <= a_in;
-
-
- //
- // Read-Only Port B
- //
- always @(posedge clk)
- //
- if (b_en)
- //
- bram_b <= bram[b_addr];
-
- always @(posedge clk)
- //
- if (b_reg_en)
- //
- bram_b_reg <= bram_b;
-
-
-endmodule
diff --git a/rtl/modexpng_mmm_col_index.v b/rtl/modexpng_mmm_col_index.v
deleted file mode 100644
index b904795..0000000
--- a/rtl/modexpng_mmm_col_index.v
+++ /dev/null
@@ -1,90 +0,0 @@
-module modexpng_mmm_col_index
-(
- clk,
- index_last,
- fsm_state_next,
- col_index,
- col_index_done,
- col_index_zero,
- col_index_next,
- col_index_prev
-);
-
-
- //
- // Includes
- //
- //`include "modexpng_parameters.vh"
- //`include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input [ INDEX_WIDTH-1:0] index_last;
- input [FSM_STATE_WIDTH-1:0] fsm_state_next;
- output [ INDEX_WIDTH-4:0] col_index;
- output col_index_done;
- output [ INDEX_WIDTH-4:0] col_index_zero;
- output [ INDEX_WIDTH-4:0] col_index_next;
- output [ INDEX_WIDTH-4:0] col_index_prev;
-
-
- //
- // Registers
- //
- reg [INDEX_WIDTH-4:0] col_index_reg;
- reg [INDEX_WIDTH-4:0] col_index_last;
- reg [INDEX_WIDTH-4:0] col_index_dly;
-
-
- //
- // Mapping
- //
- assign col_index = col_index_reg;
- assign col_index_prev = col_index_dly;
-
-
- //
- // Handy Wires
- //
- assign col_index_done = col_index == col_index_last;
- assign col_index_zero = {(INDEX_WIDTH-3){1'b0}};
- assign col_index_next = col_index + 1'b1;
-
-
- //
- // Increment Logic
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: begin
- col_index_reg <= col_index_zero;
- col_index_last <= index_last[INDEX_WIDTH-1:3];
- end
- //
- FSM_STATE_MULT_SQUARE_COL_N_TRIG:
- col_index_reg <= col_index_next;
- //
- endcase
-
-
- //
- // Delay Logic
- //
- always @(posedge clk)
- //
- col_index_dly <= col_index;
-
-
-endmodule
diff --git a/rtl/modexpng_mmm_din_addr.v b/rtl/modexpng_mmm_din_addr.v
deleted file mode 100644
index 565c7e0..0000000
--- a/rtl/modexpng_mmm_din_addr.v
+++ /dev/null
@@ -1,167 +0,0 @@
-module modexpng_mmm_din_addr
-(
- clk, rst_n,
- index_last,
- fsm_state_next,
- col_index_zero, col_index_next,
- din_addr, din_bank, din_ena, din_reg_ena,
- din_addr_cnt, din_addr_cnt_last,
- din_addr_cnt_lower_prev, din_addr_cnt_upper_prev
-);
-
-
- //
- // Includes
- //
- `include "modexpng_parameters.vh"
- //`include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input rst_n;
- input [ INDEX_WIDTH-1:0] index_last;
- input [FSM_STATE_WIDTH-1:0] fsm_state_next;
- input [ INDEX_WIDTH-4:0] col_index_zero;
- input [ INDEX_WIDTH-4:0] col_index_next;
- output [ INDEX_WIDTH-4:0] din_addr;
- output [ 3-1:0] din_bank;
- output [ 1-1:0] din_ena;
- output [ 1-1:0] din_reg_ena;
- output [ INDEX_WIDTH-1:0] din_addr_cnt;
- output [ INDEX_WIDTH-1:0] din_addr_cnt_last;
- output [ 3-1:0] din_addr_cnt_lower_prev;
- output [ INDEX_WIDTH-4:0] din_addr_cnt_upper_prev;
-
-
- //
- // Address
- //
- reg [INDEX_WIDTH-1:0] din_addr_reg;
- wire [INDEX_WIDTH-1:0] din_addr_zero = {INDEX_WIDTH{1'b0}};
- reg [INDEX_WIDTH-1:0] din_addr_last;
- wire [INDEX_WIDTH-1:0] din_addr_prev = (din_addr_reg == din_addr_zero) ? din_addr_last : din_addr_reg - 1'b1;
-
- reg [INDEX_WIDTH-1:0] din_addr_cnt_reg;
- wire [INDEX_WIDTH-1:0] din_addr_cnt_zero = {INDEX_WIDTH{1'b0}};
- wire [INDEX_WIDTH-1:0] din_addr_cnt_next = din_addr_cnt_reg + 1'b1;
- reg [INDEX_WIDTH-1:0] din_addr_cnt_last_reg;
- wire [ 3-1:0] din_addr_cnt_lower = din_addr_cnt_reg[ 3-1:0];
- wire [INDEX_WIDTH-4:0] din_addr_cnt_upper = din_addr_cnt_reg[INDEX_WIDTH-1:3];
- reg [ 3-1:0] din_addr_cnt_lower_dly;
- reg [INDEX_WIDTH-4:0] din_addr_cnt_upper_dly;
-
- reg [ 3-1:0] din_bank_reg;
-
-
- //
- // Enables
- //
- reg din_ena_reg = 1'b0;
- reg din_reg_ena_reg = 1'b0;
-
- always @(posedge clk or negedge rst_n)
- //
- if (!rst_n)
- din_ena_reg <= 1'b0;
- else case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- din_ena_reg <= 1'b1;
- //
- default:
- din_ena_reg <= 1'b0;
- //
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (!rst_n)
- din_reg_ena_reg <= 1'b0;
- else
- din_reg_ena_reg <= din_ena_reg;
-
-
- //
- // Address Mapping
- //
- assign din_addr = din_addr_reg[INDEX_WIDTH-1:3];
-
- assign din_addr_cnt = din_addr_cnt_reg;
- assign din_addr_cnt_last = din_addr_cnt_last_reg;
- assign din_addr_cnt_lower_prev = din_addr_cnt_lower_dly;
- assign din_addr_cnt_upper_prev = din_addr_cnt_upper_dly;
-
- assign din_bank = din_bank_reg;
-
-
- //
- // Enable Mapping
- //
- assign din_ena = din_ena_reg;
- assign din_reg_ena = din_reg_ena_reg;
-
-
- //
- // Delay
- //
- always @(posedge clk) begin
- din_addr_cnt_lower_dly <= din_addr_cnt_lower;
- din_addr_cnt_upper_dly <= din_addr_cnt_upper;
- end
-
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: begin
- din_addr_reg <= {col_index_zero, {3{1'b0}}};
- din_addr_last <= index_last;
- din_addr_cnt_reg <= din_addr_cnt_zero;
- din_addr_cnt_last_reg <= index_last;
- end
- //
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: begin
- din_addr_reg <= {col_index_next, {3{1'b0}}};
- din_addr_cnt_reg <= din_addr_cnt_zero;
- end
- //
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
- din_addr_reg <= din_addr_prev;
- din_addr_cnt_reg <= din_addr_cnt_next;
- end
- //
- //default:
- //
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- din_bank_reg = BANK_XY_T1T2;
- //
- default:
- din_bank_reg = BANK_XY_ANY;
- //
- endcase
-
-endmodule
diff --git a/rtl/modexpng_mmm_dout_addr.v b/rtl/modexpng_mmm_dout_addr.v
deleted file mode 100644
index 3749d82..0000000
--- a/rtl/modexpng_mmm_dout_addr.v
+++ /dev/null
@@ -1,167 +0,0 @@
-module modexpng_mmm_dout_addr
-(
- clk, rst_n,
- //index_last,
- fsm_state,
- load_xy_addr,
- load_addr_zero,
- load_nn_coeff_addr_done,
- /*
-
- col_index_zero, col_index_next,*/
- x_dout_addr, y_dout_addr,
- x_dout_ena, y_dout_ena,
- x_dout_bank, y_dout_bank
-
-);
-
-
- //
- // Includes
- //
- `include "modexpng_parameters.vh"
- `include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input rst_n;
- //input [ INDEX_WIDTH-1:0] index_last;
- input [FSM_STATE_WIDTH-1:0] fsm_state;
- input [INDEX_WIDTH:0] load_xy_addr; // address
- input load_addr_zero;
- input load_nn_coeff_addr_done;
- //input [ INDEX_WIDTH-4:0] col_index_zero;
- //input [ INDEX_WIDTH-4:0] col_index_next;
- output [INDEX_WIDTH-4:0] x_dout_addr;
- output [INDEX_WIDTH-4:0] y_dout_addr;
-
- output [NUM_MULTS-1:0] x_dout_ena;
- output [NUM_MULTS-1:0] y_dout_ena;
-
- output [3-1:0] x_dout_bank;
- output [3-1:0] y_dout_bank;
-
-
- //
- // Registers
- //
- reg [INDEX_WIDTH-4:0] x_dout_addr_reg; //clog2
- reg [INDEX_WIDTH-4:0] y_dout_addr_reg; //clog2
-
- reg [NUM_MULTS-1:0] x_dout_ena_reg = {NUM_MULTS{1'b0}};
- reg [NUM_MULTS-1:0] y_dout_ena_reg = {NUM_MULTS{1'b0}};
-
- reg [NUM_MULTS-1:0] x_dout_ena_int;
- reg [NUM_MULTS-1:0] y_dout_ena_int;
-
- reg [3-1:0] x_dout_bank_reg;
- reg [3-1:0] y_dout_bank_reg;
-
-
- //
- // Mapping
- //
- assign x_dout_addr = x_dout_addr_reg;
- assign y_dout_addr = y_dout_addr_reg;
-
- assign x_dout_ena = x_dout_ena_reg;
- assign y_dout_ena = y_dout_ena_reg;
-
- assign x_dout_bank = x_dout_bank_reg;
- assign y_dout_bank = y_dout_bank_reg;
-
-
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3: begin
- x_dout_addr_reg <= load_xy_addr[INDEX_WIDTH-1:3];
- y_dout_addr_reg <= load_xy_addr[INDEX_WIDTH-1:3];
- end
- //
- FSM_STATE_LOAD_NN_COEFF_3: begin
- x_dout_addr_reg <= !load_nn_coeff_addr_done ? load_xy_addr[INDEX_WIDTH-1:3] : BANK_XY_AUX_ADDR_N_COEFF[INDEX_WIDTH-4:0];
- y_dout_addr_reg <= !load_nn_coeff_addr_done ? load_xy_addr[INDEX_WIDTH-1:3] : BANK_XY_AUX_ADDR_N_COEFF[INDEX_WIDTH-4:0];
- end
- //
- default: begin
- x_dout_addr_reg <= {INDEX_WIDTH-3{1'bX}};
- y_dout_addr_reg <= {INDEX_WIDTH-3{1'bX}};
- end
- //
- endcase
-
- wire [NUM_MULTS-1:0] load_xy_ena_init = {{NUM_MULTS-1{1'b0}}, 1'b1};
-
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_2: begin
- x_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {x_dout_ena_int[NUM_MULTS-2:0], x_dout_ena_int[NUM_MULTS-1]};
- y_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {y_dout_ena_int[NUM_MULTS-2:0], y_dout_ena_int[NUM_MULTS-1]};
- end
- //
- FSM_STATE_LOAD_NN_COEFF_2: begin
- x_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {x_dout_ena_int[NUM_MULTS-2:0], x_dout_ena_int[NUM_MULTS-1] & ~load_nn_coeff_addr_done};
- y_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {y_dout_ena_int[NUM_MULTS-2:0], y_dout_ena_int[NUM_MULTS-1]};
- end
- //
- endcase
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (!rst_n) begin
- x_dout_ena_reg <= {NUM_MULTS{1'b0}};
- y_dout_ena_reg <= {NUM_MULTS{1'b0}};
- end else case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3,
- FSM_STATE_LOAD_NN_COEFF_3: begin
- x_dout_ena_reg <= x_dout_ena_int;
- y_dout_ena_reg <= y_dout_ena_int;
- end
- //
- default: begin
- x_dout_ena_reg <= {NUM_MULTS{1'b0}};
- y_dout_ena_reg <= {NUM_MULTS{1'b0}};
- end
- //
- endcase
-
-
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3: begin
- x_dout_bank_reg <= BANK_X_T1;
- y_dout_bank_reg <= BANK_Y_T2;
- end
- //
- FSM_STATE_LOAD_NN_COEFF_3: begin
- x_dout_bank_reg <= !load_nn_coeff_addr_done ? BANK_X_N : BANK_XY_AUX;
- y_dout_bank_reg <= !load_nn_coeff_addr_done ? BANK_Y_N_COEFF : BANK_XY_AUX;
- end
- //
- default: begin
- x_dout_bank_reg <= BANK_XY_ANY;
- y_dout_bank_reg <= BANK_XY_ANY;
- end
- //
- endcase
-
-
-endmodule
diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual_x8.v
new file mode 100644
index 0000000..2e4f4e0
--- /dev/null
+++ b/rtl/modexpng_mmm_dual_x8.v
@@ -0,0 +1,951 @@
+module modexpng_mmm_dual_x8
+(
+ clk, rst,
+
+ ena, rdy,
+
+
+ ladder_mode,
+ word_index_last,
+ word_index_last_minus1,
+
+ rd_wide_xy_ena,
+ rd_wide_xy_ena_aux,
+ rd_wide_xy_bank,
+ rd_wide_xy_bank_aux,
+ rd_wide_xy_addr,
+ rd_wide_xy_addr_aux,
+ rd_wide_x_dout,
+ rd_wide_y_dout,
+ rd_wide_x_dout_aux,
+ rd_wide_y_dout_aux,
+
+ rd_narrow_xy_ena,
+ rd_narrow_xy_bank,
+ rd_narrow_xy_addr,
+ rd_narrow_x_dout,
+ rd_narrow_y_dout,
+
+ rcmb_wide_xy_bank,
+ rcmb_wide_xy_addr,
+ rcmb_wide_x_dout,
+ rcmb_wide_y_dout,
+ rcmb_wide_xy_valid,
+
+ rcmb_narrow_xy_bank,
+ rcmb_narrow_xy_addr,
+ rcmb_narrow_x_dout,
+ rcmb_narrow_y_dout,
+ rcmb_narrow_xy_valid,
+
+ rcmb_xy_bank,
+ rcmb_xy_addr,
+ rcmb_x_dout,
+ rcmb_y_dout,
+ rcmb_xy_valid,
+
+ rdct_ena
+);
+
+
+ //
+ // Headers
+ //
+ `include "../rtl_1/modexpng_mmm_fsm_old.vh"
+ `include "../rtl_1/modexpng_parameters_old.vh"
+ `include "../rtl_1/modexpng_parameters_x8_old.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ input ladder_mode;
+ input [7:0] word_index_last;
+ input [7:0] word_index_last_minus1;
+
+ output rd_wide_xy_ena;
+ output rd_wide_xy_ena_aux;
+ output [ 1:0] rd_wide_xy_bank;
+ output [ 1:0] rd_wide_xy_bank_aux;
+ output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
+ output [ 8-1:0] rd_wide_xy_addr_aux;
+ input [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
+ input [18*NUM_MULTS/2-1:0] rd_wide_y_dout;
+ input [ 18-1:0] rd_wide_x_dout_aux;
+ input [ 18-1:0] rd_wide_y_dout_aux;
+
+ output rd_narrow_xy_ena;
+ output [ 1:0] rd_narrow_xy_bank;
+ output [ 7:0] rd_narrow_xy_addr;
+ input [18-1:0] rd_narrow_x_dout;
+ input [18-1:0] rd_narrow_y_dout;
+
+ output [ 1:0] rcmb_wide_xy_bank;
+ output [ 7:0] rcmb_wide_xy_addr;
+ output [17:0] rcmb_wide_x_dout;
+ output [17:0] rcmb_wide_y_dout;
+ output rcmb_wide_xy_valid;
+
+ output [ 1:0] rcmb_narrow_xy_bank;
+ output [ 7:0] rcmb_narrow_xy_addr;
+ output [17:0] rcmb_narrow_x_dout;
+ output [17:0] rcmb_narrow_y_dout;
+ output rcmb_narrow_xy_valid;
+
+ output [ 1:0] rcmb_xy_bank;
+ output [ 7:0] rcmb_xy_addr;
+ output [17:0] rcmb_x_dout;
+ output [17:0] rcmb_y_dout;
+ output rcmb_xy_valid;
+
+ output rdct_ena;
+
+
+ //
+ // FSM Declaration
+ //
+ reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
+ reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
+
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) fsm_state <= FSM_STATE_IDLE;
+ else fsm_state <= fsm_state_next;
+
+
+ //
+ // Storage Control Interface
+ //
+ reg wide_xy_ena = 1'b0;
+ reg wide_xy_ena_aux = 1'b0;
+ reg [ 1:0] wide_xy_bank;
+ reg [ 1:0] wide_xy_bank_aux;
+ reg [ 8-1:0] wide_xy_addr[0:3];
+ reg [ 8-1:0] wide_xy_addr_aux;
+
+ reg narrow_xy_ena = 1'b0;
+ reg [ 1:0] narrow_xy_bank;
+ reg [ 7:0] narrow_xy_addr;
+ reg [ 7:0] narrow_xy_addr_dly;
+
+ assign rd_wide_xy_ena = wide_xy_ena;
+ assign rd_wide_xy_ena_aux = wide_xy_ena_aux;
+ assign rd_wide_xy_bank = wide_xy_bank;
+ assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
+ assign rd_wide_xy_addr_aux = wide_xy_addr_aux;
+
+ assign rd_narrow_xy_ena = narrow_xy_ena;
+ assign rd_narrow_xy_bank = narrow_xy_bank;
+ assign rd_narrow_xy_addr = narrow_xy_addr;
+
+ genvar z;
+ generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ begin : gen_rd_wide_xy_addr
+ assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z];
+ end
+ endgenerate
+
+ //
+ // Column Counter
+ //
+ reg [4:0] col_index; // current column index
+ reg [4:0] col_index_prev; // delayed column index value
+ reg [4:0] col_index_last; // index of the very last column
+ reg [4:0] col_index_next; // precomputed next column index
+ reg col_is_last; // flag set during the very last column
+
+ always @(posedge clk)
+ //
+ col_index_prev <= col_index;
+
+ //
+ // Column Counter Increment Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
+ col_index <= 5'd0;
+ col_index_last <= word_index_last[7:3];
+ col_index_next <= 5'd1;
+ col_is_last <= 1'b0;
+
+ end
+ //
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
+ col_index <= col_index_next;
+ col_is_last <= col_index_next == col_index_last;
+ col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1;
+ end
+ //
+ endcase
+
+
+ //
+ // Completion Flags
+ //
+ wire square_almost_done_comb;
+ reg square_almost_done_flop = 1'b0;
+ reg square_surely_done_flop = 1'b0;
+
+ wire triangle_almost_done_comb;
+ reg triangle_almost_done_flop = 1'b0;
+ reg triangle_surely_done_flop = 1'b0;
+ reg triangle_tardy_done_flop = 1'b0;
+
+ wire rectangle_almost_done_comb;
+ reg rectangle_almost_done_flop = 1'b0;
+ reg rectangle_surely_done_flop = 1'b0;
+ reg rectangle_tardy_done_flop = 1'b0;
+
+ assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
+ assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index);
+ assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
+
+ //
+ // Square Completion Flags
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY:
+ square_almost_done_flop <= square_almost_done_comb;
+ //
+ default:
+ square_almost_done_flop <= 1'b0;
+ //
+ endcase
+ //
+ square_surely_done_flop <= square_almost_done_flop;
+ //
+ end
+
+ //
+ // Triangle Completion Flags
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
+ triangle_almost_done_flop <= triangle_almost_done_comb;
+ //
+ default:
+ triangle_almost_done_flop <= 1'b0;
+ //
+ endcase
+ //
+ triangle_surely_done_flop <= triangle_almost_done_flop;
+ triangle_tardy_done_flop <= triangle_surely_done_flop;
+ //
+ end
+
+ //
+ // Rectangle Completion Flags
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
+ rectangle_almost_done_flop <= rectangle_almost_done_comb;
+ //
+ default:
+ rectangle_almost_done_flop <= 1'b0;
+ //
+ endcase
+ //
+ rectangle_surely_done_flop <= rectangle_almost_done_flop;
+ rectangle_tardy_done_flop <= rectangle_surely_done_flop;
+ //
+ end
+
+
+ //
+ // Narrow Storage Control Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) narrow_xy_ena <= 1'b0;
+ else begin
+ //
+ // Narrow Address
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
+ 8'd0 : narrow_xy_addr + 1'b1;
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
+ 8'd1 : narrow_xy_addr + 1'b1;
+ //
+ default: narrow_xy_addr <= 8'dX;
+ //
+ endcase
+ //
+ // Narrow Bank
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2;
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
+ BANK_NARROW_EXT : BANK_NARROW_N_COEFF;
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
+ BANK_NARROW_EXT : BANK_NARROW_Q;
+ //
+ default: narrow_xy_bank <= 2'bXX;
+ //
+ endcase
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop;
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop;
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop;
+ //
+ default: narrow_xy_ena <= 1'b0;
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Wide Storage Control Logic
+ //
+
+ wire [2:0] wide_offset_rom[0:3];
+
+ generate for (z=1; z<NUM_MULTS; z=z+2)
+ begin : gen_wide_offset_rom
+ assign wide_offset_rom[(z-1)/2] = z[2:0];
+ end
+ endgenerate
+
+ function [7:0] wide_xy_addr_next;
+ input [7:0] wide_xy_addr_current;
+ input [7:0] wide_xy_addr_last;
+ begin
+ if (wide_xy_addr_current > 8'd0)
+ wide_xy_addr_next = wide_xy_addr_current - 1'b1;
+ else
+ wide_xy_addr_next = wide_xy_addr_last;
+ end
+ endfunction
+
+ integer j;
+ always @(posedge clk)
+ //
+ if (rst) begin
+ wide_xy_ena <= 1'b0;
+ wide_xy_ena_aux <= 1'b0;
+ end else begin
+ //
+ // Wide Address
+ //
+ for (j=0; j<(NUM_MULTS/2); j=j+1)
+ //
+ case (fsm_state_next)
+ //
+ // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+ //
+ default: wide_xy_addr[j] <= 8'dX;
+ endcase
+ //
+ // Wide Aux Address
+ //
+ case (fsm_state_next)
+ //
+ // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
+ //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
+ //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
+ //
+ default: wide_xy_addr_aux <= 8'dX;
+ endcase
+ //
+ // Wide Bank
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2;
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL;
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N;
+ default: wide_xy_bank <= 3'bXXX;
+ endcase
+ //
+ // Wide Aux Bank
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2;
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL;
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
+ case (rcmb_xy_bank)
+ BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL;
+ BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH;
+ //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX
+ default: wide_xy_bank_aux <= 3'bXXX;
+ endcase
+ else wide_xy_bank_aux <= 3'bXXX;
+ default: wide_xy_bank_aux <= 3'bXXX;
+ endcase
+ //
+ // Wide Enable
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1;
+ default: wide_xy_ena <= 1'b0;
+ endcase
+ //
+ // Wide Aux Enable
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1;
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
+ default: wide_xy_ena_aux <= 1'b0;
+ endcase
+ //
+ end
+
+
+ //
+ // Delay Lines
+ //
+ always @(posedge clk)
+ //
+ narrow_xy_addr_dly <= narrow_xy_addr;
+
+
+ //
+ // DSP Array Logic
+ //
+ reg dsp_xy_ce_a = 1'b0;
+ reg dsp_xy_ce_b = 1'b0;
+ reg dsp_xy_ce_b_dly = 1'b0;
+ reg dsp_xy_ce_m = 1'b0;
+ reg dsp_xy_ce_p = 1'b0;
+ reg dsp_xy_ce_mode = 1'b0;
+
+ reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}};
+
+ wire [5*18-1:0] dsp_x_a;
+ wire [5*18-1:0] dsp_y_a;
+
+ reg [1*16-1:0] dsp_x_b;
+ reg [1*16-1:0] dsp_y_b;
+
+ reg [ 1:0] dsp_xy_b_carry;
+
+ wire [9*47-1:0] dsp_x_p;
+ wire [9*47-1:0] dsp_y_p;
+
+ //generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ //begin : gen_dsp_xy_a_split
+ //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z];
+ //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z];
+ //end
+ //endgenerate
+
+ assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout};
+ assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout};
+
+ //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
+ //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
+
+ always @(posedge clk)
+ //
+ dsp_xy_ce_b_dly <= dsp_xy_ce_b;
+
+
+ modexpng_dsp_array_block dsp_array_block_x
+ (
+ .clk (clk),
+
+ .ce_a (dsp_xy_ce_a),
+ .ce_b (dsp_xy_ce_b),
+ .ce_m (dsp_xy_ce_m),
+ .ce_p (dsp_xy_ce_p),
+ .ce_mode (dsp_xy_ce_mode),
+
+ .mode_z (dsp_xy_mode_z),
+
+ .a (dsp_x_a),
+ .b (dsp_x_b),
+ .p (dsp_x_p)
+ );
+
+ modexpng_dsp_array_block dsp_array_block_y
+ (
+ .clk (clk),
+
+ .ce_a (dsp_xy_ce_a),
+ .ce_b (dsp_xy_ce_b),
+ .ce_m (dsp_xy_ce_m),
+ .ce_p (dsp_xy_ce_p),
+ .ce_mode (dsp_xy_ce_mode),
+
+ .mode_z (dsp_xy_mode_z),
+
+ .a (dsp_y_a),
+ .b (dsp_y_b),
+ .p (dsp_y_p)
+ );
+
+
+
+
+ //
+ // DSP Control Logic
+ //
+ reg narrow_xy_ena_dly1 = 1'b0;
+ reg narrow_xy_ena_dly2 = 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ narrow_xy_ena_dly1 <= 1'b0;
+ narrow_xy_ena_dly2 <= 1'b0;
+ //
+ dsp_xy_ce_a <= 1'b0;
+ dsp_xy_ce_b <= 1'b0;
+ dsp_xy_ce_m <= 1'b0;
+ dsp_xy_ce_p <= 1'b0;
+ dsp_xy_ce_mode <= 1'b0;
+ //
+ end else begin
+ //
+ narrow_xy_ena_dly1 <= narrow_xy_ena;
+ narrow_xy_ena_dly2 <= narrow_xy_ena_dly1;
+ //
+ dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2;
+ dsp_xy_ce_b <= narrow_xy_ena_dly2;
+ dsp_xy_ce_m <= dsp_xy_ce_b_dly;
+ dsp_xy_ce_p <= dsp_xy_ce_m;
+ dsp_xy_ce_mode <= dsp_xy_ce_b_dly;
+ //
+ end
+
+ //
+ // DSP Feed Logic
+ //
+ reg dsp_merge_xy_b;
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
+ endcase
+
+ //
+ // On-the-fly Carry Recombination
+ //
+ wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry};
+ wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry};
+ wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry;
+
+ always @(posedge clk)
+ //
+ if (narrow_xy_ena_dly2) begin // rewrite
+ //
+ if (!dsp_merge_xy_b) begin
+ dsp_x_b <= rd_narrow_x_dout[15:0];
+ dsp_y_b <= rd_narrow_y_dout[15:0];
+ dsp_xy_b_carry <= 2'b00;
+ end else begin
+ dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0];
+ dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0];
+ dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16];
+ end
+ //
+ end else begin
+ //
+ dsp_x_b <= {16{1'bX}};
+ dsp_y_b <= {16{1'bX}};
+ //
+ dsp_xy_b_carry <= 2'b00;
+ //
+ end
+
+
+ reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
+
+ function [NUM_MULTS:0] calc_mac_mode_z_square;
+ input [ 4:0] col_index_value;
+ input [ 7:0] narrow_xy_addr_value;
+ begin
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+ endcase
+ else
+ calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+ end
+ endfunction
+
+ function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
+ input [ 4:0] col_index_value;
+ input [ 7:0] narrow_xy_addr_value;
+ begin
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
+ endcase
+ else
+ calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
+ end
+ endfunction
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
+ default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ endcase
+
+ always @(posedge clk) begin
+ dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
+ //
+ dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
+ dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
+ dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
+ end
+
+
+
+
+
+ //
+ // Recombinator
+ //
+ reg rcmb_ena = 1'b0;
+ wire rcmb_rdy;
+
+ modexpng_recombinator_block recombinator_block
+ (
+ .clk (clk),
+
+ .ena (rcmb_ena),
+ .rdy (rcmb_rdy),
+
+ .fsm_state_next (fsm_state_next),
+
+ .word_index_last (word_index_last),
+
+ .dsp_xy_ce_p (dsp_xy_ce_p),
+ .dsp_x_p (dsp_x_p),
+ .dsp_y_p (dsp_y_p),
+
+ .col_index (col_index),
+ .col_index_last (col_index_last),
+
+ .rd_narrow_xy_addr (narrow_xy_addr),
+ .rd_narrow_xy_bank (narrow_xy_bank),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr),
+ .rcmb_wide_x_dout (rcmb_wide_x_dout),
+ .rcmb_wide_y_dout (rcmb_wide_y_dout),
+ .rcmb_wide_xy_valid (rcmb_wide_xy_valid),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_dout),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_dout),
+ .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid),
+
+ .rdct_narrow_xy_bank (rcmb_xy_bank),
+ .rdct_narrow_xy_addr (rcmb_xy_addr),
+ .rdct_narrow_x_dout (rcmb_x_dout),
+ .rdct_narrow_y_dout (rcmb_y_dout),
+ .rdct_narrow_xy_valid (rcmb_xy_valid)
+
+ );
+
+
+ //
+ // Recombinator Enable Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) rcmb_ena <= 1'b0;
+ else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
+
+
+ //
+ // Handy Completion Flags
+ //
+ wire square_done = square_surely_done_flop;
+ wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop;
+ wire rectangle_done = rectangle_tardy_done_flop;
+
+
+ //
+ // FSM Transition Logic
+ //
+ assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+ assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+
+ always @* begin
+ //
+ fsm_state_next = FSM_STATE_IDLE;
+ //
+ case (fsm_state)
+ FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
+
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+
+ FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
+
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
+
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
+
+ FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
+
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+
+ default: fsm_state_next = FSM_STATE_IDLE ;
+
+ endcase
+ //
+ end
+
+
+ //
+ // Reductor Control Logic
+ //
+ reg rdct_ena_reg = 1'b0;
+
+ assign rdct_ena = rdct_ena_reg;
+
+ always @(posedge clk) // add reset!!!
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
+ default: rdct_ena_reg <= 1'b0;
+ endcase
+
+
+
+endmodule
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm_old.vh
index 3bdae66..3bdae66 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_fsm_old.vh
diff --git a/rtl/modexpng_mmm_pad.v b/rtl/modexpng_mmm_pad.v
deleted file mode 100644
index a2a21ff..0000000
--- a/rtl/modexpng_mmm_pad.v
+++ /dev/null
@@ -1,153 +0,0 @@
-module modexpng_mmm_pad
-(
- clk, rst_n,
- fsm_state,
- load_xy_addr_lsb,
- pad_x_rd_addr, pad_y_rd_addr,
- pad_x_rd_ena, pad_y_rd_ena,
- pad_x_rd_dout, pad_y_rd_dout,
- load_x_din, load_y_din
-);
-
-
- //
- // Includes
- //
- `include "modexpng_parameters.vh"
- //`include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input rst_n;
- input [FSM_STATE_WIDTH-1:0] fsm_state;
-
- input [INDEX_WIDTH-1:0] load_xy_addr_lsb;
-
- input [WORD_WIDTH-1:0] load_x_din;
- input [WORD_WIDTH-1:0] load_y_din;
-
- input [INDEX_WIDTH-1:0] pad_x_rd_addr;
- input [INDEX_WIDTH-1:0] pad_y_rd_addr;
-
- input pad_x_rd_ena;
- input pad_y_rd_ena;
-
- output [WORD_WIDTH-1:0] pad_x_rd_dout;
- output [WORD_WIDTH-1:0] pad_y_rd_dout;
-
-
- //
- // Registers
- //
- reg [INDEX_WIDTH-1:0] pad_x_wr_addr;
- reg [INDEX_WIDTH-1:0] pad_y_wr_addr;
- reg pad_x_wr_ena;
- reg pad_y_wr_ena;
- reg [ WORD_WIDTH-1:0] pad_x_wr_din;
- reg [ WORD_WIDTH-1:0] pad_y_wr_din;
-
- bram_1wo_1ro_readfirst_ce #
- (
- .MEM_WIDTH (WORD_WIDTH),
- .MEM_ADDR_BITS (INDEX_WIDTH)
- )
- pad_x
- (
- .clk (clk),
-
- .a_addr (pad_x_wr_addr),
- .a_en (pad_x_wr_ena),
- .a_wr (pad_x_wr_ena),
- .a_in (pad_x_wr_din),
- .a_out (), // unused
-
- .b_addr (pad_x_rd_addr),
- .b_en (pad_x_rd_ena),
- .b_out (pad_x_rd_dout)
- );
-
- bram_1wo_1ro_readfirst_ce #
- (
- .MEM_WIDTH (WORD_WIDTH),
- .MEM_ADDR_BITS (INDEX_WIDTH)
- )
- pad_y
- (
- .clk (clk),
-
- .a_addr (pad_y_wr_addr),
- .a_en (pad_y_wr_ena),
- .a_wr (pad_y_wr_ena),
- .a_in (pad_y_wr_din),
- .a_out (), // unused
-
- .b_addr (pad_y_rd_addr),
- .b_en (pad_y_rd_ena),
- .b_out (pad_y_rd_dout)
- );
-
-
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3: begin
- pad_x_wr_addr <= load_xy_addr_lsb;
- pad_y_wr_addr <= load_xy_addr_lsb;
- end
- //
- default: begin
- pad_x_wr_addr <= {INDEX_WIDTH{1'bX}};
- pad_y_wr_addr <= {INDEX_WIDTH{1'bX}};
- end
- //
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3: begin
- pad_x_wr_din <= load_x_din;
- pad_y_wr_din <= load_y_din;
- end
- //
- default: begin
- pad_x_wr_din <= load_x_din;
- pad_y_wr_din <= load_y_din;
- end
- //
- endcase
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (!rst_n) begin
- pad_x_wr_ena <= 1'b0;
- pad_y_wr_ena <= 1'b0;
- end else case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3: begin
- pad_x_wr_ena <= 1'b1;
- pad_y_wr_ena <= 1'b1;
- end
- //
- default: begin
- pad_x_wr_ena <= 1'b0;
- pad_y_wr_ena <= 1'b0;
- end
- //
- endcase
-
-
-endmodule
diff --git a/rtl/modexpng_mmm_transporter.v b/rtl/modexpng_mmm_transporter.v
deleted file mode 100644
index a8f309a..0000000
--- a/rtl/modexpng_mmm_transporter.v
+++ /dev/null
@@ -1,157 +0,0 @@
-module modexpng_mmm_transporter
-(
- clk,
- ena,
- index_last,
- fsm_state,
- fsm_state_next,
- load_phase,
- load_xy_addr,
- load_xy_addr_vld,
- load_xy_req,
- load_addr_zero,
- load_t1t2_addr_done,
- load_nn_coeff_addr_done
-);
-
-
- //
- // Includes
- //
- //`include "modexpng_parameters.vh"
- //`include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input ena;
- input [ INDEX_WIDTH-1:0] index_last;
- input [FSM_STATE_WIDTH-1:0] fsm_state;
- input [FSM_STATE_WIDTH-1:0] fsm_state_next;
- output load_phase;
- output [ INDEX_WIDTH:0] load_xy_addr;
- output load_xy_addr_vld;
- output load_xy_req;
- output load_addr_zero;
- output load_t1t2_addr_done;
- output load_nn_coeff_addr_done;
-
-
- //
- // Load Address Generator
- //
- reg load_phase_reg;
- reg [INDEX_WIDTH:0] load_xy_addr_reg;
- reg load_xy_addr_vld_reg;
- reg load_xy_req_reg;
-
-
- //
- // Mapping
- //
- assign load_phase = load_phase_reg;
- assign load_xy_addr = load_xy_addr_reg;
- assign load_xy_addr_vld = load_xy_addr_vld_reg;
- assign load_xy_req = load_xy_req_reg;
-
-
- //
- // Handy Quantities
- //
- wire [INDEX_WIDTH:0] load_xy_addr_zero = {{INDEX_WIDTH{1'b0}}, 1'b0};
- wire [INDEX_WIDTH:0] load_xy_addr_next = load_xy_addr_reg + 1'b1;
- wire [INDEX_WIDTH:0] load_xy_addr_xxx = {{INDEX_WIDTH{1'bX}}, 1'bX};
-
-
- //
- // More Handy Quantities
- //
- reg [INDEX_WIDTH:0] load_t1t2_addr_last;
- reg [INDEX_WIDTH:0] load_nn_coeff_addr_last;
-
-
- //
- // Flags
- //
- assign load_addr_zero = load_xy_addr_reg == load_xy_addr_zero;
- assign load_t1t2_addr_done = load_xy_addr_reg == load_t1t2_addr_last;
- assign load_nn_coeff_addr_done = load_xy_addr_reg == load_nn_coeff_addr_last;
-
-
- //
- // Last Index Latch
- //
- always @(posedge clk)
- //
- if (ena && (fsm_state == FSM_STATE_IDLE)) begin
- load_t1t2_addr_last <= {1'b0, index_last};
- load_nn_coeff_addr_last <= {1'b0, index_last} + 1'b1;
- end
-
-
- //
- // Update Load Phase
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_LOAD_T1T2_1,
- FSM_STATE_LOAD_T1T2_2,
- FSM_STATE_LOAD_T1T2_3: load_phase_reg <= 1'b0;
- FSM_STATE_LOAD_NN_COEFF_1,
- FSM_STATE_LOAD_NN_COEFF_2,
- FSM_STATE_LOAD_NN_COEFF_3: load_phase_reg <= 1'b1;
- default: load_phase_reg <= 1'bX;
- endcase
-
-
- //
- // Update Load Address
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_LOAD_T1T2_1: load_xy_addr_reg <= (fsm_state == FSM_STATE_LOAD_T1T2_3) ? load_xy_addr_next : load_xy_addr_zero;
- FSM_STATE_LOAD_T1T2_2,
- FSM_STATE_LOAD_T1T2_3: load_xy_addr_reg <= load_xy_addr_reg;
- FSM_STATE_LOAD_NN_COEFF_1: load_xy_addr_reg <= (fsm_state == FSM_STATE_LOAD_NN_COEFF_3) ? load_xy_addr_next : load_xy_addr_zero;
- FSM_STATE_LOAD_NN_COEFF_2,
- FSM_STATE_LOAD_NN_COEFF_3: load_xy_addr_reg <= load_xy_addr_reg;
- default load_xy_addr_reg <= load_xy_addr_xxx;
- endcase
-
-
- //
- // Update Address Valid Flag
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_LOAD_T1T2_1,
- FSM_STATE_LOAD_NN_COEFF_1: load_xy_addr_vld_reg <= 1'b1;
- default load_xy_addr_vld_reg <= 1'b0;
- endcase
-
-
- //
- // Update Load Request Flag
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_LOAD_T1T2_2,
- FSM_STATE_LOAD_NN_COEFF_2: load_xy_req_reg <= 1'b1;
- default load_xy_req_reg <= 1'b0;
- endcase
-
-
-endmodule
diff --git a/rtl/modexpng_mmm_x8_dual.v b/rtl/modexpng_mmm_x8_dual.v
deleted file mode 100644
index 99a37fa..0000000
--- a/rtl/modexpng_mmm_x8_dual.v
+++ /dev/null
@@ -1,550 +0,0 @@
-module modexpng_mmm_x8_dual
-(
- clk, rst_n,
- ena, rdy,
- mode, transfer,
- index_last,
- x_din, y_din, x_dout, y_dout,
- x_din_addr, y_din_addr, x_dout_addr, y_dout_addr,
- x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena,
- x_din_bank, y_din_bank, x_dout_bank, y_dout_bank,
- load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req,
- load_x_din, load_y_din
-);
-
-
- //
- // Includes
- //
- `include "modexpng_parameters.vh"
- `include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input rst_n;
-
- input ena;
- output rdy;
-
- input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2
- // load/unload: 0 = load, 1 = unload
- input transfer; // 0 = multiply, 1 = load/unload
-
- input [INDEX_WIDTH-1:0] index_last;
-
- input [NUM_MULTS*WORD_WIDTH-1:0] x_din;
- input [NUM_MULTS*WORD_WIDTH-1:0] y_din;
- output [NUM_MULTS*WORD_WIDTH-1:0] x_dout;
- output [NUM_MULTS*WORD_WIDTH-1:0] y_dout;
-
- output [INDEX_WIDTH-4:0] x_din_addr;
- output [INDEX_WIDTH-4:0] y_din_addr;
- output [INDEX_WIDTH-4:0] x_dout_addr;
- output [INDEX_WIDTH-4:0] y_dout_addr;
-
- output [ 1-1:0] x_din_ena;
- output [ 1-1:0] y_din_ena;
- output [NUM_MULTS-1:0] x_dout_ena;
- output [NUM_MULTS-1:0] y_dout_ena;
- output [ 1-1:0] x_din_reg_ena;
- output [ 1-1:0] y_din_reg_ena;
-
- output [3-1:0] x_din_bank;
- output [3-1:0] y_din_bank;
- output [3-1:0] x_dout_bank;
- output [3-1:0] y_dout_bank;
-
- output load_phase; // 0 = T1, T2; 1 = N, N_COEFF
- output [ INDEX_WIDTH:0] load_xy_addr; // address
- output load_xy_addr_vld; // address valid
- output load_xy_req; // data request
-
- input [WORD_WIDTH-1:0] load_x_din; // data input
- input [WORD_WIDTH-1:0] load_y_din; // data input
-
-
- //
- // FSM State and Next States
- //
- reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
-
-
- //
- // FSM Idle Next State
- //
- always @*
- //
- case ({transfer, mode})
- 2'b00,
- 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG;
- 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1;
- 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload?
- endcase
-
-
- //
- // Column Counter
- //
- wire [ INDEX_WIDTH-4:0] col_index;
- wire col_index_done;
- wire [ INDEX_WIDTH-4:0] col_index_zero;
- wire [ INDEX_WIDTH-4:0] col_index_next;
- wire [ INDEX_WIDTH-4:0] col_index_prev;
-
- modexpng_mmm_col_index #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- mmm_col_index
- (
- .clk (clk),
- .index_last (index_last),
- .fsm_state_next (fsm_state_next),
- .col_index (col_index),
- .col_index_done (col_index_done),
- .col_index_zero (col_index_zero),
- .col_index_next (col_index_next),
- .col_index_prev (col_index_prev)
- );
-
-
- //
- // Load Address Generator
- //
- wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0];
- wire load_addr_zero;
- wire load_t1t2_addr_done;
- wire load_nn_coeff_addr_done;
-
- modexpng_mmm_transporter #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- transporter
- (
- .clk (clk),
- .ena (ena),
- .index_last (index_last),
- .fsm_state (fsm_state),
- .fsm_state_next (fsm_state_next),
- .load_phase (load_phase),
- .load_xy_addr (load_xy_addr),
- .load_xy_addr_vld (load_xy_addr_vld),
- .load_xy_req (load_xy_req),
- .load_addr_zero (load_addr_zero),
- .load_t1t2_addr_done (load_t1t2_addr_done),
- .load_nn_coeff_addr_done (load_nn_coeff_addr_done)
- );
-
-
- //
- // X, Y Address
- //
- wire [INDEX_WIDTH-1:0] x_din_addr_cnt;
- wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last;
- wire [ 3-1:0] x_din_addr_cnt_lower_prev;
- wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev;
-
- modexpng_mmm_din_addr #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- din_addr_x
- (
- .clk (clk),
- .rst_n (rst_n),
- .index_last (index_last),
- .fsm_state_next (fsm_state_next),
- .col_index_zero (col_index_zero),
- .col_index_next (col_index_next),
- .din_addr (x_din_addr),
- .din_bank (x_din_bank),
- .din_ena (x_din_ena),
- .din_reg_ena (x_din_reg_ena),
- .din_addr_cnt (x_din_addr_cnt),
- .din_addr_cnt_last (x_din_addr_cnt_last),
- .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev),
- .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev)
- );
-
- modexpng_mmm_dout_addr #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- dout_addr_xy
- (
- .clk (clk),
- .rst_n (rst_n),
- .fsm_state (fsm_state),
- .load_xy_addr (load_xy_addr),
- .load_addr_zero (load_addr_zero),
- .load_nn_coeff_addr_done (load_nn_coeff_addr_done),
- .x_dout_addr (x_dout_addr),
- .y_dout_addr (y_dout_addr),
- .x_dout_ena (x_dout_ena),
- .y_dout_ena (y_dout_ena),
- .x_dout_bank (x_dout_bank),
- .y_dout_bank (y_dout_bank)
- );
-
-
- //
- // Helper Memories ("Scratchpad")
- //
- reg [INDEX_WIDTH-1:0] pad_xy_rd_addr;
- reg pad_xy_rd_ena = 1'b0;
- wire [ WORD_WIDTH-1:0] pad_x_rd_dout;
- wire [ WORD_WIDTH-1:0] pad_y_rd_dout;
-
- wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}};
- wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1;
-
- modexpng_mmm_pad pad
- (
- .clk (clk),
- .rst_n (rst_n),
- .fsm_state (fsm_state),
- .load_xy_addr_lsb (load_xy_addr_lsb),
- .load_x_din (load_x_din),
- .load_y_din (load_y_din),
- .pad_x_rd_addr (pad_xy_rd_addr),
- .pad_y_rd_addr (pad_xy_rd_addr),
- .pad_x_rd_ena (pad_xy_rd_ena),
- .pad_y_rd_ena (pad_xy_rd_ena),
- .pad_x_rd_dout (pad_x_rd_dout),
- .pad_y_rd_dout (pad_y_rd_dout)
- );
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (!rst_n) begin
- pad_xy_rd_ena <= 1'b0;
- end else case (fsm_state_next)
-
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- pad_xy_rd_ena <= 1'b1;
-
- default:
- pad_xy_rd_ena <= 1'b0;
-
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG:
- pad_xy_rd_addr <= pad_xy_rd_addr_zero;
-
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- pad_xy_rd_addr <= pad_xy_rd_addr_next;
-
- default:
- pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}};
-
- endcase
-
-
-
-
- //
- // Flags
- //
-
- wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last;
-
- always @*
- //
- fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;;
-
-
- //
- // MAC Arrays
- //
- reg mac_x_ce = 1'b0;
- reg mac_x_ce_aux = 1'b0;
- reg [NUM_MULTS -1:0] mac_x_clr;
- reg mac_x_clr_aux;
- reg [NUM_MULTS -2:0] mac_x_casc_a;
- reg mac_x_casc_a_aux;
- wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a;
- reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux;
- //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1];
- reg [ 1 * WORD_WIDTH -1:0] mac_x_b;
- wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p;
- wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux;
-
- reg mac_y_ce = 1'b0;
- reg mac_y_ce_aux = 1'b0;
- reg [NUM_MULTS -1:0] mac_y_clr;
- reg mac_y_clr_aux;
- reg [NUM_MULTS -2:0] mac_y_casc_a;
- reg mac_y_casc_a_aux;
- wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a;
- reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux;
- //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1];
- reg [ 1 * WORD_WIDTH -1:0] mac_y_b;
- wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p;
- wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux;
-
- modexpng_mac_array mac_array_x
- (
- .clk (clk),
- .ce (mac_x_ce),
- .ce_aux (mac_x_ce_aux),
- .clr (mac_x_clr),
- .clr_aux (mac_x_clr_aux),
- .casc_a (mac_x_casc_a),
- .casc_a_aux (mac_x_casc_a_aux),
- .a_in (mac_x_a),
- .a_in_aux (mac_x_a_aux),
- .b_in (mac_x_b),
- .p_out (mac_x_p),
- .p_out_aux (mac_x_p_aux)
- );
-
- modexpng_mac_array mac_array_y
- (
- .clk (clk),
- .ce (mac_y_ce),
- .ce_aux (mac_y_ce_aux),
- .clr (mac_y_clr),
- .clr_aux (mac_y_clr_aux),
- .casc_a (mac_y_casc_a),
- .casc_a_aux (mac_y_casc_a_aux),
- .a_in (mac_y_a),
- .a_in_aux (mac_y_a_aux),
- .b_in (mac_y_b),
- .p_out (mac_y_p),
- .p_out_aux (mac_y_p_aux)
- );
-
- genvar gen_z;
-
- generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1)
- begin : gen_xy_din
- //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z];
- //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z];
- //gen_xy_dout
- assign mac_x_a[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_din[gen_z*WORD_WIDTH+:WORD_WIDTH];
-
- //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z];
- //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z];
- end
- endgenerate
-
-
- //
- // MAC Clock Enable Logic
- //
- reg mac_xy_ce_adv = 1'b0;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) mac_xy_ce_adv <= 1'b0;
- else case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_ce_adv <= 1'b1;
- default: mac_xy_ce_adv <= 1'b0;
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) {mac_y_ce, mac_x_ce} <= 2'b00;
- else {mac_y_ce, mac_x_ce} <= {2{mac_xy_ce_adv}};
-
-
- //
- // MAC Clear Logic
- //
- wire [NUM_MULTS-1:0] calc_mac_x_clear_square_value =
- calc_mac_clear_square(col_index_prev, x_din_addr_cnt_lower_prev, x_din_addr_cnt_upper_prev);
-
- reg [NUM_MULTS-1:0] mac_xy_clr_adv;
-
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_clr_adv <= {NUM_MULTS{1'b1}};
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_clr_adv <= calc_mac_x_clear_square_value;
- default: mac_xy_clr_adv <= {NUM_MULTS{1'bX}};
- endcase
-
- always @(posedge clk)
- //
- {mac_y_clr, mac_x_clr} <= {2{mac_xy_clr_adv}};
-
-
- //
- // MAC Cascade Logic
- //
- reg [NUM_MULTS-2:0] mac_xy_casc_a_adv;
-
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b0}};
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b1}};
- default: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'bX}};
- endcase
-
- always @(posedge clk)
- //
- {mac_y_casc_a, mac_x_casc_a} <= {2{mac_xy_casc_a_adv}};
-
-
-
- //
- // DOUT Mapping
- //
- generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1)
- begin : gen_xy_dout
- assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z];
- assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z];
- end
- endgenerate
-
-
- //
- // DOUT
- //
- reg [WORD_WIDTH-1:0] x_dout_reg[0:NUM_MULTS-1];
- reg [WORD_WIDTH-1:0] y_dout_reg[0:NUM_MULTS-1];
-
-
-
-
- integer int_z;
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3,
- FSM_STATE_LOAD_NN_COEFF_3:
- for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin
- x_dout_reg[int_z] <= load_x_din;
- y_dout_reg[int_z] <= load_y_din;
- end
- //
- default:
- for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin
- x_dout_reg[int_z] <= {WORD_WIDTH{1'bX}};
- y_dout_reg[int_z] <= {WORD_WIDTH{1'bX}};
- end
- //
- endcase
-
-
-
- //
- // FSM Process
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
- else fsm_state <= fsm_state_next;
-
-
- //
- // FSM Transition Logic
- //
- always @* begin
- //
- fsm_state_next = FSM_STATE_IDLE;
- //
- case (fsm_state)
- FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle : FSM_STATE_IDLE;
-
- FSM_STATE_LOAD_T1T2_1: fsm_state_next = FSM_STATE_LOAD_T1T2_2 ;
- FSM_STATE_LOAD_T1T2_2: fsm_state_next = FSM_STATE_LOAD_T1T2_3 ;
- FSM_STATE_LOAD_T1T2_3: fsm_state_next = load_t1t2_addr_done ? FSM_STATE_LOAD_NN_COEFF_1 : FSM_STATE_LOAD_T1T2_1;
-
- FSM_STATE_LOAD_NN_COEFF_1: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_2 ;
- FSM_STATE_LOAD_NN_COEFF_2: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_3 ;
- FSM_STATE_LOAD_NN_COEFF_3: fsm_state_next = load_nn_coeff_addr_done ? FSM_STATE_STOP : FSM_STATE_LOAD_NN_COEFF_1;
-
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_TRIG : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
-
- /*
- FSM_STATE_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_0_BUSY ;
- FSM_STATE_TRIANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_TRIANGLE_COL_N_TRIG : FSM_STATE_TRIANGLE_COL_0_BUSY;
- FSM_STATE_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_N_BUSY ;
- FSM_STATE_TRIANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_triangle : FSM_STATE_TRIANGLE_COL_N_BUSY;
-
- FSM_STATE_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_0_BUSY ;
- FSM_STATE_RECTANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_RECTANGLE_COL_N_TRIG : FSM_STATE_RECTANGLE_COL_0_BUSY;
- FSM_STATE_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_N_BUSY ;
- FSM_STATE_RECTANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_rectangle : FSM_STATE_RECTANGLE_COL_N_BUSY;
- */
-
- FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ;
-
- endcase
- //
- end
-
-
- //
- // Ready Output
- //
- reg rdy_reg = 1'b1;
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else case (fsm_state)
- FSM_STATE_IDLE: if (ena) rdy_reg <= 1'b0;
- FSM_STATE_STOP: rdy_reg <= 1'b1;
- endcase
-
- function [ NUM_MULTS-1:0] calc_mac_clear_square;
- input [INDEX_WIDTH-4:0] col_index_delayed;
- input [ 3-1:0] x_din_addr_cnt_lower_delayed;
- input [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_delayed;
- begin
- if (x_din_addr_cnt_upper_delayed == col_index_delayed)
- case (x_din_addr_cnt_lower_delayed)
- 3'b000: calc_mac_clear_square = 8'b00000001;
- 3'b001: calc_mac_clear_square = 8'b00000010;
- 3'b010: calc_mac_clear_square = 8'b00000100;
- 3'b011: calc_mac_clear_square = 8'b00001000;
- 3'b100: calc_mac_clear_square = 8'b00010000;
- 3'b101: calc_mac_clear_square = 8'b00100000;
- 3'b110: calc_mac_clear_square = 8'b01000000;
- 3'b111: calc_mac_clear_square = 8'b10000000;
- endcase
- else
- calc_mac_clear_square = {NUM_MULTS{1'b0}};
- end
- endfunction
-
-
-endmodule
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
deleted file mode 100644
index 77b57f3..0000000
--- a/rtl/modexpng_parameters.vh
+++ /dev/null
@@ -1,39 +0,0 @@
-//localparam WORD_WIDTH = 17;
-//localparam MAC_WIDTH = 47;
-
-//localparam BANK_ADDR_WIDTH = 3; // TODO: Replace everywhere!
-
-localparam [2:0] BANK_FAT_T1T2 = 3'd0;
-localparam [2:0] BANK_FAT_ABL = 3'd1;
-localparam [2:0] BANK_FAT_ABH = 3'd2;
-localparam [2:0] BANK_FAT_N = 3'd3;
-localparam [2:0] BANK_FAT_ML = 3'd4; // not needed
-localparam [2:0] BANK_FAT_MH = 3'd5; // not needed
-localparam [2:0] BANK_FAT_EXT = 3'd6; // 0 -> MH'
-localparam [2:0] BANK_FAT_UNUSED = 3'd7; // not needed
-
-localparam [1:0] BANK_SLIM_T1T2 = 2'd0;
-localparam [1:0] BANK_SLIM_N_COEFF = 2'd1;
-localparam [1:0] BANK_SLIM_Q = 2'd2;
-localparam [1:0] BANK_SLIM_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q'
-
-
-//localparam BANK_Y_T2 = 3'd0;
-//localparam BANK_XY_T1T2 = 3'd0;
-
-//localparam BANK_XY_AB_LSB = 3'd1;
-//localparam BANK_XY_AB_MSB = 3'd2;
-
-//localparam BANK_X_N = 3'd3;
-//localparam BANK_Y_N_COEFF = 3'd3;
-
-//localparam BANK_XY_M = 3'd4;
-
-//localparam BANK_XY_Q_LSB = 3'd5;
-//localparam BANK_XY_Q_MSB = 3'd6;
-
-//localparam BANK_XY_AUX = 3'd7;
-
-//localparam BANK_XY_ANY = 3'bXXX;
-
-//localparam BANK_XY_AUX_ADDR_N_COEFF = 0;
diff --git a/rtl/modexpng_parameters_old.vh b/rtl/modexpng_parameters_old.vh
new file mode 100644
index 0000000..d30b751
--- /dev/null
+++ b/rtl/modexpng_parameters_old.vh
@@ -0,0 +1,40 @@
+
+//localparam WORD_WIDTH = 17;
+//localparam MAC_WIDTH = 47;
+
+localparam BANK_ADDR_WIDTH = 2; // TODO: Replace everywhere!
+
+localparam [1:0] BANK_WIDE_T1T2 = 2'd0;
+localparam [1:0] BANK_WIDE_ABL = 2'd1;
+localparam [1:0] BANK_WIDE_ABH = 2'd2;
+localparam [1:0] BANK_WIDE_N = 2'd3;
+
+localparam [1:0] BANK_RCMB_ML = 2'd0;
+localparam [1:0] BANK_RCMB_MH = 2'd1;
+localparam [1:0] BANK_RCMB_EXT = 2'd2; // 0 -> MH'
+
+localparam [1:0] BANK_NARROW_T1T2 = 2'd0;
+localparam [1:0] BANK_NARROW_N_COEFF = 2'd1;
+localparam [1:0] BANK_NARROW_Q = 2'd2;
+localparam [1:0] BANK_NARROW_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q'
+
+
+//localparam BANK_Y_T2 = 3'd0;
+//localparam BANK_XY_T1T2 = 3'd0;
+
+//localparam BANK_XY_AB_LSB = 3'd1;
+//localparam BANK_XY_AB_MSB = 3'd2;
+
+//localparam BANK_X_N = 3'd3;
+//localparam BANK_Y_N_COEFF = 3'd3;
+
+//localparam BANK_XY_M = 3'd4;
+
+//localparam BANK_XY_Q_LSB = 3'd5;
+//localparam BANK_XY_Q_MSB = 3'd6;
+
+//localparam BANK_XY_AUX = 3'd7;
+
+//localparam BANK_XY_ANY = 3'bXXX;
+
+//localparam BANK_XY_AUX_ADDR_N_COEFF = 0;
diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8_old.vh
index 8734354..8734354 100644
--- a/rtl/modexpng_parameters_x8.vh
+++ b/rtl/modexpng_parameters_x8_old.vh
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v
deleted file mode 100644
index 957ba8e..0000000
--- a/rtl/modexpng_part_recombinator.v
+++ /dev/null
@@ -1,1128 +0,0 @@
-module modexpng_part_recombinator
-(
- clk,
- rdy,
- fsm_state_next,
- index_last,
- dsp_x_ce_p, dsp_y_ce_p,
- ena_x, ena_y,
- dsp_x_p, dsp_y_p,
- col_index, col_index_last,
- slim_bram_xy_addr, slim_bram_xy_bank,
- rcmb_fat_bram_xy_bank, rcmb_fat_bram_xy_addr, rcmb_fat_bram_x_dout, rcmb_fat_bram_y_dout, rcmb_fat_bram_xy_dout_valid,
- rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid
-);
-
-
- //
- // Headers
- //
- `include "../rtl/modexpng_mmm_fsm.vh"
- `include "../rtl/modexpng_parameters.vh"
- `include "../rtl/modexpng_parameters_x8.vh"
-
-
- input clk;
- output rdy;
- input [FSM_STATE_WIDTH-1:0] fsm_state_next;
- input [7:0] index_last;
- input dsp_x_ce_p;
- input dsp_y_ce_p;
- input ena_x;
- input ena_y;
- input [9*47-1:0] dsp_x_p;
- input [9*47-1:0] dsp_y_p;
- input [ 4:0] col_index;
- input [ 4:0] col_index_last;
- input [ 7:0] slim_bram_xy_addr;
- input [ 1:0] slim_bram_xy_bank;
-
- output [ 2:0] rcmb_fat_bram_xy_bank;
- output [ 7:0] rcmb_fat_bram_xy_addr;
- output [ 17:0] rcmb_fat_bram_x_dout;
- output [ 17:0] rcmb_fat_bram_y_dout;
- output rcmb_fat_bram_xy_dout_valid;
-
- output [ 2:0] rcmb_slim_bram_xy_bank;
- output [ 7:0] rcmb_slim_bram_xy_addr;
- output [ 17:0] rcmb_slim_bram_x_dout;
- output [ 17:0] rcmb_slim_bram_y_dout;
- output rcmb_slim_bram_xy_dout_valid;
-
-
- //
- // Latches
- //
- reg [1*47-1:0] dsp_x_p_latch[0:8];
- reg [1*47-1:0] dsp_y_p_latch[0:8];
-
-
- //
- // Mapping
- //
- wire [46:0] dsp_x_p_split[0:8];
- wire [46:0] dsp_y_p_split[0:8];
-
- genvar z;
- generate for (z=0; z<(NUM_MULTS+1); z=z+1)
- begin : gen_dsp_xy_p_split
- assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
- assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
- end
- endgenerate
-
-
- //
- // Delays
- //
- reg dsp_y_ce_p_dly1 = 1'b0;
- reg dsp_x_ce_p_dly1 = 1'b0;
-
- always @(posedge clk) begin
- //
- {dsp_y_ce_p_dly1, dsp_x_ce_p_dly1} <= {dsp_y_ce_p, dsp_x_ce_p};
- //
- end
-
-
- //
- // Registers
- //
-
- // valid
- reg x_valid_lsb = 1'b0;
- reg y_valid_lsb = 1'b0;
- reg x_aux_lsb = 1'b0;
- reg y_aux_lsb = 1'b0;
- reg x_valid_msb = 1'b0;
- reg y_valid_msb = 1'b0;
-
- // bitmap
- reg [7:0] x_bitmap_lsb = {8{1'b0}};
- reg [7:0] y_bitmap_lsb = {8{1'b0}};
- reg [7:0] x_bitmap_msb = {8{1'b0}};
- reg [7:0] y_bitmap_msb = {8{1'b0}};
-
- // index
- reg [2:0] x_index_lsb = 3'dX;
- reg [2:0] y_index_lsb = 3'dX;
-
- // purge
- reg x_purge_lsb = 1'b0;
- reg y_purge_lsb = 1'b0;
- reg x_purge_msb = 1'b0;
- reg y_purge_msb = 1'b0;
-
- // valid - latch
- reg x_valid_latch_lsb = 1'b0;
- reg y_valid_latch_lsb = 1'b0;
-
- // aux - latch
- reg x_aux_latch_lsb = 1'b0;
- reg y_aux_latch_lsb = 1'b0;
-
- // bitmap - latch
- reg [7:0] x_bitmap_latch_lsb = {8{1'b0}};
- reg [7:0] y_bitmap_latch_lsb = {8{1'b0}};
- reg [7:0] x_bitmap_latch_msb = {8{1'b0}};
- reg [7:0] y_bitmap_latch_msb = {8{1'b0}};
-
- // index - latch
- reg [2:0] x_index_latch_lsb = 3'dX;
- reg [2:0] y_index_latch_lsb = 3'dX;
-
- // purge - index
- reg x_purge_latch_lsb = 1'b0;
- reg y_purge_latch_lsb = 1'b0;
- reg x_purge_latch_msb = 1'b0;
- reg y_purge_latch_msb = 1'b0;
-
- //
- reg xy_valid_lsb_adv[1:6];
- reg xy_valid_msb_adv[1:6];
- reg xy_aux_lsb_adv[1:6];
- reg [7:0] xy_bitmap_lsb_adv[1:6];
- reg [7:0] xy_bitmap_msb_adv[1:6];
- reg [2:0] xy_index_lsb_adv[1:6];
- reg [2:0] xy_index_msb_adv[1:6];
- reg xy_purge_lsb_adv[1:6];
- reg xy_purge_msb_adv[1:6];
-
- reg [1:0] rcmb_mode;
-
- always @(posedge clk)
- //
- if (ena_x && ena_y)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
- default: rcmb_mode <= 2'd0;
- endcase
-
-
- integer i;
- initial for (i=1; i<6; i=i+1) begin
- xy_valid_lsb_adv[i] = 1'b0;
- xy_valid_msb_adv[i] = 1'b0;
- xy_aux_lsb_adv[i] = 1'b0;
- xy_bitmap_lsb_adv[i] = {8{1'b0}};
- xy_bitmap_msb_adv[i] = {8{1'b0}};
- xy_index_lsb_adv[i] = 3'dX;
- xy_index_msb_adv[i] = 3'dX;
- xy_purge_lsb_adv[i] = 1'b0;
- xy_purge_msb_adv[i] = 1'b0;
- end
-
- function calc_square_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- calc_square_valid_lsb = 1'b1;
- else
- calc_square_valid_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_triangle_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- calc_triangle_valid_lsb = 1'b1;
- else
- calc_triangle_valid_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_triangle_aux_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- begin
- //
- if (slim_bram_xy_bank_value == BANK_SLIM_EXT)
- calc_triangle_aux_lsb = 1'b1;
- else
- calc_triangle_aux_lsb = 1'b0;
- //
- //if (slim_bram_xy_addr_value[7:3] == col_index_value)
- //calc_triangle_aux_lsb = 1'b1;
- //else
- //calc_triangle_aux_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_rectangle_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT;
- else
- calc_rectangle_valid_lsb = 1'b0;
- //
- end
- endfunction
-
- function [7:0] calc_square_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_square_bitmap_lsb = 8'b00000001;
- 3'b001: calc_square_bitmap_lsb = 8'b00000010;
- 3'b010: calc_square_bitmap_lsb = 8'b00000100;
- 3'b011: calc_square_bitmap_lsb = 8'b00001000;
- 3'b100: calc_square_bitmap_lsb = 8'b00010000;
- 3'b101: calc_square_bitmap_lsb = 8'b00100000;
- 3'b110: calc_square_bitmap_lsb = 8'b01000000;
- 3'b111: calc_square_bitmap_lsb = 8'b10000000;
- endcase
- //
- else
- calc_square_bitmap_lsb = {8{1'b0}};
- //
- end
- endfunction
-
- function [7:0] calc_triangle_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_triangle_bitmap_lsb = 8'b00000001;
- 3'b001: calc_triangle_bitmap_lsb = 8'b00000010;
- 3'b010: calc_triangle_bitmap_lsb = 8'b00000100;
- 3'b011: calc_triangle_bitmap_lsb = 8'b00001000;
- 3'b100: calc_triangle_bitmap_lsb = 8'b00010000;
- 3'b101: calc_triangle_bitmap_lsb = 8'b00100000;
- 3'b110: calc_triangle_bitmap_lsb = 8'b01000000;
- 3'b111: calc_triangle_bitmap_lsb = 8'b10000000;
- endcase
- //
- else
- calc_triangle_bitmap_lsb = {8{1'b0}};
- //
- end
- endfunction
-
- function [7:0] calc_rectangle_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- begin
- //
- if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
- 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
- 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
- 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
- 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
- 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
- 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
- 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
- endcase
- //
- else
- calc_rectangle_bitmap_lsb = {8{1'b0}};
- //
- end
- endfunction
-
- function [2:0] calc_square_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_square_index_lsb = 3'd0;
- 3'b001: calc_square_index_lsb = 3'd1;
- 3'b010: calc_square_index_lsb = 3'd2;
- 3'b011: calc_square_index_lsb = 3'd3;
- 3'b100: calc_square_index_lsb = 3'd4;
- 3'b101: calc_square_index_lsb = 3'd5;
- 3'b110: calc_square_index_lsb = 3'd6;
- 3'b111: calc_square_index_lsb = 3'd7;
- endcase
- //
- else
- calc_square_index_lsb = 3'dX;
- //
- end
- endfunction
-
- function [2:0] calc_triangle_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_triangle_index_lsb = 3'd0;
- 3'b001: calc_triangle_index_lsb = 3'd1;
- 3'b010: calc_triangle_index_lsb = 3'd2;
- 3'b011: calc_triangle_index_lsb = 3'd3;
- 3'b100: calc_triangle_index_lsb = 3'd4;
- 3'b101: calc_triangle_index_lsb = 3'd5;
- 3'b110: calc_triangle_index_lsb = 3'd6;
- 3'b111: calc_triangle_index_lsb = 3'd7;
- endcase
- //
- else
- calc_triangle_index_lsb = 3'dX;
- //
- end
- endfunction
-
- function [2:0] calc_rectangle_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- begin
- //
- if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_rectangle_index_lsb = 3'd0;
- 3'b001: calc_rectangle_index_lsb = 3'd1;
- 3'b010: calc_rectangle_index_lsb = 3'd2;
- 3'b011: calc_rectangle_index_lsb = 3'd3;
- 3'b100: calc_rectangle_index_lsb = 3'd4;
- 3'b101: calc_rectangle_index_lsb = 3'd5;
- 3'b110: calc_rectangle_index_lsb = 3'd6;
- 3'b111: calc_rectangle_index_lsb = 3'd7;
- endcase
- //
- else
- calc_rectangle_index_lsb = 3'dX;
- //
- end
- endfunction
-
- function calc_square_purge_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- calc_square_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
- else
- calc_square_purge_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_rectangle_purge_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if (slim_bram_xy_addr_value[7:3] == col_index_value)
- calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
- else
- calc_rectangle_purge_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_square_valid_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if (slim_bram_xy_addr_value == index_last_value)
- calc_square_valid_msb = 1'b1;
- else
- calc_square_valid_msb = 1'b0;
- //
- end
- endfunction
-
- function calc_rectangle_valid_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- input [7:0] index_last_value;
- begin
- //
- if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
- calc_rectangle_valid_msb = 1'b1;
- else
- calc_rectangle_valid_msb = 1'b0;
- //
- end
- endfunction
-
- function [7:0] calc_square_bitmap_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if (slim_bram_xy_addr_value == index_last_value) begin
- calc_square_bitmap_msb[7] = col_index_value != col_index_last_value;
- calc_square_bitmap_msb[6:0] = 7'b1111111;
- end else
- calc_square_bitmap_msb[7:0] = 8'b00000000;
- //
- end
- endfunction
-
- function [7:0] calc_rectangle_bitmap_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- input [7:0] index_last_value;
- begin
- //
- if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin
- calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
- end else
- calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
- //
- end
- endfunction
-
- function calc_square_purge_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if (slim_bram_xy_addr_value == index_last_value)
- calc_square_purge_msb = col_index_value == col_index_last_value;
- else
- calc_square_purge_msb = 1'b0;
- //
- end
- endfunction
-
- function calc_rectangle_purge_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [7:0] slim_bram_xy_addr_value;
- input [1:0] slim_bram_xy_bank_value;
- input [7:0] index_last_value;
- begin
- //
- if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
- calc_rectangle_purge_msb = col_index_value == col_index_last_value;
- else
- calc_rectangle_purge_msb = 1'b0;
- //
- end
- endfunction
-
-
- reg recomb_lsb_ce = 1'b0;
- reg recomb_lsb_ce_aux;
- reg [ 2:0] recomb_lsb_ce_purge = 3'b000;
- wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_aux | recomb_lsb_ce_purge[0];
- reg recomb_lsb_clr;
-
- reg [46:0] recomb_lsb_din;
- wire [15:0] recomb_lsb_dout;
-
- reg recomb_msb_ce = 1'b0;
- reg [ 1:0] recomb_msb_ce_purge = 2'b00;
- wire recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0];
- reg recomb_msb_clr;
-
- reg [46:0] recomb_msb_din;
- wire [15:0] recomb_msb_dout;
-
- modexpng_recombinator_block recomb_x_lsb
- (
- .clk (clk),
- .ce (recomb_lsb_ce_combined),
- .clr (recomb_lsb_clr),
- .din (recomb_lsb_din),
- .dout (recomb_lsb_dout)
- );
-
- modexpng_recombinator_block recomb_x_msb
- (
- .clk (clk),
- .ce (recomb_msb_ce_combined),
- .clr (recomb_msb_clr),
- .din (recomb_msb_din),
- .dout (recomb_msb_dout)
- );
-
- always @(posedge clk) begin
- //
- recomb_lsb_ce <= x_valid_latch_lsb;
- recomb_lsb_ce_aux <= x_aux_latch_lsb;
- recomb_msb_ce <= x_bitmap_latch_msb[0];
- //
- if (x_purge_latch_lsb)
- recomb_lsb_ce_purge <= 3'b111;
- else
- recomb_lsb_ce_purge <= {1'b0, recomb_lsb_ce_purge[2:1]};
- //
- if (x_purge_latch_msb && x_bitmap_latch_msb[0] && !x_bitmap_latch_msb[1])
- recomb_msb_ce_purge = 2'b11;
- else
- recomb_msb_ce_purge <= {1'b0, recomb_msb_ce_purge[1]};
- //
- end
-
-
- always @(posedge clk)
- //
- if (ena_x & ena_y) begin
- recomb_lsb_clr <= 1'b1;
- recomb_msb_clr <= 1'b1;
- end else begin
- if (recomb_lsb_ce) recomb_lsb_clr <= 1'b0;
- if (recomb_msb_ce) recomb_msb_clr <= 1'b0;
- end
-
- always @(posedge clk)
- //
- if (x_valid_latch_lsb)
- recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb];
- else if (x_aux_latch_lsb)
- recomb_lsb_din <= dsp_x_p_latch[8];
- else
- recomb_lsb_din <= {47{1'b0}};
-
- always @(posedge clk)
- //
- if (x_bitmap_latch_msb[0])
- recomb_msb_din <= dsp_x_p_latch[0];
- else
- recomb_msb_din <= {47{1'b0}};
-
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
- //
- xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr);
- xy_aux_lsb_adv [6] <= 1'b0;
- xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr);
- xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr);
- xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
- //
- xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, slim_bram_xy_addr, index_last);
- xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, index_last);
- xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, slim_bram_xy_addr, index_last);
- //
- end
- //
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
- //
- xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); /// bank
- xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
- xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); //! bank
- xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr); // ! bank!!!
- xy_purge_lsb_adv [6] <= 1'b0;
- //
- xy_valid_msb_adv [6] <= 1'b0;
- xy_bitmap_msb_adv[6] <= {8{1'b0}};
- xy_purge_msb_adv [6] <= 1'b0;
- //
- end
- //
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
- //
- xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
- xy_aux_lsb_adv [6] <= 1'b0;
- xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
- xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
- xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
- //
- xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
- xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
- xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
- //
- end
- //
- default: begin
- //
- xy_valid_lsb_adv [6] <= 1'b0;
- xy_aux_lsb_adv [6] <= 1'b0;
- xy_bitmap_lsb_adv[6] <= {8{1'b0}};
- xy_index_lsb_adv [6] <= 3'dX;
- xy_purge_lsb_adv [6] <= 1'b0;
- //
- xy_valid_msb_adv [6] <= 1'b0;
- xy_bitmap_msb_adv[6] <= {8{1'b0}};
- xy_purge_msb_adv [6] <= 1'b0;
- //
- end
- //
- endcase
-
-
- always @(posedge clk) begin
- //
- {y_valid_lsb, x_valid_lsb} <= {2{xy_valid_lsb_adv [1]}};
- {y_aux_lsb, x_aux_lsb} <= {2{xy_aux_lsb_adv [1]}};
- {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}};
- {y_index_lsb, x_index_lsb} <= {2{xy_index_lsb_adv [1]}};
- {y_purge_lsb, x_purge_lsb} <= {2{xy_purge_lsb_adv [1]}};
- //
- {y_valid_latch_lsb, x_valid_latch_lsb} <= {y_valid_lsb, x_valid_lsb};
- {y_aux_latch_lsb, x_aux_latch_lsb} <= {y_aux_lsb, x_aux_lsb};
- {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb};
- {y_index_latch_lsb, x_index_latch_lsb} <= {y_index_lsb, x_index_lsb};
- {y_purge_latch_lsb, x_purge_latch_lsb} <= {y_purge_lsb, x_purge_lsb};
- //
- {y_valid_msb, x_valid_msb} <= {2{xy_valid_msb_adv[1]}};
- {y_bitmap_msb, x_bitmap_msb} <= {2{xy_bitmap_msb_adv[1]}};
- {y_purge_msb, x_purge_msb} <= {2{xy_purge_msb_adv[1]}};
- //
- if (x_valid_msb) begin
- x_bitmap_latch_msb <= x_bitmap_msb;
- x_purge_latch_msb <= x_purge_msb;
- end else begin
- x_bitmap_latch_msb <= {1'b0, x_bitmap_latch_msb[7:1]};
- end
- //
- //
- for (i=1; i<6; i=i+1) begin
- xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
- xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1];
- xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
- xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
- xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
- //
- xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
- xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
- xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
- end
- //
- end
-
- always @(posedge clk)
- //
- if (x_bitmap_latch_msb[1]) // only shift 7 times
- //
- for (i=0; i<8; i=i+1)
- if (i < 7)
- dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
- else
- dsp_x_p_latch[i] <= {47{1'bX}};
- //
- else if (dsp_x_ce_p_dly1) begin
- //
- for (i=0; i<8; i=i+1)
- //
- if (x_bitmap_lsb[i])
- dsp_x_p_latch[i] <= dsp_x_p_split[i];
- else if (x_valid_msb && x_bitmap_msb[i])
- dsp_x_p_latch[i] <= dsp_x_p_split[i];
- //
- if (x_aux_lsb)
- dsp_x_p_latch[8] <= dsp_x_p_split[8];
- //
- end
-
- reg recomb_x_lsb_dout_valid = 1'b0;
- reg recomb_x_msb_dout_valid = 1'b0;
-
- always @(posedge clk) begin
- recomb_x_lsb_dout_valid <= recomb_lsb_ce_combined;
- recomb_x_msb_dout_valid <= recomb_msb_ce_combined;
- end
-
-
- reg [ 2:0] fat_bram_xy_bank_reg;
- reg [ 7:0] fat_bram_xy_addr_reg;
- reg [17:0] fat_bram_x_dout_reg;
- reg [17:0] fat_bram_y_dout_reg;
- reg fat_bram_xy_dout_valid_reg = 1'b0;
-
- reg [ 2:0] slim_bram_xy_bank_reg;
- reg [ 7:0] slim_bram_xy_addr_reg;
- reg [17:0] slim_bram_x_dout_reg;
- reg [17:0] slim_bram_y_dout_reg;
- reg slim_bram_xy_dout_valid_reg = 1'b0;
-
- reg [ 7:0] bram_xy_cnt_lsb;
- reg [ 7:0] bram_xy_cnt_msb;
-
- reg bram_xy_cnt_lsb_wrapped;
- reg bram_xy_cnt_msb_wrapped;
-
- reg [15:0] recomb_msb_dout_carry_0;
- reg [15:0] recomb_msb_dout_carry_1;
-
- reg [15:0] recomb_msb_dout_delay_0;
- reg [15:0] recomb_msb_dout_delay_1;
- reg [15:0] recomb_msb_dout_delay_2;
-
- reg [ 7:0] recomb_msb_cnt_delay_0 = 8'd0;
- reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0;
- reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0;
-
- reg recomb_msb_flag_delay_0;
- reg recomb_msb_flag_delay_1;
- reg recomb_msb_flag_delay_2;
-
- assign rcmb_fat_bram_xy_bank = fat_bram_xy_bank_reg;
- assign rcmb_fat_bram_xy_addr = fat_bram_xy_addr_reg;
- assign rcmb_fat_bram_x_dout = fat_bram_x_dout_reg;
- assign rcmb_fat_bram_y_dout = fat_bram_y_dout_reg;
- assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
-
- assign rcmb_slim_bram_xy_bank = slim_bram_xy_bank_reg;
- assign rcmb_slim_bram_xy_addr = slim_bram_xy_addr_reg;
- assign rcmb_slim_bram_x_dout = slim_bram_x_dout_reg;
- assign rcmb_slim_bram_y_dout = slim_bram_y_dout_reg;
- assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg;
-
- reg rdy_reg = 1'b1;
- reg rdy_adv = 1'b1;
-
- assign rdy = rdy_reg;
-
-
- always @(posedge clk)
- //
- if (ena_x & ena_y)
- rdy_reg <= 1'b0;
- else
- rdy_reg <= rdy_adv;
-
-
- task advance_recomb_msb_dout_delay;
- input [15:0] dout;
- input [ 7:0] cnt;
- input flag;
- begin
- //
- recomb_msb_dout_delay_0 <= dout;
- recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
- recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
- //
- recomb_msb_cnt_delay_0 <= cnt;
- recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
- recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
- //
- recomb_msb_flag_delay_0 <= flag;
- recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0;
- recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1;
- //
- end
- endtask
-
- task shift_recomb_msb_dout_carry;
- input [15:0] dout;
- begin
- recomb_msb_dout_carry_0 <= dout;
- recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
- end
- endtask
-
- task _update_fat_bram_regs;
- input [ 2:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- input valid;
- begin
- fat_bram_xy_bank_reg <= bank;
- fat_bram_xy_addr_reg <= addr;
- fat_bram_x_dout_reg <= dout_x;
- fat_bram_y_dout_reg <= dout_y;
- fat_bram_xy_dout_valid_reg <= valid;
- end
- endtask
-
- task _update_slim_bram_regs;
- input [ 2:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- input valid;
- begin
- slim_bram_xy_bank_reg <= bank;
- slim_bram_xy_addr_reg <= addr;
- slim_bram_x_dout_reg <= dout_x;
- slim_bram_y_dout_reg <= dout_y;
- slim_bram_xy_dout_valid_reg <= valid;
- end
- endtask
-
- task set_fat_bram_regs;
- input [ 2:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- begin
- _update_fat_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
- end
- endtask
-
- task set_slim_bram_regs;
- input [ 2:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- begin
- _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
- end
- endtask
-
- task clear_fat_bram_regs;
- begin
- _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
- end
- endtask
-
- task clear_slim_bram_regs;
- begin
- _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
- end
- endtask
-
- task _set_bram_cnt_lsb;
- input [7:0] cnt;
- input wrapped;
- begin
- bram_xy_cnt_lsb <= cnt;
- bram_xy_cnt_lsb_wrapped <= wrapped;
- end
- endtask
-
- task _set_bram_cnt_msb;
- input [7:0] cnt;
- input wrapped;
- begin
- bram_xy_cnt_msb <= cnt;
- bram_xy_cnt_msb_wrapped <= wrapped;
- end
- endtask
-
- task inc_bram_cnt_lsb;
- begin
- if (bram_xy_cnt_lsb == index_last)
- _set_bram_cnt_lsb(8'd0, 1'b1);
- else
- _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped);
- end
- endtask
-
- task inc_bram_cnt_msb;
- begin
- if (bram_xy_cnt_msb == index_last)
- _set_bram_cnt_msb(8'd0, 1'b1);
- else
- _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped);
- end
- endtask
-
- task clr_bram_cnt_lsb;
- begin
- _set_bram_cnt_lsb(8'd0, 1'b0);
- end
- endtask
-
- task clr_bram_cnt_msb;
- begin
- _set_bram_cnt_msb(8'd0, 1'b0);
- end
- endtask
-
-
-
-
-
- wire [1:0] rcmb_xy_dout_valid = {recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid};
-
- always @(posedge clk)
- //
- if (ena_x & ena_y) begin
- clr_bram_cnt_lsb();
- clr_bram_cnt_msb();
- end else begin // if not ready???
- //
- case (rcmb_mode)
- 2'd1: recombine_square();
- 2'd2: recombine_triangle();
- 2'd3: recombine_rectangle();
- endcase
- //
- end
-
- task recombine_square;
- //
- begin
- //
- case (rcmb_xy_dout_valid)
- //
- 2'b01: inc_bram_cnt_lsb();
- 2'b10: inc_bram_cnt_msb();
- 2'b11: begin
- inc_bram_cnt_lsb();
- inc_bram_cnt_msb();
- end
- //
- endcase
- //
- case (rcmb_xy_dout_valid)
- //
- 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
- else clear_fat_bram_regs();
- 2'b01: set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- 2'b10: if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
- else set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
- 2'b11: if (bram_xy_cnt_lsb_wrapped) set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
- else set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- default: clear_fat_bram_regs(); // DEBUG!!!
- //
- endcase
- //
- case (rcmb_xy_dout_valid)
- //
- 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
- 2'b10: if (bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout);
-// //
- 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
- if (bram_xy_cnt_lsb_wrapped) shift_recomb_msb_dout_carry({16{1'bX}});
- end
- //
- endcase
- //
- end
- //
- endtask
-
-
- task recombine_triangle;
- //
- begin
- //
- case (rcmb_xy_dout_valid)
- //
- 2'b01: inc_bram_cnt_lsb();
- //
- endcase
- //
- case (rcmb_xy_dout_valid)
- //
- 2'b00: clear_slim_bram_regs();
- 2'b01: if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- else set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- 2'b10: clear_slim_bram_regs();
- 2'b11: clear_slim_bram_regs();
- //
- endcase
- //
- end
- //
- endtask
-
-
- task recombine_rectangle;
- //
- begin
- //
- case (rcmb_xy_dout_valid)
- //
- 2'b01: inc_bram_cnt_lsb();
- 2'b10: inc_bram_cnt_msb();
- 2'b11: begin
- inc_bram_cnt_lsb();
- inc_bram_cnt_msb();
- end
- //
- endcase
-// //
- case (rcmb_xy_dout_valid)
-// //
- 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
- else clear_fat_bram_regs();
- 2'b01: set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- 2'b10: if (!bram_xy_cnt_msb_wrapped) begin
- if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
- else set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
- end else
- set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}});
-
- 2'b11: set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
-// //
- endcase
-// //
- case (rcmb_xy_dout_valid)
-// //
- 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
- 2'b10: begin
- if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped) shift_recomb_msb_dout_carry(recomb_msb_dout);
- if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
- end
-// //
- 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
- shift_recomb_msb_dout_carry({16{1'bX}});
- end
-// //
- endcase
- //
- end
- //
- endtask
-
-
- always @(posedge clk)
- //
- if (ena_x & ena_y) begin
- rdy_adv <= 1'b0;
- end else if (!rdy_reg) begin
- //
- case (rcmb_mode)
- //
- 2'd1: case (rcmb_xy_dout_valid)
- //
- 2'b00: begin
- //
- if (recomb_msb_flag_delay_2) begin
- //
- rdy_adv <= ~recomb_msb_flag_delay_1;
- //
- end
- //
- end
- //
- endcase
- //
- 2'd2: case (rcmb_xy_dout_valid)
- //
- 2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped; //
- //
- endcase
- //
- 2'd3: case (rcmb_xy_dout_valid)
- //
- 2'b00: begin
- //
- if (recomb_msb_flag_delay_2) begin
- //
- rdy_adv <= ~recomb_msb_flag_delay_1;
- //
- end
- //
- end
- //
- endcase
- //
- endcase
- //
- end
-
-
-
- // add ready for mode=3
-endmodule
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index efe0ac5..d6b1ad1 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -1,35 +1,1225 @@
module modexpng_recombinator_block
(
- clk,
- ce, clr,
- din, dout
+ clk, rst,
+ ena, rdy,
+ fsm_state_next,
+ word_index_last,
+ dsp_xy_ce_p,
+ dsp_x_p, dsp_y_p,
+ col_index, col_index_last,
+ rd_narrow_xy_addr, rd_narrow_xy_bank,
+ rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid,
+ rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid,
+ rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);
- input clk;
- input ce;
- input clr;
- input [46:0] din;
- output [15:0] dout;
- reg [14:0] z;
- reg [16:0] y;
- reg [17:0] x;
- //reg [15:0] w;
+ //
+ // Headers
+ //
+ `include "../rtl_1/modexpng_mmm_fsm_old.vh"
+ `include "../rtl_1/modexpng_parameters_old.vh"
+ `include "../rtl_1/modexpng_parameters_x8_old.vh"
- //assign dout = w;
- assign dout = x[15:0];
+
+ input clk;
+ input rst;
+ input ena;
+ output rdy;
+ input [FSM_STATE_WIDTH-1:0] fsm_state_next;
+ input [7:0] word_index_last;
+ input dsp_xy_ce_p;
+ input [9*47-1:0] dsp_x_p;
+ input [9*47-1:0] dsp_y_p;
+ input [ 4:0] col_index;
+ input [ 4:0] col_index_last;
+
+ input [ 7:0] rd_narrow_xy_addr;
+ input [ 1:0] rd_narrow_xy_bank;
+
+ output [ 1:0] rcmb_wide_xy_bank;
+ output [ 7:0] rcmb_wide_xy_addr;
+ output [ 17:0] rcmb_wide_x_dout;
+ output [ 17:0] rcmb_wide_y_dout;
+ output rcmb_wide_xy_valid;
+
+ output [ 1:0] rcmb_narrow_xy_bank;
+ output [ 7:0] rcmb_narrow_xy_addr;
+ output [ 17:0] rcmb_narrow_x_dout;
+ output [ 17:0] rcmb_narrow_y_dout;
+ output rcmb_narrow_xy_valid;
+
+ output [ 1:0] rdct_narrow_xy_bank;
+ output [ 7:0] rdct_narrow_xy_addr;
+ output [ 17:0] rdct_narrow_x_dout;
+ output [ 17:0] rdct_narrow_y_dout;
+ output rdct_narrow_xy_valid;
+
+
+ //
+ // Latches
+ //
+ reg [1*47-1:0] dsp_x_p_latch[0:8];
+ reg [1*47-1:0] dsp_y_p_latch[0:8];
+
+
+ //
+ // Mapping
+ //
+ wire [46:0] dsp_x_p_split[0:8];
+ wire [46:0] dsp_y_p_split[0:8];
+
+ genvar z;
+ generate for (z=0; z<(NUM_MULTS+1); z=z+1)
+ begin : gen_dsp_xy_p_split
+ assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
+ assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
+ end
+ endgenerate
+
+
+ //
+ // Delays
+ //
+ reg dsp_xy_ce_p_dly1 = 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rst) dsp_xy_ce_p_dly1 <= 1'b0;
+ else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;
+
+
+ //
+ // Registers
+ //
+
+ // valid
+ reg xy_valid_lsb = 1'b0;
+ reg xy_aux_lsb = 1'b0;
+ reg xy_valid_msb = 1'b0;
+
+ // bitmap
+ reg [7:0] xy_bitmap_lsb = {8{1'b0}};
+ reg [7:0] xy_bitmap_msb = {8{1'b0}};
+
+ // index
+ reg [2:0] xy_index_lsb = 3'dX;
+
+ // purge
+ reg xy_purge_lsb = 1'b0;
+ reg xy_purge_msb = 1'b0;
+
+ // valid - latch
+ reg xy_valid_latch_lsb = 1'b0;
+
+ // aux - latch
+ reg xy_aux_latch_lsb = 1'b0;
+
+ // bitmap - latch
+ reg [7:0] xy_bitmap_latch_lsb = {8{1'b0}};
+ reg [7:0] xy_bitmap_latch_msb = {8{1'b0}};
+
+ // index - latch
+ reg [2:0] xy_index_latch_lsb = 3'dX;
+
+ // purge - index
+ reg xy_purge_latch_lsb = 1'b0;
+ reg xy_purge_latch_msb = 1'b0;
+
+ //
+ reg xy_valid_lsb_adv[1:6];
+ reg xy_valid_msb_adv[1:6];
+ reg xy_aux_lsb_adv[1:6];
+ reg [7:0] xy_bitmap_lsb_adv[1:6];
+ reg [7:0] xy_bitmap_msb_adv[1:6];
+ reg [2:0] xy_index_lsb_adv[1:6];
+ reg [2:0] xy_index_msb_adv[1:6];
+ reg xy_purge_lsb_adv[1:6];
+ reg xy_purge_msb_adv[1:6];
+
+ reg [1:0] rcmb_mode;
+
+ always @(posedge clk)
+ //
+ if (ena)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
+ default: rcmb_mode <= 2'd0;
+ endcase
+
+
+ integer i;
+ initial for (i=1; i<6; i=i+1) begin
+ xy_valid_lsb_adv[i] = 1'b0;
+ xy_valid_msb_adv[i] = 1'b0;
+ xy_aux_lsb_adv[i] = 1'b0;
+ xy_bitmap_lsb_adv[i] = {8{1'b0}};
+ xy_bitmap_msb_adv[i] = {8{1'b0}};
+ xy_index_lsb_adv[i] = 3'dX;
+ xy_index_msb_adv[i] = 3'dX;
+ xy_purge_lsb_adv[i] = 1'b0;
+ xy_purge_msb_adv[i] = 1'b0;
+ end
+
+ function calc_square_triangle_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ calc_square_triangle_valid_lsb = 1'b1;
+ else
+ calc_square_triangle_valid_lsb = 1'b0;
+ //
+ end
+ endfunction
+
+ function calc_square_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function calc_triangle_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function calc_rectangle_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
+ else
+ calc_rectangle_valid_lsb = 1'b0;
+ //
+ end
+ endfunction
+
+ function calc_triangle_aux_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if (narrow_xy_bank_value == BANK_NARROW_EXT)
+ calc_triangle_aux_lsb = 1'b1;
+ else
+ calc_triangle_aux_lsb = 1'b0;
+ //
+ end
+ endfunction
+
+ function [7:0] calc_square_triangle_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ //
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
+ 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
+ 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
+ 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
+ 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
+ 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
+ 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
+ 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
+ endcase
+ //
+ else
+ calc_square_triangle_bitmap_lsb = {8{1'b0}};
+ //
+ end
+ endfunction
+
+ function [7:0] calc_square_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function [7:0] calc_triangle_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function [7:0] calc_rectangle_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
+ //
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
+ 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
+ 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
+ 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
+ 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
+ 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
+ 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
+ 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
+ endcase
+ //
+ else
+ calc_rectangle_bitmap_lsb = {8{1'b0}};
+ //
+ end
+ endfunction
+
+ /*
+ * These can be simplified (the difference between square/triangle and
+ * rectangle is that the bank is checked or not). A universal function would
+ * accept a parameter that tells it whether it should check the bank or not.
+ * Let's do it later, too early to optimize now, it seems.
+ *
+ *
+ */
+
+ function [2:0] calc_square_triangle_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ //
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_square_triangle_index_lsb = 3'd0;
+ 3'b001: calc_square_triangle_index_lsb = 3'd1;
+ 3'b010: calc_square_triangle_index_lsb = 3'd2;
+ 3'b011: calc_square_triangle_index_lsb = 3'd3;
+ 3'b100: calc_square_triangle_index_lsb = 3'd4;
+ 3'b101: calc_square_triangle_index_lsb = 3'd5;
+ 3'b110: calc_square_triangle_index_lsb = 3'd6;
+ 3'b111: calc_square_triangle_index_lsb = 3'd7;
+ endcase
+ //
+ else
+ calc_square_triangle_index_lsb = 3'dX;
+ //
+ end
+ endfunction
+
+ function [2:0] calc_square_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function [2:0] calc_triangle_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function [2:0] calc_rectangle_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] slim_bram_xy_addr_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_rectangle_index_lsb = 3'd0;
+ 3'b001: calc_rectangle_index_lsb = 3'd1;
+ 3'b010: calc_rectangle_index_lsb = 3'd2;
+ 3'b011: calc_rectangle_index_lsb = 3'd3;
+ 3'b100: calc_rectangle_index_lsb = 3'd4;
+ 3'b101: calc_rectangle_index_lsb = 3'd5;
+ 3'b110: calc_rectangle_index_lsb = 3'd6;
+ 3'b111: calc_rectangle_index_lsb = 3'd7;
+ endcase
+ //
+ else
+ calc_rectangle_index_lsb = 3'dX;
+ //
+ end
+ endfunction
+
+ function calc_square_rectangle_purge_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ //
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value;
+ else
+ calc_square_rectangle_purge_lsb = 1'b0;
+ //
+ end
+ endfunction
+
+ function calc_square_purge_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function calc_rectangle_purge_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ begin
+ calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
+ end
+ endfunction
+
+ function calc_square_valid_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if (narrow_xy_addr_value == index_last_value)
+ calc_square_valid_msb = 1'b1;
+ else
+ calc_square_valid_msb = 1'b0;
+ //
+ end
+ endfunction
+
+ function calc_rectangle_valid_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
+ calc_rectangle_valid_msb = 1'b1;
+ else
+ calc_rectangle_valid_msb = 1'b0;
+ //
+ end
+ endfunction
+
+ function [7:0] calc_square_bitmap_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if (narrow_xy_addr_value == index_last_value) begin
+ calc_square_bitmap_msb[7] = col_index_value != col_index_last_value;
+ calc_square_bitmap_msb[6:0] = 7'b1111111;
+ end else
+ calc_square_bitmap_msb[7:0] = 8'b00000000;
+ //
+ end
+ endfunction
+
+ function [7:0] calc_rectangle_bitmap_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin
+ calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
+ end else
+ calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
+ //
+ end
+ endfunction
+
+ function calc_square_purge_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if (narrow_xy_addr_value == index_last_value)
+ calc_square_purge_msb = col_index_value == col_index_last_value;
+ else
+ calc_square_purge_msb = 1'b0;
+ //
+ end
+ endfunction
+
+ function calc_rectangle_purge_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [1:0] narrow_xy_bank_value;
+ input [7:0] narrow_xy_addr_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
+ calc_rectangle_purge_msb = col_index_value == col_index_last_value;
+ else
+ calc_rectangle_purge_msb = 1'b0;
+ //
+ end
+ endfunction
+
+
+ reg rcmb_xy_lsb_ce = 1'b0;
+ reg rcmb_xy_lsb_ce_aux;
+ reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
+ wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
+ reg rcmb_xy_lsb_clr;
+
+ reg [46:0] rcmb_x_lsb_din;
+ reg [46:0] rcmb_y_lsb_din;
+ wire [15:0] rcmb_x_lsb_dout;
+ wire [15:0] rcmb_y_lsb_dout;
+
+ reg rcmb_xy_msb_ce = 1'b0;
+ reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
+ wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
+ reg rcmb_xy_msb_clr;
+
+ reg [46:0] rcmb_x_msb_din;
+ reg [46:0] rcmb_y_msb_din;
+ wire [15:0] rcmb_x_msb_dout;
+ wire [15:0] rcmb_y_msb_dout;
+
+ modexpng_recombinator_cell recomb_x_lsb
+ (
+ .clk (clk),
+ .ce (rcmb_xy_lsb_ce_combined),
+ .clr (rcmb_xy_lsb_clr),
+ .din (rcmb_x_lsb_din),
+ .dout (rcmb_x_lsb_dout)
+ );
+ modexpng_recombinator_cell recomb_y_lsb
+ (
+ .clk (clk),
+ .ce (rcmb_xy_lsb_ce_combined),
+ .clr (rcmb_xy_lsb_clr),
+ .din (rcmb_y_lsb_din),
+ .dout (rcmb_y_lsb_dout)
+ );
+
+ modexpng_recombinator_cell recomb_x_msb
+ (
+ .clk (clk),
+ .ce (rcmb_xy_msb_ce_combined),
+ .clr (rcmb_xy_msb_clr),
+ .din (rcmb_x_msb_din),
+ .dout (rcmb_x_msb_dout)
+ );
+
+ modexpng_recombinator_cell recomb_y_msb
+ (
+ .clk (clk),
+ .ce (rcmb_xy_msb_ce_combined),
+ .clr (rcmb_xy_msb_clr),
+ .din (rcmb_y_msb_din),
+ .dout (rcmb_y_msb_dout)
+ );
+
+ always @(posedge clk) begin
+ //
+ rcmb_xy_lsb_ce <= xy_valid_latch_lsb;
+ rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb;
+ rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0];
+ //
+ if (xy_purge_latch_lsb)
+ rcmb_xy_lsb_ce_purge <= 3'b111;
+ else
+ rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
+ //
+ if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1])
+ rcmb_xy_msb_ce_purge = 2'b11;
+ else
+ rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
+ //
+ end
+
+
+ always @(posedge clk)
+ //
+ if (ena) begin
+ rcmb_xy_lsb_clr <= 1'b1;
+ rcmb_xy_msb_clr <= 1'b1;
+ end else begin
+ if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0;
+ if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0;
+ end
+
+ always @(posedge clk)
+ //
+ if (xy_valid_latch_lsb) begin
+ rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb];
+ rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb];
+ end else if (xy_aux_latch_lsb) begin
+ rcmb_x_lsb_din <= dsp_x_p_latch[8];
+ rcmb_y_lsb_din <= dsp_y_p_latch[8];
+ end else begin
+ rcmb_x_lsb_din <= {47{1'b0}};
+ rcmb_y_lsb_din <= {47{1'b0}};
+ end
+
+ always @(posedge clk)
+ //
+ if (xy_bitmap_latch_msb[0]) begin
+ rcmb_x_msb_din <= dsp_x_p_latch[0];
+ rcmb_y_msb_din <= dsp_y_p_latch[0];
+ end else begin
+ rcmb_x_msb_din <= {47{1'b0}};
+ rcmb_y_msb_din <= {47{1'b0}};
+ end
+
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
+ //
+ xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_aux_lsb_adv [6] <= 1'b0;
+ xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ //
+ xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
+ xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
+ xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
+ //
+ end
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
+ //
+ xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_purge_lsb_adv [6] <= 1'b0;
+ //
+ xy_valid_msb_adv [6] <= 1'b0;
+ xy_bitmap_msb_adv[6] <= {8{1'b0}};
+ xy_purge_msb_adv [6] <= 1'b0;
+ //
+ end
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
+ //
+ xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_aux_lsb_adv [6] <= 1'b0;
+ xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
+ //
+ xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
+ xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
+ xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
+ //
+ end
+ //
+ default: begin
+ //
+ xy_valid_lsb_adv [6] <= 1'b0;
+ xy_aux_lsb_adv [6] <= 1'b0;
+ xy_bitmap_lsb_adv[6] <= {8{1'b0}};
+ xy_index_lsb_adv [6] <= 3'dX;
+ xy_purge_lsb_adv [6] <= 1'b0;
+ //
+ xy_valid_msb_adv [6] <= 1'b0;
+ xy_bitmap_msb_adv[6] <= {8{1'b0}};
+ xy_purge_msb_adv [6] <= 1'b0;
+ //
+ end
+ //
+ endcase
+
+
+ always @(posedge clk) begin
+ //
+ xy_valid_lsb <= xy_valid_lsb_adv [1];
+ xy_aux_lsb <= xy_aux_lsb_adv [1];
+ xy_bitmap_lsb <= xy_bitmap_lsb_adv[1];
+ xy_index_lsb <= xy_index_lsb_adv [1];
+ xy_purge_lsb <= xy_purge_lsb_adv [1];
+ //
+ xy_valid_latch_lsb <= xy_valid_lsb;
+ xy_aux_latch_lsb <= xy_aux_lsb;
+ xy_bitmap_latch_lsb <= xy_bitmap_lsb;
+ xy_index_latch_lsb <= xy_index_lsb;
+ xy_purge_latch_lsb <= xy_purge_lsb;
+ //
+ xy_valid_msb <= xy_valid_msb_adv[1];
+ xy_bitmap_msb <= xy_bitmap_msb_adv[1];
+ xy_purge_msb <= xy_purge_msb_adv[1];
+ //
+ if (xy_valid_msb) begin
+ xy_bitmap_latch_msb <= xy_bitmap_msb;
+ xy_purge_latch_msb <= xy_purge_msb;
+ end else begin
+ xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]};
+ end
+ //
+ //
+ for (i=1; i<6; i=i+1) begin
+ xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
+ xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1];
+ xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
+ xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
+ xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
+ //
+ xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
+ xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
+ xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
+ end
+ //
+ end
+
+ always @(posedge clk)
+ //
+ if (xy_bitmap_latch_msb[1]) // only shift 7 times
+ //
+ for (i=0; i<8; i=i+1)
+ //
+ if (i < 7) begin
+ dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
+ dsp_y_p_latch[i] <= dsp_y_p_latch[i+1];
+ end else begin
+ dsp_x_p_latch[i] <= {47{1'bX}};
+ dsp_y_p_latch[i] <= {47{1'bX}};
+ end
+ //
+ else if (dsp_xy_ce_p_dly1) begin
+ //
+ for (i=0; i<8; i=i+1)
+ //
+ if (xy_bitmap_lsb[i]) begin
+ dsp_x_p_latch[i] <= dsp_x_p_split[i];
+ dsp_y_p_latch[i] <= dsp_y_p_split[i];
+ end else if (xy_valid_msb && xy_bitmap_msb[i]) begin
+ dsp_x_p_latch[i] <= dsp_x_p_split[i];
+ dsp_y_p_latch[i] <= dsp_y_p_split[i];
+ end
+ //
+ if (xy_aux_lsb) begin
+ dsp_x_p_latch[8] <= dsp_x_p_split[8];
+ dsp_y_p_latch[8] <= dsp_y_p_split[8];
+ end
+ //
+ end
+
+ reg rcmb_xy_lsb_valid = 1'b0;
+ reg rcmb_xy_msb_valid = 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ rcmb_xy_lsb_valid <= 1'b0;
+ rcmb_xy_msb_valid <= 1'b0;
+ end else begin
+ rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined;
+ rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined;
+ end
+
+
+ reg [ 1:0] wide_xy_bank;
+ reg [ 7:0] wide_xy_addr;
+ reg [17:0] wide_x_dout;
+ reg [17:0] wide_y_dout;
+ reg wide_xy_valid = 1'b0;
+
+ reg [ 1:0] narrow_xy_bank;
+ reg [ 7:0] narrow_xy_addr;
+ reg [17:0] narrow_x_dout;
+ reg [17:0] narrow_y_dout;
+ reg narrow_xy_valid = 1'b0;
+
+ reg [ 1:0] rdct_xy_bank;
+ reg [ 7:0] rdct_xy_addr;
+ reg [17:0] rdct_x_dout;
+ reg [17:0] rdct_y_dout;
+ reg rdct_xy_valid = 1'b0;
+
+ reg [ 7:0] cnt_lsb;
+ reg [ 7:0] cnt_msb;
+
+ reg cnt_lsb_wrapped;
+ reg cnt_msb_wrapped;
+
+ reg [31:0] rcmb_xy_msb_carry_0;
+ reg [31:0] rcmb_xy_msb_carry_1;
+
+ reg [31:0] rcmb_xy_msb_delay_0;
+ reg [31:0] rcmb_xy_msb_delay_1;
+ reg [31:0] rcmb_xy_msb_delay_2;
+
+ reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0;
+ reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0;
+ reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0;
+
+ reg rcmb_msb_flag_delay_0;
+ reg rcmb_msb_flag_delay_1;
+ reg rcmb_msb_flag_delay_2;
+
+ assign rcmb_wide_xy_bank = wide_xy_bank;
+ assign rcmb_wide_xy_addr = wide_xy_addr;
+ assign rcmb_wide_x_dout = wide_x_dout;
+ assign rcmb_wide_y_dout = wide_y_dout;
+ assign rcmb_wide_xy_valid = wide_xy_valid;
+
+ assign rcmb_narrow_xy_bank = narrow_xy_bank;
+ assign rcmb_narrow_xy_addr = narrow_xy_addr;
+ assign rcmb_narrow_x_dout = narrow_x_dout;
+ assign rcmb_narrow_y_dout = narrow_y_dout;
+ assign rcmb_narrow_xy_valid = narrow_xy_valid;
+
+ assign rdct_narrow_xy_bank = rdct_xy_bank;
+ assign rdct_narrow_xy_addr = rdct_xy_addr;
+ assign rdct_narrow_x_dout = rdct_x_dout;
+ assign rdct_narrow_y_dout = rdct_y_dout;
+ assign rdct_narrow_xy_valid = rdct_xy_valid;
+
+ reg rdy_reg = 1'b1;
+ reg rdy_adv = 1'b1;
+
+ assign rdy = rdy_reg;
- wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here
- wire [15:0] din_y = din[31:16];
- wire [15:0] din_x = din[15: 0];
always @(posedge clk)
//
- if (ce) begin
- z <= din_z;
- y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z};
- x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]};
- //w <= clr ? {16{1'bX}} : x[15:0];
+ if (ena) rdy_reg <= 1'b0;
+ else rdy_reg <= rdy_adv;
+
+ task advance_rcmb_msb_delay;
+ input [15:0] dout_x;
+ input [15:0] dout_y;
+ input [ 7:0] cnt;
+ input flag;
+ begin
+ //
+ rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
+ rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0;
+ rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1;
+ //
+ rcmb_msb_cnt_delay_0 <= cnt;
+ rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0;
+ rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1;
+ //
+ rcmb_msb_flag_delay_0 <= flag;
+ rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0;
+ rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1;
+ //
+ end
+ endtask
+
+ task shift_rcmb_msb_carry;
+ input [15:0] dout_x;
+ input [15:0] dout_y;
+ begin
+ rcmb_xy_msb_carry_0 <= {dout_y, dout_x};
+ rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0;
+ end
+ endtask
+
+ task _update_wide;
+ input [ 1:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ wide_xy_bank <= bank;
+ wide_xy_addr <= addr;
+ wide_x_dout <= dout_x;
+ wide_y_dout <= dout_y;
+ wide_xy_valid <= valid;
+ end
+ endtask
+
+ task _update_narrow;
+ input [ 1:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ narrow_xy_bank <= bank;
+ narrow_xy_addr <= addr;
+ narrow_x_dout <= dout_x;
+ narrow_y_dout <= dout_y;
+ narrow_xy_valid <= valid;
end
+ endtask
+
+ task _update_rdct;
+ input [ 1:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ rdct_xy_bank <= bank;
+ rdct_xy_addr <= addr;
+ rdct_x_dout <= dout_x;
+ rdct_y_dout <= dout_y;
+ rdct_xy_valid <= valid;
+ end
+ endtask
+
+ task set_wide;
+ input [ 1:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_wide(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task set_narrow;
+ input [ 1:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_narrow(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task set_rdct;
+ input [ 1:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_rdct(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task clear_wide;
+ begin
+ _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
+
+ task clear_narrow;
+ begin
+ _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
+
+ task clear_rdct;
+ begin
+ _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
+
+ task _set_cnt_lsb;
+ input [7:0] cnt;
+ input wrapped;
+ begin
+ cnt_lsb <= cnt;
+ cnt_lsb_wrapped <= wrapped;
+ end
+ endtask
+
+ task _set_cnt_msb;
+ input [7:0] cnt;
+ input wrapped;
+ begin
+ cnt_msb <= cnt;
+ cnt_msb_wrapped <= wrapped;
+ end
+ endtask
+
+ task inc_cnt_lsb;
+ begin
+ if (cnt_lsb == word_index_last)
+ _set_cnt_lsb(8'd0, 1'b1);
+ else
+ _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
+ end
+ endtask
+
+ task inc_cnt_both;
+ begin
+ inc_cnt_lsb;
+ inc_cnt_msb;
+ end
+ endtask
+
+ task inc_cnt_msb;
+ begin
+ if (cnt_msb == word_index_last)
+ _set_cnt_msb(8'd0, 1'b1);
+ else
+ _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
+ end
+ endtask
+
+ task clr_cnt_lsb;
+ begin
+ _set_cnt_lsb(8'd0, 1'b0);
+ end
+ endtask
+
+ task clr_cnt_msb;
+ begin
+ _set_cnt_msb(8'd0, 1'b0);
+ end
+ endtask
+
+
+
+ wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid};
+
+ always @(posedge clk)
+ //
+ if (ena) begin
+ clr_cnt_lsb();
+ clr_cnt_msb();
+ end else if (!rdy)
+ //
+ case (rcmb_mode)
+ 2'd1: recombine_square();
+ 2'd2: recombine_triangle();
+ 2'd3: recombine_rectangle();
+ endcase
+
+ wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout};
+ wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout};
+
+ wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout};
+ wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout};
+
+ wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]};
+ wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]};
+
+ wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}};
+ wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}};
+
+
+ task recombine_square;
+ //
+ begin
+ //
+ case (rcmb_xy_valid)
+ //
+ 2'b01: inc_cnt_lsb;
+ 2'b10: inc_cnt_msb;
+ 2'b11: inc_cnt_both;
+ //
+ endcase
+ //
+ case (rcmb_xy_valid)
+ //
+ 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+ else clear_wide;
+ //
+ 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ //
+ 2'b10: if (cnt_msb < 8'd2) clear_wide;
+ else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+ //
+ 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
+ else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ //
+ endcase
+ //
+ case (rcmb_xy_valid)
+ //
+ 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
+ 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
+ //
+ 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
+ if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
+ end
+ //
+ endcase
+ //
+ end
+ //
+ endtask
+
+
+ task recombine_triangle;
+ //
+ begin
+ //
+ case (rcmb_xy_valid)
+ //
+ 2'b01: inc_cnt_lsb();
+ //
+ endcase
+ //
+ case (rcmb_xy_valid)
+ //
+ 2'b00: clear_narrow;
+ 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b10: clear_narrow;
+ 2'b11: clear_narrow;
+ //
+ endcase
+ //
+ end
+ //
+ endtask
+
+
+ task recombine_rectangle;
+ //
+ begin
+ //
+ case (rcmb_xy_valid)
+ //
+ 2'b01: inc_cnt_lsb;
+ 2'b10: inc_cnt_msb;
+ 2'b11: inc_cnt_both;
+ //
+ endcase
+// //
+ case (rcmb_xy_valid)
+// //
+ 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+ else clear_rdct;
+ 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b10: if (!cnt_msb_wrapped) begin
+ if (cnt_msb < 8'd2) clear_rdct;
+ else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+ end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+
+ 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
+// //
+ endcase
+// //
+ case (rcmb_xy_valid)
+// //
+ 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
+ 2'b10: begin
+ if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
+ if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
+ end
+// //
+ 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
+ shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
+ end
+// //
+ endcase
+ //
+ end
+ //
+ endtask
+
+
+ always @(posedge clk)
+ //
+ if (ena) begin
+ rdy_adv <= 1'b0;
+ end else if (!rdy_reg) begin
+ //
+ case (rcmb_mode)
+ //
+ 2'd1: case (rcmb_xy_valid)
+ //
+ 2'b00: begin
+ //
+ if (rcmb_msb_flag_delay_2) begin
+ //
+ rdy_adv <= ~rcmb_msb_flag_delay_1;
+ //
+ end
+ //
+ end
+ //
+ endcase
+ //
+ 2'd2: case (rcmb_xy_valid)
+ //
+ 2'b01: rdy_adv <= cnt_lsb_wrapped; //
+ //
+ endcase
+ //
+ 2'd3: case (rcmb_xy_valid)
+ //
+ 2'b00: begin
+ //
+ if (rcmb_msb_flag_delay_2) begin
+ //
+ rdy_adv <= ~rcmb_msb_flag_delay_1;
+ //
+ end
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ // add ready for mode=3
endmodule
diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v
new file mode 100644
index 0000000..1ecf56a
--- /dev/null
+++ b/rtl/modexpng_recombinator_cell.v
@@ -0,0 +1,35 @@
+module modexpng_recombinator_cell
+(
+ clk,
+ ce, clr,
+ din, dout
+);
+
+ input clk;
+ input ce;
+ input clr;
+ input [46:0] din;
+ output [15:0] dout;
+
+ reg [14:0] z;
+ reg [16:0] y;
+ reg [17:0] x;
+ //reg [15:0] w;
+
+ //assign dout = w;
+ assign dout = x[15:0];
+
+ wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here
+ wire [15:0] din_y = din[31:16];
+ wire [15:0] din_x = din[15: 0];
+
+ always @(posedge clk)
+ //
+ if (ce) begin
+ z <= din_z;
+ y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z};
+ x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]};
+ //w <= clr ? {16{1'bX}} : x[15:0];
+ end
+
+endmodule
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
new file mode 100644
index 0000000..0f5e461
--- /dev/null
+++ b/rtl/modexpng_reductor.v
@@ -0,0 +1,270 @@
+module modexpng_reductor
+(
+ clk, rst,
+ ena, rdy,
+ //fsm_state_next,
+ word_index_last,
+ //dsp_xy_ce_p,
+ //dsp_x_p, dsp_y_p,
+ //col_index, col_index_last,
+ rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux,
+ //rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid,
+ rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid,
+ rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid
+);
+
+
+ //
+ // Headers
+ //
+ //`include "../rtl_1/modexpng_mmm_fsm.vh"
+ `include "../rtl_1/modexpng_parameters_old.vh"
+ //`include "../rtl_1/modexpng_parameters_x8.vh"
+
+
+ input clk;
+ input rst;
+ input ena;
+ output rdy;
+ /*
+ input [FSM_STATE_WIDTH-1:0] fsm_state_next;*/
+ input [7:0] word_index_last;/*
+ input dsp_xy_ce_p;
+ *//*
+ input [9*47-1:0] dsp_x_p;
+ input [9*47-1:0] dsp_y_p;
+ input [ 4:0] col_index;
+ input [ 4:0] col_index_last;
+ *//*
+ input [ 7:0] rd_narrow_xy_addr;
+ input [ 1:0] rd_narrow_xy_bank;
+ */
+ input [ 1:0] rd_wide_xy_bank_aux;
+ input [ 7:0] rd_wide_xy_addr_aux;
+ input [ 17:0] rd_wide_x_dout_aux;
+ input [ 17:0] rd_wide_y_dout_aux;
+ //
+ input [ 1:0] rcmb_final_xy_bank;
+ input [ 7:0] rcmb_final_xy_addr;
+ input [ 17:0] rcmb_final_x_dout;
+ input [ 17:0] rcmb_final_y_dout;
+ input rcmb_final_xy_valid;
+
+ output [ 7:0] rdct_final_xy_addr;
+ output [ 17:0] rdct_final_x_dout;
+ output [ 17:0] rdct_final_y_dout;
+ output rdct_final_xy_valid;
+
+
+ //
+ // Ready
+ //
+ reg rdy_reg = 1'b1;
+ reg busy_now = 1'b0;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_reg <= 1'b1;
+ else begin
+ if (rdy && ena) rdy_reg <= 1'b0;
+ if (!rdy && !busy_now) rdy_reg <= 1'b1;
+ end
+
+
+
+ //
+ // Pipeline (Delay Match)
+ //
+ reg rcmb_xy_valid_dly1 = 1'b0;
+ reg rcmb_xy_valid_dly2 = 1'b0;
+ reg rcmb_xy_valid_dly3 = 1'b0;
+
+ reg [2:0] rcmb_xy_bank_dly1;
+ reg [2:0] rcmb_xy_bank_dly2;
+ reg [2:0] rcmb_xy_bank_dly3;
+
+ reg [7:0] rcmb_xy_addr_dly1;
+ reg [7:0] rcmb_xy_addr_dly2;
+ reg [7:0] rcmb_xy_addr_dly3;
+
+ reg [17:0] rcmb_x_dout_dly1;
+ reg [17:0] rcmb_x_dout_dly2;
+ reg [17:0] rcmb_x_dout_dly3;
+
+ reg [17:0] rcmb_y_dout_dly1;
+ reg [17:0] rcmb_y_dout_dly2;
+ reg [17:0] rcmb_y_dout_dly3;
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ rcmb_xy_valid_dly1 <= 1'b0;
+ rcmb_xy_valid_dly2 <= 1'b0;
+ rcmb_xy_valid_dly3 <= 1'b0;
+ end else begin
+ rcmb_xy_valid_dly1 <= rcmb_final_xy_valid;
+ rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1;
+ rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2;
+ end
+
+
+ always @(posedge clk) begin
+ //
+ if (rcmb_final_xy_valid) begin
+ rcmb_xy_bank_dly1 <= rcmb_final_xy_bank;
+ rcmb_xy_addr_dly1 <= rcmb_final_xy_addr;
+ rcmb_x_dout_dly1 <= rcmb_final_x_dout;
+ rcmb_y_dout_dly1 <= rcmb_final_y_dout;
+ end
+ //
+ if (rcmb_xy_valid_dly1) begin
+ rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1;
+ rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1;
+ rcmb_x_dout_dly2 <= rcmb_x_dout_dly1;
+ rcmb_y_dout_dly2 <= rcmb_y_dout_dly1;
+ end
+ //
+ if (rcmb_xy_valid_dly2) begin
+ rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2;
+ rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2;
+ rcmb_x_dout_dly3 <= rcmb_x_dout_dly2;
+ rcmb_y_dout_dly3 <= rcmb_y_dout_dly2;
+ end
+ //
+ end
+
+
+ reg [ 1:0] rcmb_x_lsb_carry;
+ reg [15:0] rcmb_x_lsb_dummy;
+ reg [17:0] rcmb_x_lsb_dout;
+
+ reg [ 1:0] rcmb_y_lsb_carry;
+ reg [15:0] rcmb_y_lsb_dummy;
+ reg [17:0] rcmb_y_lsb_dout;
+
+ //reg [17:0] reductor_fat_bram_x_msb_dout;
+ //reg reductor_fat_bram_x_msb_dout_valid = 1'b0;
+ //reg [ 7:0] reductor_fat_bram_x_msb_addr;
+
+ //
+ // Carry Computation
+ //
+ always @(posedge clk)
+ //
+ if (ena) begin
+ rcmb_x_lsb_carry <= 2'b00;
+ rcmb_y_lsb_carry <= 2'b00;
+ end else if (rcmb_xy_valid_dly3)
+ //
+ case (rcmb_xy_bank_dly3)
+
+ BANK_RCMB_ML: begin
+ {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
+ {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
+ end
+
+ BANK_RCMB_MH:
+ if (rcmb_xy_addr_dly3 == 8'd0) begin
+ {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
+ {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
+ end
+
+ endcase
+
+
+ //
+ // Reduction
+ //
+ reg [ 7:0] rdct_xy_addr;
+ reg [ 17:0] rdct_x_dout;
+ reg [ 17:0] rdct_y_dout;
+ reg rdct_xy_valid = 1'b0;
+
+ assign rdct_final_xy_addr = rdct_xy_addr;
+ assign rdct_final_x_dout = rdct_x_dout;
+ assign rdct_final_y_dout = rdct_y_dout;
+ assign rdct_final_xy_valid = rdct_xy_valid;
+
+ task _update_rdct;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ rdct_xy_addr <= addr;
+ rdct_x_dout <= dout_x;
+ rdct_y_dout <= dout_y;
+ rdct_xy_valid <= valid;
+ end
+ endtask
+
+ task set_rdct;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_rdct(addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task clear_rdct;
+ begin
+ _update_rdct(8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
+
+
+ //
+ //
+ //
+ wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux;
+ wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux;
+
+ wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry};
+ wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry};
+
+
+ //
+ //
+ //
+ always @(posedge clk)
+ //
+ if (rst) clear_rdct;
+ else begin
+ //
+ clear_rdct;
+ //
+ if (busy_now && rcmb_xy_valid_dly3)
+ //
+ case (rcmb_xy_bank_dly3)
+
+ BANK_RCMB_MH:
+ if (rcmb_xy_addr_dly3 == 8'd1)
+ set_rdct(8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
+ else if (rcmb_xy_addr_dly3 > 8'd1)
+ set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+
+ BANK_RCMB_EXT:
+ set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+
+ endcase
+ //
+ end
+
+
+
+ //
+ // Busy
+ //
+ always @(posedge clk)
+ //
+ if (rst) busy_now <= 1'b0;
+ else begin
+ if (rdy && ena) busy_now <= 1'b1;
+ //if (!rdy && !busy_now) rdy <= 1'b1;
+ end
+
+
+endmodule
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
new file mode 100644
index 0000000..d6f9fb1
--- /dev/null
+++ b/rtl/modexpng_storage_block.v
@@ -0,0 +1,226 @@
+module modexpng_storage_block
+(
+ clk, rst,
+
+ wr_wide_xy_ena,
+ wr_wide_xy_bank,
+ wr_wide_xy_addr,
+ wr_wide_x_din,
+ wr_wide_y_din,
+
+ wr_narrow_xy_ena,
+ wr_narrow_xy_bank,
+ wr_narrow_xy_addr,
+ wr_narrow_x_din,
+ wr_narrow_y_din,
+
+ rd_wide_xy_ena,
+ rd_wide_xy_ena_aux,
+ rd_wide_xy_bank,
+ rd_wide_xy_bank_aux,
+ rd_wide_xy_addr,
+ rd_wide_xy_addr_aux,
+ rd_wide_x_dout,
+ rd_wide_y_dout,
+ rd_wide_x_dout_aux,
+ rd_wide_y_dout_aux,
+
+ rd_narrow_xy_ena,
+ rd_narrow_xy_bank,
+ rd_narrow_xy_addr,
+ rd_narrow_x_dout,
+ rd_narrow_y_dout
+);
+
+
+ //
+ // Headers
+ //
+ `include "../rtl_1/modexpng_parameters_x8_old.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input wr_wide_xy_ena;
+ input [ 1:0] wr_wide_xy_bank;
+ input [ 7:0] wr_wide_xy_addr;
+ input [17:0] wr_wide_x_din;
+ input [17:0] wr_wide_y_din;
+
+ input wr_narrow_xy_ena;
+ input [ 1:0] wr_narrow_xy_bank;
+ input [ 7:0] wr_narrow_xy_addr;
+ input [17:0] wr_narrow_x_din;
+ input [17:0] wr_narrow_y_din;
+
+ input rd_wide_xy_ena;
+ input rd_wide_xy_ena_aux;
+ input [ 1:0] rd_wide_xy_bank;
+ input [ 1:0] rd_wide_xy_bank_aux;
+ input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
+ input [ 8-1:0] rd_wide_xy_addr_aux;
+ output [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
+ output [18*NUM_MULTS/2-1:0] rd_wide_y_dout;
+ output [ 18-1:0] rd_wide_x_dout_aux;
+ output [ 18-1:0] rd_wide_y_dout_aux;
+
+ input rd_narrow_xy_ena;
+ input [ 1:0] rd_narrow_xy_bank;
+ input [ 7:0] rd_narrow_xy_addr;
+ output [18-1:0] rd_narrow_x_dout;
+ output [18-1:0] rd_narrow_y_dout;
+
+
+ //
+ // Internal Registers
+ //
+ reg rd_wide_xy_reg_ena = 1'b0;
+ reg rd_wide_xy_reg_ena_aux = 1'b0;
+ reg rd_narrow_xy_reg_ena = 1'b0;
+
+ always @(posedge clk) begin
+ //
+ rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena;
+ rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux;
+ rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena;
+ //
+ end
+
+
+ //
+ // Helper Signals
+ //
+ wire [2+8-1:0] wr_wide_xy_offset;
+ wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1];
+ wire [2+8-1:0] rd_wide_xy_offset_aux;
+ wire [2+8-1:0] wr_narrow_xy_offset;
+ wire [2+8-1:0] rd_narrow_xy_offset;
+
+ assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr};
+ assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
+ assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr};
+ assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr};
+
+
+ //
+ // "Wide" Storage
+ //
+ genvar z;
+ generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ begin : gen_wide_bram
+ //
+ assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[8*z+:8]};
+ //
+ ip_bram_18k wide_bram_x
+ (
+ .clka (clk),
+ .clkb (clk),
+
+ .ena (wr_wide_xy_ena),
+ .wea (wr_wide_xy_ena),
+ .addra (wr_wide_xy_offset),
+ .dina (wr_wide_x_din),
+
+ .enb (rd_wide_xy_ena),
+ .regceb (rd_wide_xy_reg_ena),
+ .addrb (rd_wide_xy_offset[z]),
+ .doutb (rd_wide_x_dout[18*z+:18])
+ );
+ //
+ ip_bram_18k wide_bram_y
+ (
+ .clka (clk),
+ .clkb (clk),
+
+ .ena (wr_wide_xy_ena),
+ .wea (wr_wide_xy_ena),
+ .addra (wr_wide_xy_offset),
+ .dina (wr_wide_y_din),
+
+ .enb (rd_wide_xy_ena),
+ .regceb (rd_wide_xy_reg_ena),
+ .addrb (rd_wide_xy_offset[z]),
+ .doutb (rd_wide_y_dout[18*z+:18])
+ );
+ //
+ end
+ endgenerate
+
+
+ //
+ // Auxilary Storage
+ //
+ ip_bram_18k wide_bram_x_aux
+ (
+ .clka (clk),
+ .clkb (clk),
+
+ .ena (wr_wide_xy_ena),
+ .wea (wr_wide_xy_ena),
+ .addra (wr_wide_xy_offset),
+ .dina (wr_wide_x_din),
+
+ .enb (rd_wide_xy_ena_aux),
+ .regceb (rd_wide_xy_reg_ena_aux),
+ .addrb (rd_wide_xy_offset_aux),
+ .doutb (rd_wide_x_dout_aux)
+ );
+ //
+ ip_bram_18k wide_bram_y_aux
+ (
+ .clka (clk),
+ .clkb (clk),
+
+ .ena (wr_wide_xy_ena),
+ .wea (wr_wide_xy_ena),
+ .addra (wr_wide_xy_offset),
+ .dina (wr_wide_y_din),
+
+ .enb (rd_wide_xy_ena_aux),
+ .regceb (rd_wide_xy_reg_ena_aux),
+ .addrb (rd_wide_xy_offset_aux),
+ .doutb (rd_wide_y_dout_aux)
+ );
+
+
+ //
+ // "Narrow" Storage
+ //
+ ip_bram_18k narrow_bram_x
+ (
+ .clka (clk),
+ .clkb (clk),
+
+ .ena (wr_narrow_xy_ena),
+ .wea (wr_narrow_xy_ena),
+ .addra (wr_narrow_xy_offset),
+ .dina (wr_narrow_x_din),
+
+ .enb (rd_narrow_xy_ena),
+ .regceb (rd_narrow_xy_reg_ena),
+ .addrb (rd_narrow_xy_offset),
+ .doutb (rd_narrow_x_dout)
+ );
+
+ ip_bram_18k narrow_bram_y
+ (
+ .clka (clk),
+ .clkb (clk),
+
+ .ena (wr_narrow_xy_ena),
+ .wea (wr_narrow_xy_ena),
+ .addra (wr_narrow_xy_offset),
+ .dina (wr_narrow_y_din),
+
+ .enb (rd_narrow_xy_ena),
+ .regceb (rd_narrow_xy_reg_ena),
+ .addrb (rd_narrow_xy_offset),
+ .doutb (rd_narrow_y_dout)
+ );
+
+
+endmodule
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
new file mode 100644
index 0000000..fa1e4a1
--- /dev/null
+++ b/rtl/modexpng_storage_manager.v
@@ -0,0 +1,200 @@
+module modexpng_storage_manager
+(
+ clk, rst,
+
+ wr_wide_xy_ena,
+ wr_wide_xy_bank,
+ wr_wide_xy_addr,
+ wr_wide_x_din,
+ wr_wide_y_din,
+
+ wr_narrow_xy_ena,
+ wr_narrow_xy_bank,
+ wr_narrow_xy_addr,
+ wr_narrow_x_din,
+ wr_narrow_y_din,
+
+ ext_wide_xy_ena,
+ ext_wide_xy_bank,
+ ext_wide_xy_addr,
+ ext_wide_x_din,
+ ext_wide_y_din,
+
+ ext_narrow_xy_ena,
+ ext_narrow_xy_bank,
+ ext_narrow_xy_addr,
+ ext_narrow_x_din,
+ ext_narrow_y_din,
+
+ rcmb_wide_xy_ena,
+ rcmb_wide_xy_bank,
+ rcmb_wide_xy_addr,
+ rcmb_wide_x_din,
+ rcmb_wide_y_din,
+
+ rcmb_narrow_xy_ena,
+ rcmb_narrow_xy_bank,
+ rcmb_narrow_xy_addr,
+ rcmb_narrow_x_din,
+ rcmb_narrow_y_din
+);
+
+
+ //
+ // Headers
+ //
+ `include "../rtl_1/modexpng_parameters_x8_old.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ output wr_wide_xy_ena;
+ output [ 1:0] wr_wide_xy_bank;
+ output [ 7:0] wr_wide_xy_addr;
+ output [17:0] wr_wide_x_din;
+ output [17:0] wr_wide_y_din;
+
+ output wr_narrow_xy_ena;
+ output [ 1:0] wr_narrow_xy_bank;
+ output [ 7:0] wr_narrow_xy_addr;
+ output [17:0] wr_narrow_x_din;
+ output [17:0] wr_narrow_y_din;
+
+ input ext_wide_xy_ena;
+ input [ 1:0] ext_wide_xy_bank;
+ input [ 7:0] ext_wide_xy_addr;
+ input [17:0] ext_wide_x_din;
+ input [17:0] ext_wide_y_din;
+
+ input ext_narrow_xy_ena;
+ input [ 1:0] ext_narrow_xy_bank;
+ input [ 7:0] ext_narrow_xy_addr;
+ input [17:0] ext_narrow_x_din;
+ input [17:0] ext_narrow_y_din;
+
+ input rcmb_wide_xy_ena;
+ input [ 1:0] rcmb_wide_xy_bank;
+ input [ 7:0] rcmb_wide_xy_addr;
+ input [17:0] rcmb_wide_x_din;
+ input [17:0] rcmb_wide_y_din;
+
+ input rcmb_narrow_xy_ena;
+ input [ 1:0] rcmb_narrow_xy_bank;
+ input [ 7:0] rcmb_narrow_xy_addr;
+ input [17:0] rcmb_narrow_x_din;
+ input [17:0] rcmb_narrow_y_din;
+
+
+ reg wr_wide_xy_ena_reg = 1'b0;
+ reg [ 1:0] wr_wide_xy_bank_reg;
+ reg [ 7:0] wr_wide_xy_addr_reg;
+ reg [17:0] wr_wide_x_din_reg;
+ reg [17:0] wr_wide_y_din_reg;
+
+ reg wr_narrow_xy_ena_reg = 1'b0;
+ reg [ 1:0] wr_narrow_xy_bank_reg;
+ reg [ 7:0] wr_narrow_xy_addr_reg;
+ reg [17:0] wr_narrow_x_din_reg;
+ reg [17:0] wr_narrow_y_din_reg;
+
+ task _update_wide;
+ input xy_ena;
+ input [ 1:0] xy_bank;
+ input [ 7:0] xy_addr;
+ input [17:0] x_din;
+ input [17:0] y_din;
+ begin
+ wr_wide_xy_ena_reg <= xy_ena;
+ wr_wide_xy_bank_reg <= xy_bank;
+ wr_wide_xy_addr_reg <= xy_addr;
+ wr_wide_x_din_reg <= x_din;
+ wr_wide_y_din_reg <= y_din;
+ end
+ endtask
+
+ task _update_narrow;
+ input xy_ena;
+ input [ 1:0] xy_bank;
+ input [ 7:0] xy_addr;
+ input [17:0] x_din;
+ input [17:0] y_din;
+ begin
+ wr_narrow_xy_ena_reg <= xy_ena;
+ wr_narrow_xy_bank_reg <= xy_bank;
+ wr_narrow_xy_addr_reg <= xy_addr;
+ wr_narrow_x_din_reg <= x_din;
+ wr_narrow_y_din_reg <= y_din;
+ end
+ endtask
+
+ task enable_wide;
+ input [ 1:0] xy_bank;
+ input [ 7:0] xy_addr;
+ input [17:0] x_din;
+ input [17:0] y_din;
+ begin
+ _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din);
+ end
+ endtask
+
+ task enable_narrow;
+ input [ 1:0] xy_bank;
+ input [ 7:0] xy_addr;
+ input [17:0] x_din;
+ input [17:0] y_din;
+ begin
+ _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din);
+ end
+ endtask
+
+ task disable_wide;
+ begin
+ _update_wide(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}});
+ end
+ endtask
+
+ task disable_narrow;
+ begin
+ _update_narrow(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}});
+ end
+ endtask
+
+ always @(posedge clk)
+ //
+ if (rst) disable_wide;
+ else begin
+ //
+ if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din);
+ else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
+ else disable_wide;
+ //
+ end
+
+ always @(posedge clk)
+ //
+ if (rst) disable_narrow;
+ else begin
+ //
+ if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din);
+ else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
+ else disable_narrow;
+ //
+ end
+
+ assign wr_wide_xy_ena = wr_wide_xy_ena_reg;
+ assign wr_wide_xy_bank = wr_wide_xy_bank_reg;
+ assign wr_wide_xy_addr = wr_wide_xy_addr_reg;
+ assign wr_wide_x_din = wr_wide_x_din_reg;
+ assign wr_wide_y_din = wr_wide_y_din_reg;
+
+ assign wr_narrow_xy_ena = wr_narrow_xy_ena_reg;
+ assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg;
+ assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg;
+ assign wr_narrow_x_din = wr_narrow_x_din_reg;
+ assign wr_narrow_y_din = wr_narrow_y_din_reg;
+
+endmodule