From fde62e373fdfcefefb7da10757a3db933160c911 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:16:58 +0300 Subject: Major rewrite (different core hierarchy, buses, wrappers, etc). --- rtl/modexpng_dsp_array_block.v | 143 +++++ rtl/modexpng_dsp_slice_wrapper.v | 125 ++++ rtl/modexpng_mac.v | 54 -- rtl/modexpng_mac_array.v | 116 ---- rtl/modexpng_mem.v | 93 --- rtl/modexpng_mmm_col_index.v | 90 --- rtl/modexpng_mmm_din_addr.v | 167 ----- rtl/modexpng_mmm_dout_addr.v | 167 ----- rtl/modexpng_mmm_dual_x8.v | 951 ++++++++++++++++++++++++++++ rtl/modexpng_mmm_fsm.vh | 43 -- rtl/modexpng_mmm_fsm_old.vh | 43 ++ rtl/modexpng_mmm_pad.v | 153 ----- rtl/modexpng_mmm_transporter.v | 157 ----- rtl/modexpng_mmm_x8_dual.v | 550 ----------------- rtl/modexpng_parameters.vh | 39 -- rtl/modexpng_parameters_old.vh | 40 ++ rtl/modexpng_parameters_x8.vh | 1 - rtl/modexpng_parameters_x8_old.vh | 1 + rtl/modexpng_part_recombinator.v | 1128 --------------------------------- rtl/modexpng_recombinator_block.v | 1234 ++++++++++++++++++++++++++++++++++++- rtl/modexpng_recombinator_cell.v | 35 ++ rtl/modexpng_reductor.v | 270 ++++++++ rtl/modexpng_storage_block.v | 226 +++++++ rtl/modexpng_storage_manager.v | 200 ++++++ 24 files changed, 3246 insertions(+), 2780 deletions(-) create mode 100644 rtl/modexpng_dsp_array_block.v create mode 100644 rtl/modexpng_dsp_slice_wrapper.v delete mode 100644 rtl/modexpng_mac.v delete mode 100644 rtl/modexpng_mac_array.v delete mode 100644 rtl/modexpng_mem.v delete mode 100644 rtl/modexpng_mmm_col_index.v delete mode 100644 rtl/modexpng_mmm_din_addr.v delete mode 100644 rtl/modexpng_mmm_dout_addr.v create mode 100644 rtl/modexpng_mmm_dual_x8.v delete mode 100644 rtl/modexpng_mmm_fsm.vh create mode 100644 rtl/modexpng_mmm_fsm_old.vh delete mode 100644 rtl/modexpng_mmm_pad.v delete mode 100644 rtl/modexpng_mmm_transporter.v delete mode 100644 rtl/modexpng_mmm_x8_dual.v delete mode 100644 rtl/modexpng_parameters.vh create mode 100644 rtl/modexpng_parameters_old.vh delete mode 100644 rtl/modexpng_parameters_x8.vh create mode 100644 rtl/modexpng_parameters_x8_old.vh delete mode 100644 rtl/modexpng_part_recombinator.v create mode 100644 rtl/modexpng_recombinator_cell.v create mode 100644 rtl/modexpng_reductor.v create mode 100644 rtl/modexpng_storage_block.v create mode 100644 rtl/modexpng_storage_manager.v diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v new file mode 100644 index 0000000..9c4ee93 --- /dev/null +++ b/rtl/modexpng_dsp_array_block.v @@ -0,0 +1,143 @@ +module modexpng_dsp_array_block +( + input clk, + + input ce_a, + input ce_b, + input ce_m, + input ce_p, + input ce_mode, + + input [9 -1:0] mode_z, + + input [5*18-1:0] a, + input [1*16-1:0] b, + output [9*47-1:0] p +); + + `include "modexpng_parameters_x8.vh" + + wire [17:0] casc_a[0:3]; + wire [15:0] casc_b[0:3]; + + wire ce_a0 = ce_a; + reg ce_a1 = 1'b0; + reg ce_a2 = 1'b0; + + wire ce_b0 = ce_b; + reg ce_b1 = 1'b0; + + always @(posedge clk) begin + ce_a1 <= ce_a0; + ce_a2 <= ce_a1; + ce_b1 <= ce_b0; + end + + + genvar z; + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + // + begin : gen_DSP48E1 + // + modexpng_dsp_slice_wrapper # + ( + .AB_INPUT("DIRECT"), + .B_REG(2) + ) + dsp_direct + ( + .clk (clk), + + .ce_a1 (ce_a0), + .ce_b1 (ce_b0), + .ce_a2 (ce_a1), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[z*18+:18]), + .b (b), + .p (p[47*2*z+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in ({18{1'b0}}), + .casc_b_in ({16{1'b0}}), + + .casc_a_out (casc_a[z]), + .casc_b_out (casc_b[z]) + ); + // + modexpng_dsp_slice_wrapper # + ( + .AB_INPUT("CASCADE"), + .B_REG(1) + ) + dsp_cascade + ( + .clk (clk), + + .ce_a1 (ce_a1), + .ce_b1 (1'b0), + .ce_a2 (ce_a2), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[z*18+:18]), + .b (b), + .p (p[47*(2*z+1)+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in (casc_a[z]), + .casc_b_in (casc_b[z]), + + .casc_a_out (), + .casc_b_out () + ); + // + end + // + endgenerate + + modexpng_dsp_slice_wrapper # + ( + .AB_INPUT("DIRECT"), + .B_REG(2) + ) + dsp_aux + ( + .clk (clk), + + .ce_a1 (ce_a0), + .ce_b1 (ce_b0), + .ce_a2 (ce_a1), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[4*18+:18]), + .b (b), + .p (p[47*2*4+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in ({18{1'b0}}), + .casc_b_in ({16{1'b0}}), + + .casc_a_out (), + .casc_b_out () + ); + + +endmodule diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper.v new file mode 100644 index 0000000..f565eec --- /dev/null +++ b/rtl/modexpng_dsp_slice_wrapper.v @@ -0,0 +1,125 @@ +module modexpng_dsp_slice_wrapper # +( + AB_INPUT = "DIRECT", + B_REG = 2 +) +( + input clk, + input ce_a1, + input ce_b1, + input ce_a2, + input ce_b2, + input ce_m, + input ce_p, + input ce_mode, + input [17:0] a, + input [15:0] b, + output [46:0] p, + input [ 4:0] inmode, + input [ 6:0] opmode, + input [ 3:0] alumode, + input [17:0] casc_a_in, + input [15:0] casc_b_in, + output [17:0] casc_a_out, + output [15:0] casc_b_out +); + + wire [30-18-1:0] casc_a_dummy; + wire [18-16-1:0] casc_b_dummy; + wire [48-47-1:0] p_dummy; + + DSP48E1 # + ( + .AREG (2), + .BREG (B_REG), + .CREG (0), + .DREG (0), + .ADREG (0), + .MREG (1), + .PREG (1), + .ACASCREG (1), + .BCASCREG (1), + .INMODEREG (0), + .OPMODEREG (1), + .ALUMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), + + .A_INPUT (AB_INPUT), + .B_INPUT (AB_INPUT), + + .USE_DPORT ("FALSE"), + .USE_MULT ("DYNAMIC"), + .USE_SIMD ("ONE48"), + + .MASK (48'h3fffffffffff), + .PATTERN (48'h000000000000), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + + .USE_PATTERN_DETECT ("NO_PATDET"), + .AUTORESET_PATDET ("NO_RESET") + ) + DSP48E1_inst + ( + .CLK (clk), + + .CEA1 (ce_a1), + .CEB1 (ce_b1), + .CEA2 (ce_a2), + .CEB2 (ce_b2), + .CEAD (1'b0), + .CEC (1'b0), + .CED (1'b0), + .CEM (ce_m), + .CEP (ce_p), + .CEINMODE (1'b0), + .CECTRL (ce_mode), + .CEALUMODE (1'b0), + .CECARRYIN (1'b0), + + .A ({{(30-18){1'b0}}, a}), + .B ({{(18-16){1'b0}}, b}), + .C ({48{1'b0}}), + .D ({25{1'b0}}), + .P ({p_dummy, p}), + + .INMODE (inmode), + .OPMODE (opmode), + .ALUMODE (alumode), + + .ACIN ({{(30-18){1'b0}}, casc_a_in}), + .BCIN ({{(18-16){1'b0}}, casc_b_in}), + .ACOUT ({casc_a_dummy, casc_a_out}), + .BCOUT ({casc_b_dummy, casc_b_out}), + .PCIN ({48{1'b0}}), + .PCOUT (), + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .RSTA (1'b0), + .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0), + .RSTINMODE (1'b0), + .RSTCTRL (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), + + .UNDERFLOW (), + .OVERFLOW (), + .PATTERNDETECT (), + .PATTERNBDETECT (), + + .CARRYIN (1'b0), + .CARRYOUT (), + .CARRYINSEL (3'b000), + + .MULTSIGNIN (1'b0), + .MULTSIGNOUT () + ); + + +endmodule diff --git a/rtl/modexpng_mac.v b/rtl/modexpng_mac.v deleted file mode 100644 index 9105dab..0000000 --- a/rtl/modexpng_mac.v +++ /dev/null @@ -1,54 +0,0 @@ -module modexpng_mac -( - clk, - ce, clr, - casc_a, - a_in, b_in, p_out, - a_casc_in, a_casc_out -); - - input clk; - input ce; - input clr; - input casc_a; - input [16:0] a_in; - input [16:0] b_in; - output [46:0] p_out; - input [16:0] a_casc_in; - output [16:0] a_casc_out; - - reg [16:0] a_reg; - reg [16:0] b_reg; - assign a_casc_out = a_reg; - always @(posedge clk) - // - if (ce) {b_reg, a_reg} <= {b_in, casc_a ? a_casc_in : a_in}; - - reg ce_dly1; - reg ce_dly2; - always @(posedge clk) - // - {ce_dly2, ce_dly1} <= {ce_dly1, ce}; - - reg clr_dly1; - reg clr_dly2; - always @(posedge clk) begin - // - if (ce) clr_dly1 <= clr; - if (ce_dly1) clr_dly2 <= clr_dly1; - // - end - - reg [33:0] m_reg; - wire [46:0] m_reg_ext = {{13{1'b0}}, m_reg}; - always @(posedge clk) - // - if (ce_dly1) m_reg <= {{17{1'b0}}, a_reg} * {{17{1'b0}}, b_reg}; - - reg [46:0] p_reg; - assign p_out = p_reg; - always @(posedge clk) - // - if (ce_dly2) p_reg <= clr_dly2 ? m_reg_ext : p_reg + m_reg_ext; - -endmodule diff --git a/rtl/modexpng_mac_array.v b/rtl/modexpng_mac_array.v deleted file mode 100644 index 067929e..0000000 --- a/rtl/modexpng_mac_array.v +++ /dev/null @@ -1,116 +0,0 @@ -module modexpng_mac_array -( - clk, - ce, clr, - ce_aux, clr_aux, - casc_a, casc_a_aux, - a_in, b_in, p_out, - a_in_aux, p_out_aux -); - - - // - // Includes - // - `include "modexpng_parameters.vh" - `include "modexpng_parameters_x8.vh" - - - // - // Ports - // - input clk; - input ce; - input [NUM_MULTS -1:0] clr; - input ce_aux; - input clr_aux; - input [NUM_MULTS -2:0] casc_a; - input casc_a_aux; - input [NUM_MULTS * WORD_WIDTH -1:0] a_in; - input [ 1 * WORD_WIDTH -1:0] b_in; - output [NUM_MULTS * MAC_WIDTH -1:0] p_out; - input [ 1 * WORD_WIDTH -1:0] a_in_aux; - output [ 1 * MAC_WIDTH -1:0] p_out_aux; - - - // - // A-Cascade Paths - // - wire [WORD_WIDTH-1:0] a_casc_int[0:NUM_MULTS-2]; - wire [WORD_WIDTH-1:0] a_casc_int_aux; - - - // - // LSB - // - modexpng_mac mac_lsb - ( - .clk (clk), - .ce (ce), - .clr (clr[0]), - .casc_a (1'b0), - .a_in (a_in[0+:WORD_WIDTH]), - .b_in (b_in), - .p_out (p_out[0+:MAC_WIDTH]), - .a_casc_in ({WORD_WIDTH{1'b0}}), - .a_casc_out (a_casc_int[0]) - ); - - - // - // INT - // - genvar z; - generate for (z=1; z<(NUM_MULTS-1); z=z+1) - begin : gen_modexpng_mac_int - modexpng_mac mac_int - ( - .clk (clk), - .ce (ce), - .clr (clr[z]), - .casc_a (casc_a[z-1]), - .a_in (a_in[z*WORD_WIDTH+:WORD_WIDTH]), - .b_in (b_in), - .p_out (p_out[z*MAC_WIDTH+:MAC_WIDTH]), - .a_casc_in (a_casc_int[z-1]), - .a_casc_out (a_casc_int[z]) - ); - end - endgenerate - - - // - // MSB - // - modexpng_mac mac_msb - ( - .clk (clk), - .ce (ce), - .clr (clr[NUM_MULTS-1]), - .casc_a (casc_a[NUM_MULTS-2]), - .a_in (a_in[(NUM_MULTS-1)*WORD_WIDTH+:WORD_WIDTH]), - .b_in (b_in), - .p_out (p_out[(NUM_MULTS-1)*MAC_WIDTH+:MAC_WIDTH]), - .a_casc_in (a_casc_int[NUM_MULTS-2]), - .a_casc_out (a_casc_int_aux) - ); - - - // - // AUX - // - modexpng_mac mac_aux - ( - .clk (clk), - .ce (ce_aux), - .clr (clr_aux), - .casc_a (casc_a_aux), - .a_in (a_in_aux), - .b_in (b_in), - .p_out (p_out_aux), - .a_casc_in (a_casc_int_aux), - .a_casc_out () - ); - - -endmodule diff --git a/rtl/modexpng_mem.v b/rtl/modexpng_mem.v deleted file mode 100644 index ca89214..0000000 --- a/rtl/modexpng_mem.v +++ /dev/null @@ -1,93 +0,0 @@ -// -// TODO: Add license text! -// - -module modexpng_mem # -( - parameter MEM_WIDTH = 17, - parameter MEM_ADDR_BITS = 6 -) -( - input clk, - - input [MEM_ADDR_BITS-1:0] a_addr, - input a_en, - input a_wr, - input [MEM_WIDTH -1:0] a_in, - output [MEM_WIDTH -1:0] a_out, - - input [MEM_ADDR_BITS-1:0] b_addr, - input b_en, - input b_reg_en, - output [MEM_WIDTH -1:0] b_out -); - - - // - // BRAM - // - (* RAM_STYLE="BLOCK" *) - reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; - - - // - // Initialization for Simulation - // - /* - integer c; - initial begin - for (c=0; c<(2**MEM_ADDR_BITS); c=c+1) - bram[c] = {MEM_WIDTH{1'b0}}; - end - */ - - - - // - // Output Registers - // - reg [MEM_WIDTH-1:0] bram_b; - reg [MEM_WIDTH-1:0] bram_b_reg; - - assign a_out = 32'hDEADCE11; - assign b_out = bram_b_reg; - - - // - // Note, that when both ports are accessing the same location, conflict can - // potentionally arise. See Xilinx UG473 (pages 19-20, "Conflict - // Avoidance") for more information. In our configuration to avoid that the - // write port must be coded to operate in READ_FIRST mode. If the write - // port is overwriting the same address the read port is accessing, the - // write port must read the previously stored data (not the data it is - // writing, as that would be WRITE_FIRST mode). - // - - - // - // Write-Only Port A - // - always @(posedge clk) - // - if (a_en) - // - if (a_wr) bram[a_addr] <= a_in; - - - // - // Read-Only Port B - // - always @(posedge clk) - // - if (b_en) - // - bram_b <= bram[b_addr]; - - always @(posedge clk) - // - if (b_reg_en) - // - bram_b_reg <= bram_b; - - -endmodule diff --git a/rtl/modexpng_mmm_col_index.v b/rtl/modexpng_mmm_col_index.v deleted file mode 100644 index b904795..0000000 --- a/rtl/modexpng_mmm_col_index.v +++ /dev/null @@ -1,90 +0,0 @@ -module modexpng_mmm_col_index -( - clk, - index_last, - fsm_state_next, - col_index, - col_index_done, - col_index_zero, - col_index_next, - col_index_prev -); - - - // - // Includes - // - //`include "modexpng_parameters.vh" - //`include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input [ INDEX_WIDTH-1:0] index_last; - input [FSM_STATE_WIDTH-1:0] fsm_state_next; - output [ INDEX_WIDTH-4:0] col_index; - output col_index_done; - output [ INDEX_WIDTH-4:0] col_index_zero; - output [ INDEX_WIDTH-4:0] col_index_next; - output [ INDEX_WIDTH-4:0] col_index_prev; - - - // - // Registers - // - reg [INDEX_WIDTH-4:0] col_index_reg; - reg [INDEX_WIDTH-4:0] col_index_last; - reg [INDEX_WIDTH-4:0] col_index_dly; - - - // - // Mapping - // - assign col_index = col_index_reg; - assign col_index_prev = col_index_dly; - - - // - // Handy Wires - // - assign col_index_done = col_index == col_index_last; - assign col_index_zero = {(INDEX_WIDTH-3){1'b0}}; - assign col_index_next = col_index + 1'b1; - - - // - // Increment Logic - // - always @(posedge clk) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG: begin - col_index_reg <= col_index_zero; - col_index_last <= index_last[INDEX_WIDTH-1:3]; - end - // - FSM_STATE_MULT_SQUARE_COL_N_TRIG: - col_index_reg <= col_index_next; - // - endcase - - - // - // Delay Logic - // - always @(posedge clk) - // - col_index_dly <= col_index; - - -endmodule diff --git a/rtl/modexpng_mmm_din_addr.v b/rtl/modexpng_mmm_din_addr.v deleted file mode 100644 index 565c7e0..0000000 --- a/rtl/modexpng_mmm_din_addr.v +++ /dev/null @@ -1,167 +0,0 @@ -module modexpng_mmm_din_addr -( - clk, rst_n, - index_last, - fsm_state_next, - col_index_zero, col_index_next, - din_addr, din_bank, din_ena, din_reg_ena, - din_addr_cnt, din_addr_cnt_last, - din_addr_cnt_lower_prev, din_addr_cnt_upper_prev -); - - - // - // Includes - // - `include "modexpng_parameters.vh" - //`include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input rst_n; - input [ INDEX_WIDTH-1:0] index_last; - input [FSM_STATE_WIDTH-1:0] fsm_state_next; - input [ INDEX_WIDTH-4:0] col_index_zero; - input [ INDEX_WIDTH-4:0] col_index_next; - output [ INDEX_WIDTH-4:0] din_addr; - output [ 3-1:0] din_bank; - output [ 1-1:0] din_ena; - output [ 1-1:0] din_reg_ena; - output [ INDEX_WIDTH-1:0] din_addr_cnt; - output [ INDEX_WIDTH-1:0] din_addr_cnt_last; - output [ 3-1:0] din_addr_cnt_lower_prev; - output [ INDEX_WIDTH-4:0] din_addr_cnt_upper_prev; - - - // - // Address - // - reg [INDEX_WIDTH-1:0] din_addr_reg; - wire [INDEX_WIDTH-1:0] din_addr_zero = {INDEX_WIDTH{1'b0}}; - reg [INDEX_WIDTH-1:0] din_addr_last; - wire [INDEX_WIDTH-1:0] din_addr_prev = (din_addr_reg == din_addr_zero) ? din_addr_last : din_addr_reg - 1'b1; - - reg [INDEX_WIDTH-1:0] din_addr_cnt_reg; - wire [INDEX_WIDTH-1:0] din_addr_cnt_zero = {INDEX_WIDTH{1'b0}}; - wire [INDEX_WIDTH-1:0] din_addr_cnt_next = din_addr_cnt_reg + 1'b1; - reg [INDEX_WIDTH-1:0] din_addr_cnt_last_reg; - wire [ 3-1:0] din_addr_cnt_lower = din_addr_cnt_reg[ 3-1:0]; - wire [INDEX_WIDTH-4:0] din_addr_cnt_upper = din_addr_cnt_reg[INDEX_WIDTH-1:3]; - reg [ 3-1:0] din_addr_cnt_lower_dly; - reg [INDEX_WIDTH-4:0] din_addr_cnt_upper_dly; - - reg [ 3-1:0] din_bank_reg; - - - // - // Enables - // - reg din_ena_reg = 1'b0; - reg din_reg_ena_reg = 1'b0; - - always @(posedge clk or negedge rst_n) - // - if (!rst_n) - din_ena_reg <= 1'b0; - else case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - din_ena_reg <= 1'b1; - // - default: - din_ena_reg <= 1'b0; - // - endcase - - always @(posedge clk or negedge rst_n) - // - if (!rst_n) - din_reg_ena_reg <= 1'b0; - else - din_reg_ena_reg <= din_ena_reg; - - - // - // Address Mapping - // - assign din_addr = din_addr_reg[INDEX_WIDTH-1:3]; - - assign din_addr_cnt = din_addr_cnt_reg; - assign din_addr_cnt_last = din_addr_cnt_last_reg; - assign din_addr_cnt_lower_prev = din_addr_cnt_lower_dly; - assign din_addr_cnt_upper_prev = din_addr_cnt_upper_dly; - - assign din_bank = din_bank_reg; - - - // - // Enable Mapping - // - assign din_ena = din_ena_reg; - assign din_reg_ena = din_reg_ena_reg; - - - // - // Delay - // - always @(posedge clk) begin - din_addr_cnt_lower_dly <= din_addr_cnt_lower; - din_addr_cnt_upper_dly <= din_addr_cnt_upper; - end - - - always @(posedge clk) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG: begin - din_addr_reg <= {col_index_zero, {3{1'b0}}}; - din_addr_last <= index_last; - din_addr_cnt_reg <= din_addr_cnt_zero; - din_addr_cnt_last_reg <= index_last; - end - // - FSM_STATE_MULT_SQUARE_COL_N_TRIG: begin - din_addr_reg <= {col_index_next, {3{1'b0}}}; - din_addr_cnt_reg <= din_addr_cnt_zero; - end - // - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin - din_addr_reg <= din_addr_prev; - din_addr_cnt_reg <= din_addr_cnt_next; - end - // - //default: - // - endcase - - always @(posedge clk) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - din_bank_reg = BANK_XY_T1T2; - // - default: - din_bank_reg = BANK_XY_ANY; - // - endcase - -endmodule diff --git a/rtl/modexpng_mmm_dout_addr.v b/rtl/modexpng_mmm_dout_addr.v deleted file mode 100644 index 3749d82..0000000 --- a/rtl/modexpng_mmm_dout_addr.v +++ /dev/null @@ -1,167 +0,0 @@ -module modexpng_mmm_dout_addr -( - clk, rst_n, - //index_last, - fsm_state, - load_xy_addr, - load_addr_zero, - load_nn_coeff_addr_done, - /* - - col_index_zero, col_index_next,*/ - x_dout_addr, y_dout_addr, - x_dout_ena, y_dout_ena, - x_dout_bank, y_dout_bank - -); - - - // - // Includes - // - `include "modexpng_parameters.vh" - `include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input rst_n; - //input [ INDEX_WIDTH-1:0] index_last; - input [FSM_STATE_WIDTH-1:0] fsm_state; - input [INDEX_WIDTH:0] load_xy_addr; // address - input load_addr_zero; - input load_nn_coeff_addr_done; - //input [ INDEX_WIDTH-4:0] col_index_zero; - //input [ INDEX_WIDTH-4:0] col_index_next; - output [INDEX_WIDTH-4:0] x_dout_addr; - output [INDEX_WIDTH-4:0] y_dout_addr; - - output [NUM_MULTS-1:0] x_dout_ena; - output [NUM_MULTS-1:0] y_dout_ena; - - output [3-1:0] x_dout_bank; - output [3-1:0] y_dout_bank; - - - // - // Registers - // - reg [INDEX_WIDTH-4:0] x_dout_addr_reg; //clog2 - reg [INDEX_WIDTH-4:0] y_dout_addr_reg; //clog2 - - reg [NUM_MULTS-1:0] x_dout_ena_reg = {NUM_MULTS{1'b0}}; - reg [NUM_MULTS-1:0] y_dout_ena_reg = {NUM_MULTS{1'b0}}; - - reg [NUM_MULTS-1:0] x_dout_ena_int; - reg [NUM_MULTS-1:0] y_dout_ena_int; - - reg [3-1:0] x_dout_bank_reg; - reg [3-1:0] y_dout_bank_reg; - - - // - // Mapping - // - assign x_dout_addr = x_dout_addr_reg; - assign y_dout_addr = y_dout_addr_reg; - - assign x_dout_ena = x_dout_ena_reg; - assign y_dout_ena = y_dout_ena_reg; - - assign x_dout_bank = x_dout_bank_reg; - assign y_dout_bank = y_dout_bank_reg; - - - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3: begin - x_dout_addr_reg <= load_xy_addr[INDEX_WIDTH-1:3]; - y_dout_addr_reg <= load_xy_addr[INDEX_WIDTH-1:3]; - end - // - FSM_STATE_LOAD_NN_COEFF_3: begin - x_dout_addr_reg <= !load_nn_coeff_addr_done ? load_xy_addr[INDEX_WIDTH-1:3] : BANK_XY_AUX_ADDR_N_COEFF[INDEX_WIDTH-4:0]; - y_dout_addr_reg <= !load_nn_coeff_addr_done ? load_xy_addr[INDEX_WIDTH-1:3] : BANK_XY_AUX_ADDR_N_COEFF[INDEX_WIDTH-4:0]; - end - // - default: begin - x_dout_addr_reg <= {INDEX_WIDTH-3{1'bX}}; - y_dout_addr_reg <= {INDEX_WIDTH-3{1'bX}}; - end - // - endcase - - wire [NUM_MULTS-1:0] load_xy_ena_init = {{NUM_MULTS-1{1'b0}}, 1'b1}; - - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_LOAD_T1T2_2: begin - x_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {x_dout_ena_int[NUM_MULTS-2:0], x_dout_ena_int[NUM_MULTS-1]}; - y_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {y_dout_ena_int[NUM_MULTS-2:0], y_dout_ena_int[NUM_MULTS-1]}; - end - // - FSM_STATE_LOAD_NN_COEFF_2: begin - x_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {x_dout_ena_int[NUM_MULTS-2:0], x_dout_ena_int[NUM_MULTS-1] & ~load_nn_coeff_addr_done}; - y_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {y_dout_ena_int[NUM_MULTS-2:0], y_dout_ena_int[NUM_MULTS-1]}; - end - // - endcase - - - always @(posedge clk or negedge rst_n) - // - if (!rst_n) begin - x_dout_ena_reg <= {NUM_MULTS{1'b0}}; - y_dout_ena_reg <= {NUM_MULTS{1'b0}}; - end else case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3, - FSM_STATE_LOAD_NN_COEFF_3: begin - x_dout_ena_reg <= x_dout_ena_int; - y_dout_ena_reg <= y_dout_ena_int; - end - // - default: begin - x_dout_ena_reg <= {NUM_MULTS{1'b0}}; - y_dout_ena_reg <= {NUM_MULTS{1'b0}}; - end - // - endcase - - - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3: begin - x_dout_bank_reg <= BANK_X_T1; - y_dout_bank_reg <= BANK_Y_T2; - end - // - FSM_STATE_LOAD_NN_COEFF_3: begin - x_dout_bank_reg <= !load_nn_coeff_addr_done ? BANK_X_N : BANK_XY_AUX; - y_dout_bank_reg <= !load_nn_coeff_addr_done ? BANK_Y_N_COEFF : BANK_XY_AUX; - end - // - default: begin - x_dout_bank_reg <= BANK_XY_ANY; - y_dout_bank_reg <= BANK_XY_ANY; - end - // - endcase - - -endmodule diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual_x8.v new file mode 100644 index 0000000..2e4f4e0 --- /dev/null +++ b/rtl/modexpng_mmm_dual_x8.v @@ -0,0 +1,951 @@ +module modexpng_mmm_dual_x8 +( + clk, rst, + + ena, rdy, + + + ladder_mode, + word_index_last, + word_index_last_minus1, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout, + + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_dout, + rcmb_wide_y_dout, + rcmb_wide_xy_valid, + + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_dout, + rcmb_narrow_y_dout, + rcmb_narrow_xy_valid, + + rcmb_xy_bank, + rcmb_xy_addr, + rcmb_x_dout, + rcmb_y_dout, + rcmb_xy_valid, + + rdct_ena +); + + + // + // Headers + // + `include "../rtl_1/modexpng_mmm_fsm_old.vh" + `include "../rtl_1/modexpng_parameters_old.vh" + `include "../rtl_1/modexpng_parameters_x8_old.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input ladder_mode; + input [7:0] word_index_last; + input [7:0] word_index_last_minus1; + + output rd_wide_xy_ena; + output rd_wide_xy_ena_aux; + output [ 1:0] rd_wide_xy_bank; + output [ 1:0] rd_wide_xy_bank_aux; + output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; + output [ 8-1:0] rd_wide_xy_addr_aux; + input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; + input [18*NUM_MULTS/2-1:0] rd_wide_y_dout; + input [ 18-1:0] rd_wide_x_dout_aux; + input [ 18-1:0] rd_wide_y_dout_aux; + + output rd_narrow_xy_ena; + output [ 1:0] rd_narrow_xy_bank; + output [ 7:0] rd_narrow_xy_addr; + input [18-1:0] rd_narrow_x_dout; + input [18-1:0] rd_narrow_y_dout; + + output [ 1:0] rcmb_wide_xy_bank; + output [ 7:0] rcmb_wide_xy_addr; + output [17:0] rcmb_wide_x_dout; + output [17:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ 1:0] rcmb_narrow_xy_bank; + output [ 7:0] rcmb_narrow_xy_addr; + output [17:0] rcmb_narrow_x_dout; + output [17:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ 1:0] rcmb_xy_bank; + output [ 7:0] rcmb_xy_addr; + output [17:0] rcmb_x_dout; + output [17:0] rcmb_y_dout; + output rcmb_xy_valid; + + output rdct_ena; + + + // + // FSM Declaration + // + reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; + reg [FSM_STATE_WIDTH-1:0] fsm_state_next; + + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) fsm_state <= FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + // + // Storage Control Interface + // + reg wide_xy_ena = 1'b0; + reg wide_xy_ena_aux = 1'b0; + reg [ 1:0] wide_xy_bank; + reg [ 1:0] wide_xy_bank_aux; + reg [ 8-1:0] wide_xy_addr[0:3]; + reg [ 8-1:0] wide_xy_addr_aux; + + reg narrow_xy_ena = 1'b0; + reg [ 1:0] narrow_xy_bank; + reg [ 7:0] narrow_xy_addr; + reg [ 7:0] narrow_xy_addr_dly; + + assign rd_wide_xy_ena = wide_xy_ena; + assign rd_wide_xy_ena_aux = wide_xy_ena_aux; + assign rd_wide_xy_bank = wide_xy_bank; + assign rd_wide_xy_bank_aux = wide_xy_bank_aux; + assign rd_wide_xy_addr_aux = wide_xy_addr_aux; + + assign rd_narrow_xy_ena = narrow_xy_ena; + assign rd_narrow_xy_bank = narrow_xy_bank; + assign rd_narrow_xy_addr = narrow_xy_addr; + + genvar z; + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + begin : gen_rd_wide_xy_addr + assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z]; + end + endgenerate + + // + // Column Counter + // + reg [4:0] col_index; // current column index + reg [4:0] col_index_prev; // delayed column index value + reg [4:0] col_index_last; // index of the very last column + reg [4:0] col_index_next; // precomputed next column index + reg col_is_last; // flag set during the very last column + + always @(posedge clk) + // + col_index_prev <= col_index; + + // + // Column Counter Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin + col_index <= 5'd0; + col_index_last <= word_index_last[7:3]; + col_index_next <= 5'd1; + col_is_last <= 1'b0; + + end + // + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin + col_index <= col_index_next; + col_is_last <= col_index_next == col_index_last; + col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; + end + // + endcase + + + // + // Completion Flags + // + wire square_almost_done_comb; + reg square_almost_done_flop = 1'b0; + reg square_surely_done_flop = 1'b0; + + wire triangle_almost_done_comb; + reg triangle_almost_done_flop = 1'b0; + reg triangle_surely_done_flop = 1'b0; + reg triangle_tardy_done_flop = 1'b0; + + wire rectangle_almost_done_comb; + reg rectangle_almost_done_flop = 1'b0; + reg rectangle_surely_done_flop = 1'b0; + reg rectangle_tardy_done_flop = 1'b0; + + assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); + assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + + // + // Square Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + square_almost_done_flop <= square_almost_done_comb; + // + default: + square_almost_done_flop <= 1'b0; + // + endcase + // + square_surely_done_flop <= square_almost_done_flop; + // + end + + // + // Triangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: + triangle_almost_done_flop <= triangle_almost_done_comb; + // + default: + triangle_almost_done_flop <= 1'b0; + // + endcase + // + triangle_surely_done_flop <= triangle_almost_done_flop; + triangle_tardy_done_flop <= triangle_surely_done_flop; + // + end + + // + // Rectangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: + rectangle_almost_done_flop <= rectangle_almost_done_comb; + // + default: + rectangle_almost_done_flop <= 1'b0; + // + endcase + // + rectangle_surely_done_flop <= rectangle_almost_done_flop; + rectangle_tardy_done_flop <= rectangle_surely_done_flop; + // + end + + + // + // Narrow Storage Control Logic + // + always @(posedge clk) + // + if (rst) narrow_xy_ena <= 1'b0; + else begin + // + // Narrow Address + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? + 8'd0 : narrow_xy_addr + 1'b1; + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + 8'd1 : narrow_xy_addr + 1'b1; + // + default: narrow_xy_addr <= 8'dX; + // + endcase + // + // Narrow Bank + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2; + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? + BANK_NARROW_EXT : BANK_NARROW_N_COEFF; + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + BANK_NARROW_EXT : BANK_NARROW_Q; + // + default: narrow_xy_bank <= 2'bXX; + // + endcase + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; + // + default: narrow_xy_ena <= 1'b0; + // + endcase + // + end + + + // + // Wide Storage Control Logic + // + + wire [2:0] wide_offset_rom[0:3]; + + generate for (z=1; z 8'd0) + wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else + wide_xy_addr_next = wide_xy_addr_last; + end + endfunction + + integer j; + always @(posedge clk) + // + if (rst) begin + wide_xy_ena <= 1'b0; + wide_xy_ena_aux <= 1'b0; + end else begin + // + // Wide Address + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= 8'dX; + endcase + // + // Wide Aux Address + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; + //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + // + default: wide_xy_addr_aux <= 8'dX; + endcase + // + // Wide Bank + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + default: wide_xy_bank <= 3'bXXX; + endcase + // + // Wide Aux Bank + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH; + //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX + default: wide_xy_bank_aux <= 3'bXXX; + endcase + else wide_xy_bank_aux <= 3'bXXX; + default: wide_xy_bank_aux <= 3'bXXX; + endcase + // + // Wide Enable + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + default: wide_xy_ena <= 1'b0; + endcase + // + // Wide Aux Enable + // + case (fsm_state_next) + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: wide_xy_ena_aux <= 1'b0; + endcase + // + end + + + // + // Delay Lines + // + always @(posedge clk) + // + narrow_xy_addr_dly <= narrow_xy_addr; + + + // + // DSP Array Logic + // + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_b_dly = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; + + reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + + wire [5*18-1:0] dsp_x_a; + wire [5*18-1:0] dsp_y_a; + + reg [1*16-1:0] dsp_x_b; + reg [1*16-1:0] dsp_y_b; + + reg [ 1:0] dsp_xy_b_carry; + + wire [9*47-1:0] dsp_x_p; + wire [9*47-1:0] dsp_y_p; + + //generate for (z=0; z<(NUM_MULTS/2); z=z+1) + //begin : gen_dsp_xy_a_split + //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; + //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; + //end + //endgenerate + + assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; + assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + + //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; + //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; + + always @(posedge clk) + // + dsp_xy_ce_b_dly <= dsp_xy_ce_b; + + + modexpng_dsp_array_block dsp_array_block_x + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + modexpng_dsp_array_block dsp_array_block_y + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + + + // + // DSP Control Logic + // + reg narrow_xy_ena_dly1 = 1'b0; + reg narrow_xy_ena_dly2 = 1'b0; + + always @(posedge clk) + // + if (rst) begin + // + narrow_xy_ena_dly1 <= 1'b0; + narrow_xy_ena_dly2 <= 1'b0; + // + dsp_xy_ce_a <= 1'b0; + dsp_xy_ce_b <= 1'b0; + dsp_xy_ce_m <= 1'b0; + dsp_xy_ce_p <= 1'b0; + dsp_xy_ce_mode <= 1'b0; + // + end else begin + // + narrow_xy_ena_dly1 <= narrow_xy_ena; + narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; + // + dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; + dsp_xy_ce_b <= narrow_xy_ena_dly2; + dsp_xy_ce_m <= dsp_xy_ce_b_dly; + dsp_xy_ce_p <= dsp_xy_ce_m; + dsp_xy_ce_mode <= dsp_xy_ce_b_dly; + // + end + + // + // DSP Feed Logic + // + reg dsp_merge_xy_b; + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; + endcase + + // + // On-the-fly Carry Recombination + // + wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + + always @(posedge clk) + // + if (narrow_xy_ena_dly2) begin // rewrite + // + if (!dsp_merge_xy_b) begin + dsp_x_b <= rd_narrow_x_dout[15:0]; + dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_xy_b_carry <= 2'b00; + end else begin + dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + end + // + end else begin + // + dsp_x_b <= {16{1'bX}}; + dsp_y_b <= {16{1'bX}}; + // + dsp_xy_b_carry <= 2'b00; + // + end + + + reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; + + function [NUM_MULTS:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + endcase + + always @(posedge clk) begin + dsp_xy_mode_z <= dsp_xy_mode_z_adv1; + // + dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; + dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; + dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + end + + + + + + // + // Recombinator + // + reg rcmb_ena = 1'b0; + wire rcmb_rdy; + + modexpng_recombinator_block recombinator_block + ( + .clk (clk), + + .ena (rcmb_ena), + .rdy (rcmb_rdy), + + .fsm_state_next (fsm_state_next), + + .word_index_last (word_index_last), + + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + + .col_index (col_index), + .col_index_last (col_index_last), + + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) + + ); + + + // + // Recombinator Enable Logic + // + always @(posedge clk) + // + if (rst) rcmb_ena <= 1'b0; + else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; + + + // + // Handy Completion Flags + // + wire square_done = square_surely_done_flop; + wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; + wire rectangle_done = rectangle_tardy_done_flop; + + + // + // FSM Transition Logic + // + assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + + always @* begin + // + fsm_state_next = FSM_STATE_IDLE; + // + case (fsm_state) + FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + + FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF; + + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + + FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; + + default: fsm_state_next = FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Reductor Control Logic + // + reg rdct_ena_reg = 1'b0; + + assign rdct_ena = rdct_ena_reg; + + always @(posedge clk) // add reset!!! + // + case (fsm_state) + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; + default: rdct_ena_reg <= 1'b0; + endcase + + + +endmodule diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh deleted file mode 100644 index 3bdae66..0000000 --- a/rtl/modexpng_mmm_fsm.vh +++ /dev/null @@ -1,43 +0,0 @@ -localparam FSM_STATE_WIDTH = 32; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; diff --git a/rtl/modexpng_mmm_fsm_old.vh b/rtl/modexpng_mmm_fsm_old.vh new file mode 100644 index 0000000..3bdae66 --- /dev/null +++ b/rtl/modexpng_mmm_fsm_old.vh @@ -0,0 +1,43 @@ +localparam FSM_STATE_WIDTH = 32; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; diff --git a/rtl/modexpng_mmm_pad.v b/rtl/modexpng_mmm_pad.v deleted file mode 100644 index a2a21ff..0000000 --- a/rtl/modexpng_mmm_pad.v +++ /dev/null @@ -1,153 +0,0 @@ -module modexpng_mmm_pad -( - clk, rst_n, - fsm_state, - load_xy_addr_lsb, - pad_x_rd_addr, pad_y_rd_addr, - pad_x_rd_ena, pad_y_rd_ena, - pad_x_rd_dout, pad_y_rd_dout, - load_x_din, load_y_din -); - - - // - // Includes - // - `include "modexpng_parameters.vh" - //`include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input rst_n; - input [FSM_STATE_WIDTH-1:0] fsm_state; - - input [INDEX_WIDTH-1:0] load_xy_addr_lsb; - - input [WORD_WIDTH-1:0] load_x_din; - input [WORD_WIDTH-1:0] load_y_din; - - input [INDEX_WIDTH-1:0] pad_x_rd_addr; - input [INDEX_WIDTH-1:0] pad_y_rd_addr; - - input pad_x_rd_ena; - input pad_y_rd_ena; - - output [WORD_WIDTH-1:0] pad_x_rd_dout; - output [WORD_WIDTH-1:0] pad_y_rd_dout; - - - // - // Registers - // - reg [INDEX_WIDTH-1:0] pad_x_wr_addr; - reg [INDEX_WIDTH-1:0] pad_y_wr_addr; - reg pad_x_wr_ena; - reg pad_y_wr_ena; - reg [ WORD_WIDTH-1:0] pad_x_wr_din; - reg [ WORD_WIDTH-1:0] pad_y_wr_din; - - bram_1wo_1ro_readfirst_ce # - ( - .MEM_WIDTH (WORD_WIDTH), - .MEM_ADDR_BITS (INDEX_WIDTH) - ) - pad_x - ( - .clk (clk), - - .a_addr (pad_x_wr_addr), - .a_en (pad_x_wr_ena), - .a_wr (pad_x_wr_ena), - .a_in (pad_x_wr_din), - .a_out (), // unused - - .b_addr (pad_x_rd_addr), - .b_en (pad_x_rd_ena), - .b_out (pad_x_rd_dout) - ); - - bram_1wo_1ro_readfirst_ce # - ( - .MEM_WIDTH (WORD_WIDTH), - .MEM_ADDR_BITS (INDEX_WIDTH) - ) - pad_y - ( - .clk (clk), - - .a_addr (pad_y_wr_addr), - .a_en (pad_y_wr_ena), - .a_wr (pad_y_wr_ena), - .a_in (pad_y_wr_din), - .a_out (), // unused - - .b_addr (pad_y_rd_addr), - .b_en (pad_y_rd_ena), - .b_out (pad_y_rd_dout) - ); - - - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3: begin - pad_x_wr_addr <= load_xy_addr_lsb; - pad_y_wr_addr <= load_xy_addr_lsb; - end - // - default: begin - pad_x_wr_addr <= {INDEX_WIDTH{1'bX}}; - pad_y_wr_addr <= {INDEX_WIDTH{1'bX}}; - end - // - endcase - - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3: begin - pad_x_wr_din <= load_x_din; - pad_y_wr_din <= load_y_din; - end - // - default: begin - pad_x_wr_din <= load_x_din; - pad_y_wr_din <= load_y_din; - end - // - endcase - - - always @(posedge clk or negedge rst_n) - // - if (!rst_n) begin - pad_x_wr_ena <= 1'b0; - pad_y_wr_ena <= 1'b0; - end else case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3: begin - pad_x_wr_ena <= 1'b1; - pad_y_wr_ena <= 1'b1; - end - // - default: begin - pad_x_wr_ena <= 1'b0; - pad_y_wr_ena <= 1'b0; - end - // - endcase - - -endmodule diff --git a/rtl/modexpng_mmm_transporter.v b/rtl/modexpng_mmm_transporter.v deleted file mode 100644 index a8f309a..0000000 --- a/rtl/modexpng_mmm_transporter.v +++ /dev/null @@ -1,157 +0,0 @@ -module modexpng_mmm_transporter -( - clk, - ena, - index_last, - fsm_state, - fsm_state_next, - load_phase, - load_xy_addr, - load_xy_addr_vld, - load_xy_req, - load_addr_zero, - load_t1t2_addr_done, - load_nn_coeff_addr_done -); - - - // - // Includes - // - //`include "modexpng_parameters.vh" - //`include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input ena; - input [ INDEX_WIDTH-1:0] index_last; - input [FSM_STATE_WIDTH-1:0] fsm_state; - input [FSM_STATE_WIDTH-1:0] fsm_state_next; - output load_phase; - output [ INDEX_WIDTH:0] load_xy_addr; - output load_xy_addr_vld; - output load_xy_req; - output load_addr_zero; - output load_t1t2_addr_done; - output load_nn_coeff_addr_done; - - - // - // Load Address Generator - // - reg load_phase_reg; - reg [INDEX_WIDTH:0] load_xy_addr_reg; - reg load_xy_addr_vld_reg; - reg load_xy_req_reg; - - - // - // Mapping - // - assign load_phase = load_phase_reg; - assign load_xy_addr = load_xy_addr_reg; - assign load_xy_addr_vld = load_xy_addr_vld_reg; - assign load_xy_req = load_xy_req_reg; - - - // - // Handy Quantities - // - wire [INDEX_WIDTH:0] load_xy_addr_zero = {{INDEX_WIDTH{1'b0}}, 1'b0}; - wire [INDEX_WIDTH:0] load_xy_addr_next = load_xy_addr_reg + 1'b1; - wire [INDEX_WIDTH:0] load_xy_addr_xxx = {{INDEX_WIDTH{1'bX}}, 1'bX}; - - - // - // More Handy Quantities - // - reg [INDEX_WIDTH:0] load_t1t2_addr_last; - reg [INDEX_WIDTH:0] load_nn_coeff_addr_last; - - - // - // Flags - // - assign load_addr_zero = load_xy_addr_reg == load_xy_addr_zero; - assign load_t1t2_addr_done = load_xy_addr_reg == load_t1t2_addr_last; - assign load_nn_coeff_addr_done = load_xy_addr_reg == load_nn_coeff_addr_last; - - - // - // Last Index Latch - // - always @(posedge clk) - // - if (ena && (fsm_state == FSM_STATE_IDLE)) begin - load_t1t2_addr_last <= {1'b0, index_last}; - load_nn_coeff_addr_last <= {1'b0, index_last} + 1'b1; - end - - - // - // Update Load Phase - // - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_LOAD_T1T2_1, - FSM_STATE_LOAD_T1T2_2, - FSM_STATE_LOAD_T1T2_3: load_phase_reg <= 1'b0; - FSM_STATE_LOAD_NN_COEFF_1, - FSM_STATE_LOAD_NN_COEFF_2, - FSM_STATE_LOAD_NN_COEFF_3: load_phase_reg <= 1'b1; - default: load_phase_reg <= 1'bX; - endcase - - - // - // Update Load Address - // - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_LOAD_T1T2_1: load_xy_addr_reg <= (fsm_state == FSM_STATE_LOAD_T1T2_3) ? load_xy_addr_next : load_xy_addr_zero; - FSM_STATE_LOAD_T1T2_2, - FSM_STATE_LOAD_T1T2_3: load_xy_addr_reg <= load_xy_addr_reg; - FSM_STATE_LOAD_NN_COEFF_1: load_xy_addr_reg <= (fsm_state == FSM_STATE_LOAD_NN_COEFF_3) ? load_xy_addr_next : load_xy_addr_zero; - FSM_STATE_LOAD_NN_COEFF_2, - FSM_STATE_LOAD_NN_COEFF_3: load_xy_addr_reg <= load_xy_addr_reg; - default load_xy_addr_reg <= load_xy_addr_xxx; - endcase - - - // - // Update Address Valid Flag - // - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_LOAD_T1T2_1, - FSM_STATE_LOAD_NN_COEFF_1: load_xy_addr_vld_reg <= 1'b1; - default load_xy_addr_vld_reg <= 1'b0; - endcase - - - // - // Update Load Request Flag - // - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_LOAD_T1T2_2, - FSM_STATE_LOAD_NN_COEFF_2: load_xy_req_reg <= 1'b1; - default load_xy_req_reg <= 1'b0; - endcase - - -endmodule diff --git a/rtl/modexpng_mmm_x8_dual.v b/rtl/modexpng_mmm_x8_dual.v deleted file mode 100644 index 99a37fa..0000000 --- a/rtl/modexpng_mmm_x8_dual.v +++ /dev/null @@ -1,550 +0,0 @@ -module modexpng_mmm_x8_dual -( - clk, rst_n, - ena, rdy, - mode, transfer, - index_last, - x_din, y_din, x_dout, y_dout, - x_din_addr, y_din_addr, x_dout_addr, y_dout_addr, - x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena, - x_din_bank, y_din_bank, x_dout_bank, y_dout_bank, - load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req, - load_x_din, load_y_din -); - - - // - // Includes - // - `include "modexpng_parameters.vh" - `include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input rst_n; - - input ena; - output rdy; - - input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2 - // load/unload: 0 = load, 1 = unload - input transfer; // 0 = multiply, 1 = load/unload - - input [INDEX_WIDTH-1:0] index_last; - - input [NUM_MULTS*WORD_WIDTH-1:0] x_din; - input [NUM_MULTS*WORD_WIDTH-1:0] y_din; - output [NUM_MULTS*WORD_WIDTH-1:0] x_dout; - output [NUM_MULTS*WORD_WIDTH-1:0] y_dout; - - output [INDEX_WIDTH-4:0] x_din_addr; - output [INDEX_WIDTH-4:0] y_din_addr; - output [INDEX_WIDTH-4:0] x_dout_addr; - output [INDEX_WIDTH-4:0] y_dout_addr; - - output [ 1-1:0] x_din_ena; - output [ 1-1:0] y_din_ena; - output [NUM_MULTS-1:0] x_dout_ena; - output [NUM_MULTS-1:0] y_dout_ena; - output [ 1-1:0] x_din_reg_ena; - output [ 1-1:0] y_din_reg_ena; - - output [3-1:0] x_din_bank; - output [3-1:0] y_din_bank; - output [3-1:0] x_dout_bank; - output [3-1:0] y_dout_bank; - - output load_phase; // 0 = T1, T2; 1 = N, N_COEFF - output [ INDEX_WIDTH:0] load_xy_addr; // address - output load_xy_addr_vld; // address valid - output load_xy_req; // data request - - input [WORD_WIDTH-1:0] load_x_din; // data input - input [WORD_WIDTH-1:0] load_y_din; // data input - - - // - // FSM State and Next States - // - reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; - reg [FSM_STATE_WIDTH-1:0] fsm_state_next; - reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; - reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; - - - // - // FSM Idle Next State - // - always @* - // - case ({transfer, mode}) - 2'b00, - 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG; - 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1; - 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload? - endcase - - - // - // Column Counter - // - wire [ INDEX_WIDTH-4:0] col_index; - wire col_index_done; - wire [ INDEX_WIDTH-4:0] col_index_zero; - wire [ INDEX_WIDTH-4:0] col_index_next; - wire [ INDEX_WIDTH-4:0] col_index_prev; - - modexpng_mmm_col_index # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - mmm_col_index - ( - .clk (clk), - .index_last (index_last), - .fsm_state_next (fsm_state_next), - .col_index (col_index), - .col_index_done (col_index_done), - .col_index_zero (col_index_zero), - .col_index_next (col_index_next), - .col_index_prev (col_index_prev) - ); - - - // - // Load Address Generator - // - wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0]; - wire load_addr_zero; - wire load_t1t2_addr_done; - wire load_nn_coeff_addr_done; - - modexpng_mmm_transporter # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - transporter - ( - .clk (clk), - .ena (ena), - .index_last (index_last), - .fsm_state (fsm_state), - .fsm_state_next (fsm_state_next), - .load_phase (load_phase), - .load_xy_addr (load_xy_addr), - .load_xy_addr_vld (load_xy_addr_vld), - .load_xy_req (load_xy_req), - .load_addr_zero (load_addr_zero), - .load_t1t2_addr_done (load_t1t2_addr_done), - .load_nn_coeff_addr_done (load_nn_coeff_addr_done) - ); - - - // - // X, Y Address - // - wire [INDEX_WIDTH-1:0] x_din_addr_cnt; - wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last; - wire [ 3-1:0] x_din_addr_cnt_lower_prev; - wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev; - - modexpng_mmm_din_addr # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - din_addr_x - ( - .clk (clk), - .rst_n (rst_n), - .index_last (index_last), - .fsm_state_next (fsm_state_next), - .col_index_zero (col_index_zero), - .col_index_next (col_index_next), - .din_addr (x_din_addr), - .din_bank (x_din_bank), - .din_ena (x_din_ena), - .din_reg_ena (x_din_reg_ena), - .din_addr_cnt (x_din_addr_cnt), - .din_addr_cnt_last (x_din_addr_cnt_last), - .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev), - .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev) - ); - - modexpng_mmm_dout_addr # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - dout_addr_xy - ( - .clk (clk), - .rst_n (rst_n), - .fsm_state (fsm_state), - .load_xy_addr (load_xy_addr), - .load_addr_zero (load_addr_zero), - .load_nn_coeff_addr_done (load_nn_coeff_addr_done), - .x_dout_addr (x_dout_addr), - .y_dout_addr (y_dout_addr), - .x_dout_ena (x_dout_ena), - .y_dout_ena (y_dout_ena), - .x_dout_bank (x_dout_bank), - .y_dout_bank (y_dout_bank) - ); - - - // - // Helper Memories ("Scratchpad") - // - reg [INDEX_WIDTH-1:0] pad_xy_rd_addr; - reg pad_xy_rd_ena = 1'b0; - wire [ WORD_WIDTH-1:0] pad_x_rd_dout; - wire [ WORD_WIDTH-1:0] pad_y_rd_dout; - - wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}}; - wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1; - - modexpng_mmm_pad pad - ( - .clk (clk), - .rst_n (rst_n), - .fsm_state (fsm_state), - .load_xy_addr_lsb (load_xy_addr_lsb), - .load_x_din (load_x_din), - .load_y_din (load_y_din), - .pad_x_rd_addr (pad_xy_rd_addr), - .pad_y_rd_addr (pad_xy_rd_addr), - .pad_x_rd_ena (pad_xy_rd_ena), - .pad_y_rd_ena (pad_xy_rd_ena), - .pad_x_rd_dout (pad_x_rd_dout), - .pad_y_rd_dout (pad_y_rd_dout) - ); - - - always @(posedge clk or negedge rst_n) - // - if (!rst_n) begin - pad_xy_rd_ena <= 1'b0; - end else case (fsm_state_next) - - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - pad_xy_rd_ena <= 1'b1; - - default: - pad_xy_rd_ena <= 1'b0; - - endcase - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: - pad_xy_rd_addr <= pad_xy_rd_addr_zero; - - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - pad_xy_rd_addr <= pad_xy_rd_addr_next; - - default: - pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}}; - - endcase - - - - - // - // Flags - // - - wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last; - - always @* - // - fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;; - - - // - // MAC Arrays - // - reg mac_x_ce = 1'b0; - reg mac_x_ce_aux = 1'b0; - reg [NUM_MULTS -1:0] mac_x_clr; - reg mac_x_clr_aux; - reg [NUM_MULTS -2:0] mac_x_casc_a; - reg mac_x_casc_a_aux; - wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a; - reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux; - //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1]; - reg [ 1 * WORD_WIDTH -1:0] mac_x_b; - wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p; - wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux; - - reg mac_y_ce = 1'b0; - reg mac_y_ce_aux = 1'b0; - reg [NUM_MULTS -1:0] mac_y_clr; - reg mac_y_clr_aux; - reg [NUM_MULTS -2:0] mac_y_casc_a; - reg mac_y_casc_a_aux; - wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a; - reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux; - //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1]; - reg [ 1 * WORD_WIDTH -1:0] mac_y_b; - wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p; - wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux; - - modexpng_mac_array mac_array_x - ( - .clk (clk), - .ce (mac_x_ce), - .ce_aux (mac_x_ce_aux), - .clr (mac_x_clr), - .clr_aux (mac_x_clr_aux), - .casc_a (mac_x_casc_a), - .casc_a_aux (mac_x_casc_a_aux), - .a_in (mac_x_a), - .a_in_aux (mac_x_a_aux), - .b_in (mac_x_b), - .p_out (mac_x_p), - .p_out_aux (mac_x_p_aux) - ); - - modexpng_mac_array mac_array_y - ( - .clk (clk), - .ce (mac_y_ce), - .ce_aux (mac_y_ce_aux), - .clr (mac_y_clr), - .clr_aux (mac_y_clr_aux), - .casc_a (mac_y_casc_a), - .casc_a_aux (mac_y_casc_a_aux), - .a_in (mac_y_a), - .a_in_aux (mac_y_a_aux), - .b_in (mac_y_b), - .p_out (mac_y_p), - .p_out_aux (mac_y_p_aux) - ); - - genvar gen_z; - - generate for (gen_z=0; gen_z MH' -localparam [2:0] BANK_FAT_UNUSED = 3'd7; // not needed - -localparam [1:0] BANK_SLIM_T1T2 = 2'd0; -localparam [1:0] BANK_SLIM_N_COEFF = 2'd1; -localparam [1:0] BANK_SLIM_Q = 2'd2; -localparam [1:0] BANK_SLIM_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q' - - -//localparam BANK_Y_T2 = 3'd0; -//localparam BANK_XY_T1T2 = 3'd0; - -//localparam BANK_XY_AB_LSB = 3'd1; -//localparam BANK_XY_AB_MSB = 3'd2; - -//localparam BANK_X_N = 3'd3; -//localparam BANK_Y_N_COEFF = 3'd3; - -//localparam BANK_XY_M = 3'd4; - -//localparam BANK_XY_Q_LSB = 3'd5; -//localparam BANK_XY_Q_MSB = 3'd6; - -//localparam BANK_XY_AUX = 3'd7; - -//localparam BANK_XY_ANY = 3'bXXX; - -//localparam BANK_XY_AUX_ADDR_N_COEFF = 0; diff --git a/rtl/modexpng_parameters_old.vh b/rtl/modexpng_parameters_old.vh new file mode 100644 index 0000000..d30b751 --- /dev/null +++ b/rtl/modexpng_parameters_old.vh @@ -0,0 +1,40 @@ + +//localparam WORD_WIDTH = 17; +//localparam MAC_WIDTH = 47; + +localparam BANK_ADDR_WIDTH = 2; // TODO: Replace everywhere! + +localparam [1:0] BANK_WIDE_T1T2 = 2'd0; +localparam [1:0] BANK_WIDE_ABL = 2'd1; +localparam [1:0] BANK_WIDE_ABH = 2'd2; +localparam [1:0] BANK_WIDE_N = 2'd3; + +localparam [1:0] BANK_RCMB_ML = 2'd0; +localparam [1:0] BANK_RCMB_MH = 2'd1; +localparam [1:0] BANK_RCMB_EXT = 2'd2; // 0 -> MH' + +localparam [1:0] BANK_NARROW_T1T2 = 2'd0; +localparam [1:0] BANK_NARROW_N_COEFF = 2'd1; +localparam [1:0] BANK_NARROW_Q = 2'd2; +localparam [1:0] BANK_NARROW_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q' + + +//localparam BANK_Y_T2 = 3'd0; +//localparam BANK_XY_T1T2 = 3'd0; + +//localparam BANK_XY_AB_LSB = 3'd1; +//localparam BANK_XY_AB_MSB = 3'd2; + +//localparam BANK_X_N = 3'd3; +//localparam BANK_Y_N_COEFF = 3'd3; + +//localparam BANK_XY_M = 3'd4; + +//localparam BANK_XY_Q_LSB = 3'd5; +//localparam BANK_XY_Q_MSB = 3'd6; + +//localparam BANK_XY_AUX = 3'd7; + +//localparam BANK_XY_ANY = 3'bXXX; + +//localparam BANK_XY_AUX_ADDR_N_COEFF = 0; diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8.vh deleted file mode 100644 index 8734354..0000000 --- a/rtl/modexpng_parameters_x8.vh +++ /dev/null @@ -1 +0,0 @@ -localparam NUM_MULTS = 8; diff --git a/rtl/modexpng_parameters_x8_old.vh b/rtl/modexpng_parameters_x8_old.vh new file mode 100644 index 0000000..8734354 --- /dev/null +++ b/rtl/modexpng_parameters_x8_old.vh @@ -0,0 +1 @@ +localparam NUM_MULTS = 8; diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v deleted file mode 100644 index 957ba8e..0000000 --- a/rtl/modexpng_part_recombinator.v +++ /dev/null @@ -1,1128 +0,0 @@ -module modexpng_part_recombinator -( - clk, - rdy, - fsm_state_next, - index_last, - dsp_x_ce_p, dsp_y_ce_p, - ena_x, ena_y, - dsp_x_p, dsp_y_p, - col_index, col_index_last, - slim_bram_xy_addr, slim_bram_xy_bank, - rcmb_fat_bram_xy_bank, rcmb_fat_bram_xy_addr, rcmb_fat_bram_x_dout, rcmb_fat_bram_y_dout, rcmb_fat_bram_xy_dout_valid, - rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid -); - - - // - // Headers - // - `include "../rtl/modexpng_mmm_fsm.vh" - `include "../rtl/modexpng_parameters.vh" - `include "../rtl/modexpng_parameters_x8.vh" - - - input clk; - output rdy; - input [FSM_STATE_WIDTH-1:0] fsm_state_next; - input [7:0] index_last; - input dsp_x_ce_p; - input dsp_y_ce_p; - input ena_x; - input ena_y; - input [9*47-1:0] dsp_x_p; - input [9*47-1:0] dsp_y_p; - input [ 4:0] col_index; - input [ 4:0] col_index_last; - input [ 7:0] slim_bram_xy_addr; - input [ 1:0] slim_bram_xy_bank; - - output [ 2:0] rcmb_fat_bram_xy_bank; - output [ 7:0] rcmb_fat_bram_xy_addr; - output [ 17:0] rcmb_fat_bram_x_dout; - output [ 17:0] rcmb_fat_bram_y_dout; - output rcmb_fat_bram_xy_dout_valid; - - output [ 2:0] rcmb_slim_bram_xy_bank; - output [ 7:0] rcmb_slim_bram_xy_addr; - output [ 17:0] rcmb_slim_bram_x_dout; - output [ 17:0] rcmb_slim_bram_y_dout; - output rcmb_slim_bram_xy_dout_valid; - - - // - // Latches - // - reg [1*47-1:0] dsp_x_p_latch[0:8]; - reg [1*47-1:0] dsp_y_p_latch[0:8]; - - - // - // Mapping - // - wire [46:0] dsp_x_p_split[0:8]; - wire [46:0] dsp_y_p_split[0:8]; - - genvar z; - generate for (z=0; z<(NUM_MULTS+1); z=z+1) - begin : gen_dsp_xy_p_split - assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; - assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; - end - endgenerate - - - // - // Delays - // - reg dsp_y_ce_p_dly1 = 1'b0; - reg dsp_x_ce_p_dly1 = 1'b0; - - always @(posedge clk) begin - // - {dsp_y_ce_p_dly1, dsp_x_ce_p_dly1} <= {dsp_y_ce_p, dsp_x_ce_p}; - // - end - - - // - // Registers - // - - // valid - reg x_valid_lsb = 1'b0; - reg y_valid_lsb = 1'b0; - reg x_aux_lsb = 1'b0; - reg y_aux_lsb = 1'b0; - reg x_valid_msb = 1'b0; - reg y_valid_msb = 1'b0; - - // bitmap - reg [7:0] x_bitmap_lsb = {8{1'b0}}; - reg [7:0] y_bitmap_lsb = {8{1'b0}}; - reg [7:0] x_bitmap_msb = {8{1'b0}}; - reg [7:0] y_bitmap_msb = {8{1'b0}}; - - // index - reg [2:0] x_index_lsb = 3'dX; - reg [2:0] y_index_lsb = 3'dX; - - // purge - reg x_purge_lsb = 1'b0; - reg y_purge_lsb = 1'b0; - reg x_purge_msb = 1'b0; - reg y_purge_msb = 1'b0; - - // valid - latch - reg x_valid_latch_lsb = 1'b0; - reg y_valid_latch_lsb = 1'b0; - - // aux - latch - reg x_aux_latch_lsb = 1'b0; - reg y_aux_latch_lsb = 1'b0; - - // bitmap - latch - reg [7:0] x_bitmap_latch_lsb = {8{1'b0}}; - reg [7:0] y_bitmap_latch_lsb = {8{1'b0}}; - reg [7:0] x_bitmap_latch_msb = {8{1'b0}}; - reg [7:0] y_bitmap_latch_msb = {8{1'b0}}; - - // index - latch - reg [2:0] x_index_latch_lsb = 3'dX; - reg [2:0] y_index_latch_lsb = 3'dX; - - // purge - index - reg x_purge_latch_lsb = 1'b0; - reg y_purge_latch_lsb = 1'b0; - reg x_purge_latch_msb = 1'b0; - reg y_purge_latch_msb = 1'b0; - - // - reg xy_valid_lsb_adv[1:6]; - reg xy_valid_msb_adv[1:6]; - reg xy_aux_lsb_adv[1:6]; - reg [7:0] xy_bitmap_lsb_adv[1:6]; - reg [7:0] xy_bitmap_msb_adv[1:6]; - reg [2:0] xy_index_lsb_adv[1:6]; - reg [2:0] xy_index_msb_adv[1:6]; - reg xy_purge_lsb_adv[1:6]; - reg xy_purge_msb_adv[1:6]; - - reg [1:0] rcmb_mode; - - always @(posedge clk) - // - if (ena_x && ena_y) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; - default: rcmb_mode <= 2'd0; - endcase - - - integer i; - initial for (i=1; i<6; i=i+1) begin - xy_valid_lsb_adv[i] = 1'b0; - xy_valid_msb_adv[i] = 1'b0; - xy_aux_lsb_adv[i] = 1'b0; - xy_bitmap_lsb_adv[i] = {8{1'b0}}; - xy_bitmap_msb_adv[i] = {8{1'b0}}; - xy_index_lsb_adv[i] = 3'dX; - xy_index_msb_adv[i] = 3'dX; - xy_purge_lsb_adv[i] = 1'b0; - xy_purge_msb_adv[i] = 1'b0; - end - - function calc_square_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - calc_square_valid_lsb = 1'b1; - else - calc_square_valid_lsb = 1'b0; - // - end - endfunction - - function calc_triangle_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - calc_triangle_valid_lsb = 1'b1; - else - calc_triangle_valid_lsb = 1'b0; - // - end - endfunction - - function calc_triangle_aux_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - begin - // - if (slim_bram_xy_bank_value == BANK_SLIM_EXT) - calc_triangle_aux_lsb = 1'b1; - else - calc_triangle_aux_lsb = 1'b0; - // - //if (slim_bram_xy_addr_value[7:3] == col_index_value) - //calc_triangle_aux_lsb = 1'b1; - //else - //calc_triangle_aux_lsb = 1'b0; - // - end - endfunction - - function calc_rectangle_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT; - else - calc_rectangle_valid_lsb = 1'b0; - // - end - endfunction - - function [7:0] calc_square_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_square_bitmap_lsb = 8'b00000001; - 3'b001: calc_square_bitmap_lsb = 8'b00000010; - 3'b010: calc_square_bitmap_lsb = 8'b00000100; - 3'b011: calc_square_bitmap_lsb = 8'b00001000; - 3'b100: calc_square_bitmap_lsb = 8'b00010000; - 3'b101: calc_square_bitmap_lsb = 8'b00100000; - 3'b110: calc_square_bitmap_lsb = 8'b01000000; - 3'b111: calc_square_bitmap_lsb = 8'b10000000; - endcase - // - else - calc_square_bitmap_lsb = {8{1'b0}}; - // - end - endfunction - - function [7:0] calc_triangle_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_triangle_bitmap_lsb = 8'b00000001; - 3'b001: calc_triangle_bitmap_lsb = 8'b00000010; - 3'b010: calc_triangle_bitmap_lsb = 8'b00000100; - 3'b011: calc_triangle_bitmap_lsb = 8'b00001000; - 3'b100: calc_triangle_bitmap_lsb = 8'b00010000; - 3'b101: calc_triangle_bitmap_lsb = 8'b00100000; - 3'b110: calc_triangle_bitmap_lsb = 8'b01000000; - 3'b111: calc_triangle_bitmap_lsb = 8'b10000000; - endcase - // - else - calc_triangle_bitmap_lsb = {8{1'b0}}; - // - end - endfunction - - function [7:0] calc_rectangle_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - begin - // - if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT)) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; - 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; - 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; - 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; - 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; - 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; - 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; - 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; - endcase - // - else - calc_rectangle_bitmap_lsb = {8{1'b0}}; - // - end - endfunction - - function [2:0] calc_square_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_square_index_lsb = 3'd0; - 3'b001: calc_square_index_lsb = 3'd1; - 3'b010: calc_square_index_lsb = 3'd2; - 3'b011: calc_square_index_lsb = 3'd3; - 3'b100: calc_square_index_lsb = 3'd4; - 3'b101: calc_square_index_lsb = 3'd5; - 3'b110: calc_square_index_lsb = 3'd6; - 3'b111: calc_square_index_lsb = 3'd7; - endcase - // - else - calc_square_index_lsb = 3'dX; - // - end - endfunction - - function [2:0] calc_triangle_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_triangle_index_lsb = 3'd0; - 3'b001: calc_triangle_index_lsb = 3'd1; - 3'b010: calc_triangle_index_lsb = 3'd2; - 3'b011: calc_triangle_index_lsb = 3'd3; - 3'b100: calc_triangle_index_lsb = 3'd4; - 3'b101: calc_triangle_index_lsb = 3'd5; - 3'b110: calc_triangle_index_lsb = 3'd6; - 3'b111: calc_triangle_index_lsb = 3'd7; - endcase - // - else - calc_triangle_index_lsb = 3'dX; - // - end - endfunction - - function [2:0] calc_rectangle_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - begin - // - if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT)) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_rectangle_index_lsb = 3'd0; - 3'b001: calc_rectangle_index_lsb = 3'd1; - 3'b010: calc_rectangle_index_lsb = 3'd2; - 3'b011: calc_rectangle_index_lsb = 3'd3; - 3'b100: calc_rectangle_index_lsb = 3'd4; - 3'b101: calc_rectangle_index_lsb = 3'd5; - 3'b110: calc_rectangle_index_lsb = 3'd6; - 3'b111: calc_rectangle_index_lsb = 3'd7; - endcase - // - else - calc_rectangle_index_lsb = 3'dX; - // - end - endfunction - - function calc_square_purge_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - calc_square_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value; - else - calc_square_purge_lsb = 1'b0; - // - end - endfunction - - function calc_rectangle_purge_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if (slim_bram_xy_addr_value[7:3] == col_index_value) - calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value; - else - calc_rectangle_purge_lsb = 1'b0; - // - end - endfunction - - function calc_square_valid_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [7:0] index_last_value; - begin - // - if (slim_bram_xy_addr_value == index_last_value) - calc_square_valid_msb = 1'b1; - else - calc_square_valid_msb = 1'b0; - // - end - endfunction - - function calc_rectangle_valid_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - input [7:0] index_last_value; - begin - // - if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) - calc_rectangle_valid_msb = 1'b1; - else - calc_rectangle_valid_msb = 1'b0; - // - end - endfunction - - function [7:0] calc_square_bitmap_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [7:0] index_last_value; - begin - // - if (slim_bram_xy_addr_value == index_last_value) begin - calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; - calc_square_bitmap_msb[6:0] = 7'b1111111; - end else - calc_square_bitmap_msb[7:0] = 8'b00000000; - // - end - endfunction - - function [7:0] calc_rectangle_bitmap_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - input [7:0] index_last_value; - begin - // - if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin - calc_rectangle_bitmap_msb[7:0] = 8'b11111111; - end else - calc_rectangle_bitmap_msb[7:0] = 8'b00000000; - // - end - endfunction - - function calc_square_purge_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [7:0] index_last_value; - begin - // - if (slim_bram_xy_addr_value == index_last_value) - calc_square_purge_msb = col_index_value == col_index_last_value; - else - calc_square_purge_msb = 1'b0; - // - end - endfunction - - function calc_rectangle_purge_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [7:0] slim_bram_xy_addr_value; - input [1:0] slim_bram_xy_bank_value; - input [7:0] index_last_value; - begin - // - if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) - calc_rectangle_purge_msb = col_index_value == col_index_last_value; - else - calc_rectangle_purge_msb = 1'b0; - // - end - endfunction - - - reg recomb_lsb_ce = 1'b0; - reg recomb_lsb_ce_aux; - reg [ 2:0] recomb_lsb_ce_purge = 3'b000; - wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_aux | recomb_lsb_ce_purge[0]; - reg recomb_lsb_clr; - - reg [46:0] recomb_lsb_din; - wire [15:0] recomb_lsb_dout; - - reg recomb_msb_ce = 1'b0; - reg [ 1:0] recomb_msb_ce_purge = 2'b00; - wire recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0]; - reg recomb_msb_clr; - - reg [46:0] recomb_msb_din; - wire [15:0] recomb_msb_dout; - - modexpng_recombinator_block recomb_x_lsb - ( - .clk (clk), - .ce (recomb_lsb_ce_combined), - .clr (recomb_lsb_clr), - .din (recomb_lsb_din), - .dout (recomb_lsb_dout) - ); - - modexpng_recombinator_block recomb_x_msb - ( - .clk (clk), - .ce (recomb_msb_ce_combined), - .clr (recomb_msb_clr), - .din (recomb_msb_din), - .dout (recomb_msb_dout) - ); - - always @(posedge clk) begin - // - recomb_lsb_ce <= x_valid_latch_lsb; - recomb_lsb_ce_aux <= x_aux_latch_lsb; - recomb_msb_ce <= x_bitmap_latch_msb[0]; - // - if (x_purge_latch_lsb) - recomb_lsb_ce_purge <= 3'b111; - else - recomb_lsb_ce_purge <= {1'b0, recomb_lsb_ce_purge[2:1]}; - // - if (x_purge_latch_msb && x_bitmap_latch_msb[0] && !x_bitmap_latch_msb[1]) - recomb_msb_ce_purge = 2'b11; - else - recomb_msb_ce_purge <= {1'b0, recomb_msb_ce_purge[1]}; - // - end - - - always @(posedge clk) - // - if (ena_x & ena_y) begin - recomb_lsb_clr <= 1'b1; - recomb_msb_clr <= 1'b1; - end else begin - if (recomb_lsb_ce) recomb_lsb_clr <= 1'b0; - if (recomb_msb_ce) recomb_msb_clr <= 1'b0; - end - - always @(posedge clk) - // - if (x_valid_latch_lsb) - recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb]; - else if (x_aux_latch_lsb) - recomb_lsb_din <= dsp_x_p_latch[8]; - else - recomb_lsb_din <= {47{1'b0}}; - - always @(posedge clk) - // - if (x_bitmap_latch_msb[0]) - recomb_msb_din <= dsp_x_p_latch[0]; - else - recomb_msb_din <= {47{1'b0}}; - - - always @(posedge clk) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin - // - xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); - xy_aux_lsb_adv [6] <= 1'b0; - xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); - xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr); - xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr); - // - xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, slim_bram_xy_addr, index_last); - xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, index_last); - xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, slim_bram_xy_addr, index_last); - // - end - // - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin - // - xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); /// bank - xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); - xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); //! bank - xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr); // ! bank!!! - xy_purge_lsb_adv [6] <= 1'b0; - // - xy_valid_msb_adv [6] <= 1'b0; - xy_bitmap_msb_adv[6] <= {8{1'b0}}; - xy_purge_msb_adv [6] <= 1'b0; - // - end - // - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin - // - xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); - xy_aux_lsb_adv [6] <= 1'b0; - xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); - xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); - xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr); - // - xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last); - xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last); - xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last); - // - end - // - default: begin - // - xy_valid_lsb_adv [6] <= 1'b0; - xy_aux_lsb_adv [6] <= 1'b0; - xy_bitmap_lsb_adv[6] <= {8{1'b0}}; - xy_index_lsb_adv [6] <= 3'dX; - xy_purge_lsb_adv [6] <= 1'b0; - // - xy_valid_msb_adv [6] <= 1'b0; - xy_bitmap_msb_adv[6] <= {8{1'b0}}; - xy_purge_msb_adv [6] <= 1'b0; - // - end - // - endcase - - - always @(posedge clk) begin - // - {y_valid_lsb, x_valid_lsb} <= {2{xy_valid_lsb_adv [1]}}; - {y_aux_lsb, x_aux_lsb} <= {2{xy_aux_lsb_adv [1]}}; - {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}}; - {y_index_lsb, x_index_lsb} <= {2{xy_index_lsb_adv [1]}}; - {y_purge_lsb, x_purge_lsb} <= {2{xy_purge_lsb_adv [1]}}; - // - {y_valid_latch_lsb, x_valid_latch_lsb} <= {y_valid_lsb, x_valid_lsb}; - {y_aux_latch_lsb, x_aux_latch_lsb} <= {y_aux_lsb, x_aux_lsb}; - {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb}; - {y_index_latch_lsb, x_index_latch_lsb} <= {y_index_lsb, x_index_lsb}; - {y_purge_latch_lsb, x_purge_latch_lsb} <= {y_purge_lsb, x_purge_lsb}; - // - {y_valid_msb, x_valid_msb} <= {2{xy_valid_msb_adv[1]}}; - {y_bitmap_msb, x_bitmap_msb} <= {2{xy_bitmap_msb_adv[1]}}; - {y_purge_msb, x_purge_msb} <= {2{xy_purge_msb_adv[1]}}; - // - if (x_valid_msb) begin - x_bitmap_latch_msb <= x_bitmap_msb; - x_purge_latch_msb <= x_purge_msb; - end else begin - x_bitmap_latch_msb <= {1'b0, x_bitmap_latch_msb[7:1]}; - end - // - // - for (i=1; i<6; i=i+1) begin - xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; - xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; - xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; - xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; - xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; - // - xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; - xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; - xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; - end - // - end - - always @(posedge clk) - // - if (x_bitmap_latch_msb[1]) // only shift 7 times - // - for (i=0; i<8; i=i+1) - if (i < 7) - dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; - else - dsp_x_p_latch[i] <= {47{1'bX}}; - // - else if (dsp_x_ce_p_dly1) begin - // - for (i=0; i<8; i=i+1) - // - if (x_bitmap_lsb[i]) - dsp_x_p_latch[i] <= dsp_x_p_split[i]; - else if (x_valid_msb && x_bitmap_msb[i]) - dsp_x_p_latch[i] <= dsp_x_p_split[i]; - // - if (x_aux_lsb) - dsp_x_p_latch[8] <= dsp_x_p_split[8]; - // - end - - reg recomb_x_lsb_dout_valid = 1'b0; - reg recomb_x_msb_dout_valid = 1'b0; - - always @(posedge clk) begin - recomb_x_lsb_dout_valid <= recomb_lsb_ce_combined; - recomb_x_msb_dout_valid <= recomb_msb_ce_combined; - end - - - reg [ 2:0] fat_bram_xy_bank_reg; - reg [ 7:0] fat_bram_xy_addr_reg; - reg [17:0] fat_bram_x_dout_reg; - reg [17:0] fat_bram_y_dout_reg; - reg fat_bram_xy_dout_valid_reg = 1'b0; - - reg [ 2:0] slim_bram_xy_bank_reg; - reg [ 7:0] slim_bram_xy_addr_reg; - reg [17:0] slim_bram_x_dout_reg; - reg [17:0] slim_bram_y_dout_reg; - reg slim_bram_xy_dout_valid_reg = 1'b0; - - reg [ 7:0] bram_xy_cnt_lsb; - reg [ 7:0] bram_xy_cnt_msb; - - reg bram_xy_cnt_lsb_wrapped; - reg bram_xy_cnt_msb_wrapped; - - reg [15:0] recomb_msb_dout_carry_0; - reg [15:0] recomb_msb_dout_carry_1; - - reg [15:0] recomb_msb_dout_delay_0; - reg [15:0] recomb_msb_dout_delay_1; - reg [15:0] recomb_msb_dout_delay_2; - - reg [ 7:0] recomb_msb_cnt_delay_0 = 8'd0; - reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0; - reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0; - - reg recomb_msb_flag_delay_0; - reg recomb_msb_flag_delay_1; - reg recomb_msb_flag_delay_2; - - assign rcmb_fat_bram_xy_bank = fat_bram_xy_bank_reg; - assign rcmb_fat_bram_xy_addr = fat_bram_xy_addr_reg; - assign rcmb_fat_bram_x_dout = fat_bram_x_dout_reg; - assign rcmb_fat_bram_y_dout = fat_bram_y_dout_reg; - assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg; - - assign rcmb_slim_bram_xy_bank = slim_bram_xy_bank_reg; - assign rcmb_slim_bram_xy_addr = slim_bram_xy_addr_reg; - assign rcmb_slim_bram_x_dout = slim_bram_x_dout_reg; - assign rcmb_slim_bram_y_dout = slim_bram_y_dout_reg; - assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg; - - reg rdy_reg = 1'b1; - reg rdy_adv = 1'b1; - - assign rdy = rdy_reg; - - - always @(posedge clk) - // - if (ena_x & ena_y) - rdy_reg <= 1'b0; - else - rdy_reg <= rdy_adv; - - - task advance_recomb_msb_dout_delay; - input [15:0] dout; - input [ 7:0] cnt; - input flag; - begin - // - recomb_msb_dout_delay_0 <= dout; - recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; - recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; - // - recomb_msb_cnt_delay_0 <= cnt; - recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; - recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; - // - recomb_msb_flag_delay_0 <= flag; - recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0; - recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1; - // - end - endtask - - task shift_recomb_msb_dout_carry; - input [15:0] dout; - begin - recomb_msb_dout_carry_0 <= dout; - recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; - end - endtask - - task _update_fat_bram_regs; - input [ 2:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - input valid; - begin - fat_bram_xy_bank_reg <= bank; - fat_bram_xy_addr_reg <= addr; - fat_bram_x_dout_reg <= dout_x; - fat_bram_y_dout_reg <= dout_y; - fat_bram_xy_dout_valid_reg <= valid; - end - endtask - - task _update_slim_bram_regs; - input [ 2:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - input valid; - begin - slim_bram_xy_bank_reg <= bank; - slim_bram_xy_addr_reg <= addr; - slim_bram_x_dout_reg <= dout_x; - slim_bram_y_dout_reg <= dout_y; - slim_bram_xy_dout_valid_reg <= valid; - end - endtask - - task set_fat_bram_regs; - input [ 2:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - begin - _update_fat_bram_regs(bank, addr, dout_x, dout_y, 1'b1); - end - endtask - - task set_slim_bram_regs; - input [ 2:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - begin - _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1); - end - endtask - - task clear_fat_bram_regs; - begin - _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); - end - endtask - - task clear_slim_bram_regs; - begin - _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); - end - endtask - - task _set_bram_cnt_lsb; - input [7:0] cnt; - input wrapped; - begin - bram_xy_cnt_lsb <= cnt; - bram_xy_cnt_lsb_wrapped <= wrapped; - end - endtask - - task _set_bram_cnt_msb; - input [7:0] cnt; - input wrapped; - begin - bram_xy_cnt_msb <= cnt; - bram_xy_cnt_msb_wrapped <= wrapped; - end - endtask - - task inc_bram_cnt_lsb; - begin - if (bram_xy_cnt_lsb == index_last) - _set_bram_cnt_lsb(8'd0, 1'b1); - else - _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped); - end - endtask - - task inc_bram_cnt_msb; - begin - if (bram_xy_cnt_msb == index_last) - _set_bram_cnt_msb(8'd0, 1'b1); - else - _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped); - end - endtask - - task clr_bram_cnt_lsb; - begin - _set_bram_cnt_lsb(8'd0, 1'b0); - end - endtask - - task clr_bram_cnt_msb; - begin - _set_bram_cnt_msb(8'd0, 1'b0); - end - endtask - - - - - - wire [1:0] rcmb_xy_dout_valid = {recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}; - - always @(posedge clk) - // - if (ena_x & ena_y) begin - clr_bram_cnt_lsb(); - clr_bram_cnt_msb(); - end else begin // if not ready??? - // - case (rcmb_mode) - 2'd1: recombine_square(); - 2'd2: recombine_triangle(); - 2'd3: recombine_rectangle(); - endcase - // - end - - task recombine_square; - // - begin - // - case (rcmb_xy_dout_valid) - // - 2'b01: inc_bram_cnt_lsb(); - 2'b10: inc_bram_cnt_msb(); - 2'b11: begin - inc_bram_cnt_lsb(); - inc_bram_cnt_msb(); - end - // - endcase - // - case (rcmb_xy_dout_valid) - // - 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); - else clear_fat_bram_regs(); - 2'b01: set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - 2'b10: if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); - else set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); - 2'b11: if (bram_xy_cnt_lsb_wrapped) set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); - else set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - default: clear_fat_bram_regs(); // DEBUG!!! - // - endcase - // - case (rcmb_xy_dout_valid) - // - 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0); - 2'b10: if (bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout); -// // - 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1); - if (bram_xy_cnt_lsb_wrapped) shift_recomb_msb_dout_carry({16{1'bX}}); - end - // - endcase - // - end - // - endtask - - - task recombine_triangle; - // - begin - // - case (rcmb_xy_dout_valid) - // - 2'b01: inc_bram_cnt_lsb(); - // - endcase - // - case (rcmb_xy_dout_valid) - // - 2'b00: clear_slim_bram_regs(); - 2'b01: if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - else set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - 2'b10: clear_slim_bram_regs(); - 2'b11: clear_slim_bram_regs(); - // - endcase - // - end - // - endtask - - - task recombine_rectangle; - // - begin - // - case (rcmb_xy_dout_valid) - // - 2'b01: inc_bram_cnt_lsb(); - 2'b10: inc_bram_cnt_msb(); - 2'b11: begin - inc_bram_cnt_lsb(); - inc_bram_cnt_msb(); - end - // - endcase -// // - case (rcmb_xy_dout_valid) -// // - 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); - else clear_fat_bram_regs(); - 2'b01: set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - 2'b10: if (!bram_xy_cnt_msb_wrapped) begin - if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); - else set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); - end else - set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}}); - - 2'b11: set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); -// // - endcase -// // - case (rcmb_xy_dout_valid) -// // - 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0); - 2'b10: begin - if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped) shift_recomb_msb_dout_carry(recomb_msb_dout); - if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0); - end -// // - 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1); - shift_recomb_msb_dout_carry({16{1'bX}}); - end -// // - endcase - // - end - // - endtask - - - always @(posedge clk) - // - if (ena_x & ena_y) begin - rdy_adv <= 1'b0; - end else if (!rdy_reg) begin - // - case (rcmb_mode) - // - 2'd1: case (rcmb_xy_dout_valid) - // - 2'b00: begin - // - if (recomb_msb_flag_delay_2) begin - // - rdy_adv <= ~recomb_msb_flag_delay_1; - // - end - // - end - // - endcase - // - 2'd2: case (rcmb_xy_dout_valid) - // - 2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped; // - // - endcase - // - 2'd3: case (rcmb_xy_dout_valid) - // - 2'b00: begin - // - if (recomb_msb_flag_delay_2) begin - // - rdy_adv <= ~recomb_msb_flag_delay_1; - // - end - // - end - // - endcase - // - endcase - // - end - - - - // add ready for mode=3 -endmodule diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index efe0ac5..d6b1ad1 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -1,35 +1,1225 @@ module modexpng_recombinator_block ( - clk, - ce, clr, - din, dout + clk, rst, + ena, rdy, + fsm_state_next, + word_index_last, + dsp_xy_ce_p, + dsp_x_p, dsp_y_p, + col_index, col_index_last, + rd_narrow_xy_addr, rd_narrow_xy_bank, + rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, + rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid, + rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid ); - input clk; - input ce; - input clr; - input [46:0] din; - output [15:0] dout; - reg [14:0] z; - reg [16:0] y; - reg [17:0] x; - //reg [15:0] w; + // + // Headers + // + `include "../rtl_1/modexpng_mmm_fsm_old.vh" + `include "../rtl_1/modexpng_parameters_old.vh" + `include "../rtl_1/modexpng_parameters_x8_old.vh" - //assign dout = w; - assign dout = x[15:0]; + + input clk; + input rst; + input ena; + output rdy; + input [FSM_STATE_WIDTH-1:0] fsm_state_next; + input [7:0] word_index_last; + input dsp_xy_ce_p; + input [9*47-1:0] dsp_x_p; + input [9*47-1:0] dsp_y_p; + input [ 4:0] col_index; + input [ 4:0] col_index_last; + + input [ 7:0] rd_narrow_xy_addr; + input [ 1:0] rd_narrow_xy_bank; + + output [ 1:0] rcmb_wide_xy_bank; + output [ 7:0] rcmb_wide_xy_addr; + output [ 17:0] rcmb_wide_x_dout; + output [ 17:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ 1:0] rcmb_narrow_xy_bank; + output [ 7:0] rcmb_narrow_xy_addr; + output [ 17:0] rcmb_narrow_x_dout; + output [ 17:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ 1:0] rdct_narrow_xy_bank; + output [ 7:0] rdct_narrow_xy_addr; + output [ 17:0] rdct_narrow_x_dout; + output [ 17:0] rdct_narrow_y_dout; + output rdct_narrow_xy_valid; + + + // + // Latches + // + reg [1*47-1:0] dsp_x_p_latch[0:8]; + reg [1*47-1:0] dsp_y_p_latch[0:8]; + + + // + // Mapping + // + wire [46:0] dsp_x_p_split[0:8]; + wire [46:0] dsp_y_p_split[0:8]; + + genvar z; + generate for (z=0; z<(NUM_MULTS+1); z=z+1) + begin : gen_dsp_xy_p_split + assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; + assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; + end + endgenerate + + + // + // Delays + // + reg dsp_xy_ce_p_dly1 = 1'b0; + + always @(posedge clk) + // + if (rst) dsp_xy_ce_p_dly1 <= 1'b0; + else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p; + + + // + // Registers + // + + // valid + reg xy_valid_lsb = 1'b0; + reg xy_aux_lsb = 1'b0; + reg xy_valid_msb = 1'b0; + + // bitmap + reg [7:0] xy_bitmap_lsb = {8{1'b0}}; + reg [7:0] xy_bitmap_msb = {8{1'b0}}; + + // index + reg [2:0] xy_index_lsb = 3'dX; + + // purge + reg xy_purge_lsb = 1'b0; + reg xy_purge_msb = 1'b0; + + // valid - latch + reg xy_valid_latch_lsb = 1'b0; + + // aux - latch + reg xy_aux_latch_lsb = 1'b0; + + // bitmap - latch + reg [7:0] xy_bitmap_latch_lsb = {8{1'b0}}; + reg [7:0] xy_bitmap_latch_msb = {8{1'b0}}; + + // index - latch + reg [2:0] xy_index_latch_lsb = 3'dX; + + // purge - index + reg xy_purge_latch_lsb = 1'b0; + reg xy_purge_latch_msb = 1'b0; + + // + reg xy_valid_lsb_adv[1:6]; + reg xy_valid_msb_adv[1:6]; + reg xy_aux_lsb_adv[1:6]; + reg [7:0] xy_bitmap_lsb_adv[1:6]; + reg [7:0] xy_bitmap_msb_adv[1:6]; + reg [2:0] xy_index_lsb_adv[1:6]; + reg [2:0] xy_index_msb_adv[1:6]; + reg xy_purge_lsb_adv[1:6]; + reg xy_purge_msb_adv[1:6]; + + reg [1:0] rcmb_mode; + + always @(posedge clk) + // + if (ena) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; + default: rcmb_mode <= 2'd0; + endcase + + + integer i; + initial for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv[i] = 1'b0; + xy_valid_msb_adv[i] = 1'b0; + xy_aux_lsb_adv[i] = 1'b0; + xy_bitmap_lsb_adv[i] = {8{1'b0}}; + xy_bitmap_msb_adv[i] = {8{1'b0}}; + xy_index_lsb_adv[i] = 3'dX; + xy_index_msb_adv[i] = 3'dX; + xy_purge_lsb_adv[i] = 1'b0; + xy_purge_msb_adv[i] = 1'b0; + end + + function calc_square_triangle_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_square_triangle_valid_lsb = 1'b1; + else + calc_square_triangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_square_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_triangle_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_rectangle_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT; + else + calc_rectangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_triangle_aux_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_bank_value == BANK_NARROW_EXT) + calc_triangle_aux_lsb = 1'b1; + else + calc_triangle_aux_lsb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_triangle_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_square_triangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + function [7:0] calc_square_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [7:0] calc_triangle_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [7:0] calc_rectangle_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT)) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_rectangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + /* + * These can be simplified (the difference between square/triangle and + * rectangle is that the bank is checked or not). A universal function would + * accept a parameter that tells it whether it should check the bank or not. + * Let's do it later, too early to optimize now, it seems. + * + * + */ + + function [2:0] calc_square_triangle_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_square_triangle_index_lsb = 3'd0; + 3'b001: calc_square_triangle_index_lsb = 3'd1; + 3'b010: calc_square_triangle_index_lsb = 3'd2; + 3'b011: calc_square_triangle_index_lsb = 3'd3; + 3'b100: calc_square_triangle_index_lsb = 3'd4; + 3'b101: calc_square_triangle_index_lsb = 3'd5; + 3'b110: calc_square_triangle_index_lsb = 3'd6; + 3'b111: calc_square_triangle_index_lsb = 3'd7; + endcase + // + else + calc_square_triangle_index_lsb = 3'dX; + // + end + endfunction + + function [2:0] calc_square_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [2:0] calc_triangle_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [2:0] calc_rectangle_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] slim_bram_xy_bank_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT)) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_rectangle_index_lsb = 3'd0; + 3'b001: calc_rectangle_index_lsb = 3'd1; + 3'b010: calc_rectangle_index_lsb = 3'd2; + 3'b011: calc_rectangle_index_lsb = 3'd3; + 3'b100: calc_rectangle_index_lsb = 3'd4; + 3'b101: calc_rectangle_index_lsb = 3'd5; + 3'b110: calc_rectangle_index_lsb = 3'd6; + 3'b111: calc_rectangle_index_lsb = 3'd7; + endcase + // + else + calc_rectangle_index_lsb = 3'dX; + // + end + endfunction + + function calc_square_rectangle_purge_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value; + else + calc_square_rectangle_purge_lsb = 1'b0; + // + end + endfunction + + function calc_square_purge_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_rectangle_purge_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_square_valid_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) + calc_square_valid_msb = 1'b1; + else + calc_square_valid_msb = 1'b0; + // + end + endfunction + + function calc_rectangle_valid_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) + calc_rectangle_valid_msb = 1'b1; + else + calc_rectangle_valid_msb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) begin + calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; + calc_square_bitmap_msb[6:0] = 7'b1111111; + end else + calc_square_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function [7:0] calc_rectangle_bitmap_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin + calc_rectangle_bitmap_msb[7:0] = 8'b11111111; + end else + calc_rectangle_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function calc_square_purge_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) + calc_square_purge_msb = col_index_value == col_index_last_value; + else + calc_square_purge_msb = 1'b0; + // + end + endfunction + + function calc_rectangle_purge_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) + calc_rectangle_purge_msb = col_index_value == col_index_last_value; + else + calc_rectangle_purge_msb = 1'b0; + // + end + endfunction + + + reg rcmb_xy_lsb_ce = 1'b0; + reg rcmb_xy_lsb_ce_aux; + reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; + wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; + reg rcmb_xy_lsb_clr; + + reg [46:0] rcmb_x_lsb_din; + reg [46:0] rcmb_y_lsb_din; + wire [15:0] rcmb_x_lsb_dout; + wire [15:0] rcmb_y_lsb_dout; + + reg rcmb_xy_msb_ce = 1'b0; + reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; + wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; + reg rcmb_xy_msb_clr; + + reg [46:0] rcmb_x_msb_din; + reg [46:0] rcmb_y_msb_din; + wire [15:0] rcmb_x_msb_dout; + wire [15:0] rcmb_y_msb_dout; + + modexpng_recombinator_cell recomb_x_lsb + ( + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_x_lsb_din), + .dout (rcmb_x_lsb_dout) + ); + modexpng_recombinator_cell recomb_y_lsb + ( + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_y_lsb_din), + .dout (rcmb_y_lsb_dout) + ); + + modexpng_recombinator_cell recomb_x_msb + ( + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .din (rcmb_x_msb_din), + .dout (rcmb_x_msb_dout) + ); + + modexpng_recombinator_cell recomb_y_msb + ( + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .din (rcmb_y_msb_din), + .dout (rcmb_y_msb_dout) + ); + + always @(posedge clk) begin + // + rcmb_xy_lsb_ce <= xy_valid_latch_lsb; + rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb; + rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0]; + // + if (xy_purge_latch_lsb) + rcmb_xy_lsb_ce_purge <= 3'b111; + else + rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]}; + // + if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) + rcmb_xy_msb_ce_purge = 2'b11; + else + rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]}; + // + end + + + always @(posedge clk) + // + if (ena) begin + rcmb_xy_lsb_clr <= 1'b1; + rcmb_xy_msb_clr <= 1'b1; + end else begin + if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0; + if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0; + end + + always @(posedge clk) + // + if (xy_valid_latch_lsb) begin + rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb]; + rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb]; + end else if (xy_aux_latch_lsb) begin + rcmb_x_lsb_din <= dsp_x_p_latch[8]; + rcmb_y_lsb_din <= dsp_y_p_latch[8]; + end else begin + rcmb_x_lsb_din <= {47{1'b0}}; + rcmb_y_lsb_din <= {47{1'b0}}; + end + + always @(posedge clk) + // + if (xy_bitmap_latch_msb[0]) begin + rcmb_x_msb_din <= dsp_x_p_latch[0]; + rcmb_y_msb_din <= dsp_y_p_latch[0]; + end else begin + rcmb_x_msb_din <= {47{1'b0}}; + rcmb_y_msb_din <= {47{1'b0}}; + end + + + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + // + xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + // + end + // + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + // + xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + // + end + // + default: begin + // + xy_valid_lsb_adv [6] <= 1'b0; + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= {8{1'b0}}; + xy_index_lsb_adv [6] <= 3'dX; + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + endcase + + + always @(posedge clk) begin + // + xy_valid_lsb <= xy_valid_lsb_adv [1]; + xy_aux_lsb <= xy_aux_lsb_adv [1]; + xy_bitmap_lsb <= xy_bitmap_lsb_adv[1]; + xy_index_lsb <= xy_index_lsb_adv [1]; + xy_purge_lsb <= xy_purge_lsb_adv [1]; + // + xy_valid_latch_lsb <= xy_valid_lsb; + xy_aux_latch_lsb <= xy_aux_lsb; + xy_bitmap_latch_lsb <= xy_bitmap_lsb; + xy_index_latch_lsb <= xy_index_lsb; + xy_purge_latch_lsb <= xy_purge_lsb; + // + xy_valid_msb <= xy_valid_msb_adv[1]; + xy_bitmap_msb <= xy_bitmap_msb_adv[1]; + xy_purge_msb <= xy_purge_msb_adv[1]; + // + if (xy_valid_msb) begin + xy_bitmap_latch_msb <= xy_bitmap_msb; + xy_purge_latch_msb <= xy_purge_msb; + end else begin + xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]}; + end + // + // + for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; + xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; + xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; + xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; + xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; + // + xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; + xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; + xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; + end + // + end + + always @(posedge clk) + // + if (xy_bitmap_latch_msb[1]) // only shift 7 times + // + for (i=0; i<8; i=i+1) + // + if (i < 7) begin + dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; + dsp_y_p_latch[i] <= dsp_y_p_latch[i+1]; + end else begin + dsp_x_p_latch[i] <= {47{1'bX}}; + dsp_y_p_latch[i] <= {47{1'bX}}; + end + // + else if (dsp_xy_ce_p_dly1) begin + // + for (i=0; i<8; i=i+1) + // + if (xy_bitmap_lsb[i]) begin + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + dsp_y_p_latch[i] <= dsp_y_p_split[i]; + end else if (xy_valid_msb && xy_bitmap_msb[i]) begin + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + dsp_y_p_latch[i] <= dsp_y_p_split[i]; + end + // + if (xy_aux_lsb) begin + dsp_x_p_latch[8] <= dsp_x_p_split[8]; + dsp_y_p_latch[8] <= dsp_y_p_split[8]; + end + // + end + + reg rcmb_xy_lsb_valid = 1'b0; + reg rcmb_xy_msb_valid = 1'b0; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_lsb_valid <= 1'b0; + rcmb_xy_msb_valid <= 1'b0; + end else begin + rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined; + rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined; + end + + + reg [ 1:0] wide_xy_bank; + reg [ 7:0] wide_xy_addr; + reg [17:0] wide_x_dout; + reg [17:0] wide_y_dout; + reg wide_xy_valid = 1'b0; + + reg [ 1:0] narrow_xy_bank; + reg [ 7:0] narrow_xy_addr; + reg [17:0] narrow_x_dout; + reg [17:0] narrow_y_dout; + reg narrow_xy_valid = 1'b0; + + reg [ 1:0] rdct_xy_bank; + reg [ 7:0] rdct_xy_addr; + reg [17:0] rdct_x_dout; + reg [17:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + reg [ 7:0] cnt_lsb; + reg [ 7:0] cnt_msb; + + reg cnt_lsb_wrapped; + reg cnt_msb_wrapped; + + reg [31:0] rcmb_xy_msb_carry_0; + reg [31:0] rcmb_xy_msb_carry_1; + + reg [31:0] rcmb_xy_msb_delay_0; + reg [31:0] rcmb_xy_msb_delay_1; + reg [31:0] rcmb_xy_msb_delay_2; + + reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0; + reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0; + reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0; + + reg rcmb_msb_flag_delay_0; + reg rcmb_msb_flag_delay_1; + reg rcmb_msb_flag_delay_2; + + assign rcmb_wide_xy_bank = wide_xy_bank; + assign rcmb_wide_xy_addr = wide_xy_addr; + assign rcmb_wide_x_dout = wide_x_dout; + assign rcmb_wide_y_dout = wide_y_dout; + assign rcmb_wide_xy_valid = wide_xy_valid; + + assign rcmb_narrow_xy_bank = narrow_xy_bank; + assign rcmb_narrow_xy_addr = narrow_xy_addr; + assign rcmb_narrow_x_dout = narrow_x_dout; + assign rcmb_narrow_y_dout = narrow_y_dout; + assign rcmb_narrow_xy_valid = narrow_xy_valid; + + assign rdct_narrow_xy_bank = rdct_xy_bank; + assign rdct_narrow_xy_addr = rdct_xy_addr; + assign rdct_narrow_x_dout = rdct_x_dout; + assign rdct_narrow_y_dout = rdct_y_dout; + assign rdct_narrow_xy_valid = rdct_xy_valid; + + reg rdy_reg = 1'b1; + reg rdy_adv = 1'b1; + + assign rdy = rdy_reg; - wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here - wire [15:0] din_y = din[31:16]; - wire [15:0] din_x = din[15: 0]; always @(posedge clk) // - if (ce) begin - z <= din_z; - y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; - x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; - //w <= clr ? {16{1'bX}} : x[15:0]; + if (ena) rdy_reg <= 1'b0; + else rdy_reg <= rdy_adv; + + task advance_rcmb_msb_delay; + input [15:0] dout_x; + input [15:0] dout_y; + input [ 7:0] cnt; + input flag; + begin + // + rcmb_xy_msb_delay_0 <= {dout_y, dout_x}; + rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0; + rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1; + // + rcmb_msb_cnt_delay_0 <= cnt; + rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0; + rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1; + // + rcmb_msb_flag_delay_0 <= flag; + rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0; + rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1; + // + end + endtask + + task shift_rcmb_msb_carry; + input [15:0] dout_x; + input [15:0] dout_y; + begin + rcmb_xy_msb_carry_0 <= {dout_y, dout_x}; + rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0; + end + endtask + + task _update_wide; + input [ 1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + wide_xy_bank <= bank; + wide_xy_addr <= addr; + wide_x_dout <= dout_x; + wide_y_dout <= dout_y; + wide_xy_valid <= valid; + end + endtask + + task _update_narrow; + input [ 1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + narrow_xy_bank <= bank; + narrow_xy_addr <= addr; + narrow_x_dout <= dout_x; + narrow_y_dout <= dout_y; + narrow_xy_valid <= valid; end + endtask + + task _update_rdct; + input [ 1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + rdct_xy_bank <= bank; + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_wide; + input [ 1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_wide(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task set_narrow; + input [ 1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_narrow(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task set_rdct; + input [ 1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_rdct(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_wide; + begin + _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task clear_narrow; + begin + _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task clear_rdct; + begin + _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task _set_cnt_lsb; + input [7:0] cnt; + input wrapped; + begin + cnt_lsb <= cnt; + cnt_lsb_wrapped <= wrapped; + end + endtask + + task _set_cnt_msb; + input [7:0] cnt; + input wrapped; + begin + cnt_msb <= cnt; + cnt_msb_wrapped <= wrapped; + end + endtask + + task inc_cnt_lsb; + begin + if (cnt_lsb == word_index_last) + _set_cnt_lsb(8'd0, 1'b1); + else + _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped); + end + endtask + + task inc_cnt_both; + begin + inc_cnt_lsb; + inc_cnt_msb; + end + endtask + + task inc_cnt_msb; + begin + if (cnt_msb == word_index_last) + _set_cnt_msb(8'd0, 1'b1); + else + _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped); + end + endtask + + task clr_cnt_lsb; + begin + _set_cnt_lsb(8'd0, 1'b0); + end + endtask + + task clr_cnt_msb; + begin + _set_cnt_msb(8'd0, 1'b0); + end + endtask + + + + wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; + + always @(posedge clk) + // + if (ena) begin + clr_cnt_lsb(); + clr_cnt_msb(); + end else if (!rdy) + // + case (rcmb_mode) + 2'd1: recombine_square(); + 2'd2: recombine_triangle(); + 2'd3: recombine_rectangle(); + endcase + + wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout}; + wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout}; + + wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout}; + wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout}; + + wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]}; + wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]}; + + wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}}; + wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}}; + + + task recombine_square; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_wide; + // + 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + 2'b10: if (cnt_msb < 8'd2) clear_wide; + else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + // + 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); + else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + // + 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + end + // + endcase + // + end + // + endtask + + + task recombine_triangle; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb(); + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: clear_narrow; + 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b10: clear_narrow; + 2'b11: clear_narrow; + // + endcase + // + end + // + endtask + + + task recombine_rectangle; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; + // + endcase +// // + case (rcmb_xy_valid) +// // + 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_rdct; + 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b10: if (!cnt_msb_wrapped) begin + if (cnt_msb < 8'd2) clear_rdct; + else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + + 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); +// // + endcase +// // + case (rcmb_xy_valid) +// // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + 2'b10: begin + if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + end +// // + 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + end +// // + endcase + // + end + // + endtask + + + always @(posedge clk) + // + if (ena) begin + rdy_adv <= 1'b0; + end else if (!rdy_reg) begin + // + case (rcmb_mode) + // + 2'd1: case (rcmb_xy_valid) + // + 2'b00: begin + // + if (rcmb_msb_flag_delay_2) begin + // + rdy_adv <= ~rcmb_msb_flag_delay_1; + // + end + // + end + // + endcase + // + 2'd2: case (rcmb_xy_valid) + // + 2'b01: rdy_adv <= cnt_lsb_wrapped; // + // + endcase + // + 2'd3: case (rcmb_xy_valid) + // + 2'b00: begin + // + if (rcmb_msb_flag_delay_2) begin + // + rdy_adv <= ~rcmb_msb_flag_delay_1; + // + end + // + end + // + endcase + // + endcase + // + end + + + // add ready for mode=3 endmodule diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v new file mode 100644 index 0000000..1ecf56a --- /dev/null +++ b/rtl/modexpng_recombinator_cell.v @@ -0,0 +1,35 @@ +module modexpng_recombinator_cell +( + clk, + ce, clr, + din, dout +); + + input clk; + input ce; + input clr; + input [46:0] din; + output [15:0] dout; + + reg [14:0] z; + reg [16:0] y; + reg [17:0] x; + //reg [15:0] w; + + //assign dout = w; + assign dout = x[15:0]; + + wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here + wire [15:0] din_y = din[31:16]; + wire [15:0] din_x = din[15: 0]; + + always @(posedge clk) + // + if (ce) begin + z <= din_z; + y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; + x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; + //w <= clr ? {16{1'bX}} : x[15:0]; + end + +endmodule diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v new file mode 100644 index 0000000..0f5e461 --- /dev/null +++ b/rtl/modexpng_reductor.v @@ -0,0 +1,270 @@ +module modexpng_reductor +( + clk, rst, + ena, rdy, + //fsm_state_next, + word_index_last, + //dsp_xy_ce_p, + //dsp_x_p, dsp_y_p, + //col_index, col_index_last, + rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux, + //rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, + rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid, + rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid +); + + + // + // Headers + // + //`include "../rtl_1/modexpng_mmm_fsm.vh" + `include "../rtl_1/modexpng_parameters_old.vh" + //`include "../rtl_1/modexpng_parameters_x8.vh" + + + input clk; + input rst; + input ena; + output rdy; + /* + input [FSM_STATE_WIDTH-1:0] fsm_state_next;*/ + input [7:0] word_index_last;/* + input dsp_xy_ce_p; + *//* + input [9*47-1:0] dsp_x_p; + input [9*47-1:0] dsp_y_p; + input [ 4:0] col_index; + input [ 4:0] col_index_last; + *//* + input [ 7:0] rd_narrow_xy_addr; + input [ 1:0] rd_narrow_xy_bank; + */ + input [ 1:0] rd_wide_xy_bank_aux; + input [ 7:0] rd_wide_xy_addr_aux; + input [ 17:0] rd_wide_x_dout_aux; + input [ 17:0] rd_wide_y_dout_aux; + // + input [ 1:0] rcmb_final_xy_bank; + input [ 7:0] rcmb_final_xy_addr; + input [ 17:0] rcmb_final_x_dout; + input [ 17:0] rcmb_final_y_dout; + input rcmb_final_xy_valid; + + output [ 7:0] rdct_final_xy_addr; + output [ 17:0] rdct_final_x_dout; + output [ 17:0] rdct_final_y_dout; + output rdct_final_xy_valid; + + + // + // Ready + // + reg rdy_reg = 1'b1; + reg busy_now = 1'b0; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else begin + if (rdy && ena) rdy_reg <= 1'b0; + if (!rdy && !busy_now) rdy_reg <= 1'b1; + end + + + + // + // Pipeline (Delay Match) + // + reg rcmb_xy_valid_dly1 = 1'b0; + reg rcmb_xy_valid_dly2 = 1'b0; + reg rcmb_xy_valid_dly3 = 1'b0; + + reg [2:0] rcmb_xy_bank_dly1; + reg [2:0] rcmb_xy_bank_dly2; + reg [2:0] rcmb_xy_bank_dly3; + + reg [7:0] rcmb_xy_addr_dly1; + reg [7:0] rcmb_xy_addr_dly2; + reg [7:0] rcmb_xy_addr_dly3; + + reg [17:0] rcmb_x_dout_dly1; + reg [17:0] rcmb_x_dout_dly2; + reg [17:0] rcmb_x_dout_dly3; + + reg [17:0] rcmb_y_dout_dly1; + reg [17:0] rcmb_y_dout_dly2; + reg [17:0] rcmb_y_dout_dly3; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_valid_dly1 <= 1'b0; + rcmb_xy_valid_dly2 <= 1'b0; + rcmb_xy_valid_dly3 <= 1'b0; + end else begin + rcmb_xy_valid_dly1 <= rcmb_final_xy_valid; + rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1; + rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2; + end + + + always @(posedge clk) begin + // + if (rcmb_final_xy_valid) begin + rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; + rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; + rcmb_x_dout_dly1 <= rcmb_final_x_dout; + rcmb_y_dout_dly1 <= rcmb_final_y_dout; + end + // + if (rcmb_xy_valid_dly1) begin + rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1; + rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1; + rcmb_x_dout_dly2 <= rcmb_x_dout_dly1; + rcmb_y_dout_dly2 <= rcmb_y_dout_dly1; + end + // + if (rcmb_xy_valid_dly2) begin + rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2; + rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2; + rcmb_x_dout_dly3 <= rcmb_x_dout_dly2; + rcmb_y_dout_dly3 <= rcmb_y_dout_dly2; + end + // + end + + + reg [ 1:0] rcmb_x_lsb_carry; + reg [15:0] rcmb_x_lsb_dummy; + reg [17:0] rcmb_x_lsb_dout; + + reg [ 1:0] rcmb_y_lsb_carry; + reg [15:0] rcmb_y_lsb_dummy; + reg [17:0] rcmb_y_lsb_dout; + + //reg [17:0] reductor_fat_bram_x_msb_dout; + //reg reductor_fat_bram_x_msb_dout_valid = 1'b0; + //reg [ 7:0] reductor_fat_bram_x_msb_addr; + + // + // Carry Computation + // + always @(posedge clk) + // + if (ena) begin + rcmb_x_lsb_carry <= 2'b00; + rcmb_y_lsb_carry <= 2'b00; + end else if (rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_ML: begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == 8'd0) begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + endcase + + + // + // Reduction + // + reg [ 7:0] rdct_xy_addr; + reg [ 17:0] rdct_x_dout; + reg [ 17:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + assign rdct_final_xy_addr = rdct_xy_addr; + assign rdct_final_x_dout = rdct_x_dout; + assign rdct_final_y_dout = rdct_y_dout; + assign rdct_final_xy_valid = rdct_xy_valid; + + task _update_rdct; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_rdct; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_rdct(addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_rdct; + begin + _update_rdct(8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + + // + // + // + wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux; + wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux; + + wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry}; + wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry}; + + + // + // + // + always @(posedge clk) + // + if (rst) clear_rdct; + else begin + // + clear_rdct; + // + if (busy_now && rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == 8'd1) + set_rdct(8'd0, sum_rdct_x_carry, sum_rdct_y_carry); + else if (rcmb_xy_addr_dly3 > 8'd1) + set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); + + BANK_RCMB_EXT: + set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); + + endcase + // + end + + + + // + // Busy + // + always @(posedge clk) + // + if (rst) busy_now <= 1'b0; + else begin + if (rdy && ena) busy_now <= 1'b1; + //if (!rdy && !busy_now) rdy <= 1'b1; + end + + +endmodule diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v new file mode 100644 index 0000000..d6f9fb1 --- /dev/null +++ b/rtl/modexpng_storage_block.v @@ -0,0 +1,226 @@ +module modexpng_storage_block +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout +); + + + // + // Headers + // + `include "../rtl_1/modexpng_parameters_x8_old.vh" + + + // + // Ports + // + input clk; + input rst; + + input wr_wide_xy_ena; + input [ 1:0] wr_wide_xy_bank; + input [ 7:0] wr_wide_xy_addr; + input [17:0] wr_wide_x_din; + input [17:0] wr_wide_y_din; + + input wr_narrow_xy_ena; + input [ 1:0] wr_narrow_xy_bank; + input [ 7:0] wr_narrow_xy_addr; + input [17:0] wr_narrow_x_din; + input [17:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ 1:0] rd_wide_xy_bank; + input [ 1:0] rd_wide_xy_bank_aux; + input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; + input [ 8-1:0] rd_wide_xy_addr_aux; + output [18*NUM_MULTS/2-1:0] rd_wide_x_dout; + output [18*NUM_MULTS/2-1:0] rd_wide_y_dout; + output [ 18-1:0] rd_wide_x_dout_aux; + output [ 18-1:0] rd_wide_y_dout_aux; + + input rd_narrow_xy_ena; + input [ 1:0] rd_narrow_xy_bank; + input [ 7:0] rd_narrow_xy_addr; + output [18-1:0] rd_narrow_x_dout; + output [18-1:0] rd_narrow_y_dout; + + + // + // Internal Registers + // + reg rd_wide_xy_reg_ena = 1'b0; + reg rd_wide_xy_reg_ena_aux = 1'b0; + reg rd_narrow_xy_reg_ena = 1'b0; + + always @(posedge clk) begin + // + rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + // + end + + + // + // Helper Signals + // + wire [2+8-1:0] wr_wide_xy_offset; + wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1]; + wire [2+8-1:0] rd_wide_xy_offset_aux; + wire [2+8-1:0] wr_narrow_xy_offset; + wire [2+8-1:0] rd_narrow_xy_offset; + + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; + assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; + + + // + // "Wide" Storage + // + genvar z; + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + begin : gen_wide_bram + // + assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[8*z+:8]}; + // + ip_bram_18k wide_bram_x + ( + .clka (clk), + .clkb (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_x_din), + + .enb (rd_wide_xy_ena), + .regceb (rd_wide_xy_reg_ena), + .addrb (rd_wide_xy_offset[z]), + .doutb (rd_wide_x_dout[18*z+:18]) + ); + // + ip_bram_18k wide_bram_y + ( + .clka (clk), + .clkb (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_y_din), + + .enb (rd_wide_xy_ena), + .regceb (rd_wide_xy_reg_ena), + .addrb (rd_wide_xy_offset[z]), + .doutb (rd_wide_y_dout[18*z+:18]) + ); + // + end + endgenerate + + + // + // Auxilary Storage + // + ip_bram_18k wide_bram_x_aux + ( + .clka (clk), + .clkb (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_x_din), + + .enb (rd_wide_xy_ena_aux), + .regceb (rd_wide_xy_reg_ena_aux), + .addrb (rd_wide_xy_offset_aux), + .doutb (rd_wide_x_dout_aux) + ); + // + ip_bram_18k wide_bram_y_aux + ( + .clka (clk), + .clkb (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_y_din), + + .enb (rd_wide_xy_ena_aux), + .regceb (rd_wide_xy_reg_ena_aux), + .addrb (rd_wide_xy_offset_aux), + .doutb (rd_wide_y_dout_aux) + ); + + + // + // "Narrow" Storage + // + ip_bram_18k narrow_bram_x + ( + .clka (clk), + .clkb (clk), + + .ena (wr_narrow_xy_ena), + .wea (wr_narrow_xy_ena), + .addra (wr_narrow_xy_offset), + .dina (wr_narrow_x_din), + + .enb (rd_narrow_xy_ena), + .regceb (rd_narrow_xy_reg_ena), + .addrb (rd_narrow_xy_offset), + .doutb (rd_narrow_x_dout) + ); + + ip_bram_18k narrow_bram_y + ( + .clka (clk), + .clkb (clk), + + .ena (wr_narrow_xy_ena), + .wea (wr_narrow_xy_ena), + .addra (wr_narrow_xy_offset), + .dina (wr_narrow_y_din), + + .enb (rd_narrow_xy_ena), + .regceb (rd_narrow_xy_reg_ena), + .addrb (rd_narrow_xy_offset), + .doutb (rd_narrow_y_dout) + ); + + +endmodule diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v new file mode 100644 index 0000000..fa1e4a1 --- /dev/null +++ b/rtl/modexpng_storage_manager.v @@ -0,0 +1,200 @@ +module modexpng_storage_manager +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + ext_wide_xy_ena, + ext_wide_xy_bank, + ext_wide_xy_addr, + ext_wide_x_din, + ext_wide_y_din, + + ext_narrow_xy_ena, + ext_narrow_xy_bank, + ext_narrow_xy_addr, + ext_narrow_x_din, + ext_narrow_y_din, + + rcmb_wide_xy_ena, + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_din, + rcmb_wide_y_din, + + rcmb_narrow_xy_ena, + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_din, + rcmb_narrow_y_din +); + + + // + // Headers + // + `include "../rtl_1/modexpng_parameters_x8_old.vh" + + + // + // Ports + // + input clk; + input rst; + + output wr_wide_xy_ena; + output [ 1:0] wr_wide_xy_bank; + output [ 7:0] wr_wide_xy_addr; + output [17:0] wr_wide_x_din; + output [17:0] wr_wide_y_din; + + output wr_narrow_xy_ena; + output [ 1:0] wr_narrow_xy_bank; + output [ 7:0] wr_narrow_xy_addr; + output [17:0] wr_narrow_x_din; + output [17:0] wr_narrow_y_din; + + input ext_wide_xy_ena; + input [ 1:0] ext_wide_xy_bank; + input [ 7:0] ext_wide_xy_addr; + input [17:0] ext_wide_x_din; + input [17:0] ext_wide_y_din; + + input ext_narrow_xy_ena; + input [ 1:0] ext_narrow_xy_bank; + input [ 7:0] ext_narrow_xy_addr; + input [17:0] ext_narrow_x_din; + input [17:0] ext_narrow_y_din; + + input rcmb_wide_xy_ena; + input [ 1:0] rcmb_wide_xy_bank; + input [ 7:0] rcmb_wide_xy_addr; + input [17:0] rcmb_wide_x_din; + input [17:0] rcmb_wide_y_din; + + input rcmb_narrow_xy_ena; + input [ 1:0] rcmb_narrow_xy_bank; + input [ 7:0] rcmb_narrow_xy_addr; + input [17:0] rcmb_narrow_x_din; + input [17:0] rcmb_narrow_y_din; + + + reg wr_wide_xy_ena_reg = 1'b0; + reg [ 1:0] wr_wide_xy_bank_reg; + reg [ 7:0] wr_wide_xy_addr_reg; + reg [17:0] wr_wide_x_din_reg; + reg [17:0] wr_wide_y_din_reg; + + reg wr_narrow_xy_ena_reg = 1'b0; + reg [ 1:0] wr_narrow_xy_bank_reg; + reg [ 7:0] wr_narrow_xy_addr_reg; + reg [17:0] wr_narrow_x_din_reg; + reg [17:0] wr_narrow_y_din_reg; + + task _update_wide; + input xy_ena; + input [ 1:0] xy_bank; + input [ 7:0] xy_addr; + input [17:0] x_din; + input [17:0] y_din; + begin + wr_wide_xy_ena_reg <= xy_ena; + wr_wide_xy_bank_reg <= xy_bank; + wr_wide_xy_addr_reg <= xy_addr; + wr_wide_x_din_reg <= x_din; + wr_wide_y_din_reg <= y_din; + end + endtask + + task _update_narrow; + input xy_ena; + input [ 1:0] xy_bank; + input [ 7:0] xy_addr; + input [17:0] x_din; + input [17:0] y_din; + begin + wr_narrow_xy_ena_reg <= xy_ena; + wr_narrow_xy_bank_reg <= xy_bank; + wr_narrow_xy_addr_reg <= xy_addr; + wr_narrow_x_din_reg <= x_din; + wr_narrow_y_din_reg <= y_din; + end + endtask + + task enable_wide; + input [ 1:0] xy_bank; + input [ 7:0] xy_addr; + input [17:0] x_din; + input [17:0] y_din; + begin + _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din); + end + endtask + + task enable_narrow; + input [ 1:0] xy_bank; + input [ 7:0] xy_addr; + input [17:0] x_din; + input [17:0] y_din; + begin + _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din); + end + endtask + + task disable_wide; + begin + _update_wide(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}); + end + endtask + + task disable_narrow; + begin + _update_narrow(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}); + end + endtask + + always @(posedge clk) + // + if (rst) disable_wide; + else begin + // + if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din); + else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din); + else disable_wide; + // + end + + always @(posedge clk) + // + if (rst) disable_narrow; + else begin + // + if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din); + else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din); + else disable_narrow; + // + end + + assign wr_wide_xy_ena = wr_wide_xy_ena_reg; + assign wr_wide_xy_bank = wr_wide_xy_bank_reg; + assign wr_wide_xy_addr = wr_wide_xy_addr_reg; + assign wr_wide_x_din = wr_wide_x_din_reg; + assign wr_wide_y_din = wr_wide_y_din_reg; + + assign wr_narrow_xy_ena = wr_narrow_xy_ena_reg; + assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg; + assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg; + assign wr_narrow_x_din = wr_narrow_x_din_reg; + assign wr_narrow_y_din = wr_narrow_y_din_reg; + +endmodule -- cgit v1.2.3