//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_recombinator_block
(
clk, rst_n,
ena, rdy,
fsm_state_next,
word_index_last,
dsp_xy_ce_p,
dsp_x_p, dsp_y_p,
col_index, col_index_last,
rd_narrow_xy_addr, rd_narrow_xy_bank,
rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid,
rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid,
rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_mmm_dual_fsm.vh"
input clk;
input rst_n;
input ena;
output rdy;
input [MMM_FSM_STATE_W -1:0] fsm_state_next;
input [ OP_ADDR_W -1:0] word_index_last;
input dsp_xy_ce_p;
input [ MAC_W * NUM_MULTS_AUX -1:0] dsp_x_p;
input [ MAC_W * NUM_MULTS_AUX -1:0] dsp_y_p;
input [ COL_INDEX_W -1:0] col_index;
input [ COL_INDEX_W -1:0] col_index_last;
input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
output [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
output [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
output rcmb_wide_xy_valid;
output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
output rcmb_narrow_xy_valid;
output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr;
output [ WORD_EXT_W -1:0] rdct_narrow_x_dout;
output [ WORD_EXT_W -1:0] rdct_narrow_y_dout;
output rdct_narrow_xy_valid;
//
// Latches
//
reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS_AUX-1];
reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS_AUX-1];
//
// Mapping
//
wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS_AUX-1];
wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS_AUX-1];
genvar z;
generate for (z=0; z<NUM_MULTS_AUX; z=z+1)
begin : gen_dsp_xy_p_split
assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W];
assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W];
end
endgenerate
//
// Delays
//
reg dsp_xy_ce_p_dly1 = 1'b0;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) dsp_xy_ce_p_dly1 <= 1'b0;
else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;
//
// Registers
//
// valid
reg xy_valid_lsb = 1'b0;
reg xy_aux_lsb = 1'b0;
reg xy_valid_msb = 1'b0;
// bitmap
reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}};
reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}};
// index
reg [MAC_INDEX_W-1:0] xy_index_lsb;
// purge
reg xy_purge_lsb = 1'b0;
reg xy_purge_msb = 1'b0;
// valid - latch
reg xy_valid_latch_lsb = 1'b0;
// aux - latch
reg xy_aux_latch_lsb = 1'b0;
// bitmap - latch
reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = {NUM_MULTS{1'b0}};
reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = {NUM_MULTS{1'b0}};
// index - latch
reg [MAC_INDEX_W-1:0] xy_index_latch_lsb;
// purge - latch
reg xy_purge_latch_lsb = 1'b0;
reg xy_purge_latch_msb = 1'b0;
//
// Anticipatory Values
//
reg xy_valid_lsb_adv [1:6];
reg xy_valid_msb_adv [1:6];
reg xy_aux_lsb_adv [1:6];
reg [NUM_MULTS -1:0] xy_bitmap_lsb_adv[1:6];
reg [NUM_MULTS -1:0] xy_bitmap_msb_adv[1:6];
reg [MAC_INDEX_W -1:0] xy_index_lsb_adv [1:6];
reg [MAC_INDEX_W -1:0] xy_index_msb_adv [1:6];
reg xy_purge_lsb_adv [1:6];
reg xy_purge_msb_adv [1:6];
//
// Recombination Mode
//
localparam [1:0] RCMB_MODE_UNUSED = 2'd0;
localparam [1:0] RCMB_MODE_SQUARE = 2'd1;
localparam [1:0] RCMB_MODE_TRIANGLE = 2'd2;
localparam [1:0] RCMB_MODE_RECTANGLE = 2'd3;
reg [1:0] rcmb_mode = RCMB_MODE_UNUSED;
always @(posedge clk)
//
if (ena)
//
case (fsm_state_next)
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_SQUARE;
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_TRIANGLE;
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_RECTANGLE;
default: rcmb_mode <= RCMB_MODE_UNUSED;
endcase
//
// Initialization
//
integer i;
initial for (i=1; i<=6; i=i+1) begin
xy_valid_lsb_adv [i] = 1'b0;
xy_valid_msb_adv [i] = 1'b0;
xy_aux_lsb_adv [i] = 1'b0;
xy_bitmap_lsb_adv[i] = {NUM_MULTS{1'b0}};
xy_bitmap_msb_adv[i] = {NUM_MULTS{1'b0}};
xy_index_lsb_adv [i] = {MAC_INDEX_W{1'bX}};
xy_index_msb_adv [i] = {MAC_INDEX_W{1'bX}};
xy_purge_lsb_adv [i] = 1'b0;
xy_purge_msb_adv [i] = 1'b0;
end
function calc_square_triangle_valid_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_triangle_valid_lsb = 1'b1;
else calc_square_triangle_valid_lsb = 1'b0;
//
endfunction
function calc_square_valid_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function calc_triangle_valid_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function calc_rectangle_valid_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
else calc_rectangle_valid_lsb = 1'b0;
//
endfunction
function calc_triangle_aux_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_bank_value == BANK_NARROW_EXT) calc_triangle_aux_lsb = 1'b1;
else calc_triangle_aux_lsb = 1'b0;
//
endfunction
//
// TODO: This will need some generic replacement defined in modexpng_parameters.vh
// in case anything different from NUM_MULTS = 8 is used.
//
function [ NUM_MULTS -1:0] calc_square_triangle_bitmap_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
endcase
else calc_square_triangle_bitmap_lsb = 8'b00000000;
//
endfunction
function [ NUM_MULTS -1:0] calc_square_bitmap_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function [ NUM_MULTS -1:0] calc_triangle_bitmap_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function [ NUM_MULTS -1:0] calc_rectangle_bitmap_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if ((narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
endcase
else calc_rectangle_bitmap_lsb = 8'b00000000;
//
endfunction
/*
* These can be simplified (the difference between square/triangle and
* rectangle is that the bank is checked or not). A universal function would
* accept a parameter that tells it whether it should check the bank or not.
* Let's do it later, too early to optimize now, it seems.
*/
function [MAC_INDEX_W -1:0] calc_square_triangle_index_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
3'b000: calc_square_triangle_index_lsb = 3'd0;
3'b001: calc_square_triangle_index_lsb = 3'd1;
3'b010: calc_square_triangle_index_lsb = 3'd2;
3'b011: calc_square_triangle_index_lsb = 3'd3;
3'b100: calc_square_triangle_index_lsb = 3'd4;
3'b101: calc_square_triangle_index_lsb = 3'd5;
3'b110: calc_square_triangle_index_lsb = 3'd6;
3'b111: calc_square_triangle_index_lsb = 3'd7;
endcase
else calc_square_triangle_index_lsb = 3'dX;
//
endfunction
function [MAC_INDEX_W -1:0] calc_square_index_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function [MAC_INDEX_W -1:0] calc_triangle_index_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function [MAC_INDEX_W -1:0] calc_rectangle_index_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value;
input [ OP_ADDR_W -1:0] slim_bram_xy_addr_value;
//
if ((slim_bram_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
case (slim_bram_xy_addr_value[MAC_INDEX_W-1:0])
3'b000: calc_rectangle_index_lsb = 3'd0;
3'b001: calc_rectangle_index_lsb = 3'd1;
3'b010: calc_rectangle_index_lsb = 3'd2;
3'b011: calc_rectangle_index_lsb = 3'd3;
3'b100: calc_rectangle_index_lsb = 3'd4;
3'b101: calc_rectangle_index_lsb = 3'd5;
3'b110: calc_rectangle_index_lsb = 3'd6;
3'b111: calc_rectangle_index_lsb = 3'd7;
endcase
else calc_rectangle_index_lsb = 3'dX;
//
endfunction
function calc_square_rectangle_purge_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_rectangle_purge_lsb = narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_last_value;
else calc_square_rectangle_purge_lsb = 1'b0;
//
endfunction
function calc_square_purge_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function calc_rectangle_purge_lsb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
endfunction
function calc_square_valid_msb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
input [ OP_ADDR_W -1:0] index_last_value;
//
if (narrow_xy_addr_value == index_last_value) calc_square_valid_msb = 1'b1;
else calc_square_valid_msb = 1'b0;
//
endfunction
function calc_rectangle_valid_msb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
input [ OP_ADDR_W -1:0] index_last_value;
//
if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_valid_msb = 1'b1;
else calc_rectangle_valid_msb = 1'b0;
//
endfunction
function [ NUM_MULTS -1:0] calc_square_bitmap_msb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
input [ OP_ADDR_W -1:0] index_last_value;
//
if (narrow_xy_addr_value == index_last_value) calc_square_bitmap_msb = {col_index_value != col_index_last_value, 7'b1111111};
else calc_square_bitmap_msb = 8'b00000000;
//
endfunction
function [ NUM_MULTS -1:0] calc_rectangle_bitmap_msb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
input [ OP_ADDR_W -1:0] index_last_value;
//
if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_bitmap_msb = 8'b11111111;
else calc_rectangle_bitmap_msb = 8'b00000000;
//
endfunction
function calc_square_purge_msb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
input [ OP_ADDR_W -1:0] index_last_value;
//
if (narrow_xy_addr_value == index_last_value) calc_square_purge_msb = col_index_value == col_index_last_value;
else calc_square_purge_msb = 1'b0;
//
endfunction
function calc_rectangle_purge_msb;
input [COL_INDEX_W -1:0] col_index_value;
input [COL_INDEX_W -1:0] col_index_last_value;
input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [ OP_ADDR_W -1:0] narrow_xy_addr_value;
input [ OP_ADDR_W -1:0] index_last_value;
//
if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_purge_msb = col_index_value == col_index_last_value;
else calc_rectangle_purge_msb = 1'b0;
//
endfunction
//
// Recombinator Cell Instances
//
reg [WORD_W -1:0] rcmb_x_msb_carry_0;
reg [WORD_W -1:0] rcmb_y_msb_carry_0;
reg [WORD_W -1:0] rcmb_x_msb_carry_1;
reg [WORD_W -1:0] rcmb_y_msb_carry_1;
reg rcmb_xy_lsb_ce = 1'b0;
reg rcmb_xy_lsb_ce_aux = 1'b0;
reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
reg rcmb_xy_lsb_clr;
wire rcmb_xy_lsb_cry = !xy_valid_latch_lsb && rcmb_xy_lsb_ce_purge[1];
reg [ MAC_W -1:0] rcmb_x_lsb_din;
reg [ MAC_W -1:0] rcmb_y_lsb_din;
wire [WORD_W -1:0] rcmb_x_lsb_dout;
wire [WORD_W -1:0] rcmb_y_lsb_dout;
wire [WORD_W :0] rcmb_x_lsb_doutw;
wire [WORD_W :0] rcmb_y_lsb_doutw;
reg rcmb_xy_msb_ce = 1'b0;
reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
reg rcmb_xy_msb_clr;
reg [ MAC_W -1:0] rcmb_x_msb_din;
reg [ MAC_W -1:0] rcmb_y_msb_din;
wire [WORD_W -1:0] rcmb_x_msb_dout;
wire [WORD_W -1:0] rcmb_y_msb_dout;
modexpng_recombinator_cell recomb_x_lsb
(
.clk (clk),
.ce (rcmb_xy_lsb_ce_combined),
.clr (rcmb_xy_lsb_clr),
.din (rcmb_x_lsb_din),
.dout (rcmb_x_lsb_dout),
.doutw (rcmb_x_lsb_doutw)
);
modexpng_recombinator_cell recomb_y_lsb
(
.clk (clk),
.ce (rcmb_xy_lsb_ce_combined),
.clr (rcmb_xy_lsb_clr),
.din (rcmb_y_lsb_din),
.dout (rcmb_y_lsb_dout),
.doutw (rcmb_y_lsb_doutw)
);
modexpng_recombinator_cell recomb_x_msb
(
.clk (clk),
.ce (rcmb_xy_msb_ce_combined),
.clr (rcmb_xy_msb_clr),
.din (rcmb_x_msb_din),
.dout (rcmb_x_msb_dout),
.doutw ()
);
modexpng_recombinator_cell recomb_y_msb
(
.clk (clk),
.ce (rcmb_xy_msb_ce_combined),
.clr (rcmb_xy_msb_clr),
.din (rcmb_y_msb_din),
.dout (rcmb_y_msb_dout),
.doutw ()
);
always @(posedge clk) begin
//
rcmb_xy_lsb_ce <= xy_valid_latch_lsb;
rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb;
rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0];
//
if (xy_purge_latch_lsb) rcmb_xy_lsb_ce_purge <= 3'b111;
else rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
//
if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) rcmb_xy_msb_ce_purge <= 2'b11;
else rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
//
end
always @(posedge clk)
//
if (ena) begin
rcmb_xy_lsb_clr <= 1'b1;
rcmb_xy_msb_clr <= 1'b1;
end else begin
if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0;
if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0;
end
always @(posedge clk)
//
if (xy_valid_latch_lsb) begin
rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb];
rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb];
end else if (xy_aux_latch_lsb) begin
rcmb_x_lsb_din <= dsp_x_p_latch[NUM_MULTS_AUX-1];
rcmb_y_lsb_din <= dsp_y_p_latch[NUM_MULTS_AUX-1];
end else if (rcmb_xy_lsb_cry) begin
rcmb_x_lsb_din <= {{(MAC_W-WORD_W){1'b0}}, rcmb_x_msb_carry_1};
rcmb_y_lsb_din <= {{(MAC_W-WORD_W){1'b0}}, rcmb_y_msb_carry_1};
end else begin
rcmb_x_lsb_din <= {MAC_W{1'b0}};
rcmb_y_lsb_din <= {MAC_W{1'b0}};
end
always @(posedge clk)
//
if (xy_bitmap_latch_msb[0]) begin
rcmb_x_msb_din <= dsp_x_p_latch[0];
rcmb_y_msb_din <= dsp_y_p_latch[0];
end else begin
rcmb_x_msb_din <= {MAC_W{1'b0}};
rcmb_y_msb_din <= {MAC_W{1'b0}};
end
always @(posedge clk)
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_aux_lsb_adv [6] <= 1'b0;
xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
//
xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
//
end
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_purge_lsb_adv [6] <= 1'b0;
//
xy_valid_msb_adv [6] <= 1'b0;
xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}};
xy_purge_msb_adv [6] <= 1'b0;
//
end
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_aux_lsb_adv [6] <= 1'b0;
xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
//
xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
//
end
//
default: begin
//
xy_valid_lsb_adv [6] <= 1'b0;
xy_aux_lsb_adv [6] <= 1'b0;
xy_bitmap_lsb_adv[6] <= {NUM_MULTS{1'b0}};
xy_index_lsb_adv [6] <= {MAC_INDEX_W{1'bX}};
xy_purge_lsb_adv [6] <= 1'b0;
//
xy_valid_msb_adv [6] <= 1'b0;
xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}};
xy_purge_msb_adv [6] <= 1'b0;
//
end
//
endcase
always @(posedge clk) begin
//
xy_valid_lsb <= xy_valid_lsb_adv [1];
xy_aux_lsb <= xy_aux_lsb_adv [1];
xy_bitmap_lsb <= xy_bitmap_lsb_adv[1];
xy_index_lsb <= xy_index_lsb_adv [1];
xy_purge_lsb <= xy_purge_lsb_adv [1];
//
xy_valid_latch_lsb <= xy_valid_lsb;
xy_aux_latch_lsb <= xy_aux_lsb;
xy_bitmap_latch_lsb <= xy_bitmap_lsb;
xy_index_latch_lsb <= xy_index_lsb;
xy_purge_latch_lsb <= xy_purge_lsb;
//
xy_valid_msb <= xy_valid_msb_adv[1];
xy_bitmap_msb <= xy_bitmap_msb_adv[1];
xy_purge_msb <= xy_purge_msb_adv[1];
//
if (xy_valid_msb) begin
xy_bitmap_latch_msb <= xy_bitmap_msb;
xy_purge_latch_msb <= xy_purge_msb;
end else begin
xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]};
end
//
for (i=1; i<6; i=i+1) begin
xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1];
xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
//
xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
end
//
end
always @(posedge clk)
//
if (xy_bitmap_latch_msb[1]) // only shift 7 times
//
for (i=0; i<NUM_MULTS; i=i+1)
//
if (i < (NUM_MULTS-1)) begin
dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
dsp_y_p_latch[i] <= dsp_y_p_latch[i+1];
end else begin
dsp_x_p_latch[i] <= {MAC_W{1'bX}};
dsp_y_p_latch[i] <= {MAC_W{1'bX}};
end
//
else if (dsp_xy_ce_p_dly1) begin
//
for (i=0; i<NUM_MULTS; i=i+1)
//
if (xy_bitmap_lsb[i]) begin
dsp_x_p_latch[i] <= dsp_x_p_split[i];
dsp_y_p_latch[i] <= dsp_y_p_split[i];
end else if (xy_valid_msb && xy_bitmap_msb[i]) begin
dsp_x_p_latch[i] <= dsp_x_p_split[i];
dsp_y_p_latch[i] <= dsp_y_p_split[i];
end
//
if (xy_aux_lsb) begin
dsp_x_p_latch[NUM_MULTS_AUX-1] <= dsp_x_p_split[NUM_MULTS_AUX-1];
dsp_y_p_latch[NUM_MULTS_AUX-1] <= dsp_y_p_split[NUM_MULTS_AUX-1];
end
//
end
reg [4:1] rcmb_xy_lsb_ce_combined_dly;
reg [4:1] rcmb_xy_msb_ce_combined_dly;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
rcmb_xy_lsb_ce_combined_dly <= 4'b0000;
rcmb_xy_msb_ce_combined_dly <= 4'b0000;
end else begin
rcmb_xy_lsb_ce_combined_dly <= {rcmb_xy_lsb_ce_combined_dly[3:1], rcmb_xy_lsb_ce_combined};
rcmb_xy_msb_ce_combined_dly <= {rcmb_xy_msb_ce_combined_dly[3:1], rcmb_xy_msb_ce_combined};
end
reg rcmb_xy_lsb_valid = 1'b0;
reg rcmb_xy_msb_valid = 1'b0;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
rcmb_xy_lsb_valid <= 1'b0;
rcmb_xy_msb_valid <= 1'b0;
end else begin
rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly[4];
rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly[4];
end
//
// Output Registers
//
reg [BANK_ADDR_W -1:0] wide_xy_bank;
reg [ OP_ADDR_W -1:0] wide_xy_addr;
reg [ WORD_EXT_W -1:0] wide_x_dout;
reg [ WORD_EXT_W -1:0] wide_y_dout;
reg wide_xy_valid = 1'b0;
reg [BANK_ADDR_W -1:0] narrow_xy_bank;
reg [ OP_ADDR_W -1:0] narrow_xy_addr;
reg [ WORD_EXT_W -1:0] narrow_x_dout;
reg [ WORD_EXT_W -1:0] narrow_y_dout;
reg narrow_xy_valid = 1'b0;
reg [BANK_ADDR_W -1:0] rdct_xy_bank;
reg [ OP_ADDR_W -1:0] rdct_xy_addr;
reg [ WORD_EXT_W -1:0] rdct_x_dout;
reg [ WORD_EXT_W -1:0] rdct_y_dout;
reg rdct_xy_valid = 1'b0;
//
// Internal Counters
//
reg [OP_ADDR_W -1:0] cnt_lsb;
reg [OP_ADDR_W -1:0] cnt_msb;
reg cnt_lsb_wrapped;
reg cnt_msb_wrapped;
reg [WORD_W-1:0] rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y;
reg [WORD_W-1:0] rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y;
reg [WORD_W-1:0] rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y;
reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_0 = OP_ADDR_ZERO;
reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_1 = OP_ADDR_ZERO;
reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_2 = OP_ADDR_ZERO;
reg rcmb_msb_flag_dly_0 = 1'b0;
reg rcmb_msb_flag_dly_1 = 1'b0;
reg rcmb_msb_flag_dly_2 = 1'b0;
//
// Output Port Mapping
//
assign rcmb_wide_xy_bank = wide_xy_bank;
assign rcmb_wide_xy_addr = wide_xy_addr;
assign rcmb_wide_x_dout = wide_x_dout;
assign rcmb_wide_y_dout = wide_y_dout;
assign rcmb_wide_xy_valid = wide_xy_valid;
assign rcmb_narrow_xy_bank = narrow_xy_bank;
assign rcmb_narrow_xy_addr = narrow_xy_addr;
assign rcmb_narrow_x_dout = narrow_x_dout;
assign rcmb_narrow_y_dout = narrow_y_dout;
assign rcmb_narrow_xy_valid = narrow_xy_valid;
assign rdct_narrow_xy_bank = rdct_xy_bank;
assign rdct_narrow_xy_addr = rdct_xy_addr;
assign rdct_narrow_x_dout = rdct_x_dout;
assign rdct_narrow_y_dout = rdct_y_dout;
assign rdct_narrow_xy_valid = rdct_xy_valid;
//
// Ready Logic
//
reg rdy_reg = 1'b1;
reg rdy_adv = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk)
//
if (ena) rdy_reg <= 1'b0;
else rdy_reg <= rdy_adv;
//
// Helper Tasks
//
task advance_rcmb_msb_delay;
input [ WORD_W -1:0] dout_x;
input [ WORD_W -1:0] dout_y;
input [OP_ADDR_W -1:0] cnt;
input flag;
begin
//
{rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y} <= {dout_x, dout_y};
{rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y} <= {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y};
{rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y} <= {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y};
//
rcmb_msb_cnt_dly_0 <= cnt;
rcmb_msb_cnt_dly_1 <= rcmb_msb_cnt_dly_0;
rcmb_msb_cnt_dly_2 <= rcmb_msb_cnt_dly_1;
//
rcmb_msb_flag_dly_0 <= flag;
rcmb_msb_flag_dly_1 <= rcmb_msb_flag_dly_0;
rcmb_msb_flag_dly_2 <= rcmb_msb_flag_dly_1;
//
end
endtask
task _update_rcmb_msb_carry;
input [WORD_W-1:0] dout_x;
input [WORD_W-1:0] dout_y;
begin
rcmb_x_msb_carry_0 <= dout_x;
rcmb_y_msb_carry_0 <= dout_y;
rcmb_x_msb_carry_1 <= rcmb_x_msb_carry_0;
rcmb_y_msb_carry_1 <= rcmb_y_msb_carry_0;
end
endtask
task push_rcmb_msb_carry;
input [WORD_W -1:0] dout_x;
input [WORD_W -1:0] dout_y;
_update_rcmb_msb_carry(dout_x, dout_y);
endtask
task pop_rcmb_msb_carry;
_update_rcmb_msb_carry(WORD_DNC, WORD_DNC);
endtask
task _update_wide;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
input valid;
begin
wide_xy_bank <= bank;
wide_xy_addr <= addr;
wide_x_dout <= dout_x;
wide_y_dout <= dout_y;
wide_xy_valid <= valid;
end
endtask
task _update_narrow;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
input valid;
begin
narrow_xy_bank <= bank;
narrow_xy_addr <= addr;
narrow_x_dout <= dout_x;
narrow_y_dout <= dout_y;
narrow_xy_valid <= valid;
end
endtask
task _update_rdct;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
input valid;
begin
rdct_xy_bank <= bank;
rdct_xy_addr <= addr;
rdct_x_dout <= dout_x;
rdct_y_dout <= dout_y;
rdct_xy_valid <= valid;
end
endtask
task set_wide;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
_update_wide(bank, addr, dout_x, dout_y, 1'b1);
endtask
task set_narrow;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
_update_narrow(bank, addr, dout_x, dout_y, 1'b1);
endtask
task set_rdct;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
_update_rdct(bank, addr, dout_x, dout_y, 1'b1);
endtask
task clear_wide;
_update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
endtask
task clear_narrow;
_update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
endtask
task clear_rdct;
_update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
endtask
task _set_cnt_lsb;
input [OP_ADDR_W-1:0] cnt;
input wrapped;
{cnt_lsb, cnt_lsb_wrapped} <= {cnt, wrapped};
endtask
task _set_cnt_msb;
input [OP_ADDR_W-1:0] cnt;
input wrapped;
{cnt_msb, cnt_msb_wrapped} <= {cnt, wrapped};
endtask
task inc_cnt_lsb;
if (cnt_lsb == word_index_last) _set_cnt_lsb(OP_ADDR_ZERO, 1'b1);
else _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
endtask
task inc_cnt_msb;
if (cnt_msb == word_index_last) _set_cnt_msb(OP_ADDR_ZERO, 1'b1);
else _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
endtask
task inc_cnt_both;
begin
inc_cnt_lsb;
inc_cnt_msb;
end
endtask
task clr_cnt_lsb;
_set_cnt_lsb(OP_ADDR_ZERO, 1'b0);
endtask
task clr_cnt_msb;
_set_cnt_msb(OP_ADDR_ZERO, 1'b0);
endtask
//
// Main Process
//
always @(posedge clk)
//
if (ena) begin
clr_cnt_lsb;
clr_cnt_msb;
end else if (!rdy)
//
case (rcmb_mode)
RCMB_MODE_SQUARE: recombine_square();
RCMB_MODE_TRIANGLE: recombine_triangle();
RCMB_MODE_RECTANGLE: recombine_rectangle();
endcase
//
// Padding
//
wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout};
wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout};
wire [WORD_EXT_W-1:0] rcmb_x_lsb_doutw_pad = {1'b0, rcmb_x_lsb_doutw};
wire [WORD_EXT_W-1:0] rcmb_y_lsb_doutw_pad = {1'b0, rcmb_y_lsb_doutw};
wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout};
wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout};
wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_x_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_x};
wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_y_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_y};
//
// Handy Signal
//
wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid};
//
// Recombination Task - Square
//
task recombine_square;
//
begin
//
case (rcmb_xy_valid)
2'b01: inc_cnt_lsb;
2'b10: inc_cnt_msb;
2'b11: inc_cnt_both;
endcase
//
case (rcmb_xy_valid)
//
2'b00: if (rcmb_msb_flag_dly_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad);
else clear_wide;
//
2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
2'b10: if (cnt_msb < OP_ADDR_TWO) clear_wide;
else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
//
2'b11: if (!cnt_lsb_wrapped) set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
else begin
if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
else set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad);
end
//
endcase
//
case (rcmb_xy_valid)
//
2'b00: if (rcmb_msb_flag_dly_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
//
2'b01: if (rcmb_xy_lsb_cry) pop_rcmb_msb_carry;
//
2'b10: if (cnt_msb < OP_ADDR_TWO) push_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
//
2'b11: advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
//
endcase
//
end
//
endtask
//
// Recombination Task - Triangle
//
task recombine_triangle;
//
begin
//
case (rcmb_xy_valid)
2'b01: inc_cnt_lsb;
endcase
//
case (rcmb_xy_valid)
//
2'b00: clear_narrow;
//
2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
else set_narrow(BANK_NARROW_EXT, OP_ADDR_EXT_Q, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
2'b10: clear_narrow;
//
2'b11: clear_narrow;
//
endcase
//
end
//
endtask
//
// Recombination Task - Rectangle
//
task recombine_rectangle;
//
begin
//
case (rcmb_xy_valid)
2'b01: inc_cnt_lsb;
2'b10: inc_cnt_msb;
2'b11: inc_cnt_both;
endcase
//
case (rcmb_xy_valid)
//
2'b00: if (rcmb_msb_flag_dly_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad);
else clear_rdct;
//
2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
2'b10: if (!cnt_msb_wrapped) begin
if (cnt_msb < OP_ADDR_TWO) clear_rdct;
else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
end else set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
//
2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
else set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad);
//
endcase
//
case (rcmb_xy_valid)
//
2'b00: if (rcmb_msb_flag_dly_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
//
2'b01: if (rcmb_xy_lsb_cry) pop_rcmb_msb_carry;
//
2'b10: begin
if ((cnt_msb < OP_ADDR_TWO) && !cnt_msb_wrapped) push_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
if (cnt_msb_wrapped) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
end
//
2'b11: advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
//
endcase
//
end
//
endtask
//
// Completion Logic
//
always @(posedge clk)
//
if (ena) rdy_adv <= 1'b0;
else if (!rdy_reg)
//
case (rcmb_mode)
RCMB_MODE_SQUARE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase
RCMB_MODE_TRIANGLE: case (rcmb_xy_valid) 2'b01: rdy_adv <= cnt_lsb_wrapped; endcase
RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase
endcase
endmodule