//====================================================================== // // Copyright (c) 2019, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // //====================================================================== module modexpng_recombinator_block ( clk, rst_n, ena, rdy, fsm_state_next, word_index_last, dsp_xy_ce_p, dsp_x_p, dsp_y_p, col_index, col_index_last, rd_narrow_xy_addr, rd_narrow_xy_bank, rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid, rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid ); // // Headers // `include "modexpng_parameters.vh" `include "modexpng_mmm_dual_fsm.vh" input clk; input rst_n; input ena; output rdy; input [MMM_FSM_STATE_W-1:0] fsm_state_next; input [7:0] word_index_last; input dsp_xy_ce_p; input [9*47-1:0] dsp_x_p; input [9*47-1:0] dsp_y_p; input [ 4:0] col_index; input [ 4:0] col_index_last; input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; input [ 7:0] rd_narrow_xy_addr; output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [ 17:0] rcmb_wide_x_dout; output [ 17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [ 17:0] rcmb_narrow_x_dout; output [ 17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; output [ 7:0] rdct_narrow_xy_addr; output [ 17:0] rdct_narrow_x_dout; output [ 17:0] rdct_narrow_y_dout; output rdct_narrow_xy_valid; // // Latches // reg [1*47-1:0] dsp_x_p_latch[0:8]; reg [1*47-1:0] dsp_y_p_latch[0:8]; // // Mapping // wire [46:0] dsp_x_p_split[0:8]; wire [46:0] dsp_y_p_split[0:8]; genvar z; generate for (z=0; z<(NUM_MULTS+1); z=z+1) begin : gen_dsp_xy_p_split assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; end endgenerate // // Delays // reg dsp_xy_ce_p_dly1 = 1'b0; always @(posedge clk or negedge rst_n) // if (!rst_n) dsp_xy_ce_p_dly1 <= 1'b0; else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p; // // Registers // // valid reg xy_valid_lsb = 1'b0; reg xy_aux_lsb = 1'b0; reg xy_valid_msb = 1'b0; // bitmap reg [7:0] xy_bitmap_lsb = {8{1'b0}}; reg [7:0] xy_bitmap_msb = {8{1'b0}}; // index reg [2:0] xy_index_lsb = 3'dX; // purge reg xy_purge_lsb = 1'b0; reg xy_purge_msb = 1'b0; // valid - latch reg xy_valid_latch_lsb = 1'b0; // aux - latch reg xy_aux_latch_lsb = 1'b0; // bitmap - latch reg [7:0] xy_bitmap_latch_lsb = {8{1'b0}}; reg [7:0] xy_bitmap_latch_msb = {8{1'b0}}; // index - latch reg [2:0] xy_index_latch_lsb = 3'dX; // purge - index reg xy_purge_latch_lsb = 1'b0; reg xy_purge_latch_msb = 1'b0; // reg xy_valid_lsb_adv[1:6]; reg xy_valid_msb_adv[1:6]; reg xy_aux_lsb_adv[1:6]; reg [7:0] xy_bitmap_lsb_adv[1:6]; reg [7:0] xy_bitmap_msb_adv[1:6]; reg [2:0] xy_index_lsb_adv[1:6]; reg [2:0] xy_index_msb_adv[1:6]; reg xy_purge_lsb_adv[1:6]; reg xy_purge_msb_adv[1:6]; reg [1:0] rcmb_mode; always @(posedge clk) // if (ena) // case (fsm_state_next) MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; default: rcmb_mode <= 2'd0; endcase integer i; initial for (i=1; i<6; i=i+1) begin xy_valid_lsb_adv[i] = 1'b0; xy_valid_msb_adv[i] = 1'b0; xy_aux_lsb_adv[i] = 1'b0; xy_bitmap_lsb_adv[i] = {8{1'b0}}; xy_bitmap_msb_adv[i] = {8{1'b0}}; xy_index_lsb_adv[i] = 3'dX; xy_index_msb_adv[i] = 3'dX; xy_purge_lsb_adv[i] = 1'b0; xy_purge_msb_adv[i] = 1'b0; end function calc_square_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if (narrow_xy_addr_value[7:3] == col_index_value) calc_square_triangle_valid_lsb = 1'b1; else calc_square_triangle_valid_lsb = 1'b0; // end endfunction function calc_square_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function calc_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function calc_rectangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if (narrow_xy_addr_value[7:3] == col_index_value) calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT; else calc_rectangle_valid_lsb = 1'b0; // end endfunction function calc_triangle_aux_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if (narrow_xy_bank_value == BANK_NARROW_EXT) calc_triangle_aux_lsb = 1'b1; else calc_triangle_aux_lsb = 1'b0; // end endfunction function [7:0] calc_square_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if (narrow_xy_addr_value[7:3] == col_index_value) // case (narrow_xy_addr_value[2:0]) 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001; 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010; 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100; 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000; 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000; 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000; 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000; 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000; endcase // else calc_square_triangle_bitmap_lsb = {8{1'b0}}; // end endfunction function [7:0] calc_square_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function [7:0] calc_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function [7:0] calc_rectangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT)) // case (narrow_xy_addr_value[2:0]) 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; endcase // else calc_rectangle_bitmap_lsb = {8{1'b0}}; // end endfunction /* * These can be simplified (the difference between square/triangle and * rectangle is that the bank is checked or not). A universal function would * accept a parameter that tells it whether it should check the bank or not. * Let's do it later, too early to optimize now, it seems. * * */ function [2:0] calc_square_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if (narrow_xy_addr_value[7:3] == col_index_value) // case (narrow_xy_addr_value[2:0]) 3'b000: calc_square_triangle_index_lsb = 3'd0; 3'b001: calc_square_triangle_index_lsb = 3'd1; 3'b010: calc_square_triangle_index_lsb = 3'd2; 3'b011: calc_square_triangle_index_lsb = 3'd3; 3'b100: calc_square_triangle_index_lsb = 3'd4; 3'b101: calc_square_triangle_index_lsb = 3'd5; 3'b110: calc_square_triangle_index_lsb = 3'd6; 3'b111: calc_square_triangle_index_lsb = 3'd7; endcase // else calc_square_triangle_index_lsb = 3'dX; // end endfunction function [2:0] calc_square_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function [2:0] calc_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function [2:0] calc_rectangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value; input [7:0] slim_bram_xy_addr_value; begin // if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT)) // case (slim_bram_xy_addr_value[2:0]) 3'b000: calc_rectangle_index_lsb = 3'd0; 3'b001: calc_rectangle_index_lsb = 3'd1; 3'b010: calc_rectangle_index_lsb = 3'd2; 3'b011: calc_rectangle_index_lsb = 3'd3; 3'b100: calc_rectangle_index_lsb = 3'd4; 3'b101: calc_rectangle_index_lsb = 3'd5; 3'b110: calc_rectangle_index_lsb = 3'd6; 3'b111: calc_rectangle_index_lsb = 3'd7; endcase // else calc_rectangle_index_lsb = 3'dX; // end endfunction function calc_square_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // if (narrow_xy_addr_value[7:3] == col_index_value) calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value; else calc_square_rectangle_purge_lsb = 1'b0; // end endfunction function calc_square_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function calc_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); end endfunction function calc_square_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin // if (narrow_xy_addr_value == index_last_value) calc_square_valid_msb = 1'b1; else calc_square_valid_msb = 1'b0; // end endfunction function calc_rectangle_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin // if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_valid_msb = 1'b1; else calc_rectangle_valid_msb = 1'b0; // end endfunction function [7:0] calc_square_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin // if (narrow_xy_addr_value == index_last_value) begin calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; calc_square_bitmap_msb[6:0] = 7'b1111111; end else calc_square_bitmap_msb[7:0] = 8'b00000000; // end endfunction function [7:0] calc_rectangle_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin // if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin calc_rectangle_bitmap_msb[7:0] = 8'b11111111; end else calc_rectangle_bitmap_msb[7:0] = 8'b00000000; // end endfunction function calc_square_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin // if (narrow_xy_addr_value == index_last_value) calc_square_purge_msb = col_index_value == col_index_last_value; else calc_square_purge_msb = 1'b0; // end endfunction function calc_rectangle_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin // if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_purge_msb = col_index_value == col_index_last_value; else calc_rectangle_purge_msb = 1'b0; // end endfunction reg rcmb_xy_lsb_ce = 1'b0; reg rcmb_xy_lsb_ce_aux; reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; reg rcmb_xy_lsb_clr; reg [46:0] rcmb_x_lsb_din; reg [46:0] rcmb_y_lsb_din; wire [15:0] rcmb_x_lsb_dout; wire [15:0] rcmb_y_lsb_dout; reg rcmb_xy_msb_ce = 1'b0; reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; reg rcmb_xy_msb_clr; reg [46:0] rcmb_x_msb_din; reg [46:0] rcmb_y_msb_din; wire [15:0] rcmb_x_msb_dout; wire [15:0] rcmb_y_msb_dout; modexpng_recombinator_cell recomb_x_lsb ( .clk (clk), .ce (rcmb_xy_lsb_ce_combined), .clr (rcmb_xy_lsb_clr), .din (rcmb_x_lsb_din), .dout (rcmb_x_lsb_dout) ); modexpng_recombinator_cell recomb_y_lsb ( .clk (clk), .ce (rcmb_xy_lsb_ce_combined), .clr (rcmb_xy_lsb_clr), .din (rcmb_y_lsb_din), .dout (rcmb_y_lsb_dout) ); modexpng_recombinator_cell recomb_x_msb ( .clk (clk), .ce (rcmb_xy_msb_ce_combined), .clr (rcmb_xy_msb_clr), .din (rcmb_x_msb_din), .dout (rcmb_x_msb_dout) ); modexpng_recombinator_cell recomb_y_msb ( .clk (clk), .ce (rcmb_xy_msb_ce_combined), .clr (rcmb_xy_msb_clr), .din (rcmb_y_msb_din), .dout (rcmb_y_msb_dout) ); always @(posedge clk) begin // rcmb_xy_lsb_ce <= xy_valid_latch_lsb; rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb; rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0]; // if (xy_purge_latch_lsb) rcmb_xy_lsb_ce_purge <= 3'b111; else rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]}; // if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) rcmb_xy_msb_ce_purge <= 2'b11; else rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]}; // end always @(posedge clk) // if (ena) begin rcmb_xy_lsb_clr <= 1'b1; rcmb_xy_msb_clr <= 1'b1; end else begin if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0; if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0; end always @(posedge clk) // if (xy_valid_latch_lsb) begin rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb]; rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb]; end else if (xy_aux_latch_lsb) begin rcmb_x_lsb_din <= dsp_x_p_latch[8]; rcmb_y_lsb_din <= dsp_y_p_latch[8]; end else begin rcmb_x_lsb_din <= {47{1'b0}}; rcmb_y_lsb_din <= {47{1'b0}}; end always @(posedge clk) // if (xy_bitmap_latch_msb[0]) begin rcmb_x_msb_din <= dsp_x_p_latch[0]; rcmb_y_msb_din <= dsp_y_p_latch[0]; end else begin rcmb_x_msb_din <= {47{1'b0}}; rcmb_y_msb_din <= {47{1'b0}}; end always @(posedge clk) // case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin // xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_aux_lsb_adv [6] <= 1'b0; xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); // xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); // end // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin // xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_purge_lsb_adv [6] <= 1'b0; // xy_valid_msb_adv [6] <= 1'b0; xy_bitmap_msb_adv[6] <= {8{1'b0}}; xy_purge_msb_adv [6] <= 1'b0; // end // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin // xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_aux_lsb_adv [6] <= 1'b0; xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); // xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); // end // default: begin // xy_valid_lsb_adv [6] <= 1'b0; xy_aux_lsb_adv [6] <= 1'b0; xy_bitmap_lsb_adv[6] <= {8{1'b0}}; xy_index_lsb_adv [6] <= 3'dX; xy_purge_lsb_adv [6] <= 1'b0; // xy_valid_msb_adv [6] <= 1'b0; xy_bitmap_msb_adv[6] <= {8{1'b0}}; xy_purge_msb_adv [6] <= 1'b0; // end // endcase always @(posedge clk) begin // xy_valid_lsb <= xy_valid_lsb_adv [1]; xy_aux_lsb <= xy_aux_lsb_adv [1]; xy_bitmap_lsb <= xy_bitmap_lsb_adv[1]; xy_index_lsb <= xy_index_lsb_adv [1]; xy_purge_lsb <= xy_purge_lsb_adv [1]; // xy_valid_latch_lsb <= xy_valid_lsb; xy_aux_latch_lsb <= xy_aux_lsb; xy_bitmap_latch_lsb <= xy_bitmap_lsb; xy_index_latch_lsb <= xy_index_lsb; xy_purge_latch_lsb <= xy_purge_lsb; // xy_valid_msb <= xy_valid_msb_adv[1]; xy_bitmap_msb <= xy_bitmap_msb_adv[1]; xy_purge_msb <= xy_purge_msb_adv[1]; // if (xy_valid_msb) begin xy_bitmap_latch_msb <= xy_bitmap_msb; xy_purge_latch_msb <= xy_purge_msb; end else begin xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]}; end // // for (i=1; i<6; i=i+1) begin xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; // xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; end // end always @(posedge clk) // if (xy_bitmap_latch_msb[1]) // only shift 7 times // for (i=0; i<8; i=i+1) // if (i < 7) begin dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; dsp_y_p_latch[i] <= dsp_y_p_latch[i+1]; end else begin dsp_x_p_latch[i] <= {47{1'bX}}; dsp_y_p_latch[i] <= {47{1'bX}}; end // else if (dsp_xy_ce_p_dly1) begin // for (i=0; i<8; i=i+1) // if (xy_bitmap_lsb[i]) begin dsp_x_p_latch[i] <= dsp_x_p_split[i]; dsp_y_p_latch[i] <= dsp_y_p_split[i]; end else if (xy_valid_msb && xy_bitmap_msb[i]) begin dsp_x_p_latch[i] <= dsp_x_p_split[i]; dsp_y_p_latch[i] <= dsp_y_p_split[i]; end // if (xy_aux_lsb) begin dsp_x_p_latch[8] <= dsp_x_p_split[8]; dsp_y_p_latch[8] <= dsp_y_p_split[8]; end // end reg rcmb_xy_lsb_valid = 1'b0; reg rcmb_xy_msb_valid = 1'b0; always @(posedge clk or negedge rst_n) // if (!rst_n) begin rcmb_xy_lsb_valid <= 1'b0; rcmb_xy_msb_valid <= 1'b0; end else begin rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined; rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined; end reg [ BANK_ADDR_W -1:0] wide_xy_bank; reg [ 7:0] wide_xy_addr; reg [17:0] wide_x_dout; reg [17:0] wide_y_dout; reg wide_xy_valid = 1'b0; reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [17:0] narrow_x_dout; reg [17:0] narrow_y_dout; reg narrow_xy_valid = 1'b0; reg [ BANK_ADDR_W -1:0] rdct_xy_bank; reg [ 7:0] rdct_xy_addr; reg [17:0] rdct_x_dout; reg [17:0] rdct_y_dout; reg rdct_xy_valid = 1'b0; reg [ 7:0] cnt_lsb; reg [ 7:0] cnt_msb; reg cnt_lsb_wrapped; reg cnt_msb_wrapped; reg [31:0] rcmb_xy_msb_carry_0; reg [31:0] rcmb_xy_msb_carry_1; reg [31:0] rcmb_xy_msb_delay_0; reg [31:0] rcmb_xy_msb_delay_1; reg [31:0] rcmb_xy_msb_delay_2; reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0; reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0; reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0; reg rcmb_msb_flag_delay_0; reg rcmb_msb_flag_delay_1; reg rcmb_msb_flag_delay_2; assign rcmb_wide_xy_bank = wide_xy_bank; assign rcmb_wide_xy_addr = wide_xy_addr; assign rcmb_wide_x_dout = wide_x_dout; assign rcmb_wide_y_dout = wide_y_dout; assign rcmb_wide_xy_valid = wide_xy_valid; assign rcmb_narrow_xy_bank = narrow_xy_bank; assign rcmb_narrow_xy_addr = narrow_xy_addr; assign rcmb_narrow_x_dout = narrow_x_dout; assign rcmb_narrow_y_dout = narrow_y_dout; assign rcmb_narrow_xy_valid = narrow_xy_valid; assign rdct_narrow_xy_bank = rdct_xy_bank; assign rdct_narrow_xy_addr = rdct_xy_addr; assign rdct_narrow_x_dout = rdct_x_dout; assign rdct_narrow_y_dout = rdct_y_dout; assign rdct_narrow_xy_valid = rdct_xy_valid; reg rdy_reg = 1'b1; reg rdy_adv = 1'b1; assign rdy = rdy_reg; always @(posedge clk) // if (ena) rdy_reg <= 1'b0; else rdy_reg <= rdy_adv; task advance_rcmb_msb_delay; input [15:0] dout_x; input [15:0] dout_y; input [ 7:0] cnt; input flag; begin // rcmb_xy_msb_delay_0 <= {dout_y, dout_x}; rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0; rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1; // rcmb_msb_cnt_delay_0 <= cnt; rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0; rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1; // rcmb_msb_flag_delay_0 <= flag; rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0; rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1; // end endtask task shift_rcmb_msb_carry; input [15:0] dout_x; input [15:0] dout_y; begin rcmb_xy_msb_carry_0 <= {dout_y, dout_x}; rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0; end endtask task _update_wide; input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; input valid; begin wide_xy_bank <= bank; wide_xy_addr <= addr; wide_x_dout <= dout_x; wide_y_dout <= dout_y; wide_xy_valid <= valid; end endtask task _update_narrow; input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; input valid; begin narrow_xy_bank <= bank; narrow_xy_addr <= addr; narrow_x_dout <= dout_x; narrow_y_dout <= dout_y; narrow_xy_valid <= valid; end endtask task _update_rdct; input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; input valid; begin rdct_xy_bank <= bank; rdct_xy_addr <= addr; rdct_x_dout <= dout_x; rdct_y_dout <= dout_y; rdct_xy_valid <= valid; end endtask task set_wide; input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; begin _update_wide(bank, addr, dout_x, dout_y, 1'b1); end endtask task set_narrow; input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; begin _update_narrow(bank, addr, dout_x, dout_y, 1'b1); end endtask task set_rdct; input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; begin _update_rdct(bank, addr, dout_x, dout_y, 1'b1); end endtask task clear_wide; begin _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); end endtask task clear_narrow; begin _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); end endtask task clear_rdct; begin _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); end endtask task _set_cnt_lsb; input [7:0] cnt; input wrapped; begin cnt_lsb <= cnt; cnt_lsb_wrapped <= wrapped; end endtask task _set_cnt_msb; input [7:0] cnt; input wrapped; begin cnt_msb <= cnt; cnt_msb_wrapped <= wrapped; end endtask task inc_cnt_lsb; begin if (cnt_lsb == word_index_last) _set_cnt_lsb(8'd0, 1'b1); else _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped); end endtask task inc_cnt_both; begin inc_cnt_lsb; inc_cnt_msb; end endtask task inc_cnt_msb; begin if (cnt_msb == word_index_last) _set_cnt_msb(8'd0, 1'b1); else _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped); end endtask task clr_cnt_lsb; begin _set_cnt_lsb(8'd0, 1'b0); end endtask task clr_cnt_msb; begin _set_cnt_msb(8'd0, 1'b0); end endtask wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; always @(posedge clk) // if (ena) begin clr_cnt_lsb(); clr_cnt_msb(); end else if (!rdy) // case (rcmb_mode) 2'd1: recombine_square(); 2'd2: recombine_triangle(); 2'd3: recombine_rectangle(); endcase wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout}; wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout}; wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout}; wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout}; wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]}; wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]}; wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}}; wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}}; task recombine_square; // begin // case (rcmb_xy_valid) // 2'b01: inc_cnt_lsb; 2'b10: inc_cnt_msb; 2'b11: inc_cnt_both; // endcase // case (rcmb_xy_valid) // 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); else clear_wide; // 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // 2'b10: if (cnt_msb < 8'd2) clear_wide; else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // endcase // case (rcmb_xy_valid) // 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); // 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); end // endcase // end // endtask task recombine_triangle; // begin // case (rcmb_xy_valid) // 2'b01: inc_cnt_lsb(); // endcase // case (rcmb_xy_valid) // 2'b00: clear_narrow; 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 2'b10: clear_narrow; 2'b11: clear_narrow; // endcase // end // endtask task recombine_rectangle; // begin // case (rcmb_xy_valid) // 2'b01: inc_cnt_lsb; 2'b10: inc_cnt_msb; 2'b11: inc_cnt_both; // endcase // // case (rcmb_xy_valid) // // 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); else clear_rdct; 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 2'b10: if (!cnt_msb_wrapped) begin if (cnt_msb < 8'd2) clear_rdct; else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); // // endcase // // case (rcmb_xy_valid) // // 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); 2'b10: begin if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); end // // 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); end // // endcase // end // endtask always @(posedge clk) // if (ena) begin rdy_adv <= 1'b0; end else if (!rdy_reg) begin // case (rcmb_mode) // 2'd1: case (rcmb_xy_valid) // 2'b00: begin // if (rcmb_msb_flag_delay_2) begin // rdy_adv <= ~rcmb_msb_flag_delay_1; // end // end // endcase // 2'd2: case (rcmb_xy_valid) // 2'b01: rdy_adv <= cnt_lsb_wrapped; // // endcase // 2'd3: case (rcmb_xy_valid) // 2'b00: begin // if (rcmb_msb_flag_delay_2) begin // rdy_adv <= ~rcmb_msb_flag_delay_1; // end // end // endcase // endcase // end endmodule