`timescale 1ns / 1ps
module tb_square;
//
// Headers
//
`include "../rtl/modexpng_parameters.vh"
`include "../rtl/modexpng_parameters_x8.vh"
`include "../rtl/modexpng_mmm_fsm.vh"
//
// Clock
//
`define CLK_FREQUENCY_MHZ 100.0
`define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
`define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
reg clk = 1'b0;
always begin
#`CLK_PERIOD_HALF_NS clk = 1'b1;
#`CLK_PERIOD_HALF_NS clk = 1'b0;
end
//
// Reset
//
reg rst = 1'b1;
//
// T1, T2
//
reg [17:0] T1[0:31];
reg [17:0] T2[0:31];
reg [17:0] AB[0:63];
//
// Init
//
initial begin
//
T1[ 0] = 18'h0f13e; T1[ 1] = 18'h0daf6; T1[ 2] = 18'h0aaa9; T1[ 3] = 18'h0c2c2;
T1[ 4] = 18'h0fc5f; T1[ 5] = 18'h12164; T1[ 6] = 18'h14375; T1[ 7] = 18'h15615;
T1[ 8] = 18'h0d8e2; T1[ 9] = 18'h0ec15; T1[10] = 18'h17c46; T1[11] = 18'h0c922;
T1[12] = 18'h08f00; T1[13] = 18'h152f9; T1[14] = 18'h0b0b6; T1[15] = 18'h0ce87;
T1[16] = 18'h178f2; T1[17] = 18'h09efb; T1[18] = 18'h0409d; T1[19] = 18'h11104;
T1[20] = 18'h0b4a6; T1[21] = 18'h158a6; T1[22] = 18'h0514e; T1[23] = 18'h0ec55;
T1[24] = 18'h11e73; T1[25] = 18'h11ddd; T1[26] = 18'h07bd4; T1[27] = 18'h0638b;
T1[28] = 18'h0e805; T1[29] = 18'h11c4f; T1[30] = 18'h0a2eb; T1[31] = 18'h05454;
//
T2[ 0] = 18'h1a479; T2[ 1] = 18'h102f5; T2[ 2] = 18'h10e72; T2[ 3] = 18'h120b1;
T2[ 4] = 18'h169cd; T2[ 5] = 18'h1d0c4; T2[ 6] = 18'h11462; T2[ 7] = 18'h12015;
T2[ 8] = 18'h16fca; T2[ 9] = 18'h1044f; T2[10] = 18'h122b4; T2[11] = 18'h10a5a;
T2[12] = 18'h12620; T2[13] = 18'h0e01a; T2[14] = 18'h095cd; T2[15] = 18'h1278a;
T2[16] = 18'h10763; T2[17] = 18'h09fe7; T2[18] = 18'h0d35c; T2[19] = 18'h10e24;
T2[20] = 18'h1527d; T2[21] = 18'h115b3; T2[22] = 18'h05443; T2[23] = 18'h1190a;
T2[24] = 18'h0fcc3; T2[25] = 18'h115e2; T2[26] = 18'h0a398; T2[27] = 18'h0608d;
T2[28] = 18'h13075; T2[29] = 18'h0d816; T2[30] = 18'h0bb4c; T2[31] = 18'h04e8a;
//
AB[ 0] = 18'h0be4e; AB[ 1] = 18'h0fed7; AB[ 2] = 18'h09496; AB[ 3] = 18'h07181;
AB[ 4] = 18'h0ee73; AB[ 5] = 18'h04692; AB[ 6] = 18'h0141a; AB[ 7] = 18'h0078c;
AB[ 8] = 18'h030eb; AB[ 9] = 18'h0217c; AB[10] = 18'h0696f; AB[11] = 18'h0a165;
AB[12] = 18'h0b753; AB[13] = 18'h04af9; AB[14] = 18'h0ed7c; AB[15] = 18'h079ce;
AB[16] = 18'h0e863; AB[17] = 18'h097df; AB[18] = 18'h07984; AB[19] = 18'h048af;
AB[20] = 18'h0197f; AB[21] = 18'h0206a; AB[22] = 18'h027e7; AB[23] = 18'h04b3a;
AB[24] = 18'h03312; AB[25] = 18'h03b56; AB[26] = 18'h04487; AB[27] = 18'h0bd6a;
AB[28] = 18'h04e4b; AB[29] = 18'h069ca; AB[30] = 18'h0f994; AB[31] = 18'h0dd4e;
AB[32] = 18'h1b024; AB[33] = 18'h0127f; AB[34] = 18'h02631; AB[35] = 18'h0186b;
AB[36] = 18'h03adb; AB[37] = 18'h05368; AB[38] = 18'h059a5; AB[39] = 18'h002e0;
AB[40] = 18'h0b78a; AB[41] = 18'h016f3; AB[42] = 18'h0b58d; AB[43] = 18'h03ddb;
AB[44] = 18'h078b0; AB[45] = 18'h0073b; AB[46] = 18'h07337; AB[47] = 18'h0c7b0;
AB[48] = 18'h00668; AB[49] = 18'h0106d; AB[50] = 18'h01a44; AB[51] = 18'h05ee3;
AB[52] = 18'h0462d; AB[53] = 18'h0fdeb; AB[54] = 18'h05f85; AB[55] = 18'h02af9;
AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194;
AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df;
//
end
//
// BRAMs
//
reg tb_fat_bram_xy_ena = 1'b0;
reg [ 2:0] tb_fat_bram_xy_bank;
reg [ 7:0] tb_fat_bram_xy_addr;
reg [17:0] tb_fat_bram_x_din;
reg [17:0] tb_fat_bram_y_din;
reg mgr_fat_bram_xy_ena = 1'b0;
reg [ 2:0] mgr_fat_bram_xy_bank;
reg [ 7:0] mgr_fat_bram_xy_addr;
reg [17:0] mgr_fat_bram_x_din;
reg [17:0] mgr_fat_bram_y_din;
reg mac_fat_bram_xy_ena = 1'b0;
reg mac_fat_bram_xy_reg_ena = 1'b0;
reg [ 2:0] mac_fat_bram_xy_bank;
reg [ 7:0] mac_fat_bram_xy_addr[0:3];
wire [17:0] mac_fat_bram_x_dout[0:3];
wire [17:0] mac_fat_bram_y_dout[0:3];
reg tb_slim_bram_xy_ena = 1'b0;
reg [ 1:0] tb_slim_bram_xy_bank;
reg [ 7:0] tb_slim_bram_xy_addr;
reg [17:0] tb_slim_bram_x_din;
reg [17:0] tb_slim_bram_y_din;
reg mac_slim_bram_xy_ena = 1'b0;
reg mac_slim_bram_xy_reg_ena = 1'b0;
reg [ 1:0] mac_slim_bram_xy_bank;
reg [ 7:0] mac_slim_bram_xy_addr;
reg [ 7:0] mac_slim_bram_xy_addr_dly;
wire [17:0] mac_slim_bram_x_dout;
wire [17:0] mac_slim_bram_y_dout;
always @(posedge clk)
//
mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr;
reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
always @(posedge clk)
mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena;
genvar z;
generate for (z=0; z<(NUM_MULTS/2); z=z+1)
begin : gen_fat_bram
//
ip_bram_36k fat_bram_x
(
.clka (clk),
.ena (mgr_fat_bram_xy_ena),
.wea (mgr_fat_bram_xy_ena),
.addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
.dina (mgr_fat_bram_x_din),
.clkb (clk),
.enb (mac_fat_bram_xy_ena),
.regceb (mac_fat_bram_xy_reg_ena),
.addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
.doutb (mac_fat_bram_x_dout[z])
);
//
ip_bram_36k fat_bram_y
(
.clka (clk),
.ena (mgr_fat_bram_xy_ena),
.wea (mgr_fat_bram_xy_ena),
.addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
.dina (mgr_fat_bram_y_din),
.clkb (clk),
.enb (mac_fat_bram_xy_ena),
.regceb (mac_fat_bram_xy_reg_ena),
.addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
.doutb (mac_fat_bram_y_dout[z])
);
//
end
endgenerate
ip_bram_18k slim_bram_x
(
.clka (clk),
.ena (tb_slim_bram_xy_ena),
.wea (tb_slim_bram_xy_ena),
.addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
.dina (tb_slim_bram_x_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
.regceb (mac_slim_bram_xy_reg_ena),
.addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
.doutb (mac_slim_bram_x_dout)
);
ip_bram_18k slim_bram_y
(
.clka (clk),
.ena (tb_slim_bram_xy_ena),
.wea (tb_slim_bram_xy_ena),
.addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
.dina (tb_slim_bram_y_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
.regceb (mac_slim_bram_xy_reg_ena),
.addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
.doutb (mac_slim_bram_y_dout)
);
//
// Enable, Ready
//
reg ena = 1'b0;
integer i;
initial begin
for (i=0; i<10; i=i+1)
wait_clock_tick;
rst = 1'b0;
for (i=0; i<10; i=i+1)
wait_clock_tick;
tb_fat_bram_xy_ena = 1'b1;
tb_slim_bram_xy_ena = 1'b1;
for (i=0; i<32; i=i+1) begin
tb_fat_bram_xy_bank = BANK_FAT_T1T2;
tb_fat_bram_xy_addr = i[7:0];
tb_fat_bram_x_din = T1[i];
tb_fat_bram_y_din = T2[i];
tb_slim_bram_xy_bank = BANK_SLIM_T1T2;
tb_slim_bram_xy_addr = i[7:0];
tb_slim_bram_x_din = T1[i];
tb_slim_bram_y_din = T2[i];
wait_clock_tick;
end
tb_fat_bram_xy_ena = 1'b0;
tb_slim_bram_xy_ena = 1'b0;
tb_fat_bram_xy_bank = {3{1'bX}};
tb_fat_bram_xy_addr = {8{1'bX}};
tb_fat_bram_x_din = {18{1'bX}};
tb_fat_bram_y_din = {18{1'bX}};
tb_slim_bram_xy_bank = {2{1'bX}};
tb_slim_bram_xy_addr = {8{1'bX}};
tb_slim_bram_x_din = {18{1'bX}};
tb_slim_bram_y_din = {18{1'bX}};
for (i=0; i<10; i=i+1)
wait_clock_tick;
ena = 1'b1;
wait_clock_tick;
ena = 1'b0;
for (i=0; i<10000; i=i+1)
wait_clock_tick;
verify_ab;
end
//
// DSPs
//
reg dsp_x_ce_a;
reg dsp_x_ce_b;
reg dsp_x_ce_b_dly;
reg dsp_x_ce_m;
reg dsp_x_ce_p;
reg dsp_x_ce_mode;
reg [8 -1:0] dsp_x_mode_z = {8{1'b1}};
wire [4*18-1:0] dsp_x_a;
reg [1*17-1:0] dsp_x_b;
wire [8*47-1:0] dsp_x_p;
reg dsp_y_ce_a;
reg dsp_y_ce_b;
reg dsp_y_ce_b_dly;
reg dsp_y_ce_m;
reg dsp_y_ce_p;
reg dsp_y_ce_mode;
reg [8 -1:0] dsp_y_mode_z = {8{1'b1}};
wire [4*18-1:0] dsp_y_a;
reg [1*17-1:0] dsp_y_b;
wire [8*47-1:0] dsp_y_p;
generate for (z=0; z<(NUM_MULTS/2); z=z+1)
begin : gen_dsp_xy_a_split
assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z];
assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z];
end
endgenerate
always @(posedge clk)
//
{dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b};
reg [8 -1:0] dsp_xy_mode_z_adv1 = {8{1'b1}};
reg [8 -1:0] dsp_xy_mode_z_adv2 = {8{1'b1}};
reg [8 -1:0] dsp_xy_mode_z_adv3 = {8{1'b1}};
reg [8 -1:0] dsp_xy_mode_z_adv4 = {8{1'b1}};
dsp_array dsp_x
(
.clk (clk),
.ce_a (dsp_x_ce_a),
.ce_b (dsp_x_ce_b),
.ce_m (dsp_x_ce_m),
.ce_p (dsp_x_ce_p),
.ce_mode (dsp_x_ce_mode),
.mode_z (dsp_x_mode_z),
.a (dsp_x_a),
.b (dsp_x_b),
.p (dsp_x_p)
);
dsp_array dsp_y
(
.clk (clk),
.ce_a (dsp_y_ce_a),
.ce_b (dsp_y_ce_b),
.ce_m (dsp_y_ce_m),
.ce_p (dsp_y_ce_p),
.ce_mode (dsp_y_ce_mode),
.mode_z (dsp_y_mode_z),
.a (dsp_y_a),
.b (dsp_y_b),
.p (dsp_y_p)
);
//
// FSM State and Next States
//
reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
always @(posedge clk)
//
if (rst) fsm_state <= FSM_STATE_IDLE;
else fsm_state <= fsm_state_next;
localparam [7:0] index_last = 8'd31;
wire mult_square_addr_almost_done_comb;
reg mult_square_addr_almost_done_flop;
wire mult_square_addr_surely_done_comb;
reg mult_square_addr_surely_done_flop;
assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == (index_last - 8'd1);
assign mult_square_addr_surely_done_comb = mac_slim_bram_xy_addr == index_last;
always @(posedge clk)
//
case (fsm_state)
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY:
{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <=
{mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
default:
{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
endcase
//
// Column
//
reg [4:0] col_index;
reg [4:0] col_index_prev;
reg [4:0] col_index_last;
always @(posedge clk)
//
col_index_prev <= col_index;
//
// FSM Transition Logic
//
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0;
default: mac_slim_bram_xy_addr <= 8'dX;
endcase
integer j;
always @(posedge clk)
//
for (j=0; j<(NUM_MULTS/2); j=j+1)
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= 1 + 2 * j;
FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= 8 * (col_index + 1) + 1 + 2 * j;
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
default: mac_fat_bram_xy_addr[j] <= 8'dX;
endcase
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
default: mac_slim_bram_xy_bank <= 2'bXX;
endcase
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2;
default: mac_fat_bram_xy_bank <= 3'bXXX;
endcase
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop;
default: mac_slim_bram_xy_ena <= 1'b0;
endcase
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
default: mac_fat_bram_xy_ena <= 1'b0;
endcase
always @(posedge clk)
//
mac_slim_bram_xy_reg_ena <= mac_slim_bram_xy_ena;
always @(posedge clk)
//
mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
always @(posedge clk)
//
if (mac_slim_bram_xy_reg_ena_dly)
{dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
else
{dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
function [7:0] mac_fat_bram_xy_addr_next;
input [7:0] mac_fat_bram_xy_addr_current;
input [7:0] mac_fat_bram_xy_addr_last;
begin
if (mac_fat_bram_xy_addr_current > 0)
mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1;
else
mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last;
end
endfunction
always @(posedge clk)
//
{dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}};
always @(posedge clk)
//
{dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}};
always @(posedge clk)
//
{dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
always @(posedge clk)
//
{dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m};
always @(posedge clk)
//
{dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
task wait_clock_tick;
begin
#`CLK_PERIOD_NS;
end
endtask
//
// Increment Logic
//
always @(posedge clk)
//
case (fsm_state_next)
//
FSM_STATE_MULT_SQUARE_COL_0_INIT: begin
col_index <= 5'd0;
col_index_last <= index_last[7:3];
end
//
FSM_STATE_MULT_SQUARE_COL_N_INIT:
col_index <= col_index + 1'b1;
//
endcase
assign fsm_state_after_mult_square = (col_index == col_index_last) ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {8{1'b0}};
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
default: dsp_xy_mode_z_adv4 <= {8{1'b1}};
endcase
always @(posedge clk) begin
{dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}};
//
dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2};
dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3};
dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4};
end
function [NUM_MULTS-1:0] calc_mac_mode_z_square;
input [ 4:0] col_index_value;
input [ 7:0] mac_slim_bram_xy_addr_value;
begin
if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
case (mac_slim_bram_xy_addr_value[2:0])
3'b000: calc_mac_mode_z_square = 8'b11111110;
3'b001: calc_mac_mode_z_square = 8'b11111101;
3'b010: calc_mac_mode_z_square = 8'b11111011;
3'b011: calc_mac_mode_z_square = 8'b11110111;
3'b100: calc_mac_mode_z_square = 8'b11101111;
3'b101: calc_mac_mode_z_square = 8'b11011111;
3'b110: calc_mac_mode_z_square = 8'b10111111;
3'b111: calc_mac_mode_z_square = 8'b01111111;
endcase
else
calc_mac_mode_z_square = {NUM_MULTS{1'b1}};
end
endfunction
reg recomb_x_ena = 1'b0;
reg recomb_y_ena = 1'b0;
always @(posedge clk) begin
//
recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p;
recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p;
//
end
wire [ 2:0] recomb_fat_bram_xy_bank;
wire [ 7:0] recomb_fat_bram_xy_addr;
wire [17:0] recomb_fat_bram_x_dout;
wire [17:0] recomb_fat_bram_y_dout;
wire recomb_fat_bram_xy_dout_valid;
wire recomb_rdy;
modexpng_part_recombinator recomb
(
.clk (clk),
.rdy (recomb_rdy),
.fsm_state_next (fsm_state_next),
.index_last (index_last),
.dsp_x_ce_p (dsp_x_ce_p),
.dsp_y_ce_p (dsp_y_ce_p),
.ena_x (recomb_x_ena),
.ena_y (recomb_y_ena),
.dsp_x_p (dsp_x_p),
.dsp_y_p (dsp_y_p),
.col_index (col_index),
.col_index_last (col_index_last),
.slim_bram_xy_addr (mac_slim_bram_xy_addr),
.fat_bram_xy_bank (recomb_fat_bram_xy_bank),
.fat_bram_xy_addr (recomb_fat_bram_xy_addr),
.fat_bram_x_dout (recomb_fat_bram_x_dout),
.fat_bram_y_dout (recomb_fat_bram_y_dout),
.fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid)
);
reg [17:0] AB_READ[0:63];
always @(posedge clk)
//
if (recomb_fat_bram_xy_dout_valid)
//
case (recomb_fat_bram_xy_bank)
3'd1: AB_READ[recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
3'd2: AB_READ[32 + recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
endcase
always @(posedge clk)
//
if (tb_fat_bram_xy_ena) begin
mgr_fat_bram_xy_ena <= 1'b1;
mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank;
mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr;
mgr_fat_bram_x_din <= tb_fat_bram_x_din;
mgr_fat_bram_y_din <= tb_fat_bram_y_din;
end else if (recomb_fat_bram_xy_dout_valid) begin
mgr_fat_bram_xy_ena <= 1'b1;
mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank;
mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr;
mgr_fat_bram_x_din <= recomb_fat_bram_x_dout;
mgr_fat_bram_y_din <= recomb_fat_bram_y_dout;
end else begin
mgr_fat_bram_xy_ena <= 1'b0;
mgr_fat_bram_xy_bank <= 3'bXXX;
mgr_fat_bram_xy_addr <= 8'hXX;
mgr_fat_bram_x_din <= {18{1'bX}};
mgr_fat_bram_y_din <= {18{1'bX}};
end
task verify_ab;
reg verify_ab_ok;
begin
verify_ab_ok = 1;
for (i=0; i<64; i=i+1)
if (AB_READ[i] === AB[i])
$display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]);
else begin
$display("AB / AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, AB[i], AB_READ[i]);
verify_ab_ok = 0;
end
if (verify_ab_ok)
$display("AB is OK.");
else
$display("AB is WRONG!");
end
endtask
always @* begin
//
fsm_state_next = FSM_STATE_IDLE;
//
case (fsm_state)
FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
default: fsm_state_next = FSM_STATE_IDLE ;
endcase
//
end
endmodule