`timescale 1ns / 1ps
module tb_square;
//
// Headers
//
`include "../rtl/modexpng_parameters.vh"
`include "../rtl/modexpng_parameters_x8.vh"
`include "../rtl/modexpng_mmm_fsm.vh"
//
// Clock
//
`define CLK_FREQUENCY_MHZ 100.0
`define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
`define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
reg clk = 1'b0;
always begin
#`CLK_PERIOD_HALF_NS clk = 1'b1;
#`CLK_PERIOD_HALF_NS clk = 1'b0;
end
//
// Reset
//
reg rst = 1'b1;
//
// T1, T2
//
reg [17:0] T1[0:31];
reg [17:0] T2[0:31];
reg [17:0] AB[0:63];
reg [17:0] N_COEFF[0:32];
reg [17:0] Q[0:32];
reg [17:0] N[0:31];
reg [17:0] M[0:64];
//
// Init
//
initial begin
//
T1[ 0] = 18'h191c5; T1[ 1] = 18'h1a118; T1[ 2] = 18'h06e06; T1[ 3] = 18'h0ea69;
T1[ 4] = 18'h12944; T1[ 5] = 18'h0c242; T1[ 6] = 18'h0fc64; T1[ 7] = 18'h14efe;
T1[ 8] = 18'h113da; T1[ 9] = 18'h06ff7; T1[10] = 18'h0ef0d; T1[11] = 18'h18581;
T1[12] = 18'h1a62c; T1[13] = 18'h052b7; T1[14] = 18'h114f7; T1[15] = 18'h1c53e;
T1[16] = 18'h0c63e; T1[17] = 18'h0dd14; T1[18] = 18'h0fba8; T1[19] = 18'h1b8e6;
T1[20] = 18'h0d944; T1[21] = 18'h10292; T1[22] = 18'h0d276; T1[23] = 18'h027b1;
T1[24] = 18'h0c0c7; T1[25] = 18'h100a9; T1[26] = 18'h0a9ab; T1[27] = 18'h0e696;
T1[28] = 18'h10798; T1[29] = 18'h0ae91; T1[30] = 18'h08d4d; T1[31] = 18'h0080b;
//
T2[ 0] = 18'h1193b; T2[ 1] = 18'h0de9c; T2[ 2] = 18'h0b993; T2[ 3] = 18'h0d2cd;
T2[ 4] = 18'h106ad; T2[ 5] = 18'h076da; T2[ 6] = 18'h10cab; T2[ 7] = 18'h15cd5;
T2[ 8] = 18'h15425; T2[ 9] = 18'h16287; T2[10] = 18'h0fd64; T2[11] = 18'h06ee0;
T2[12] = 18'h1b0c9; T2[13] = 18'h01a5e; T2[14] = 18'h1855c; T2[15] = 18'h17bf9;
T2[16] = 18'h1c83c; T2[17] = 18'h158ed; T2[18] = 18'h086df; T2[19] = 18'h16676;
T2[20] = 18'h0a0f8; T2[21] = 18'h14545; T2[22] = 18'h09641; T2[23] = 18'h16863;
T2[24] = 18'h17e20; T2[25] = 18'h0d457; T2[26] = 18'h05a9b; T2[27] = 18'h1a4cf;
T2[28] = 18'h1582a; T2[29] = 18'h1686c; T2[30] = 18'h1394e; T2[31] = 18'h0bdbc;
//
N[ 0] = 18'h00f97; N[ 1] = 18'h018bb; N[ 2] = 18'h08a44; N[ 3] = 18'h00858;
N[ 4] = 18'h06647; N[ 5] = 18'h0042c; N[ 6] = 18'h0fa09; N[ 7] = 18'h0c8d3;
N[ 8] = 18'h0bbc7; N[ 9] = 18'h0e2dd; N[10] = 18'h017fd; N[11] = 18'h0ef4a;
N[12] = 18'h002ef; N[13] = 18'h090c1; N[14] = 18'h032db; N[15] = 18'h028b1;
N[16] = 18'h05f0a; N[17] = 18'h0ebfd; N[18] = 18'h017ca; N[19] = 18'h09587;
N[20] = 18'h0d266; N[21] = 18'h0563c; N[22] = 18'h041af; N[23] = 18'h0433f;
N[24] = 18'h08e83; N[25] = 18'h0bc19; N[26] = 18'h000b2; N[27] = 18'h05b53;
N[28] = 18'h00e5d; N[29] = 18'h09bc5; N[30] = 18'h0a822; N[31] = 18'h0efff;
//
N_COEFF[ 0] = 18'h09fd9; N_COEFF[ 1] = 18'h0b367; N_COEFF[ 2] = 18'h0e467; N_COEFF[ 3] = 18'h0de24;
N_COEFF[ 4] = 18'h02022; N_COEFF[ 5] = 18'h0f0e8; N_COEFF[ 6] = 18'h02919; N_COEFF[ 7] = 18'h09901;
N_COEFF[ 8] = 18'h0da43; N_COEFF[ 9] = 18'h0023b; N_COEFF[10] = 18'h0ebf8; N_COEFF[11] = 18'h0f04e;
N_COEFF[12] = 18'h0942f; N_COEFF[13] = 18'h029e9; N_COEFF[14] = 18'h07cb0; N_COEFF[15] = 18'h08c25;
N_COEFF[16] = 18'h04e60; N_COEFF[17] = 18'h05cdc; N_COEFF[18] = 18'h0dff7; N_COEFF[19] = 18'h0279b;
N_COEFF[20] = 18'h0610d; N_COEFF[21] = 18'h0f04a; N_COEFF[22] = 18'h001dc; N_COEFF[23] = 18'h03429;
N_COEFF[24] = 18'h0f78c; N_COEFF[25] = 18'h0c3e2; N_COEFF[26] = 18'h00ed8; N_COEFF[27] = 18'h039c0;
N_COEFF[28] = 18'h02ac2; N_COEFF[29] = 18'h0f703; N_COEFF[30] = 18'h0c54e; N_COEFF[31] = 18'h022d9;
N_COEFF[32] = 18'h0f994;
//
AB[ 0] = 18'h0c199; AB[ 1] = 18'h0957a; AB[ 2] = 18'h070ad; AB[ 3] = 18'h0e5a6;
AB[ 4] = 18'h0fec9; AB[ 5] = 18'h00b73; AB[ 6] = 18'h09c72; AB[ 7] = 18'h0cdf0;
AB[ 8] = 18'h08755; AB[ 9] = 18'h07560; AB[10] = 18'h084b1; AB[11] = 18'h0ad3f;
AB[12] = 18'h074fe; AB[13] = 18'h04d74; AB[14] = 18'h00e16; AB[15] = 18'h0d3b3;
AB[16] = 18'h0d418; AB[17] = 18'h02f12; AB[18] = 18'h0c301; AB[19] = 18'h0be2b;
AB[20] = 18'h08222; AB[21] = 18'h0056c; AB[22] = 18'h01c7c; AB[23] = 18'h0bc95;
AB[24] = 18'h03427; AB[25] = 18'h0c65a; AB[26] = 18'h089ac; AB[27] = 18'h02117;
AB[28] = 18'h0ff7d; AB[29] = 18'h01cde; AB[30] = 18'h02709; AB[31] = 18'h01c56;
AB[32] = 18'h0f35a; AB[33] = 18'h08ce6; AB[34] = 18'h0a8e5; AB[35] = 18'h0d6d4;
AB[36] = 18'h06868; AB[37] = 18'h09105; AB[38] = 18'h0219e; AB[39] = 18'h0bc40;
AB[40] = 18'h00e0a; AB[41] = 18'h07783; AB[42] = 18'h0187a; AB[43] = 18'h0b922;
AB[44] = 18'h02609; AB[45] = 18'h0c64b; AB[46] = 18'h06b4b; AB[47] = 18'h04b79;
AB[48] = 18'h0fed6; AB[49] = 18'h03eac; AB[50] = 18'h04cac; AB[51] = 18'h0d47d;
AB[52] = 18'h045fd; AB[53] = 18'h04fa8; AB[54] = 18'h0597c; AB[55] = 18'h0a10d;
AB[56] = 18'h0bf44; AB[57] = 18'h08671; AB[58] = 18'h0112a; AB[59] = 18'h08ccf;
AB[60] = 18'h0cae5; AB[61] = 18'h04d94; AB[62] = 18'h0b95a; AB[63] = 18'h00040;
//
Q[ 0] = 18'h021b1; Q[ 1] = 18'h0d2db; Q[ 2] = 18'h0754b; Q[ 3] = 18'h01fc1;
Q[ 4] = 18'h063f7; Q[ 5] = 18'h086e5; Q[ 6] = 18'h0bcea; Q[ 7] = 18'h02260;
Q[ 8] = 18'h0c54c; Q[ 9] = 18'h0e298; Q[10] = 18'h05d07; Q[11] = 18'h0f978;
Q[12] = 18'h0e742; Q[13] = 18'h0a3f0; Q[14] = 18'h0b31e; Q[15] = 18'h041b7;
Q[16] = 18'h06ed9; Q[17] = 18'h03ac5; Q[18] = 18'h0f8eb; Q[19] = 18'h0c619;
Q[20] = 18'h067e9; Q[21] = 18'h00350; Q[22] = 18'h00376; Q[23] = 18'h02ebf;
Q[24] = 18'h0b125; Q[25] = 18'h05f7d; Q[26] = 18'h0f121; Q[27] = 18'h07ba4;
Q[28] = 18'h03050; Q[29] = 18'h0642e; Q[30] = 18'h0c2fc; Q[31] = 18'h0dfcf;
Q[32] = 18'h03f9e;
//
M[ 0] = 18'h03e67; M[ 1] = 18'h06a85; M[ 2] = 18'h08f52; M[ 3] = 18'h01a59;
M[ 4] = 18'h00136; M[ 5] = 18'h0f48c; M[ 6] = 18'h0638d; M[ 7] = 18'h0320f;
M[ 8] = 18'h078aa; M[ 9] = 18'h08a9f; M[10] = 18'h07b4e; M[11] = 18'h052c0;
M[12] = 18'h08b01; M[13] = 18'h0b28b; M[14] = 18'h0f1e9; M[15] = 18'h02c4c;
M[16] = 18'h02be7; M[17] = 18'h0d0ed; M[18] = 18'h03cfe; M[19] = 18'h041d4;
M[20] = 18'h07ddd; M[21] = 18'h0fa93; M[22] = 18'h0e383; M[23] = 18'h0436a;
M[24] = 18'h0cbd8; M[25] = 18'h039a5; M[26] = 18'h07653; M[27] = 18'h0dee8;
M[28] = 18'h00082; M[29] = 18'h0e321; M[30] = 18'h0d8f6; M[31] = 18'h0e3a9;
M[32] = 18'h00ca5; M[33] = 18'h035ed; M[34] = 18'h02b8f; M[35] = 18'h063bd;
M[36] = 18'h0ec9f; M[37] = 18'h0b8bb; M[38] = 18'h00389; M[39] = 18'h0ca27;
M[40] = 18'h0bea7; M[41] = 18'h0df1e; M[42] = 18'h0d685; M[43] = 18'h0cc1b;
M[44] = 18'h036c4; M[45] = 18'h01ce9; M[46] = 18'h0c43b; M[47] = 18'h05f58;
M[48] = 18'h02c77; M[49] = 18'h03a12; M[50] = 18'h0eea8; M[51] = 18'h0ac31;
M[52] = 18'h05838; M[53] = 18'h093ac; M[54] = 18'h0fd54; M[55] = 18'h06e13;
M[56] = 18'h002e2; M[57] = 18'h06af4; M[58] = 18'h0ea18; M[59] = 18'h083b3;
M[60] = 18'h059f7; M[61] = 18'h016d3; M[62] = 18'h0c3ad; M[63] = 18'h0dbfc;
M[64] = 18'h03ba4;
//
end
//
// BRAMs
//
reg tb_fat_bram_xy_ena = 1'b0;
reg [ 2:0] tb_fat_bram_xy_bank;
reg [ 7:0] tb_fat_bram_xy_addr;
reg [17:0] tb_fat_bram_x_din;
reg [17:0] tb_fat_bram_y_din;
reg mgr_fat_bram_xy_ena = 1'b0;
reg [ 2:0] mgr_fat_bram_xy_bank;
reg [ 7:0] mgr_fat_bram_xy_addr;
reg [17:0] mgr_fat_bram_x_din;
reg [17:0] mgr_fat_bram_y_din;
reg mac_fat_bram_xy_ena = 1'b0;
reg mac_fat_bram_xy_ena_aux = 1'b0;
reg mac_fat_bram_xy_reg_ena = 1'b0;
reg mac_fat_bram_xy_reg_ena_aux = 1'b0;
reg [ 2:0] mac_fat_bram_xy_bank;
reg [ 2:0] mac_fat_bram_xy_bank_aux;
reg [ 7:0] mac_fat_bram_xy_addr[0:4];
wire [17:0] mac_fat_bram_x_dout[0:4];
wire [17:0] mac_fat_bram_y_dout[0:4];
wire [ 7:0] mac_fat_bram_xy_addr_aux = mac_fat_bram_xy_addr[4]; // handy for debug
wire [17:0] mac_fat_bram_x_dout_aux = mac_fat_bram_x_dout[4]; // handy for debug
wire [17:0] mac_fat_bram_y_dout_aux = mac_fat_bram_x_dout[4]; // handy for debug
reg tb_slim_bram_xy_ena = 1'b0;
reg [ 1:0] tb_slim_bram_xy_bank;
reg [ 7:0] tb_slim_bram_xy_addr;
reg [17:0] tb_slim_bram_x_din;
reg [17:0] tb_slim_bram_y_din;
reg mgr_slim_bram_xy_ena = 1'b0;
reg [ 1:0] mgr_slim_bram_xy_bank;
reg [ 7:0] mgr_slim_bram_xy_addr;
reg [17:0] mgr_slim_bram_x_din;
reg [17:0] mgr_slim_bram_y_din;
reg mac_slim_bram_xy_ena = 1'b0;
reg mac_slim_bram_xy_reg_ena = 1'b0;
reg [ 1:0] mac_slim_bram_xy_bank;
reg [ 7:0] mac_slim_bram_xy_addr;
reg [ 7:0] mac_slim_bram_xy_addr_dly;
wire [17:0] mac_slim_bram_x_dout;
wire [17:0] mac_slim_bram_y_dout;
always @(posedge clk)
//
mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr;
reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
always @(posedge clk)
mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena;
genvar z;
generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
begin : gen_fat_bram
//
ip_bram_36k fat_bram_x
(
.clka (clk),
.ena (mgr_fat_bram_xy_ena),
.wea (mgr_fat_bram_xy_ena),
.addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
.dina (mgr_fat_bram_x_din),
.clkb (clk),
.enb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_ena : mac_fat_bram_xy_ena_aux),
.regceb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_reg_ena : mac_fat_bram_xy_reg_ena_aux),
.addrb ({(z < (NUM_MULTS/2) ?
mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux), mac_fat_bram_xy_addr[z]}),
.doutb (mac_fat_bram_x_dout[z])
);
//
ip_bram_36k fat_bram_y
(
.clka (clk),
.ena (mgr_fat_bram_xy_ena),
.wea (mgr_fat_bram_xy_ena),
.addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
.dina (mgr_fat_bram_y_din),
.clkb (clk),
.enb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_ena : mac_fat_bram_xy_ena_aux),
.regceb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_reg_ena : mac_fat_bram_xy_reg_ena_aux),
.addrb ({z < (NUM_MULTS/2) ?
mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_addr[z]}),
.doutb (mac_fat_bram_y_dout[z])
);
//
end
endgenerate
ip_bram_18k slim_bram_x
(
.clka (clk),
.ena (mgr_slim_bram_xy_ena),
.wea (mgr_slim_bram_xy_ena),
.addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
.dina (mgr_slim_bram_x_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
.regceb (mac_slim_bram_xy_reg_ena),
.addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
.doutb (mac_slim_bram_x_dout)
);
ip_bram_18k slim_bram_y
(
.clka (clk),
.ena (mgr_slim_bram_xy_ena),
.wea (mgr_slim_bram_xy_ena),
.addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
.dina (mgr_slim_bram_y_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
.regceb (mac_slim_bram_xy_reg_ena),
.addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
.doutb (mac_slim_bram_y_dout)
);
//
// Enable, Ready
//
reg ena = 1'b0;
integer i;
initial begin
for (i=0; i<10; i=i+1)
wait_clock_tick;
rst = 1'b0;
for (i=0; i<10; i=i+1)
wait_clock_tick;
tb_fat_bram_xy_ena = 1'b1;
tb_slim_bram_xy_ena = 1'b1;
for (i=0; i<32; i=i+1) begin
tb_fat_bram_xy_bank = BANK_FAT_T1T2;
tb_fat_bram_xy_addr = i[7:0];
tb_fat_bram_x_din = T1[i];
tb_fat_bram_y_din = T2[i];
tb_slim_bram_xy_bank = BANK_SLIM_T1T2;
tb_slim_bram_xy_addr = i[7:0];
tb_slim_bram_x_din = T1[i];
tb_slim_bram_y_din = T2[i];
wait_clock_tick;
end
for (i=0; i<32; i=i+1) begin
tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF;
tb_slim_bram_xy_addr = i[7:0];
tb_slim_bram_x_din = N_COEFF[i];
tb_slim_bram_y_din = N_COEFF[i];
wait_clock_tick;
end
for (i=32; i<33; i=i+1) begin
tb_slim_bram_xy_bank = BANK_SLIM_EXT;
tb_slim_bram_xy_addr = 0; // !
tb_slim_bram_x_din = N_COEFF[i];
tb_slim_bram_y_din = N_COEFF[i];
wait_clock_tick;
end
for (i=0; i<32; i=i+1) begin
tb_fat_bram_xy_bank = BANK_FAT_N;
tb_fat_bram_xy_addr = i[7:0];
tb_fat_bram_x_din = N[i];
tb_fat_bram_y_din = N[i];
wait_clock_tick;
end
tb_fat_bram_xy_ena = 1'b0;
tb_slim_bram_xy_ena = 1'b0;
tb_fat_bram_xy_bank = {3{1'bX}};
tb_fat_bram_xy_addr = {8{1'bX}};
tb_fat_bram_x_din = {18{1'bX}};
tb_fat_bram_y_din = {18{1'bX}};
tb_slim_bram_xy_bank = {2{1'bX}};
tb_slim_bram_xy_addr = {8{1'bX}};
tb_slim_bram_x_din = {18{1'bX}};
tb_slim_bram_y_din = {18{1'bX}};
for (i=0; i<10; i=i+1)
wait_clock_tick;
ena = 1'b1;
wait_clock_tick;
ena = 1'b0;
for (i=0; i<10000; i=i+1)
wait_clock_tick;
verify_ab;
verify_q;
verify_m;
end
//
// DSPs
//
reg dsp_x_ce_a;
reg dsp_x_ce_b;
reg dsp_x_ce_b_dly;
reg dsp_x_ce_m;
reg dsp_x_ce_p;
reg dsp_x_ce_mode;
reg [9 -1:0] dsp_x_mode_z = {9{1'b1}};
wire [5*18-1:0] dsp_x_a;
reg [1*17-1:0] dsp_x_b;
wire [9*47-1:0] dsp_x_p;
reg dsp_y_ce_a;
reg dsp_y_ce_b;
reg dsp_y_ce_b_dly;
reg dsp_y_ce_m;
reg dsp_y_ce_p;
reg dsp_y_ce_mode;
reg [9 -1:0] dsp_y_mode_z = {9{1'b1}};
wire [5*18-1:0] dsp_y_a;
reg [1*17-1:0] dsp_y_b;
wire [9*47-1:0] dsp_y_p;
generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
begin : gen_dsp_xy_a_split
assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z];
assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z];
end
endgenerate
always @(posedge clk)
//
{dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b};
reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
dsp_array dsp_x
(
.clk (clk),
.ce_a (dsp_x_ce_a),
.ce_b (dsp_x_ce_b),
.ce_m (dsp_x_ce_m),
.ce_p (dsp_x_ce_p),
.ce_mode (dsp_x_ce_mode),
.mode_z (dsp_x_mode_z),
.a (dsp_x_a),
.b (dsp_x_b),
.p (dsp_x_p)
);
dsp_array dsp_y
(
.clk (clk),
.ce_a (dsp_y_ce_a),
.ce_b (dsp_y_ce_b),
.ce_m (dsp_y_ce_m),
.ce_p (dsp_y_ce_p),
.ce_mode (dsp_y_ce_mode),
.mode_z (dsp_y_mode_z),
.a (dsp_y_a),
.b (dsp_y_b),
.p (dsp_y_p)
);
//
// FSM State and Next States
//
reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
always @(posedge clk)
//
if (rst) fsm_state <= FSM_STATE_IDLE;
else fsm_state <= fsm_state_next;
localparam [7:0] index_last = 8'd31;
localparam [7:0] index_last_minus1 = index_last - 1'b1;
//
// Column
//
reg [4:0] col_index; // current column index
reg [4:0] col_index_prev; // delayed column index value
reg [4:0] col_index_last; // index of the very last column
reg [4:0] col_index_next1; // precomputed next column index
//reg [4:0] col_index_next2; // precomputed next column index after next column index
reg col_is_last; // flag set during the very last column
always @(posedge clk)
//
col_index_prev <= col_index;
wire mult_square_addr_almost_done_comb;
reg mult_square_addr_almost_done_flop;
reg mult_square_addr_surely_done_flop;
wire mult_triangle_addr_almost_done_comb;
reg mult_triangle_addr_almost_done_flop;
reg mult_triangle_addr_surely_done_flop;
reg mult_triangle_addr_tardy_done_flop;
wire mult_rectangle_addr_almost_done_comb;
reg mult_rectangle_addr_almost_done_flop;
reg mult_rectangle_addr_surely_done_flop;
reg mult_rectangle_addr_tardy_done_flop;
assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
always @(posedge clk)
//
case (fsm_state)
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY:
mult_square_addr_almost_done_flop <= mult_square_addr_almost_done_comb;
//{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <=
//{mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
default:
mult_square_addr_almost_done_flop <= 1'b0;
//{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
endcase
always @(posedge clk)
//
mult_square_addr_surely_done_flop <= mult_square_addr_almost_done_flop;
always @(posedge clk)
//
case (fsm_state)
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
mult_triangle_addr_almost_done_flop <= mult_triangle_addr_almost_done_comb;
//{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
//{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
default:
mult_triangle_addr_almost_done_flop <= 1'b0;
//{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
endcase
always @(posedge clk) begin
//
mult_triangle_addr_surely_done_flop <= mult_triangle_addr_almost_done_flop;
mult_triangle_addr_tardy_done_flop <= mult_triangle_addr_surely_done_flop;
//
end
always @(posedge clk)
//
case (fsm_state)
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb;
//{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
//{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
default:
mult_rectangle_addr_almost_done_flop <= 1'b0;
//{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
endcase
always @(posedge clk) begin
//
mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop;
mult_rectangle_addr_tardy_done_flop <= mult_rectangle_addr_surely_done_flop;
//
end
//
// Recombinator Interface
//
wire [ 2:0] recomb_fat_bram_xy_bank;
wire [ 7:0] recomb_fat_bram_xy_addr;
wire [17:0] recomb_fat_bram_x_dout;
wire [17:0] recomb_fat_bram_y_dout;
wire recomb_fat_bram_xy_dout_valid;
wire [ 2:0] recomb_slim_bram_xy_bank;
wire [ 7:0] recomb_slim_bram_xy_addr;
wire [17:0] recomb_slim_bram_x_dout;
wire [17:0] recomb_slim_bram_y_dout;
wire recomb_slim_bram_xy_dout_valid;
wire recomb_rdy;
//
// FSM Transition Logic
//
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
//
// Slim - Address
//
always @(posedge clk)
//
case (fsm_state_next)
//
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0;
//
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
8'd0 : mac_slim_bram_xy_addr + 1'b1;
//
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
8'd1 : mac_slim_bram_xy_addr + 1'b1;
//
default: mac_slim_bram_xy_addr <= 8'dX;
endcase
wire [2:0] fat_bram_offset_rom[0:3];
generate for (z=1; z<NUM_MULTS; z=z+2)
begin : gen_fat_bram_offset
assign fat_bram_offset_rom[(z-1)/2] = z[2:0];
end
endgenerate
//
// Fat - Address
//
integer j;
always @(posedge clk) begin
//
for (j=0; j<(NUM_MULTS/2); j=j+1)
//
case (fsm_state_next)
//
// this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
//
FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
//
FSM_STATE_MULT_TRIANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
//
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
//
default: mac_fat_bram_xy_addr[j] <= 8'dX;
endcase
//
case (fsm_state_next)
//
// this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
//
FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
//
FSM_STATE_MULT_TRIANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
//
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= 8'dX;//{5'd0, 3'd0};
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
FSM_STATE_MULT_RECTANGLE_HOLDOFF: mac_fat_bram_xy_addr[4] <= recomb_fat_bram_xy_dout_valid ? recomb_fat_bram_xy_addr : 8'dX;//recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
//mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
//
default: mac_fat_bram_xy_addr[4] <= 8'dX;
endcase
//
end
always @(posedge clk)
//
case (fsm_state_next)
//
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
//
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
BANK_SLIM_EXT : BANK_SLIM_N_COEFF;
//
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
BANK_SLIM_EXT : BANK_SLIM_Q;
//
default: mac_slim_bram_xy_bank <= 2'bXX;
endcase
always @(posedge clk) begin
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_fat_bram_xy_bank <= BANK_FAT_ABL;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_ABL;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_N;
default: mac_fat_bram_xy_bank <= 3'bXXX;
endcase
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank_aux <= BANK_FAT_T1T2;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABH;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABL;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (recomb_fat_bram_xy_dout_valid)
case (recomb_fat_bram_xy_bank)
BANK_FAT_ML: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABL;
BANK_FAT_MH: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABH;
BANK_FAT_EXT: mac_fat_bram_xy_bank_aux <= BANK_FAT_EXT;
default: mac_fat_bram_xy_bank_aux <= 3'bXXX;
endcase
else mac_fat_bram_xy_bank_aux <= 3'bXXX;
default: mac_fat_bram_xy_bank_aux <= 3'bXXX;
endcase
//
end
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_rectangle_addr_surely_done_flop;
default: mac_slim_bram_xy_ena <= 1'b0;
endcase
always @(posedge clk) begin
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
default: mac_fat_bram_xy_ena <= 1'b0;
endcase
//
case (fsm_state_next)
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena_aux <= 1'b1;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_ena_aux <= 1'b0;//1'b1;
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
FSM_STATE_MULT_RECTANGLE_HOLDOFF: mac_fat_bram_xy_ena_aux <= recomb_fat_bram_xy_dout_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
default: mac_fat_bram_xy_ena_aux <= 1'b0;
endcase
//
end
always @(posedge clk)
//
mac_slim_bram_xy_reg_ena <= mac_slim_bram_xy_ena;
always @(posedge clk)
//
{mac_fat_bram_xy_reg_ena_aux, mac_fat_bram_xy_reg_ena} <= {mac_fat_bram_xy_ena_aux, mac_fat_bram_xy_ena};
reg ladder_mode = 1'b1; // 0 = X:T1*T2, Y:T2*T2
// 1 = X:T1*T2, Y:T2*T1
reg dsp_swap_xy;
always @(posedge clk)
//
case (fsm_state)
FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_swap_xy <= 1'b1;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_swap_xy <= 1'b0;
endcase
always @(posedge clk)
//
if (mac_slim_bram_xy_reg_ena_dly) begin // rewrite
if (!dsp_swap_xy)
{dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
else begin
if (!ladder_mode) {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
else {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
end
end
else
{dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
function [7:0] mac_fat_bram_xy_addr_next;
input [7:0] mac_fat_bram_xy_addr_current;
input [7:0] mac_fat_bram_xy_addr_last;
begin
if (mac_fat_bram_xy_addr_current > 8'd0)
mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1;
else
mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last;
end
endfunction
always @(posedge clk)
//
{dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}};
always @(posedge clk)
//
{dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}};
always @(posedge clk)
//
{dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
always @(posedge clk)
//
{dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m};
always @(posedge clk)
//
{dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
task wait_clock_tick;
begin
#`CLK_PERIOD_NS;
end
endtask
//
// Increment Logic
//
always @(posedge clk)
//
case (fsm_state_next)
//
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
col_index <= 5'd0;
col_index_last <= index_last[7:3];
col_index_next1 <= 5'd1;
//col_index_next2 <= 5'd2;
col_is_last <= 1'b0;
end
//
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
col_index <= col_index_next1;
col_is_last <= col_index_next1 == col_index_last;
col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;
//col_index_next2 <= col_index_next2 + 1'b1;
end
//
endcase
assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly);
default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
endcase
always @(posedge clk) begin
{dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}};
//
dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2};
dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3};
dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4};
end
function [NUM_MULTS:0] calc_mac_mode_z_square;
input [ 4:0] col_index_value;
input [ 7:0] mac_slim_bram_xy_addr_value;
begin
if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
case (mac_slim_bram_xy_addr_value[2:0])
3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
endcase
else
calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
end
endfunction
function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
input [ 4:0] col_index_value;
input [ 7:0] mac_slim_bram_xy_addr_value;
begin
if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
case (mac_slim_bram_xy_addr_value[2:0])
3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
endcase
else
calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
end
endfunction
reg recomb_x_ena = 1'b0;
reg recomb_y_ena = 1'b0;
always @(posedge clk) begin
//
recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p;
recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p;
//
end
modexpng_part_recombinator recomb
(
.clk (clk),
.rdy (recomb_rdy),
.fsm_state_next (fsm_state_next),
.index_last (index_last),
.dsp_x_ce_p (dsp_x_ce_p),
.dsp_y_ce_p (dsp_y_ce_p),
.ena_x (recomb_x_ena),
.ena_y (recomb_y_ena),
.dsp_x_p (dsp_x_p),
.dsp_y_p (dsp_y_p),
.col_index (col_index),
.col_index_last (col_index_last),
.slim_bram_xy_addr (mac_slim_bram_xy_addr),
.slim_bram_xy_bank (mac_slim_bram_xy_bank),
.rcmb_fat_bram_xy_bank (recomb_fat_bram_xy_bank),
.rcmb_fat_bram_xy_addr (recomb_fat_bram_xy_addr),
.rcmb_fat_bram_x_dout (recomb_fat_bram_x_dout),
.rcmb_fat_bram_y_dout (recomb_fat_bram_y_dout),
.rcmb_fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid),
.rcmb_slim_bram_xy_bank (recomb_slim_bram_xy_bank),
.rcmb_slim_bram_xy_addr (recomb_slim_bram_xy_addr),
.rcmb_slim_bram_x_dout (recomb_slim_bram_x_dout),
.rcmb_slim_bram_y_dout (recomb_slim_bram_y_dout),
.rcmb_slim_bram_xy_dout_valid (recomb_slim_bram_xy_dout_valid)
);
reg [17:0] AB_READ[0:63];
reg [17:0] Q_READ[0:32];
reg [17:0] M_READ[0:64];
always @(posedge clk) begin
//
if (recomb_fat_bram_xy_dout_valid)
//
case (recomb_fat_bram_xy_bank)
BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
BANK_FAT_ML: M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
BANK_FAT_MH: M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
endcase
//
if (recomb_slim_bram_xy_dout_valid)
//
case (recomb_slim_bram_xy_bank)
BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout;
BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1)
Q_READ[32] <= recomb_slim_bram_x_dout;
endcase
//
end
always @(posedge clk)
//
if (tb_fat_bram_xy_ena) begin
mgr_fat_bram_xy_ena <= 1'b1;
mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank;
mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr;
mgr_fat_bram_x_din <= tb_fat_bram_x_din;
mgr_fat_bram_y_din <= tb_fat_bram_y_din;
end else if (recomb_fat_bram_xy_dout_valid) begin
mgr_fat_bram_xy_ena <= 1'b1;
mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank;
mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr;
mgr_fat_bram_x_din <= recomb_fat_bram_x_dout;
mgr_fat_bram_y_din <= recomb_fat_bram_y_dout;
end else begin
mgr_fat_bram_xy_ena <= 1'b0;
mgr_fat_bram_xy_bank <= 3'bXXX;
mgr_fat_bram_xy_addr <= 8'hXX;
mgr_fat_bram_x_din <= {18{1'bX}};
mgr_fat_bram_y_din <= {18{1'bX}};
end
always @(posedge clk)
//
if (tb_slim_bram_xy_ena) begin
mgr_slim_bram_xy_ena <= 1'b1;
mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank;
mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr;
mgr_slim_bram_x_din <= tb_slim_bram_x_din;
mgr_slim_bram_y_din <= tb_slim_bram_y_din;
end else if (recomb_slim_bram_xy_dout_valid) begin
mgr_slim_bram_xy_ena <= 1'b1;
mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank;
mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr;
mgr_slim_bram_x_din <= recomb_slim_bram_x_dout;
mgr_slim_bram_y_din <= recomb_slim_bram_y_dout;
end else begin
mgr_slim_bram_xy_ena <= 1'b0;
mgr_slim_bram_xy_bank <= 3'bXXX;
mgr_slim_bram_xy_addr <= 8'hXX;
mgr_slim_bram_x_din <= {18{1'bX}};
mgr_slim_bram_y_din <= {18{1'bX}};
end
task verify_ab;
reg verify_ab_ok;
begin
verify_ab_ok = 1;
for (i=0; i<64; i=i+1)
if (AB_READ[i] === AB[i])
$display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]);
else begin
$display("AB / AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, AB[i], AB_READ[i]);
verify_ab_ok = 0;
end
if (verify_ab_ok)
$display("AB is OK.");
else
$display("AB is WRONG!");
end
endtask
task verify_q;
reg verify_q_ok;
begin
verify_q_ok = 1;
for (i=0; i<33; i=i+1)
if (Q_READ[i] === Q[i])
$display("Q / Q_READ [%02d] = 0x%05x / 0x%05x", i, Q[i], Q_READ[i]);
else begin
$display("Q / Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, Q[i], Q_READ[i]);
verify_q_ok = 0;
end
if (verify_q_ok)
$display("Q is OK.");
else
$display("Q is WRONG!");
end
endtask
task verify_m;
reg verify_m_ok;
begin
verify_m_ok = 1;
for (i=0; i<65; i=i+1)
if (M_READ[i] === M[i])
$display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]);
else begin
$display("M / M_READ [%02d] = 0x%05x / 0x%05x <???>", i, M[i], M_READ[i]);
verify_m_ok = 0;
end
if (verify_m_ok)
$display("M is OK.");
else
$display("M is WRONG!");
end
endtask
wire mult_square_addr_done = mult_square_addr_surely_done_flop;
wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop;
always @* begin
//
fsm_state_next = FSM_STATE_IDLE;
//
case (fsm_state)
FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = mult_triangle_addr_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
default: fsm_state_next = FSM_STATE_IDLE ;
endcase
//
end
//
// Reductor
//
reg reductor_ena = 1'b0;
always @(posedge clk)
//
if (!reductor_ena)
case (fsm_state)
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: reductor_ena <= 1'b1;
endcase
else begin
end
reg recomb_fat_bram_xy_dout_valid_dly1;
reg recomb_fat_bram_xy_dout_valid_dly2;
reg recomb_fat_bram_xy_dout_valid_dly3;
reg [2:0] recomb_fat_bram_xy_bank_dly1;
reg [2:0] recomb_fat_bram_xy_bank_dly2;
reg [2:0] recomb_fat_bram_xy_bank_dly3;
reg [7:0] recomb_fat_bram_xy_addr_dly1;
reg [7:0] recomb_fat_bram_xy_addr_dly2;
reg [7:0] recomb_fat_bram_xy_addr_dly3;
always @(posedge clk) begin
//
recomb_fat_bram_xy_dout_valid_dly1 <= recomb_fat_bram_xy_dout_valid;
recomb_fat_bram_xy_dout_valid_dly2 <= recomb_fat_bram_xy_dout_valid_dly1;
recomb_fat_bram_xy_dout_valid_dly3 <= recomb_fat_bram_xy_dout_valid_dly2;
//
end
reg [17:0] recomb_fat_bram_x_dout_dly1;
reg [17:0] recomb_fat_bram_x_dout_dly2;
reg [17:0] recomb_fat_bram_x_dout_dly3;
always @(posedge clk) begin
//
if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_x_dout_dly1 <= recomb_fat_bram_x_dout;
if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_x_dout_dly2 <= recomb_fat_bram_x_dout_dly1;
if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_x_dout_dly3 <= recomb_fat_bram_x_dout_dly2;
//
end
always @(posedge clk) begin
//
if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_xy_bank_dly1 <= recomb_fat_bram_xy_bank;
if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_xy_bank_dly2 <= recomb_fat_bram_xy_bank_dly1;
if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_xy_bank_dly3 <= recomb_fat_bram_xy_bank_dly2;
//
end
always @(posedge clk) begin
//
if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_xy_addr_dly1 <= recomb_fat_bram_xy_addr;
if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_xy_addr_dly2 <= recomb_fat_bram_xy_addr_dly1;
if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_xy_addr_dly3 <= recomb_fat_bram_xy_addr_dly2;
//
end
reg [ 1:0] reductor_fat_bram_x_lsb_carry;
reg [15:0] reductor_fat_bram_x_lsb_dummy;
reg [17:0] reductor_fat_bram_x_lsb_dout;
reg [17:0] reductor_fat_bram_x_msb_dout;
always @(posedge clk)
//
if (!reductor_ena) begin
reductor_fat_bram_x_lsb_carry <= 2'b00;
end else if (recomb_fat_bram_xy_dout_valid_dly3) begin
case (recomb_fat_bram_xy_bank_dly3)
BANK_FAT_ML: {reductor_fat_bram_x_lsb_carry, reductor_fat_bram_x_lsb_dummy} <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry;
BANK_FAT_MH:
if (recomb_fat_bram_xy_addr_dly3 == 8'd0)
{reductor_fat_bram_x_lsb_carry, reductor_fat_bram_x_lsb_dummy} <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry;
else if (recomb_fat_bram_xy_addr_dly3 == 8'd1)
reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry;
else
reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux;
BANK_FAT_EXT:
reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3;
endcase
//
end
/*
reg [17:0] recomb_fat_bram_x_dout_dly1;
reg [17:0] recomb_fat_bram_x_dout_dly2;
reg [ 2:0] recomb_fat_bram_xy_bank_dly1;
reg [ 2:0] recomb_fat_bram_xy_bank_dly2;
reg [1:0] reductor_fat_bram_x_carry;
reg [15:0] reductor_fat_bram_x_dummy;
reg [17:0] reductor_fat_bram_x_dout;
reg reductor_fat_bram_xy_dout_valid;
always @(posedge clk)
//
if (reductor_ena) begin
if (recomb_fat_bram_xy_dout_valid) begin
recomb_fat_bram_x_dout_dly1 <= recomb_fat_bram_x_dout;
recomb_fat_bram_xy_bank_dly1 <= recomb_fat_bram_xy_bank;
end
if (mac_fat_bram_xy_ena_aux) begin
recomb_fat_bram_x_dout_dly2 <= recomb_fat_bram_x_dout_dly1;
recomb_fat_bram_xy_bank_dly2 <= recomb_fat_bram_xy_bank_dly1;
end
if (mac_fat_bram_xy_reg_ena_aux)
case (recomb_fat_bram_xy_bank_dly2)
BANK_FAT_ML: {reductor_fat_bram_x_carry, reductor_fat_bram_x_dummy} <= recomb_fat_bram_x_dout_dly2 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_carry;
endcase
//reductor_fat_bram_xy_dout_valid <= mac_fat_bram_xy_reg_ena_aux;
end else begin
reductor_fat_bram_x_carry <= 2'b00;
reductor_fat_bram_xy_dout_valid <= 1'b0;
end
*/
endmodule