`timescale 1ns / 1ps module tb_square; // // Headers // `include "../rtl/modexpng_parameters.vh" `include "../rtl/modexpng_parameters_x8.vh" `include "../rtl/modexpng_mmm_fsm.vh" // // Clock // `define CLK_FREQUENCY_MHZ 100.0 `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) reg clk = 1'b0; always begin #`CLK_PERIOD_HALF_NS clk = 1'b1; #`CLK_PERIOD_HALF_NS clk = 1'b0; end // // Reset // reg rst = 1'b1; // // T1, T2 // reg [17:0] T1[0:31]; reg [17:0] T2[0:31]; reg [17:0] AB[0:63]; reg [17:0] N_COEFF[0:32]; reg [17:0] Q[0:32]; reg [17:0] N[0:31]; reg [17:0] M[0:64]; // // Init // initial begin // T1[ 0] = 18'h0f13e; T1[ 1] = 18'h0daf6; T1[ 2] = 18'h0aaa9; T1[ 3] = 18'h0c2c2; T1[ 4] = 18'h0fc5f; T1[ 5] = 18'h12164; T1[ 6] = 18'h14375; T1[ 7] = 18'h15615; T1[ 8] = 18'h0d8e2; T1[ 9] = 18'h0ec15; T1[10] = 18'h17c46; T1[11] = 18'h0c922; T1[12] = 18'h08f00; T1[13] = 18'h152f9; T1[14] = 18'h0b0b6; T1[15] = 18'h0ce87; T1[16] = 18'h178f2; T1[17] = 18'h09efb; T1[18] = 18'h0409d; T1[19] = 18'h11104; T1[20] = 18'h0b4a6; T1[21] = 18'h158a6; T1[22] = 18'h0514e; T1[23] = 18'h0ec55; T1[24] = 18'h11e73; T1[25] = 18'h11ddd; T1[26] = 18'h07bd4; T1[27] = 18'h0638b; T1[28] = 18'h0e805; T1[29] = 18'h11c4f; T1[30] = 18'h0a2eb; T1[31] = 18'h05454; // T2[ 0] = 18'h1a479; T2[ 1] = 18'h102f5; T2[ 2] = 18'h10e72; T2[ 3] = 18'h120b1; T2[ 4] = 18'h169cd; T2[ 5] = 18'h1d0c4; T2[ 6] = 18'h11462; T2[ 7] = 18'h12015; T2[ 8] = 18'h16fca; T2[ 9] = 18'h1044f; T2[10] = 18'h122b4; T2[11] = 18'h10a5a; T2[12] = 18'h12620; T2[13] = 18'h0e01a; T2[14] = 18'h095cd; T2[15] = 18'h1278a; T2[16] = 18'h10763; T2[17] = 18'h09fe7; T2[18] = 18'h0d35c; T2[19] = 18'h10e24; T2[20] = 18'h1527d; T2[21] = 18'h115b3; T2[22] = 18'h05443; T2[23] = 18'h1190a; T2[24] = 18'h0fcc3; T2[25] = 18'h115e2; T2[26] = 18'h0a398; T2[27] = 18'h0608d; T2[28] = 18'h13075; T2[29] = 18'h0d816; T2[30] = 18'h0bb4c; T2[31] = 18'h04e8a; // AB[ 0] = 18'h0be4e; AB[ 1] = 18'h0fed7; AB[ 2] = 18'h09496; AB[ 3] = 18'h07181; AB[ 4] = 18'h0ee73; AB[ 5] = 18'h04692; AB[ 6] = 18'h0141a; AB[ 7] = 18'h0078c; AB[ 8] = 18'h030eb; AB[ 9] = 18'h0217c; AB[10] = 18'h0696f; AB[11] = 18'h0a165; AB[12] = 18'h0b753; AB[13] = 18'h04af9; AB[14] = 18'h0ed7c; AB[15] = 18'h079ce; AB[16] = 18'h0e863; AB[17] = 18'h097df; AB[18] = 18'h07984; AB[19] = 18'h048af; AB[20] = 18'h0197f; AB[21] = 18'h0206a; AB[22] = 18'h027e7; AB[23] = 18'h04b3a; AB[24] = 18'h03312; AB[25] = 18'h03b56; AB[26] = 18'h04487; AB[27] = 18'h0bd6a; AB[28] = 18'h04e4b; AB[29] = 18'h069ca; AB[30] = 18'h0f994; AB[31] = 18'h0dd4e; AB[32] = 18'h1b024; AB[33] = 18'h0127f; AB[34] = 18'h02631; AB[35] = 18'h0186b; AB[36] = 18'h03adb; AB[37] = 18'h05368; AB[38] = 18'h059a5; AB[39] = 18'h002e0; AB[40] = 18'h0b78a; AB[41] = 18'h016f3; AB[42] = 18'h0b58d; AB[43] = 18'h03ddb; AB[44] = 18'h078b0; AB[45] = 18'h0073b; AB[46] = 18'h07337; AB[47] = 18'h0c7b0; AB[48] = 18'h00668; AB[49] = 18'h0106d; AB[50] = 18'h01a44; AB[51] = 18'h05ee3; AB[52] = 18'h0462d; AB[53] = 18'h0fdeb; AB[54] = 18'h05f85; AB[55] = 18'h02af9; AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194; AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df; // N_COEFF[ 0] = 18'h05a97; N_COEFF[ 1] = 18'h0ac69; N_COEFF[ 2] = 18'h0d51e; N_COEFF[ 3] = 18'h07326; N_COEFF[ 4] = 18'h01053; N_COEFF[ 5] = 18'h0f68a; N_COEFF[ 6] = 18'h09c70; N_COEFF[ 7] = 18'h064f7; N_COEFF[ 8] = 18'h01041; N_COEFF[ 9] = 18'h0c2bf; N_COEFF[10] = 18'h0f01f; N_COEFF[11] = 18'h01842; N_COEFF[12] = 18'h0e69a; N_COEFF[13] = 18'h037ea; N_COEFF[14] = 18'h0b4a0; N_COEFF[15] = 18'h0c1ab; N_COEFF[16] = 18'h0bd5b; N_COEFF[17] = 18'h09e5e; N_COEFF[18] = 18'h039bd; N_COEFF[19] = 18'h06430; N_COEFF[20] = 18'h0b460; N_COEFF[21] = 18'h08bd4; N_COEFF[22] = 18'h09fcd; N_COEFF[23] = 18'h05391; N_COEFF[24] = 18'h0fa45; N_COEFF[25] = 18'h08892; N_COEFF[26] = 18'h0732c; N_COEFF[27] = 18'h0baf6; N_COEFF[28] = 18'h067a9; N_COEFF[29] = 18'h0b184; N_COEFF[30] = 18'h02089; N_COEFF[31] = 18'h0297b; N_COEFF[32] = 18'h01810; // Q[ 0] = 18'h0ac02; Q[ 1] = 18'h0a026; Q[ 2] = 18'h06825; Q[ 3] = 18'h08f06; Q[ 4] = 18'h03783; Q[ 5] = 18'h04cb5; Q[ 6] = 18'h0e8ea; Q[ 7] = 18'h083d2; Q[ 8] = 18'h0fec9; Q[ 9] = 18'h066d9; Q[10] = 18'h0edad; Q[11] = 18'h06c12; Q[12] = 18'h0a5fb; Q[13] = 18'h07295; Q[14] = 18'h06a0c; Q[15] = 18'h081a5; Q[16] = 18'h03493; Q[17] = 18'h0a393; Q[18] = 18'h03da6; Q[19] = 18'h0beb1; Q[20] = 18'h0d138; Q[21] = 18'h02815; Q[22] = 18'h0f191; Q[23] = 18'h03617; Q[24] = 18'h08d4f; Q[25] = 18'h0f641; Q[26] = 18'h00e82; Q[27] = 18'h01774; Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a; Q[32] = 18'h0eef3; // N[ 0] = 18'h03ad9; N[ 1] = 18'h046b4; N[ 2] = 18'h0e181; N[ 3] = 18'h0fac7; N[ 4] = 18'h0be72; N[ 5] = 18'h029ab; N[ 6] = 18'h07e51; N[ 7] = 18'h037a8; N[ 8] = 18'h0880c; N[ 9] = 18'h05a7d; N[10] = 18'h043c2; N[11] = 18'h038c9; N[12] = 18'h01275; N[13] = 18'h0aa0d; N[14] = 18'h0c0c1; N[15] = 18'h0d035; N[16] = 18'h04082; N[17] = 18'h0543c; N[18] = 18'h0dcb0; N[19] = 18'h0497c; N[20] = 18'h0b12c; N[21] = 18'h013d4; N[22] = 18'h0b80a; N[23] = 18'h051cf; N[24] = 18'h0286c; N[25] = 18'h0b600; N[26] = 18'h0d838; N[27] = 18'h0af4b; N[28] = 18'h08274; N[29] = 18'h06a07; N[30] = 18'h0beea; N[31] = 18'h0f000; // M[ 0] = 18'h041b2; M[ 1] = 18'h00128; M[ 2] = 18'h06b69; M[ 3] = 18'h08e7e; M[ 4] = 18'h0118c; M[ 5] = 18'h0b96d; M[ 6] = 18'h0ebe5; M[ 7] = 18'h0f873; M[ 8] = 18'h0cf14; M[ 9] = 18'h0de83; M[10] = 18'h09690; M[11] = 18'h05e9a; M[12] = 18'h048ac; M[13] = 18'h0b506; M[14] = 18'h01283; M[15] = 18'h08631; M[16] = 18'h0179c; M[17] = 18'h06820; M[18] = 18'h0867b; M[19] = 18'h0b750; M[20] = 18'h0e680; M[21] = 18'h0df95; M[22] = 18'h0d818; M[23] = 18'h0b4c5; M[24] = 18'h0cced; M[25] = 18'h0c4a9; M[26] = 18'h0bb78; M[27] = 18'h04295; M[28] = 18'h0b1b4; M[29] = 18'h09635; M[30] = 18'h0066b; M[31] = 18'h022b1; M[32] = 18'h04fdb; M[33] = 18'h0efc8; M[34] = 18'h00a14; M[35] = 18'h04bef; M[36] = 18'h006a1; M[37] = 18'h0f1a6; M[38] = 18'h0fc40; M[39] = 18'h0adb5; M[40] = 18'h06e8f; M[41] = 18'h02c60; M[42] = 18'h083e1; M[43] = 18'h0f862; M[44] = 18'h0da61; M[45] = 18'h0dd3d; M[46] = 18'h03381; M[47] = 18'h09db0; M[48] = 18'h05454; M[49] = 18'h07525; M[50] = 18'h0d9c7; M[51] = 18'h0a361; M[52] = 18'h049e0; M[53] = 18'h0a671; M[54] = 18'h0242e; M[55] = 18'h07cb2; M[56] = 18'h02021; M[57] = 18'h0bde1; M[58] = 18'h025aa; M[59] = 18'h0c615; M[60] = 18'h05645; M[61] = 18'h03b46; M[62] = 18'h065d6; M[63] = 18'h0390d; M[64] = 18'h0e005; // end // // BRAMs // reg tb_fat_bram_xy_ena = 1'b0; reg [ 2:0] tb_fat_bram_xy_bank; reg [ 7:0] tb_fat_bram_xy_addr; reg [17:0] tb_fat_bram_x_din; reg [17:0] tb_fat_bram_y_din; reg mgr_fat_bram_xy_ena = 1'b0; reg [ 2:0] mgr_fat_bram_xy_bank; reg [ 7:0] mgr_fat_bram_xy_addr; reg [17:0] mgr_fat_bram_x_din; reg [17:0] mgr_fat_bram_y_din; reg mac_fat_bram_xy_ena = 1'b0; reg mac_fat_bram_xy_reg_ena = 1'b0; reg [ 2:0] mac_fat_bram_xy_bank; reg [ 2:0] mac_fat_bram_xy_bank_aux; reg [ 7:0] mac_fat_bram_xy_addr[0:4]; wire [17:0] mac_fat_bram_x_dout[0:4]; wire [17:0] mac_fat_bram_y_dout[0:4]; reg tb_slim_bram_xy_ena = 1'b0; reg [ 1:0] tb_slim_bram_xy_bank; reg [ 7:0] tb_slim_bram_xy_addr; reg [17:0] tb_slim_bram_x_din; reg [17:0] tb_slim_bram_y_din; reg mgr_slim_bram_xy_ena = 1'b0; reg [ 1:0] mgr_slim_bram_xy_bank; reg [ 7:0] mgr_slim_bram_xy_addr; reg [17:0] mgr_slim_bram_x_din; reg [17:0] mgr_slim_bram_y_din; reg mac_slim_bram_xy_ena = 1'b0; reg mac_slim_bram_xy_reg_ena = 1'b0; reg [ 1:0] mac_slim_bram_xy_bank; reg [ 7:0] mac_slim_bram_xy_addr; reg [ 7:0] mac_slim_bram_xy_addr_dly; wire [17:0] mac_slim_bram_x_dout; wire [17:0] mac_slim_bram_y_dout; always @(posedge clk) // mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr; reg mac_slim_bram_xy_reg_ena_dly = 1'b0; always @(posedge clk) mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena; genvar z; generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1) begin : gen_fat_bram // ip_bram_36k fat_bram_x ( .clka (clk), .ena (mgr_fat_bram_xy_ena), .wea (mgr_fat_bram_xy_ena), .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}), .dina (mgr_fat_bram_x_din), .clkb (clk), .enb (mac_fat_bram_xy_ena), .regceb (mac_fat_bram_xy_reg_ena), .addrb ({(z < (NUM_MULTS/2) ? mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux), mac_fat_bram_xy_addr[z]}), .doutb (mac_fat_bram_x_dout[z]) ); // ip_bram_36k fat_bram_y ( .clka (clk), .ena (mgr_fat_bram_xy_ena), .wea (mgr_fat_bram_xy_ena), .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}), .dina (mgr_fat_bram_y_din), .clkb (clk), .enb (mac_fat_bram_xy_ena), .regceb (mac_fat_bram_xy_reg_ena), .addrb ({z < (NUM_MULTS/2) ? mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_addr[z]}), .doutb (mac_fat_bram_y_dout[z]) ); // end endgenerate ip_bram_18k slim_bram_x ( .clka (clk), .ena (mgr_slim_bram_xy_ena), .wea (mgr_slim_bram_xy_ena), .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}), .dina (mgr_slim_bram_x_din), .clkb (clk), .enb (mac_slim_bram_xy_ena), .regceb (mac_slim_bram_xy_reg_ena), .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}), .doutb (mac_slim_bram_x_dout) ); ip_bram_18k slim_bram_y ( .clka (clk), .ena (mgr_slim_bram_xy_ena), .wea (mgr_slim_bram_xy_ena), .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}), .dina (mgr_slim_bram_y_din), .clkb (clk), .enb (mac_slim_bram_xy_ena), .regceb (mac_slim_bram_xy_reg_ena), .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}), .doutb (mac_slim_bram_y_dout) ); // // Enable, Ready // reg ena = 1'b0; integer i; initial begin for (i=0; i<10; i=i+1) wait_clock_tick; rst = 1'b0; for (i=0; i<10; i=i+1) wait_clock_tick; tb_fat_bram_xy_ena = 1'b1; tb_slim_bram_xy_ena = 1'b1; for (i=0; i<32; i=i+1) begin tb_fat_bram_xy_bank = BANK_FAT_T1T2; tb_fat_bram_xy_addr = i[7:0]; tb_fat_bram_x_din = T1[i]; tb_fat_bram_y_din = T2[i]; tb_slim_bram_xy_bank = BANK_SLIM_T1T2; tb_slim_bram_xy_addr = i[7:0]; tb_slim_bram_x_din = T1[i]; tb_slim_bram_y_din = T2[i]; wait_clock_tick; end for (i=0; i<32; i=i+1) begin tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF; tb_slim_bram_xy_addr = i[7:0]; tb_slim_bram_x_din = N_COEFF[i]; tb_slim_bram_y_din = N_COEFF[i]; wait_clock_tick; end for (i=32; i<33; i=i+1) begin tb_slim_bram_xy_bank = BANK_SLIM_EXT; tb_slim_bram_xy_addr = 0; // ! tb_slim_bram_x_din = N_COEFF[i]; tb_slim_bram_y_din = N_COEFF[i]; wait_clock_tick; end for (i=0; i<32; i=i+1) begin tb_fat_bram_xy_bank = BANK_FAT_N; tb_fat_bram_xy_addr = i[7:0]; tb_fat_bram_x_din = N[i]; tb_fat_bram_y_din = N[i]; wait_clock_tick; end tb_fat_bram_xy_ena = 1'b0; tb_slim_bram_xy_ena = 1'b0; tb_fat_bram_xy_bank = {3{1'bX}}; tb_fat_bram_xy_addr = {8{1'bX}}; tb_fat_bram_x_din = {18{1'bX}}; tb_fat_bram_y_din = {18{1'bX}}; tb_slim_bram_xy_bank = {2{1'bX}}; tb_slim_bram_xy_addr = {8{1'bX}}; tb_slim_bram_x_din = {18{1'bX}}; tb_slim_bram_y_din = {18{1'bX}}; for (i=0; i<10; i=i+1) wait_clock_tick; ena = 1'b1; wait_clock_tick; ena = 1'b0; for (i=0; i<10000; i=i+1) wait_clock_tick; verify_ab; verify_q; verify_m; end // // DSPs // reg dsp_x_ce_a; reg dsp_x_ce_b; reg dsp_x_ce_b_dly; reg dsp_x_ce_m; reg dsp_x_ce_p; reg dsp_x_ce_mode; reg [9 -1:0] dsp_x_mode_z = {9{1'b1}}; wire [5*18-1:0] dsp_x_a; reg [1*17-1:0] dsp_x_b; wire [9*47-1:0] dsp_x_p; reg dsp_y_ce_a; reg dsp_y_ce_b; reg dsp_y_ce_b_dly; reg dsp_y_ce_m; reg dsp_y_ce_p; reg dsp_y_ce_mode; reg [9 -1:0] dsp_y_mode_z = {9{1'b1}}; wire [5*18-1:0] dsp_y_a; reg [1*17-1:0] dsp_y_b; wire [9*47-1:0] dsp_y_p; generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1) begin : gen_dsp_xy_a_split assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z]; assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z]; end endgenerate always @(posedge clk) // {dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b}; reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; dsp_array dsp_x ( .clk (clk), .ce_a (dsp_x_ce_a), .ce_b (dsp_x_ce_b), .ce_m (dsp_x_ce_m), .ce_p (dsp_x_ce_p), .ce_mode (dsp_x_ce_mode), .mode_z (dsp_x_mode_z), .a (dsp_x_a), .b (dsp_x_b), .p (dsp_x_p) ); dsp_array dsp_y ( .clk (clk), .ce_a (dsp_y_ce_a), .ce_b (dsp_y_ce_b), .ce_m (dsp_y_ce_m), .ce_p (dsp_y_ce_p), .ce_mode (dsp_y_ce_mode), .mode_z (dsp_y_mode_z), .a (dsp_y_a), .b (dsp_y_b), .p (dsp_y_p) ); // // FSM State and Next States // reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; reg [FSM_STATE_WIDTH-1:0] fsm_state_next; always @(posedge clk) // if (rst) fsm_state <= FSM_STATE_IDLE; else fsm_state <= fsm_state_next; localparam [7:0] index_last = 8'd31; localparam [7:0] index_last_minus1 = index_last - 1'b1; // // Column // reg [4:0] col_index; // current column index reg [4:0] col_index_prev; // delayed column index value reg [4:0] col_index_last; // index of the very last column reg [4:0] col_index_next1; // precomputed next column index //reg [4:0] col_index_next2; // precomputed next column index after next column index reg col_is_last; // flag set during the very last column always @(posedge clk) // col_index_prev <= col_index; wire mult_square_addr_almost_done_comb; reg mult_square_addr_almost_done_flop; reg mult_square_addr_surely_done_flop; wire mult_triangle_addr_almost_done_comb; reg mult_triangle_addr_almost_done_flop; reg mult_triangle_addr_surely_done_flop; reg mult_triangle_addr_tardy_done_flop; wire mult_rectangle_addr_almost_done_comb; reg mult_rectangle_addr_almost_done_flop; reg mult_rectangle_addr_surely_done_flop; reg mult_rectangle_addr_tardy_done_flop; assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1; assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index); assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1; always @(posedge clk) // case (fsm_state) FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_BUSY: mult_square_addr_almost_done_flop <= mult_square_addr_almost_done_comb; //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= //{mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb}; default: mult_square_addr_almost_done_flop <= 1'b0; //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00; endcase always @(posedge clk) // mult_square_addr_surely_done_flop <= mult_square_addr_almost_done_flop; always @(posedge clk) // case (fsm_state) FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mult_triangle_addr_almost_done_flop <= mult_triangle_addr_almost_done_comb; //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb}; default: mult_triangle_addr_almost_done_flop <= 1'b0; //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00; endcase always @(posedge clk) begin // mult_triangle_addr_surely_done_flop <= mult_triangle_addr_almost_done_flop; mult_triangle_addr_tardy_done_flop <= mult_triangle_addr_surely_done_flop; // end always @(posedge clk) // case (fsm_state) FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb; //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb}; default: mult_rectangle_addr_almost_done_flop <= 1'b0; //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00; endcase always @(posedge clk) begin // mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop; mult_rectangle_addr_tardy_done_flop <= mult_rectangle_addr_surely_done_flop; // end // // FSM Transition Logic // wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; always @(posedge clk) // case (fsm_state_next) // FSM_STATE_MULT_SQUARE_COL_0_INIT, FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0; FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0; // FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0; FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ? 8'd0 : mac_slim_bram_xy_addr + 1'b1; // FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0; FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ? 8'd1 : mac_slim_bram_xy_addr + 1'b1; // default: mac_slim_bram_xy_addr <= 8'dX; endcase wire [2:0] fat_bram_offset_rom[0:3]; generate for (z=1; z 8'd0) mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1; else mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last; end endfunction always @(posedge clk) // {dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}}; always @(posedge clk) // {dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}}; always @(posedge clk) // {dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly}; always @(posedge clk) // {dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m}; always @(posedge clk) // {dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly}; task wait_clock_tick; begin #`CLK_PERIOD_NS; end endtask // // Increment Logic // always @(posedge clk) // case (fsm_state_next) // FSM_STATE_MULT_SQUARE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin col_index <= 5'd0; col_index_last <= index_last[7:3]; col_index_next1 <= 5'd1; //col_index_next2 <= 5'd2; col_is_last <= 1'b0; end // FSM_STATE_MULT_SQUARE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin col_index <= col_index_next1; col_is_last <= col_index_next1 == col_index_last; col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1; //col_index_next2 <= col_index_next2 + 1'b1; end // endcase assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; always @(posedge clk) // case (fsm_state_next) FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly); FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly); default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; endcase always @(posedge clk) begin {dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}}; // dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2}; dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3}; dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4}; end function [NUM_MULTS:0] calc_mac_mode_z_square; input [ 4:0] col_index_value; input [ 7:0] mac_slim_bram_xy_addr_value; begin if (mac_slim_bram_xy_addr_value[7:3] == col_index_value) case (mac_slim_bram_xy_addr_value[2:0]) 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; endcase else calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; end endfunction function [NUM_MULTS:0] calc_mac_mode_z_rectangle; input [ 4:0] col_index_value; input [ 7:0] mac_slim_bram_xy_addr_value; begin if (mac_slim_bram_xy_addr_value[7:3] == col_index_value) case (mac_slim_bram_xy_addr_value[2:0]) 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; endcase else calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; end endfunction reg recomb_x_ena = 1'b0; reg recomb_y_ena = 1'b0; always @(posedge clk) begin // recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p; recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p; // end wire [ 2:0] recomb_fat_bram_xy_bank; wire [ 7:0] recomb_fat_bram_xy_addr; wire [17:0] recomb_fat_bram_x_dout; wire [17:0] recomb_fat_bram_y_dout; wire recomb_fat_bram_xy_dout_valid; wire [ 2:0] recomb_slim_bram_xy_bank; wire [ 7:0] recomb_slim_bram_xy_addr; wire [17:0] recomb_slim_bram_x_dout; wire [17:0] recomb_slim_bram_y_dout; wire recomb_slim_bram_xy_dout_valid; wire recomb_rdy; modexpng_part_recombinator recomb ( .clk (clk), .rdy (recomb_rdy), .fsm_state_next (fsm_state_next), .index_last (index_last), .dsp_x_ce_p (dsp_x_ce_p), .dsp_y_ce_p (dsp_y_ce_p), .ena_x (recomb_x_ena), .ena_y (recomb_y_ena), .dsp_x_p (dsp_x_p), .dsp_y_p (dsp_y_p), .col_index (col_index), .col_index_last (col_index_last), .slim_bram_xy_addr (mac_slim_bram_xy_addr), .slim_bram_xy_bank (mac_slim_bram_xy_bank), .rcmb_fat_bram_xy_bank (recomb_fat_bram_xy_bank), .rcmb_fat_bram_xy_addr (recomb_fat_bram_xy_addr), .rcmb_fat_bram_x_dout (recomb_fat_bram_x_dout), .rcmb_fat_bram_y_dout (recomb_fat_bram_y_dout), .rcmb_fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid), .rcmb_slim_bram_xy_bank (recomb_slim_bram_xy_bank), .rcmb_slim_bram_xy_addr (recomb_slim_bram_xy_addr), .rcmb_slim_bram_x_dout (recomb_slim_bram_x_dout), .rcmb_slim_bram_y_dout (recomb_slim_bram_y_dout), .rcmb_slim_bram_xy_dout_valid (recomb_slim_bram_xy_dout_valid) ); reg [17:0] AB_READ[0:63]; reg [17:0] Q_READ[0:32]; reg [17:0] M_READ[0:64]; always @(posedge clk) begin // if (recomb_fat_bram_xy_dout_valid) // case (recomb_fat_bram_xy_bank) BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout; BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; BANK_FAT_ML: M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout; BANK_FAT_MH: M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; endcase // if (recomb_slim_bram_xy_dout_valid) // case (recomb_slim_bram_xy_bank) BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout; BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1) Q_READ[32] <= recomb_slim_bram_x_dout; endcase // end always @(posedge clk) // if (tb_fat_bram_xy_ena) begin mgr_fat_bram_xy_ena <= 1'b1; mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank; mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr; mgr_fat_bram_x_din <= tb_fat_bram_x_din; mgr_fat_bram_y_din <= tb_fat_bram_y_din; end else if (recomb_fat_bram_xy_dout_valid) begin mgr_fat_bram_xy_ena <= 1'b1; mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank; mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr; mgr_fat_bram_x_din <= recomb_fat_bram_x_dout; mgr_fat_bram_y_din <= recomb_fat_bram_y_dout; end else begin mgr_fat_bram_xy_ena <= 1'b0; mgr_fat_bram_xy_bank <= 3'bXXX; mgr_fat_bram_xy_addr <= 8'hXX; mgr_fat_bram_x_din <= {18{1'bX}}; mgr_fat_bram_y_din <= {18{1'bX}}; end always @(posedge clk) // if (tb_slim_bram_xy_ena) begin mgr_slim_bram_xy_ena <= 1'b1; mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank; mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr; mgr_slim_bram_x_din <= tb_slim_bram_x_din; mgr_slim_bram_y_din <= tb_slim_bram_y_din; end else if (recomb_slim_bram_xy_dout_valid) begin mgr_slim_bram_xy_ena <= 1'b1; mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank; mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr; mgr_slim_bram_x_din <= recomb_slim_bram_x_dout; mgr_slim_bram_y_din <= recomb_slim_bram_y_dout; end else begin mgr_slim_bram_xy_ena <= 1'b0; mgr_slim_bram_xy_bank <= 3'bXXX; mgr_slim_bram_xy_addr <= 8'hXX; mgr_slim_bram_x_din <= {18{1'bX}}; mgr_slim_bram_y_din <= {18{1'bX}}; end task verify_ab; reg verify_ab_ok; begin verify_ab_ok = 1; for (i=0; i<64; i=i+1) if (AB_READ[i] === AB[i]) $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]); else begin $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x ", i, AB[i], AB_READ[i]); verify_ab_ok = 0; end if (verify_ab_ok) $display("AB is OK."); else $display("AB is WRONG!"); end endtask task verify_q; reg verify_q_ok; begin verify_q_ok = 1; for (i=0; i<33; i=i+1) if (Q_READ[i] === Q[i]) $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x", i, Q[i], Q_READ[i]); else begin $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x ", i, Q[i], Q_READ[i]); verify_q_ok = 0; end if (verify_q_ok) $display("Q is OK."); else $display("Q is WRONG!"); end endtask task verify_m; reg verify_m_ok; begin verify_m_ok = 1; for (i=0; i<65; i=i+1) if (M_READ[i] === M[i]) $display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]); else begin $display("M / M_READ [%02d] = 0x%05x / 0x%05x ", i, M[i], M_READ[i]); verify_m_ok = 0; end if (verify_m_ok) $display("M is OK."); else $display("M is WRONG!"); end endtask wire mult_square_addr_done = mult_square_addr_surely_done_flop; wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop; wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop; always @* begin // fsm_state_next = FSM_STATE_IDLE; // case (fsm_state) FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF; FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = mult_triangle_addr_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; default: fsm_state_next = FSM_STATE_IDLE ; endcase // end endmodule