aboutsummaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:01:43 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:01:43 +0300
commit29fb6afd018c601a2e0c7376656d5e37beb565d6 (patch)
treedc11ee0c8e5a30113052254be23594da74a8a572 /bench
parentec07464d239f7f6379a682ac57b58b863d3f0374 (diff)
Started working on the pipelined Montgomery modular multiplier. Currently can
do the "square" part of the multiplication, i.e. compute the twice larger intermediate product AB = A * B.
Diffstat (limited to 'bench')
-rw-r--r--bench/tb_mmm_x8_dual.v327
-rw-r--r--bench/tb_square.v716
2 files changed, 1043 insertions, 0 deletions
diff --git a/bench/tb_mmm_x8_dual.v b/bench/tb_mmm_x8_dual.v
new file mode 100644
index 0000000..aa25900
--- /dev/null
+++ b/bench/tb_mmm_x8_dual.v
@@ -0,0 +1,327 @@
+`timescale 1ns / 1ps
+
+module tb_mmm_x8_dual;
+
+
+ //
+ // Headers
+ //
+ `include "../rtl/modexpng_parameters.vh"
+ `include "../rtl/modexpng_parameters_x8.vh"
+
+
+ //
+ // Settings
+ //
+ localparam INDEX_WIDTH = 6;
+
+ wire [INDEX_WIDTH-1:0] index_last = 31; // 512 bits
+
+
+ //
+ // Clock
+ //
+ `define CLK_FREQUENCY_MHZ 100.0
+ `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
+ `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
+
+ reg clk = 1'b0;
+
+ always begin
+ #`CLK_PERIOD_HALF_NS clk = 1'b1;
+ #`CLK_PERIOD_HALF_NS clk = 1'b0;
+ end
+
+
+ //
+ // Reset
+ //
+ reg rst = 1'b1;
+ wire rst_n = ~rst;
+
+
+ //
+ // Control
+ //
+ reg ena = 1'b0;
+ wire rdy;
+
+ reg mode;
+ reg transfer;
+
+
+ //
+ // Interface
+ //
+
+
+ //
+ // Interface - Data Buses
+ //
+ wire [NUM_MULTS*WORD_WIDTH-1:0] x_din;
+ wire [NUM_MULTS*WORD_WIDTH-1:0] y_din;
+ wire [NUM_MULTS*WORD_WIDTH-1:0] x_dout;
+ wire [NUM_MULTS*WORD_WIDTH-1:0] y_dout;
+
+
+ //
+ // Interface - Address Buses
+ //
+ wire [INDEX_WIDTH-4:0] x_din_addr;
+ wire [INDEX_WIDTH-4:0] y_din_addr;
+ wire [INDEX_WIDTH-4:0] x_dout_addr;
+ wire [INDEX_WIDTH-4:0] y_dout_addr;
+
+
+ //
+ // Interface - Enable Buses
+ //
+ wire [ 1-1:0] x_din_ena;
+ wire [ 1-1:0] y_din_ena;
+ wire [ 1-1:0] x_din_reg_ena;
+ wire [ 1-1:0] y_din_reg_ena;
+ wire [NUM_MULTS-1:0] x_dout_ena;
+ wire [NUM_MULTS-1:0] y_dout_ena;
+
+
+ //
+ // Interface - Bank Buses
+ //
+ wire [3-1:0] x_din_bank;
+ wire [3-1:0] y_din_bank;
+ wire [3-1:0] x_dout_bank;
+ wire [3-1:0] y_dout_bank;
+
+
+ //
+ // Operands
+ //
+ reg [WORD_WIDTH-1:0] T1[0:2**INDEX_WIDTH-1];
+ reg [WORD_WIDTH-1:0] T2[0:2**INDEX_WIDTH-1];
+ reg [WORD_WIDTH-1:0] N[0:2**INDEX_WIDTH-1];
+ reg [WORD_WIDTH-1:0] N_COEFF[0:2**INDEX_WIDTH];
+
+
+ //
+ // Memories
+ //
+ genvar z;
+ generate for (z=0; z<NUM_MULTS; z=z+1)
+ //
+ begin : gen_z_mem
+ //
+ modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ #
+ (
+ .MEM_WIDTH(WORD_WIDTH),
+ .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS)
+ )
+ gen_z_mem_x
+ (
+ .clk (clk),
+
+ .a_addr ({x_dout_bank, x_dout_addr}),
+ .a_en (x_dout_ena[z]),
+ .a_wr (x_dout_ena[z]),
+ .a_in (x_dout[z*WORD_WIDTH+:WORD_WIDTH]),
+ .a_out (), // unused
+
+ .b_addr ({x_din_bank, x_din_addr}),
+ .b_en (x_din_ena),
+ .b_reg_en (x_din_reg_ena),
+ .b_out (x_din[z*WORD_WIDTH+:WORD_WIDTH])
+ );
+ //
+ modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ #
+ (
+ .MEM_WIDTH(WORD_WIDTH),
+ .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS)
+ )
+ gen_z_mem_y
+ (
+ .clk (clk),
+
+ .a_addr ({y_dout_bank, y_dout_addr}),
+ .a_en (y_dout_ena[z]),
+ .a_wr (y_dout_ena[z]),
+ .a_in (y_dout[z*WORD_WIDTH+:WORD_WIDTH]),
+ .a_out (), // unused
+
+ .b_addr ({y_din_bank, y_din_addr}),
+ .b_en (y_din_ena),
+ .b_reg_en (y_din_reg_ena),
+ .b_out (y_din[z*WORD_WIDTH+:WORD_WIDTH])
+ );
+ //
+ end
+ //
+ endgenerate
+
+
+ // T1 / T2
+ // N / N_COEFF
+ // AB_LSB
+ // AB_MSB
+ // M
+ // Q_LSB
+ // Q_MSB
+ // ?
+
+
+ //
+ // Operands - Values
+ //
+ initial begin
+ //
+ T1[ 0] = 18'h0b27b; T1[ 1] = 18'h0fc7d; T1[ 2] = 18'h0a214; T1[ 3] = 18'h08d2b;
+ T1[ 4] = 18'h1c80c; T1[ 5] = 18'h145f1; T1[ 6] = 18'h00db6; T1[ 7] = 18'h1cf0f;
+ T1[ 8] = 18'h19386; T1[ 9] = 18'h02ad9; T1[10] = 18'h1a8b5; T1[11] = 18'h1479b;
+ T1[12] = 18'h08b5f; T1[13] = 18'h14806; T1[14] = 18'h0e6f7; T1[15] = 18'h0ce9d;
+ T1[16] = 18'h0cbc2; T1[17] = 18'h16ef1; T1[18] = 18'h0e14e; T1[19] = 18'h1796f;
+ T1[20] = 18'h14901; T1[21] = 18'h06666; T1[22] = 18'h0cb9f; T1[23] = 18'h09ab4;
+ T1[24] = 18'h12ffc; T1[25] = 18'h0a86d; T1[26] = 18'h19d35; T1[27] = 18'h0cda9;
+ T1[28] = 18'h16a19; T1[29] = 18'h09a36; T1[30] = 18'h0b176; T1[31] = 18'h0e0dc;
+ //
+ T2[ 0] = 18'h0b21a; T2[ 1] = 18'h13e71; T2[ 2] = 18'h03459; T2[ 3] = 18'h1063f;
+ T2[ 4] = 18'h18cef; T2[ 5] = 18'h1b8a5; T2[ 6] = 18'h082d1; T2[ 7] = 18'h1b1be;
+ T2[ 8] = 18'h18979; T2[ 9] = 18'h1409a; T2[10] = 18'h1713c; T2[11] = 18'h0cda3;
+ T2[12] = 18'h11c7d; T2[13] = 18'h0c943; T2[14] = 18'h12d7c; T2[15] = 18'h1531e;
+ T2[16] = 18'h0a45a; T2[17] = 18'h1c637; T2[18] = 18'h0906a; T2[19] = 18'h1670e;
+ T2[20] = 18'h12f78; T2[21] = 18'h08ce6; T2[22] = 18'h1c5c7; T2[23] = 18'h1292d;
+ T2[24] = 18'h0fc4b; T2[25] = 18'h064fb; T2[26] = 18'h0cc3c; T2[27] = 18'h19b37;
+ T2[28] = 18'h1b721; T2[29] = 18'h0f424; T2[30] = 18'h0f608; T2[31] = 18'h03e9b;
+ //
+ N[ 0] = 18'h00a9d; N[ 1] = 18'h01175; N[ 2] = 18'h0254f; N[ 3] = 18'h0ee38;
+ N[ 4] = 18'h00a6a; N[ 5] = 18'h0c7bd; N[ 6] = 18'h0ddac; N[ 7] = 18'h069fe;
+ N[ 8] = 18'h0e9d6; N[ 9] = 18'h0b6bf; N[10] = 18'h09230; N[11] = 18'h04fc5;
+ N[12] = 18'h05c9f; N[13] = 18'h09502; N[14] = 18'h0cbc5; N[15] = 18'h03109;
+ N[16] = 18'h08029; N[17] = 18'h0b27c; N[18] = 18'h0eeb8; N[19] = 18'h0c191;
+ N[20] = 18'h0ff86; N[21] = 18'h027ab; N[22] = 18'h07d76; N[23] = 18'h0ff1a;
+ N[24] = 18'h02afc; N[25] = 18'h0b25a; N[26] = 18'h0d3c1; N[27] = 18'h05589;
+ N[28] = 18'h09f7c; N[29] = 18'h0ddd6; N[30] = 18'h0b4fc; N[31] = 18'h0e8e7;
+ //
+ N_COEFF[ 0] = 18'h0344b; N_COEFF[ 1] = 18'h0ca66; N_COEFF[ 2] = 18'h0d9e8; N_COEFF[ 3] = 18'h070d5;
+ N_COEFF[ 4] = 18'h0ce4b; N_COEFF[ 5] = 18'h049b2; N_COEFF[ 6] = 18'h0abb3; N_COEFF[ 7] = 18'h0c3b2;
+ N_COEFF[ 8] = 18'h0ad38; N_COEFF[ 9] = 18'h05672; N_COEFF[10] = 18'h0fd47; N_COEFF[11] = 18'h06671;
+ N_COEFF[12] = 18'h00b7f; N_COEFF[13] = 18'h0fa35; N_COEFF[14] = 18'h0d4ac; N_COEFF[15] = 18'h0f1ca;
+ N_COEFF[16] = 18'h08e0a; N_COEFF[17] = 18'h05858; N_COEFF[18] = 18'h02dc6; N_COEFF[19] = 18'h08cfc;
+ N_COEFF[20] = 18'h01941; N_COEFF[21] = 18'h0f855; N_COEFF[22] = 18'h01e43; N_COEFF[23] = 18'h053f0;
+ N_COEFF[24] = 18'h0a479; N_COEFF[25] = 18'h0ae7e; N_COEFF[26] = 18'h05c66; N_COEFF[27] = 18'h02413;
+ N_COEFF[28] = 18'h0b5f8; N_COEFF[29] = 18'h0eb06; N_COEFF[30] = 18'h0de5b; N_COEFF[31] = 18'h0a751;
+ N_COEFF[32] = 18'h0c1ec;
+ //
+ end
+
+
+ //
+ // Load Interface
+ //
+ wire load_phase;
+ wire [ INDEX_WIDTH:0] load_xy_addr;
+ wire load_xy_addr_vld;
+ wire load_xy_req;
+ reg [ WORD_WIDTH-1:0] load_x_din;
+ reg [ WORD_WIDTH-1:0] load_y_din;
+ reg [ WORD_WIDTH-1:0] load_x_pipe;
+ reg [ WORD_WIDTH-1:0] load_y_pipe;
+
+ always @(posedge clk)
+ //
+ if (load_xy_addr_vld) begin
+
+ if (!load_phase) begin
+ load_x_pipe <= T1[load_xy_addr];
+ load_y_pipe <= T2[load_xy_addr];
+ end else begin
+ load_x_pipe <= !load_xy_addr[INDEX_WIDTH] ? N[load_xy_addr] : {WORD_WIDTH{1'bX}};
+ load_y_pipe <= N_COEFF[load_xy_addr];
+ end
+ end
+
+ always @(posedge clk)
+ //
+ if (load_xy_req)
+ {load_y_din, load_x_din} <= {load_y_pipe, load_x_pipe};
+ else
+ {load_y_din, load_x_din} <= {2*WORD_WIDTH{1'bX}};
+
+
+ //
+ // UUT
+ //
+ modexpng_mmm_x8_dual #
+ (
+ .INDEX_WIDTH(INDEX_WIDTH)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .mode (mode),
+ .transfer (transfer),
+
+ .index_last (index_last),
+
+ .x_din (x_din),
+ .y_din (y_din),
+ .x_dout (x_dout),
+ .y_dout (y_dout),
+
+ .x_din_addr (x_din_addr),
+ .y_din_addr (y_din_addr),
+ .x_dout_addr (x_dout_addr),
+ .y_dout_addr (y_dout_addr),
+
+ .x_din_ena (x_din_ena),
+ .y_din_ena (y_din_ena),
+ .x_dout_ena (x_dout_ena),
+ .y_dout_ena (y_dout_ena),
+
+ .x_din_reg_ena (x_din_reg_ena),
+ .y_din_reg_ena (y_din_reg_ena),
+
+ .x_din_bank (x_din_bank),
+ .y_din_bank (y_din_bank),
+ .x_dout_bank (x_dout_bank),
+ .y_dout_bank (y_dout_bank),
+
+ .load_phase (load_phase),
+ .load_xy_addr (load_xy_addr),
+ .load_xy_addr_vld (load_xy_addr_vld),
+ .load_xy_req (load_xy_req),
+ .load_x_din (load_x_din),
+ .load_y_din (load_y_din)
+ );
+
+
+ //
+ // Script
+ //
+ initial begin
+ #(100.0*`CLK_PERIOD_NS) rst = 1'b0;
+ #(100.0*`CLK_PERIOD_NS) ena = 1'b1;
+ transfer = 1'b1;
+ mode = 1'b0;
+ #( 1.0*`CLK_PERIOD_NS) ena = 1'b0;
+ transfer = 1'bX;
+ mode = 1'bX;
+
+ while (!rdy) #`CLK_PERIOD_NS;
+
+ #(100.0*`CLK_PERIOD_NS) ena = 1'b1;
+ transfer = 1'b0;
+ mode = 1'b0;
+ #( 1.0*`CLK_PERIOD_NS) ena = 1'b0;
+ transfer = 1'bX;
+ mode = 1'bX;
+
+ while (!rdy) #`CLK_PERIOD_NS;
+
+ end
+
+
+endmodule
+
diff --git a/bench/tb_square.v b/bench/tb_square.v
new file mode 100644
index 0000000..61e5d8a
--- /dev/null
+++ b/bench/tb_square.v
@@ -0,0 +1,716 @@
+`timescale 1ns / 1ps
+
+module tb_square;
+
+
+ //
+ // Headers
+ //
+ `include "../rtl/modexpng_parameters.vh"
+ `include "../rtl/modexpng_parameters_x8.vh"
+ `include "../rtl/modexpng_mmm_fsm.vh"
+
+
+ //
+ // Clock
+ //
+ `define CLK_FREQUENCY_MHZ 100.0
+ `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
+ `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
+
+ reg clk = 1'b0;
+
+ always begin
+ #`CLK_PERIOD_HALF_NS clk = 1'b1;
+ #`CLK_PERIOD_HALF_NS clk = 1'b0;
+ end
+
+
+ //
+ // Reset
+ //
+ reg rst = 1'b1;
+
+
+
+ //
+ // T1, T2
+ //
+ reg [17:0] T1[0:31];
+ reg [17:0] T2[0:31];
+ reg [17:0] AB[0:63];
+
+
+ //
+ // Init
+ //
+ initial begin
+ //
+ T1[ 0] = 18'h0f13e; T1[ 1] = 18'h0daf6; T1[ 2] = 18'h0aaa9; T1[ 3] = 18'h0c2c2;
+ T1[ 4] = 18'h0fc5f; T1[ 5] = 18'h12164; T1[ 6] = 18'h14375; T1[ 7] = 18'h15615;
+ T1[ 8] = 18'h0d8e2; T1[ 9] = 18'h0ec15; T1[10] = 18'h17c46; T1[11] = 18'h0c922;
+ T1[12] = 18'h08f00; T1[13] = 18'h152f9; T1[14] = 18'h0b0b6; T1[15] = 18'h0ce87;
+ T1[16] = 18'h178f2; T1[17] = 18'h09efb; T1[18] = 18'h0409d; T1[19] = 18'h11104;
+ T1[20] = 18'h0b4a6; T1[21] = 18'h158a6; T1[22] = 18'h0514e; T1[23] = 18'h0ec55;
+ T1[24] = 18'h11e73; T1[25] = 18'h11ddd; T1[26] = 18'h07bd4; T1[27] = 18'h0638b;
+ T1[28] = 18'h0e805; T1[29] = 18'h11c4f; T1[30] = 18'h0a2eb; T1[31] = 18'h05454;
+ //
+ T2[ 0] = 18'h1a479; T2[ 1] = 18'h102f5; T2[ 2] = 18'h10e72; T2[ 3] = 18'h120b1;
+ T2[ 4] = 18'h169cd; T2[ 5] = 18'h1d0c4; T2[ 6] = 18'h11462; T2[ 7] = 18'h12015;
+ T2[ 8] = 18'h16fca; T2[ 9] = 18'h1044f; T2[10] = 18'h122b4; T2[11] = 18'h10a5a;
+ T2[12] = 18'h12620; T2[13] = 18'h0e01a; T2[14] = 18'h095cd; T2[15] = 18'h1278a;
+ T2[16] = 18'h10763; T2[17] = 18'h09fe7; T2[18] = 18'h0d35c; T2[19] = 18'h10e24;
+ T2[20] = 18'h1527d; T2[21] = 18'h115b3; T2[22] = 18'h05443; T2[23] = 18'h1190a;
+ T2[24] = 18'h0fcc3; T2[25] = 18'h115e2; T2[26] = 18'h0a398; T2[27] = 18'h0608d;
+ T2[28] = 18'h13075; T2[29] = 18'h0d816; T2[30] = 18'h0bb4c; T2[31] = 18'h04e8a;
+ //
+ AB[ 0] = 18'h0be4e; AB[ 1] = 18'h0fed7; AB[ 2] = 18'h09496; AB[ 3] = 18'h07181;
+ AB[ 4] = 18'h0ee73; AB[ 5] = 18'h04692; AB[ 6] = 18'h0141a; AB[ 7] = 18'h0078c;
+ AB[ 8] = 18'h030eb; AB[ 9] = 18'h0217c; AB[10] = 18'h0696f; AB[11] = 18'h0a165;
+ AB[12] = 18'h0b753; AB[13] = 18'h04af9; AB[14] = 18'h0ed7c; AB[15] = 18'h079ce;
+ AB[16] = 18'h0e863; AB[17] = 18'h097df; AB[18] = 18'h07984; AB[19] = 18'h048af;
+ AB[20] = 18'h0197f; AB[21] = 18'h0206a; AB[22] = 18'h027e7; AB[23] = 18'h04b3a;
+ AB[24] = 18'h03312; AB[25] = 18'h03b56; AB[26] = 18'h04487; AB[27] = 18'h0bd6a;
+ AB[28] = 18'h04e4b; AB[29] = 18'h069ca; AB[30] = 18'h0f994; AB[31] = 18'h0dd4e;
+ AB[32] = 18'h1b024; AB[33] = 18'h0127f; AB[34] = 18'h02631; AB[35] = 18'h0186b;
+ AB[36] = 18'h03adb; AB[37] = 18'h05368; AB[38] = 18'h059a5; AB[39] = 18'h002e0;
+ AB[40] = 18'h0b78a; AB[41] = 18'h016f3; AB[42] = 18'h0b58d; AB[43] = 18'h03ddb;
+ AB[44] = 18'h078b0; AB[45] = 18'h0073b; AB[46] = 18'h07337; AB[47] = 18'h0c7b0;
+ AB[48] = 18'h00668; AB[49] = 18'h0106d; AB[50] = 18'h01a44; AB[51] = 18'h05ee3;
+ AB[52] = 18'h0462d; AB[53] = 18'h0fdeb; AB[54] = 18'h05f85; AB[55] = 18'h02af9;
+ AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194;
+ AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df;
+ //
+ end
+
+
+ //
+ // BRAMs
+ //
+ reg tb_fat_bram_xy_ena = 1'b0;
+ reg [ 2:0] tb_fat_bram_xy_bank;
+ reg [ 7:0] tb_fat_bram_xy_addr;
+ reg [17:0] tb_fat_bram_x_din;
+ reg [17:0] tb_fat_bram_y_din;
+
+ reg mgr_fat_bram_xy_ena = 1'b0;
+ reg [ 2:0] mgr_fat_bram_xy_bank;
+ reg [ 7:0] mgr_fat_bram_xy_addr;
+ reg [17:0] mgr_fat_bram_x_din;
+ reg [17:0] mgr_fat_bram_y_din;
+
+ reg mac_fat_bram_xy_ena = 1'b0;
+ reg mac_fat_bram_xy_reg_ena = 1'b0;
+ reg [ 2:0] mac_fat_bram_xy_bank;
+ reg [ 7:0] mac_fat_bram_xy_addr[0:3];
+ wire [17:0] mac_fat_bram_x_dout[0:3];
+ wire [17:0] mac_fat_bram_y_dout[0:3];
+
+ reg tb_slim_bram_xy_ena = 1'b0;
+ reg [ 1:0] tb_slim_bram_xy_bank;
+ reg [ 7:0] tb_slim_bram_xy_addr;
+ reg [17:0] tb_slim_bram_x_din;
+ reg [17:0] tb_slim_bram_y_din;
+
+ reg mac_slim_bram_xy_ena = 1'b0;
+ reg mac_slim_bram_xy_reg_ena = 1'b0;
+ reg [ 1:0] mac_slim_bram_xy_bank;
+ reg [ 7:0] mac_slim_bram_xy_addr;
+ reg [ 7:0] mac_slim_bram_xy_addr_dly;
+ wire [17:0] mac_slim_bram_x_dout;
+ wire [17:0] mac_slim_bram_y_dout;
+
+ always @(posedge clk)
+ //
+ mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr;
+
+ reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
+ always @(posedge clk)
+ mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena;
+
+
+
+ genvar z;
+ generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ begin : gen_fat_bram
+ //
+ ip_bram_36k fat_bram_x
+ (
+ .clka (clk),
+ .ena (mgr_fat_bram_xy_ena),
+ .wea (mgr_fat_bram_xy_ena),
+ .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
+ .dina (mgr_fat_bram_x_din),
+
+ .clkb (clk),
+ .enb (mac_fat_bram_xy_ena),
+ .regceb (mac_fat_bram_xy_reg_ena),
+ .addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
+ .doutb (mac_fat_bram_x_dout[z])
+ );
+ //
+ ip_bram_36k fat_bram_y
+ (
+ .clka (clk),
+ .ena (mgr_fat_bram_xy_ena),
+ .wea (mgr_fat_bram_xy_ena),
+ .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
+ .dina (mgr_fat_bram_y_din),
+
+ .clkb (clk),
+ .enb (mac_fat_bram_xy_ena),
+ .regceb (mac_fat_bram_xy_reg_ena),
+ .addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
+ .doutb (mac_fat_bram_y_dout[z])
+ );
+ //
+ end
+ endgenerate
+
+ ip_bram_18k slim_bram_x
+ (
+ .clka (clk),
+ .ena (tb_slim_bram_xy_ena),
+ .wea (tb_slim_bram_xy_ena),
+ .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
+ .dina (tb_slim_bram_x_din),
+
+ .clkb (clk),
+ .enb (mac_slim_bram_xy_ena),
+ .regceb (mac_slim_bram_xy_reg_ena),
+ .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
+ .doutb (mac_slim_bram_x_dout)
+ );
+
+ ip_bram_18k slim_bram_y
+ (
+ .clka (clk),
+ .ena (tb_slim_bram_xy_ena),
+ .wea (tb_slim_bram_xy_ena),
+ .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
+ .dina (tb_slim_bram_y_din),
+
+ .clkb (clk),
+ .enb (mac_slim_bram_xy_ena),
+ .regceb (mac_slim_bram_xy_reg_ena),
+ .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
+ .doutb (mac_slim_bram_y_dout)
+ );
+
+
+
+ //
+ // Enable, Ready
+ //
+ reg ena = 1'b0;
+
+ integer i;
+ initial begin
+
+ for (i=0; i<10; i=i+1)
+ wait_clock_tick;
+
+ rst = 1'b0;
+
+ for (i=0; i<10; i=i+1)
+ wait_clock_tick;
+
+ tb_fat_bram_xy_ena = 1'b1;
+ tb_slim_bram_xy_ena = 1'b1;
+
+ for (i=0; i<32; i=i+1) begin
+ tb_fat_bram_xy_bank = BANK_FAT_T1T2;
+ tb_fat_bram_xy_addr = i[7:0];
+ tb_fat_bram_x_din = T1[i];
+ tb_fat_bram_y_din = T2[i];
+
+ tb_slim_bram_xy_bank = BANK_SLIM_T1T2;
+ tb_slim_bram_xy_addr = i[7:0];
+ tb_slim_bram_x_din = T1[i];
+ tb_slim_bram_y_din = T2[i];
+
+ wait_clock_tick;
+ end
+
+ tb_fat_bram_xy_ena = 1'b0;
+ tb_slim_bram_xy_ena = 1'b0;
+
+ tb_fat_bram_xy_bank = {3{1'bX}};
+ tb_fat_bram_xy_addr = {8{1'bX}};
+ tb_fat_bram_x_din = {18{1'bX}};
+ tb_fat_bram_y_din = {18{1'bX}};
+
+ tb_slim_bram_xy_bank = {2{1'bX}};
+ tb_slim_bram_xy_addr = {8{1'bX}};
+ tb_slim_bram_x_din = {18{1'bX}};
+ tb_slim_bram_y_din = {18{1'bX}};
+
+ for (i=0; i<10; i=i+1)
+ wait_clock_tick;
+
+ ena = 1'b1;
+ wait_clock_tick;
+ ena = 1'b0;
+
+ for (i=0; i<10000; i=i+1)
+ wait_clock_tick;
+
+ verify_ab;
+
+ end
+
+
+ //
+ // DSPs
+ //
+ reg dsp_x_ce_a;
+ reg dsp_x_ce_b;
+ reg dsp_x_ce_b_dly;
+ reg dsp_x_ce_m;
+ reg dsp_x_ce_p;
+ reg dsp_x_ce_mode;
+
+ reg [8 -1:0] dsp_x_mode_z = {8{1'b1}};
+
+ wire [4*18-1:0] dsp_x_a;
+ reg [1*17-1:0] dsp_x_b;
+ wire [8*47-1:0] dsp_x_p;
+
+ reg dsp_y_ce_a;
+ reg dsp_y_ce_b;
+ reg dsp_y_ce_b_dly;
+ reg dsp_y_ce_m;
+ reg dsp_y_ce_p;
+ reg dsp_y_ce_mode;
+
+ reg [8 -1:0] dsp_y_mode_z = {8{1'b1}};
+
+ wire [4*18-1:0] dsp_y_a;
+ reg [1*17-1:0] dsp_y_b;
+ wire [8*47-1:0] dsp_y_p;
+
+ generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ begin : gen_dsp_xy_a_split
+ assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z];
+ assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z];
+ end
+ endgenerate
+
+ always @(posedge clk)
+ //
+ {dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b};
+
+
+ reg [8 -1:0] dsp_xy_mode_z_adv1 = {8{1'b1}};
+ reg [8 -1:0] dsp_xy_mode_z_adv2 = {8{1'b1}};
+ reg [8 -1:0] dsp_xy_mode_z_adv3 = {8{1'b1}};
+ reg [8 -1:0] dsp_xy_mode_z_adv4 = {8{1'b1}};
+
+ dsp_array dsp_x
+ (
+ .clk (clk),
+
+ .ce_a (dsp_x_ce_a),
+ .ce_b (dsp_x_ce_b),
+ .ce_m (dsp_x_ce_m),
+ .ce_p (dsp_x_ce_p),
+ .ce_mode (dsp_x_ce_mode),
+
+ .mode_z (dsp_x_mode_z),
+
+ .a (dsp_x_a),
+ .b (dsp_x_b),
+ .p (dsp_x_p)
+ );
+
+ dsp_array dsp_y
+ (
+ .clk (clk),
+
+ .ce_a (dsp_y_ce_a),
+ .ce_b (dsp_y_ce_b),
+ .ce_m (dsp_y_ce_m),
+ .ce_p (dsp_y_ce_p),
+ .ce_mode (dsp_y_ce_mode),
+
+ .mode_z (dsp_y_mode_z),
+
+ .a (dsp_y_a),
+ .b (dsp_y_b),
+ .p (dsp_y_p)
+ );
+
+
+ //
+ // FSM State and Next States
+ //
+ reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
+ reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
+
+
+ always @(posedge clk)
+ //
+ if (rst) fsm_state <= FSM_STATE_IDLE;
+ else fsm_state <= fsm_state_next;
+
+
+ localparam [7:0] index_last = 8'd31;
+
+
+ wire mult_square_addr_almost_done_comb;
+ reg mult_square_addr_almost_done_flop;
+
+ wire mult_square_addr_surely_done_comb;
+ reg mult_square_addr_surely_done_flop;
+
+ assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == (index_last - 8'd1);
+ assign mult_square_addr_surely_done_comb = mac_slim_bram_xy_addr == index_last;
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY:
+ {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <=
+ {mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
+
+ default:
+ {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
+
+ endcase
+
+
+ //
+ // Column
+ //
+ reg [4:0] col_index;
+ reg [4:0] col_index_prev;
+ reg [4:0] col_index_last;
+
+ always @(posedge clk)
+ //
+ col_index_prev <= col_index;
+
+ //
+ // FSM Transition Logic
+ //
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
+
+
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0;
+ default: mac_slim_bram_xy_addr <= 8'dX;
+ endcase
+
+ integer j;
+ always @(posedge clk)
+ //
+ for (j=0; j<(NUM_MULTS/2); j=j+1)
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= 1 + 2 * j;
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= 8 * (col_index + 1) + 1 + 2 * j;
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ default: mac_fat_bram_xy_addr[j] <= 8'dX;
+ endcase
+
+
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
+ default: mac_slim_bram_xy_bank <= 2'bXX;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2;
+ default: mac_fat_bram_xy_bank <= 3'bXXX;
+ endcase
+
+
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop;
+ default: mac_slim_bram_xy_ena <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
+ default: mac_fat_bram_xy_ena <= 1'b0;
+ endcase
+
+
+ always @(posedge clk)
+ //
+ mac_slim_bram_xy_reg_ena <= mac_slim_bram_xy_ena;
+
+ always @(posedge clk)
+ //
+ mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
+
+
+ always @(posedge clk)
+ //
+ if (mac_slim_bram_xy_reg_ena_dly)
+ {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+ else
+ {dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
+
+
+ function [7:0] mac_fat_bram_xy_addr_next;
+ input [7:0] mac_fat_bram_xy_addr_current;
+ input [7:0] mac_fat_bram_xy_addr_last;
+ begin
+ if (mac_fat_bram_xy_addr_current > 0)
+ mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1;
+ else
+ mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last;
+ end
+ endfunction
+
+
+
+ always @(posedge clk)
+ //
+ {dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}};
+
+ always @(posedge clk)
+ //
+ {dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}};
+
+ always @(posedge clk)
+ //
+ {dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
+
+ always @(posedge clk)
+ //
+ {dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m};
+
+ always @(posedge clk)
+ //
+ {dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
+
+ task wait_clock_tick;
+ begin
+ #`CLK_PERIOD_NS;
+ end
+ endtask
+
+ //
+ // Increment Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: begin
+ col_index <= 5'd0;
+ col_index_last <= index_last[7:3];
+ end
+ //
+ FSM_STATE_MULT_SQUARE_COL_N_INIT:
+ col_index <= col_index + 1'b1;
+ //
+ endcase
+
+ assign fsm_state_after_mult_square = (col_index == col_index_last) ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {8{1'b0}};
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
+ default: dsp_xy_mode_z_adv4 <= {8{1'b1}};
+ endcase
+
+ always @(posedge clk) begin
+ {dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}};
+ //
+ dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2};
+ dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3};
+ dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4};
+ end
+
+ function [NUM_MULTS-1:0] calc_mac_mode_z_square;
+ input [ 4:0] col_index_value;
+ input [ 7:0] mac_slim_bram_xy_addr_value;
+ begin
+ if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
+ case (mac_slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_mac_mode_z_square = 8'b11111110;
+ 3'b001: calc_mac_mode_z_square = 8'b11111101;
+ 3'b010: calc_mac_mode_z_square = 8'b11111011;
+ 3'b011: calc_mac_mode_z_square = 8'b11110111;
+ 3'b100: calc_mac_mode_z_square = 8'b11101111;
+ 3'b101: calc_mac_mode_z_square = 8'b11011111;
+ 3'b110: calc_mac_mode_z_square = 8'b10111111;
+ 3'b111: calc_mac_mode_z_square = 8'b01111111;
+ endcase
+ else
+ calc_mac_mode_z_square = {NUM_MULTS{1'b1}};
+ end
+ endfunction
+
+ reg recomb_x_ena = 1'b0;
+ reg recomb_y_ena = 1'b0;
+
+ always @(posedge clk) begin
+ //
+ recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p;
+ recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p;
+ //
+ end
+
+ wire [ 2:0] recomb_fat_bram_xy_bank;
+ wire [ 7:0] recomb_fat_bram_xy_addr;
+ wire [17:0] recomb_fat_bram_x_dout;
+ wire [17:0] recomb_fat_bram_y_dout;
+ wire recomb_fat_bram_xy_dout_valid;
+ wire recomb_rdy;
+
+ modexpng_part_recombinator recomb
+ (
+ .clk (clk),
+ .rdy (recomb_rdy),
+ .fsm_state_next (fsm_state_next),
+ .index_last (index_last),
+ .dsp_x_ce_p (dsp_x_ce_p),
+ .dsp_y_ce_p (dsp_y_ce_p),
+ .ena_x (recomb_x_ena),
+ .ena_y (recomb_y_ena),
+ .dsp_x_p (dsp_x_p),
+ .dsp_y_p (dsp_y_p),
+ .col_index (col_index),
+ .col_index_last (col_index_last),
+ .slim_bram_xy_addr (mac_slim_bram_xy_addr),
+ .fat_bram_xy_bank (recomb_fat_bram_xy_bank),
+ .fat_bram_xy_addr (recomb_fat_bram_xy_addr),
+ .fat_bram_x_dout (recomb_fat_bram_x_dout),
+ .fat_bram_y_dout (recomb_fat_bram_y_dout),
+ .fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid)
+ );
+
+ reg [17:0] AB_READ[0:63];
+
+ always @(posedge clk)
+ //
+ if (recomb_fat_bram_xy_dout_valid)
+ //
+ case (recomb_fat_bram_xy_bank)
+ 3'd1: AB_READ[recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
+ 3'd2: AB_READ[32 + recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
+ endcase
+
+
+ always @(posedge clk)
+ //
+ if (tb_fat_bram_xy_ena) begin
+ mgr_fat_bram_xy_ena <= 1'b1;
+ mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank;
+ mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr;
+ mgr_fat_bram_x_din <= tb_fat_bram_x_din;
+ mgr_fat_bram_y_din <= tb_fat_bram_y_din;
+ end else if (recomb_fat_bram_xy_dout_valid) begin
+ mgr_fat_bram_xy_ena <= 1'b1;
+ mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank;
+ mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr;
+ mgr_fat_bram_x_din <= recomb_fat_bram_x_dout;
+ mgr_fat_bram_y_din <= recomb_fat_bram_y_dout;
+ end else begin
+ mgr_fat_bram_xy_ena <= 1'b0;
+ mgr_fat_bram_xy_bank <= 3'bXXX;
+ mgr_fat_bram_xy_addr <= 8'hXX;
+ mgr_fat_bram_x_din <= {18{1'bX}};
+ mgr_fat_bram_y_din <= {18{1'bX}};
+ end
+
+
+
+
+
+ task verify_ab;
+ reg verify_ab_ok;
+ begin
+ verify_ab_ok = 1;
+ for (i=0; i<64; i=i+1)
+ if (AB_READ[i] === AB[i])
+ $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]);
+ else begin
+ $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, AB[i], AB_READ[i]);
+ verify_ab_ok = 0;
+ end
+ if (verify_ab_ok)
+ $display("AB is OK.");
+ else
+ $display("AB is WRONG!");
+ end
+ endtask
+
+
+
+ always @* begin
+ //
+ fsm_state_next = FSM_STATE_IDLE;
+ //
+ case (fsm_state)
+ FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
+
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+
+ FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
+
+ default: fsm_state_next = FSM_STATE_IDLE ;
+
+ endcase
+ //
+ end
+
+
+endmodule
+