diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:01:43 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:01:43 +0300 |
commit | 29fb6afd018c601a2e0c7376656d5e37beb565d6 (patch) | |
tree | dc11ee0c8e5a30113052254be23594da74a8a572 | |
parent | ec07464d239f7f6379a682ac57b58b863d3f0374 (diff) |
Started working on the pipelined Montgomery modular multiplier. Currently can
do the "square" part of the multiplication, i.e. compute the twice larger
intermediate product AB = A * B.
-rw-r--r-- | bench/tb_mmm_x8_dual.v | 327 | ||||
-rw-r--r-- | bench/tb_square.v | 716 | ||||
-rw-r--r-- | rtl/dev/temp.txt | 384 | ||||
-rw-r--r-- | rtl/dsp/dsp_array.v | 111 | ||||
-rw-r--r-- | rtl/dsp/dsp_slice.v | 125 | ||||
-rw-r--r-- | rtl/modexpng_mac.v | 54 | ||||
-rw-r--r-- | rtl/modexpng_mac_array.v | 116 | ||||
-rw-r--r-- | rtl/modexpng_mem.v | 93 | ||||
-rw-r--r-- | rtl/modexpng_mmm_col_index.v | 90 | ||||
-rw-r--r-- | rtl/modexpng_mmm_din_addr.v | 167 | ||||
-rw-r--r-- | rtl/modexpng_mmm_dout_addr.v | 167 | ||||
-rw-r--r-- | rtl/modexpng_mmm_fsm.vh | 24 | ||||
-rw-r--r-- | rtl/modexpng_mmm_pad.v | 153 | ||||
-rw-r--r-- | rtl/modexpng_mmm_transporter.v | 157 | ||||
-rw-r--r-- | rtl/modexpng_mmm_x8_dual.v | 550 | ||||
-rw-r--r-- | rtl/modexpng_parameters.vh | 39 | ||||
-rw-r--r-- | rtl/modexpng_parameters_x8.vh | 1 | ||||
-rw-r--r-- | rtl/modexpng_part_recombinator.v | 623 | ||||
-rw-r--r-- | rtl/modexpng_recombinator_block.v | 35 |
19 files changed, 3932 insertions, 0 deletions
diff --git a/bench/tb_mmm_x8_dual.v b/bench/tb_mmm_x8_dual.v new file mode 100644 index 0000000..aa25900 --- /dev/null +++ b/bench/tb_mmm_x8_dual.v @@ -0,0 +1,327 @@ +`timescale 1ns / 1ps + +module tb_mmm_x8_dual; + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_parameters_x8.vh" + + + // + // Settings + // + localparam INDEX_WIDTH = 6; + + wire [INDEX_WIDTH-1:0] index_last = 31; // 512 bits + + + // + // Clock + // + `define CLK_FREQUENCY_MHZ 100.0 + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + + reg clk = 1'b0; + + always begin + #`CLK_PERIOD_HALF_NS clk = 1'b1; + #`CLK_PERIOD_HALF_NS clk = 1'b0; + end + + + // + // Reset + // + reg rst = 1'b1; + wire rst_n = ~rst; + + + // + // Control + // + reg ena = 1'b0; + wire rdy; + + reg mode; + reg transfer; + + + // + // Interface + // + + + // + // Interface - Data Buses + // + wire [NUM_MULTS*WORD_WIDTH-1:0] x_din; + wire [NUM_MULTS*WORD_WIDTH-1:0] y_din; + wire [NUM_MULTS*WORD_WIDTH-1:0] x_dout; + wire [NUM_MULTS*WORD_WIDTH-1:0] y_dout; + + + // + // Interface - Address Buses + // + wire [INDEX_WIDTH-4:0] x_din_addr; + wire [INDEX_WIDTH-4:0] y_din_addr; + wire [INDEX_WIDTH-4:0] x_dout_addr; + wire [INDEX_WIDTH-4:0] y_dout_addr; + + + // + // Interface - Enable Buses + // + wire [ 1-1:0] x_din_ena; + wire [ 1-1:0] y_din_ena; + wire [ 1-1:0] x_din_reg_ena; + wire [ 1-1:0] y_din_reg_ena; + wire [NUM_MULTS-1:0] x_dout_ena; + wire [NUM_MULTS-1:0] y_dout_ena; + + + // + // Interface - Bank Buses + // + wire [3-1:0] x_din_bank; + wire [3-1:0] y_din_bank; + wire [3-1:0] x_dout_bank; + wire [3-1:0] y_dout_bank; + + + // + // Operands + // + reg [WORD_WIDTH-1:0] T1[0:2**INDEX_WIDTH-1]; + reg [WORD_WIDTH-1:0] T2[0:2**INDEX_WIDTH-1]; + reg [WORD_WIDTH-1:0] N[0:2**INDEX_WIDTH-1]; + reg [WORD_WIDTH-1:0] N_COEFF[0:2**INDEX_WIDTH]; + + + // + // Memories + // + genvar z; + generate for (z=0; z<NUM_MULTS; z=z+1) + // + begin : gen_z_mem + // + modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ # + ( + .MEM_WIDTH(WORD_WIDTH), + .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS) + ) + gen_z_mem_x + ( + .clk (clk), + + .a_addr ({x_dout_bank, x_dout_addr}), + .a_en (x_dout_ena[z]), + .a_wr (x_dout_ena[z]), + .a_in (x_dout[z*WORD_WIDTH+:WORD_WIDTH]), + .a_out (), // unused + + .b_addr ({x_din_bank, x_din_addr}), + .b_en (x_din_ena), + .b_reg_en (x_din_reg_ena), + .b_out (x_din[z*WORD_WIDTH+:WORD_WIDTH]) + ); + // + modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ # + ( + .MEM_WIDTH(WORD_WIDTH), + .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS) + ) + gen_z_mem_y + ( + .clk (clk), + + .a_addr ({y_dout_bank, y_dout_addr}), + .a_en (y_dout_ena[z]), + .a_wr (y_dout_ena[z]), + .a_in (y_dout[z*WORD_WIDTH+:WORD_WIDTH]), + .a_out (), // unused + + .b_addr ({y_din_bank, y_din_addr}), + .b_en (y_din_ena), + .b_reg_en (y_din_reg_ena), + .b_out (y_din[z*WORD_WIDTH+:WORD_WIDTH]) + ); + // + end + // + endgenerate + + + // T1 / T2 + // N / N_COEFF + // AB_LSB + // AB_MSB + // M + // Q_LSB + // Q_MSB + // ? + + + // + // Operands - Values + // + initial begin + // + T1[ 0] = 18'h0b27b; T1[ 1] = 18'h0fc7d; T1[ 2] = 18'h0a214; T1[ 3] = 18'h08d2b; + T1[ 4] = 18'h1c80c; T1[ 5] = 18'h145f1; T1[ 6] = 18'h00db6; T1[ 7] = 18'h1cf0f; + T1[ 8] = 18'h19386; T1[ 9] = 18'h02ad9; T1[10] = 18'h1a8b5; T1[11] = 18'h1479b; + T1[12] = 18'h08b5f; T1[13] = 18'h14806; T1[14] = 18'h0e6f7; T1[15] = 18'h0ce9d; + T1[16] = 18'h0cbc2; T1[17] = 18'h16ef1; T1[18] = 18'h0e14e; T1[19] = 18'h1796f; + T1[20] = 18'h14901; T1[21] = 18'h06666; T1[22] = 18'h0cb9f; T1[23] = 18'h09ab4; + T1[24] = 18'h12ffc; T1[25] = 18'h0a86d; T1[26] = 18'h19d35; T1[27] = 18'h0cda9; + T1[28] = 18'h16a19; T1[29] = 18'h09a36; T1[30] = 18'h0b176; T1[31] = 18'h0e0dc; + // + T2[ 0] = 18'h0b21a; T2[ 1] = 18'h13e71; T2[ 2] = 18'h03459; T2[ 3] = 18'h1063f; + T2[ 4] = 18'h18cef; T2[ 5] = 18'h1b8a5; T2[ 6] = 18'h082d1; T2[ 7] = 18'h1b1be; + T2[ 8] = 18'h18979; T2[ 9] = 18'h1409a; T2[10] = 18'h1713c; T2[11] = 18'h0cda3; + T2[12] = 18'h11c7d; T2[13] = 18'h0c943; T2[14] = 18'h12d7c; T2[15] = 18'h1531e; + T2[16] = 18'h0a45a; T2[17] = 18'h1c637; T2[18] = 18'h0906a; T2[19] = 18'h1670e; + T2[20] = 18'h12f78; T2[21] = 18'h08ce6; T2[22] = 18'h1c5c7; T2[23] = 18'h1292d; + T2[24] = 18'h0fc4b; T2[25] = 18'h064fb; T2[26] = 18'h0cc3c; T2[27] = 18'h19b37; + T2[28] = 18'h1b721; T2[29] = 18'h0f424; T2[30] = 18'h0f608; T2[31] = 18'h03e9b; + // + N[ 0] = 18'h00a9d; N[ 1] = 18'h01175; N[ 2] = 18'h0254f; N[ 3] = 18'h0ee38; + N[ 4] = 18'h00a6a; N[ 5] = 18'h0c7bd; N[ 6] = 18'h0ddac; N[ 7] = 18'h069fe; + N[ 8] = 18'h0e9d6; N[ 9] = 18'h0b6bf; N[10] = 18'h09230; N[11] = 18'h04fc5; + N[12] = 18'h05c9f; N[13] = 18'h09502; N[14] = 18'h0cbc5; N[15] = 18'h03109; + N[16] = 18'h08029; N[17] = 18'h0b27c; N[18] = 18'h0eeb8; N[19] = 18'h0c191; + N[20] = 18'h0ff86; N[21] = 18'h027ab; N[22] = 18'h07d76; N[23] = 18'h0ff1a; + N[24] = 18'h02afc; N[25] = 18'h0b25a; N[26] = 18'h0d3c1; N[27] = 18'h05589; + N[28] = 18'h09f7c; N[29] = 18'h0ddd6; N[30] = 18'h0b4fc; N[31] = 18'h0e8e7; + // + N_COEFF[ 0] = 18'h0344b; N_COEFF[ 1] = 18'h0ca66; N_COEFF[ 2] = 18'h0d9e8; N_COEFF[ 3] = 18'h070d5; + N_COEFF[ 4] = 18'h0ce4b; N_COEFF[ 5] = 18'h049b2; N_COEFF[ 6] = 18'h0abb3; N_COEFF[ 7] = 18'h0c3b2; + N_COEFF[ 8] = 18'h0ad38; N_COEFF[ 9] = 18'h05672; N_COEFF[10] = 18'h0fd47; N_COEFF[11] = 18'h06671; + N_COEFF[12] = 18'h00b7f; N_COEFF[13] = 18'h0fa35; N_COEFF[14] = 18'h0d4ac; N_COEFF[15] = 18'h0f1ca; + N_COEFF[16] = 18'h08e0a; N_COEFF[17] = 18'h05858; N_COEFF[18] = 18'h02dc6; N_COEFF[19] = 18'h08cfc; + N_COEFF[20] = 18'h01941; N_COEFF[21] = 18'h0f855; N_COEFF[22] = 18'h01e43; N_COEFF[23] = 18'h053f0; + N_COEFF[24] = 18'h0a479; N_COEFF[25] = 18'h0ae7e; N_COEFF[26] = 18'h05c66; N_COEFF[27] = 18'h02413; + N_COEFF[28] = 18'h0b5f8; N_COEFF[29] = 18'h0eb06; N_COEFF[30] = 18'h0de5b; N_COEFF[31] = 18'h0a751; + N_COEFF[32] = 18'h0c1ec; + // + end + + + // + // Load Interface + // + wire load_phase; + wire [ INDEX_WIDTH:0] load_xy_addr; + wire load_xy_addr_vld; + wire load_xy_req; + reg [ WORD_WIDTH-1:0] load_x_din; + reg [ WORD_WIDTH-1:0] load_y_din; + reg [ WORD_WIDTH-1:0] load_x_pipe; + reg [ WORD_WIDTH-1:0] load_y_pipe; + + always @(posedge clk) + // + if (load_xy_addr_vld) begin + + if (!load_phase) begin + load_x_pipe <= T1[load_xy_addr]; + load_y_pipe <= T2[load_xy_addr]; + end else begin + load_x_pipe <= !load_xy_addr[INDEX_WIDTH] ? N[load_xy_addr] : {WORD_WIDTH{1'bX}}; + load_y_pipe <= N_COEFF[load_xy_addr]; + end + end + + always @(posedge clk) + // + if (load_xy_req) + {load_y_din, load_x_din} <= {load_y_pipe, load_x_pipe}; + else + {load_y_din, load_x_din} <= {2*WORD_WIDTH{1'bX}}; + + + // + // UUT + // + modexpng_mmm_x8_dual # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + uut + ( + .clk (clk), + .rst_n (rst_n), + + .ena (ena), + .rdy (rdy), + + .mode (mode), + .transfer (transfer), + + .index_last (index_last), + + .x_din (x_din), + .y_din (y_din), + .x_dout (x_dout), + .y_dout (y_dout), + + .x_din_addr (x_din_addr), + .y_din_addr (y_din_addr), + .x_dout_addr (x_dout_addr), + .y_dout_addr (y_dout_addr), + + .x_din_ena (x_din_ena), + .y_din_ena (y_din_ena), + .x_dout_ena (x_dout_ena), + .y_dout_ena (y_dout_ena), + + .x_din_reg_ena (x_din_reg_ena), + .y_din_reg_ena (y_din_reg_ena), + + .x_din_bank (x_din_bank), + .y_din_bank (y_din_bank), + .x_dout_bank (x_dout_bank), + .y_dout_bank (y_dout_bank), + + .load_phase (load_phase), + .load_xy_addr (load_xy_addr), + .load_xy_addr_vld (load_xy_addr_vld), + .load_xy_req (load_xy_req), + .load_x_din (load_x_din), + .load_y_din (load_y_din) + ); + + + // + // Script + // + initial begin + #(100.0*`CLK_PERIOD_NS) rst = 1'b0; + #(100.0*`CLK_PERIOD_NS) ena = 1'b1; + transfer = 1'b1; + mode = 1'b0; + #( 1.0*`CLK_PERIOD_NS) ena = 1'b0; + transfer = 1'bX; + mode = 1'bX; + + while (!rdy) #`CLK_PERIOD_NS; + + #(100.0*`CLK_PERIOD_NS) ena = 1'b1; + transfer = 1'b0; + mode = 1'b0; + #( 1.0*`CLK_PERIOD_NS) ena = 1'b0; + transfer = 1'bX; + mode = 1'bX; + + while (!rdy) #`CLK_PERIOD_NS; + + end + + +endmodule + diff --git a/bench/tb_square.v b/bench/tb_square.v new file mode 100644 index 0000000..61e5d8a --- /dev/null +++ b/bench/tb_square.v @@ -0,0 +1,716 @@ +`timescale 1ns / 1ps + +module tb_square; + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_parameters_x8.vh" + `include "../rtl/modexpng_mmm_fsm.vh" + + + // + // Clock + // + `define CLK_FREQUENCY_MHZ 100.0 + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + + reg clk = 1'b0; + + always begin + #`CLK_PERIOD_HALF_NS clk = 1'b1; + #`CLK_PERIOD_HALF_NS clk = 1'b0; + end + + + // + // Reset + // + reg rst = 1'b1; + + + + // + // T1, T2 + // + reg [17:0] T1[0:31]; + reg [17:0] T2[0:31]; + reg [17:0] AB[0:63]; + + + // + // Init + // + initial begin + // + T1[ 0] = 18'h0f13e; T1[ 1] = 18'h0daf6; T1[ 2] = 18'h0aaa9; T1[ 3] = 18'h0c2c2; + T1[ 4] = 18'h0fc5f; T1[ 5] = 18'h12164; T1[ 6] = 18'h14375; T1[ 7] = 18'h15615; + T1[ 8] = 18'h0d8e2; T1[ 9] = 18'h0ec15; T1[10] = 18'h17c46; T1[11] = 18'h0c922; + T1[12] = 18'h08f00; T1[13] = 18'h152f9; T1[14] = 18'h0b0b6; T1[15] = 18'h0ce87; + T1[16] = 18'h178f2; T1[17] = 18'h09efb; T1[18] = 18'h0409d; T1[19] = 18'h11104; + T1[20] = 18'h0b4a6; T1[21] = 18'h158a6; T1[22] = 18'h0514e; T1[23] = 18'h0ec55; + T1[24] = 18'h11e73; T1[25] = 18'h11ddd; T1[26] = 18'h07bd4; T1[27] = 18'h0638b; + T1[28] = 18'h0e805; T1[29] = 18'h11c4f; T1[30] = 18'h0a2eb; T1[31] = 18'h05454; + // + T2[ 0] = 18'h1a479; T2[ 1] = 18'h102f5; T2[ 2] = 18'h10e72; T2[ 3] = 18'h120b1; + T2[ 4] = 18'h169cd; T2[ 5] = 18'h1d0c4; T2[ 6] = 18'h11462; T2[ 7] = 18'h12015; + T2[ 8] = 18'h16fca; T2[ 9] = 18'h1044f; T2[10] = 18'h122b4; T2[11] = 18'h10a5a; + T2[12] = 18'h12620; T2[13] = 18'h0e01a; T2[14] = 18'h095cd; T2[15] = 18'h1278a; + T2[16] = 18'h10763; T2[17] = 18'h09fe7; T2[18] = 18'h0d35c; T2[19] = 18'h10e24; + T2[20] = 18'h1527d; T2[21] = 18'h115b3; T2[22] = 18'h05443; T2[23] = 18'h1190a; + T2[24] = 18'h0fcc3; T2[25] = 18'h115e2; T2[26] = 18'h0a398; T2[27] = 18'h0608d; + T2[28] = 18'h13075; T2[29] = 18'h0d816; T2[30] = 18'h0bb4c; T2[31] = 18'h04e8a; + // + AB[ 0] = 18'h0be4e; AB[ 1] = 18'h0fed7; AB[ 2] = 18'h09496; AB[ 3] = 18'h07181; + AB[ 4] = 18'h0ee73; AB[ 5] = 18'h04692; AB[ 6] = 18'h0141a; AB[ 7] = 18'h0078c; + AB[ 8] = 18'h030eb; AB[ 9] = 18'h0217c; AB[10] = 18'h0696f; AB[11] = 18'h0a165; + AB[12] = 18'h0b753; AB[13] = 18'h04af9; AB[14] = 18'h0ed7c; AB[15] = 18'h079ce; + AB[16] = 18'h0e863; AB[17] = 18'h097df; AB[18] = 18'h07984; AB[19] = 18'h048af; + AB[20] = 18'h0197f; AB[21] = 18'h0206a; AB[22] = 18'h027e7; AB[23] = 18'h04b3a; + AB[24] = 18'h03312; AB[25] = 18'h03b56; AB[26] = 18'h04487; AB[27] = 18'h0bd6a; + AB[28] = 18'h04e4b; AB[29] = 18'h069ca; AB[30] = 18'h0f994; AB[31] = 18'h0dd4e; + AB[32] = 18'h1b024; AB[33] = 18'h0127f; AB[34] = 18'h02631; AB[35] = 18'h0186b; + AB[36] = 18'h03adb; AB[37] = 18'h05368; AB[38] = 18'h059a5; AB[39] = 18'h002e0; + AB[40] = 18'h0b78a; AB[41] = 18'h016f3; AB[42] = 18'h0b58d; AB[43] = 18'h03ddb; + AB[44] = 18'h078b0; AB[45] = 18'h0073b; AB[46] = 18'h07337; AB[47] = 18'h0c7b0; + AB[48] = 18'h00668; AB[49] = 18'h0106d; AB[50] = 18'h01a44; AB[51] = 18'h05ee3; + AB[52] = 18'h0462d; AB[53] = 18'h0fdeb; AB[54] = 18'h05f85; AB[55] = 18'h02af9; + AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194; + AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df; + // + end + + + // + // BRAMs + // + reg tb_fat_bram_xy_ena = 1'b0; + reg [ 2:0] tb_fat_bram_xy_bank; + reg [ 7:0] tb_fat_bram_xy_addr; + reg [17:0] tb_fat_bram_x_din; + reg [17:0] tb_fat_bram_y_din; + + reg mgr_fat_bram_xy_ena = 1'b0; + reg [ 2:0] mgr_fat_bram_xy_bank; + reg [ 7:0] mgr_fat_bram_xy_addr; + reg [17:0] mgr_fat_bram_x_din; + reg [17:0] mgr_fat_bram_y_din; + + reg mac_fat_bram_xy_ena = 1'b0; + reg mac_fat_bram_xy_reg_ena = 1'b0; + reg [ 2:0] mac_fat_bram_xy_bank; + reg [ 7:0] mac_fat_bram_xy_addr[0:3]; + wire [17:0] mac_fat_bram_x_dout[0:3]; + wire [17:0] mac_fat_bram_y_dout[0:3]; + + reg tb_slim_bram_xy_ena = 1'b0; + reg [ 1:0] tb_slim_bram_xy_bank; + reg [ 7:0] tb_slim_bram_xy_addr; + reg [17:0] tb_slim_bram_x_din; + reg [17:0] tb_slim_bram_y_din; + + reg mac_slim_bram_xy_ena = 1'b0; + reg mac_slim_bram_xy_reg_ena = 1'b0; + reg [ 1:0] mac_slim_bram_xy_bank; + reg [ 7:0] mac_slim_bram_xy_addr; + reg [ 7:0] mac_slim_bram_xy_addr_dly; + wire [17:0] mac_slim_bram_x_dout; + wire [17:0] mac_slim_bram_y_dout; + + always @(posedge clk) + // + mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr; + + reg mac_slim_bram_xy_reg_ena_dly = 1'b0; + always @(posedge clk) + mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena; + + + + genvar z; + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + begin : gen_fat_bram + // + ip_bram_36k fat_bram_x + ( + .clka (clk), + .ena (mgr_fat_bram_xy_ena), + .wea (mgr_fat_bram_xy_ena), + .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}), + .dina (mgr_fat_bram_x_din), + + .clkb (clk), + .enb (mac_fat_bram_xy_ena), + .regceb (mac_fat_bram_xy_reg_ena), + .addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}), + .doutb (mac_fat_bram_x_dout[z]) + ); + // + ip_bram_36k fat_bram_y + ( + .clka (clk), + .ena (mgr_fat_bram_xy_ena), + .wea (mgr_fat_bram_xy_ena), + .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}), + .dina (mgr_fat_bram_y_din), + + .clkb (clk), + .enb (mac_fat_bram_xy_ena), + .regceb (mac_fat_bram_xy_reg_ena), + .addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}), + .doutb (mac_fat_bram_y_dout[z]) + ); + // + end + endgenerate + + ip_bram_18k slim_bram_x + ( + .clka (clk), + .ena (tb_slim_bram_xy_ena), + .wea (tb_slim_bram_xy_ena), + .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}), + .dina (tb_slim_bram_x_din), + + .clkb (clk), + .enb (mac_slim_bram_xy_ena), + .regceb (mac_slim_bram_xy_reg_ena), + .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}), + .doutb (mac_slim_bram_x_dout) + ); + + ip_bram_18k slim_bram_y + ( + .clka (clk), + .ena (tb_slim_bram_xy_ena), + .wea (tb_slim_bram_xy_ena), + .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}), + .dina (tb_slim_bram_y_din), + + .clkb (clk), + .enb (mac_slim_bram_xy_ena), + .regceb (mac_slim_bram_xy_reg_ena), + .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}), + .doutb (mac_slim_bram_y_dout) + ); + + + + // + // Enable, Ready + // + reg ena = 1'b0; + + integer i; + initial begin + + for (i=0; i<10; i=i+1) + wait_clock_tick; + + rst = 1'b0; + + for (i=0; i<10; i=i+1) + wait_clock_tick; + + tb_fat_bram_xy_ena = 1'b1; + tb_slim_bram_xy_ena = 1'b1; + + for (i=0; i<32; i=i+1) begin + tb_fat_bram_xy_bank = BANK_FAT_T1T2; + tb_fat_bram_xy_addr = i[7:0]; + tb_fat_bram_x_din = T1[i]; + tb_fat_bram_y_din = T2[i]; + + tb_slim_bram_xy_bank = BANK_SLIM_T1T2; + tb_slim_bram_xy_addr = i[7:0]; + tb_slim_bram_x_din = T1[i]; + tb_slim_bram_y_din = T2[i]; + + wait_clock_tick; + end + + tb_fat_bram_xy_ena = 1'b0; + tb_slim_bram_xy_ena = 1'b0; + + tb_fat_bram_xy_bank = {3{1'bX}}; + tb_fat_bram_xy_addr = {8{1'bX}}; + tb_fat_bram_x_din = {18{1'bX}}; + tb_fat_bram_y_din = {18{1'bX}}; + + tb_slim_bram_xy_bank = {2{1'bX}}; + tb_slim_bram_xy_addr = {8{1'bX}}; + tb_slim_bram_x_din = {18{1'bX}}; + tb_slim_bram_y_din = {18{1'bX}}; + + for (i=0; i<10; i=i+1) + wait_clock_tick; + + ena = 1'b1; + wait_clock_tick; + ena = 1'b0; + + for (i=0; i<10000; i=i+1) + wait_clock_tick; + + verify_ab; + + end + + + // + // DSPs + // + reg dsp_x_ce_a; + reg dsp_x_ce_b; + reg dsp_x_ce_b_dly; + reg dsp_x_ce_m; + reg dsp_x_ce_p; + reg dsp_x_ce_mode; + + reg [8 -1:0] dsp_x_mode_z = {8{1'b1}}; + + wire [4*18-1:0] dsp_x_a; + reg [1*17-1:0] dsp_x_b; + wire [8*47-1:0] dsp_x_p; + + reg dsp_y_ce_a; + reg dsp_y_ce_b; + reg dsp_y_ce_b_dly; + reg dsp_y_ce_m; + reg dsp_y_ce_p; + reg dsp_y_ce_mode; + + reg [8 -1:0] dsp_y_mode_z = {8{1'b1}}; + + wire [4*18-1:0] dsp_y_a; + reg [1*17-1:0] dsp_y_b; + wire [8*47-1:0] dsp_y_p; + + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + begin : gen_dsp_xy_a_split + assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z]; + assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z]; + end + endgenerate + + always @(posedge clk) + // + {dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b}; + + + reg [8 -1:0] dsp_xy_mode_z_adv1 = {8{1'b1}}; + reg [8 -1:0] dsp_xy_mode_z_adv2 = {8{1'b1}}; + reg [8 -1:0] dsp_xy_mode_z_adv3 = {8{1'b1}}; + reg [8 -1:0] dsp_xy_mode_z_adv4 = {8{1'b1}}; + + dsp_array dsp_x + ( + .clk (clk), + + .ce_a (dsp_x_ce_a), + .ce_b (dsp_x_ce_b), + .ce_m (dsp_x_ce_m), + .ce_p (dsp_x_ce_p), + .ce_mode (dsp_x_ce_mode), + + .mode_z (dsp_x_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + dsp_array dsp_y + ( + .clk (clk), + + .ce_a (dsp_y_ce_a), + .ce_b (dsp_y_ce_b), + .ce_m (dsp_y_ce_m), + .ce_p (dsp_y_ce_p), + .ce_mode (dsp_y_ce_mode), + + .mode_z (dsp_y_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + // + // FSM State and Next States + // + reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; + reg [FSM_STATE_WIDTH-1:0] fsm_state_next; + + + always @(posedge clk) + // + if (rst) fsm_state <= FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + localparam [7:0] index_last = 8'd31; + + + wire mult_square_addr_almost_done_comb; + reg mult_square_addr_almost_done_flop; + + wire mult_square_addr_surely_done_comb; + reg mult_square_addr_surely_done_flop; + + assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == (index_last - 8'd1); + assign mult_square_addr_surely_done_comb = mac_slim_bram_xy_addr == index_last; + + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= + {mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb}; + + default: + {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00; + + endcase + + + // + // Column + // + reg [4:0] col_index; + reg [4:0] col_index_prev; + reg [4:0] col_index_last; + + always @(posedge clk) + // + col_index_prev <= col_index; + + // + // FSM Transition Logic + // + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; + + + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0; + default: mac_slim_bram_xy_addr <= 8'dX; + endcase + + integer j; + always @(posedge clk) + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= 1 + 2 * j; + FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= 8 * (col_index + 1) + 1 + 2 * j; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last); + default: mac_fat_bram_xy_addr[j] <= 8'dX; + endcase + + + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2; + default: mac_slim_bram_xy_bank <= 2'bXX; + endcase + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2; + default: mac_fat_bram_xy_bank <= 3'bXXX; + endcase + + + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop; + default: mac_slim_bram_xy_ena <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1; + default: mac_fat_bram_xy_ena <= 1'b0; + endcase + + + always @(posedge clk) + // + mac_slim_bram_xy_reg_ena <= mac_slim_bram_xy_ena; + + always @(posedge clk) + // + mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena; + + + always @(posedge clk) + // + if (mac_slim_bram_xy_reg_ena_dly) + {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]}; + else + {dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}}; + + + function [7:0] mac_fat_bram_xy_addr_next; + input [7:0] mac_fat_bram_xy_addr_current; + input [7:0] mac_fat_bram_xy_addr_last; + begin + if (mac_fat_bram_xy_addr_current > 0) + mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1; + else + mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last; + end + endfunction + + + + always @(posedge clk) + // + {dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}}; + + always @(posedge clk) + // + {dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}}; + + always @(posedge clk) + // + {dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly}; + + always @(posedge clk) + // + {dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m}; + + always @(posedge clk) + // + {dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly}; + + task wait_clock_tick; + begin + #`CLK_PERIOD_NS; + end + endtask + + // + // Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: begin + col_index <= 5'd0; + col_index_last <= index_last[7:3]; + end + // + FSM_STATE_MULT_SQUARE_COL_N_INIT: + col_index <= col_index + 1'b1; + // + endcase + + assign fsm_state_after_mult_square = (col_index == col_index_last) ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {8{1'b0}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {8{1'b1}}; + endcase + + always @(posedge clk) begin + {dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}}; + // + dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2}; + dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3}; + dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4}; + end + + function [NUM_MULTS-1:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] mac_slim_bram_xy_addr_value; + begin + if (mac_slim_bram_xy_addr_value[7:3] == col_index_value) + case (mac_slim_bram_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = 8'b11111110; + 3'b001: calc_mac_mode_z_square = 8'b11111101; + 3'b010: calc_mac_mode_z_square = 8'b11111011; + 3'b011: calc_mac_mode_z_square = 8'b11110111; + 3'b100: calc_mac_mode_z_square = 8'b11101111; + 3'b101: calc_mac_mode_z_square = 8'b11011111; + 3'b110: calc_mac_mode_z_square = 8'b10111111; + 3'b111: calc_mac_mode_z_square = 8'b01111111; + endcase + else + calc_mac_mode_z_square = {NUM_MULTS{1'b1}}; + end + endfunction + + reg recomb_x_ena = 1'b0; + reg recomb_y_ena = 1'b0; + + always @(posedge clk) begin + // + recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p; + recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p; + // + end + + wire [ 2:0] recomb_fat_bram_xy_bank; + wire [ 7:0] recomb_fat_bram_xy_addr; + wire [17:0] recomb_fat_bram_x_dout; + wire [17:0] recomb_fat_bram_y_dout; + wire recomb_fat_bram_xy_dout_valid; + wire recomb_rdy; + + modexpng_part_recombinator recomb + ( + .clk (clk), + .rdy (recomb_rdy), + .fsm_state_next (fsm_state_next), + .index_last (index_last), + .dsp_x_ce_p (dsp_x_ce_p), + .dsp_y_ce_p (dsp_y_ce_p), + .ena_x (recomb_x_ena), + .ena_y (recomb_y_ena), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + .col_index (col_index), + .col_index_last (col_index_last), + .slim_bram_xy_addr (mac_slim_bram_xy_addr), + .fat_bram_xy_bank (recomb_fat_bram_xy_bank), + .fat_bram_xy_addr (recomb_fat_bram_xy_addr), + .fat_bram_x_dout (recomb_fat_bram_x_dout), + .fat_bram_y_dout (recomb_fat_bram_y_dout), + .fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid) + ); + + reg [17:0] AB_READ[0:63]; + + always @(posedge clk) + // + if (recomb_fat_bram_xy_dout_valid) + // + case (recomb_fat_bram_xy_bank) + 3'd1: AB_READ[recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout; + 3'd2: AB_READ[32 + recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout; + endcase + + + always @(posedge clk) + // + if (tb_fat_bram_xy_ena) begin + mgr_fat_bram_xy_ena <= 1'b1; + mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank; + mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr; + mgr_fat_bram_x_din <= tb_fat_bram_x_din; + mgr_fat_bram_y_din <= tb_fat_bram_y_din; + end else if (recomb_fat_bram_xy_dout_valid) begin + mgr_fat_bram_xy_ena <= 1'b1; + mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank; + mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr; + mgr_fat_bram_x_din <= recomb_fat_bram_x_dout; + mgr_fat_bram_y_din <= recomb_fat_bram_y_dout; + end else begin + mgr_fat_bram_xy_ena <= 1'b0; + mgr_fat_bram_xy_bank <= 3'bXXX; + mgr_fat_bram_xy_addr <= 8'hXX; + mgr_fat_bram_x_din <= {18{1'bX}}; + mgr_fat_bram_y_din <= {18{1'bX}}; + end + + + + + + task verify_ab; + reg verify_ab_ok; + begin + verify_ab_ok = 1; + for (i=0; i<64; i=i+1) + if (AB_READ[i] === AB[i]) + $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]); + else begin + $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, AB[i], AB_READ[i]); + verify_ab_ok = 0; + end + if (verify_ab_ok) + $display("AB is OK."); + else + $display("AB is WRONG!"); + end + endtask + + + + always @* begin + // + fsm_state_next = FSM_STATE_IDLE; + // + case (fsm_state) + FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + + FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF; + + default: fsm_state_next = FSM_STATE_IDLE ; + + endcase + // + end + + +endmodule + diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt new file mode 100644 index 0000000..987bd86 --- /dev/null +++ b/rtl/dev/temp.txt @@ -0,0 +1,384 @@ + // + // Helper Functions + // + /* + function [INDEX_WIDTH-1:0] calc_preset_a_index; + input [INDEX_WIDTH-4:0] col_in; + input integer x_in; + integer index_out; + begin + index_out = col_in * NUM_MULTS + x_in; + calc_preset_a_index = index_out[INDEX_WIDTH-1:0]; + end + endfunction + + function [INDEX_WIDTH-1:0] calc_rotate_a_index; + input [INDEX_WIDTH-1:0] current_index_in; + input [INDEX_WIDTH-1:0] last_index_in; + begin + if (current_index_in > {INDEX_WIDTH{1'b0}}) + calc_rotate_a_index = current_index_in - 1'b1; + else + calc_rotate_a_index = last_index_in; + end + endfunction + */ + + /* + // + // Narrow Counters + // + reg [INDEX_WIDTH-1:0] din_addr_narrow_reg; + reg [INDEX_WIDTH-1:0] din_addr_narrow_dly; + localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}}; + wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ? + din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero; + wire din_addr_narrow_done = din_addr_narrow_reg == index_last; + + assign din_addr_narrow = din_addr_narrow_reg; + + always @(posedge clk) + // + din_addr_narrow_dly <= din_addr_narrow_reg; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; + endcase + + + // + // Helper Functions + // + function [NUM_MULTS-1:0] calc_mac_clear_bitmask; + input [2:0] t; + begin + case (t) + 3'd0: calc_mac_clear_bitmask = 8'b00000001; + 3'd1: calc_mac_clear_bitmask = 8'b00000010; + 3'd2: calc_mac_clear_bitmask = 8'b00000100; + 3'd3: calc_mac_clear_bitmask = 8'b00001000; + 3'd4: calc_mac_clear_bitmask = 8'b00010000; + 3'd5: calc_mac_clear_bitmask = 8'b00100000; + 3'd6: calc_mac_clear_bitmask = 8'b01000000; + 3'd7: calc_mac_clear_bitmask = 8'b10000000; + endcase + end + endfunction + + function [NUM_MULTS:0] calc_mac_clear_square; + input [INDEX_WIDTH-4:0] current_col_index; + input [INDEX_WIDTH-1:0] b_addr_prev; + begin + if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index) + calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])}; + else + calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}}; + end + endfunction + + + // + // Wide Counters + // + reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1]; + + integer xi; + always @(posedge clk) + // + for (xi=0; xi<NUM_MULTS; xi=xi+1) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(0, xi); + FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(col_index + 1'b1, xi); + // + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_wide_reg[xi] <= calc_rotate_a_index(din_addr_wide_reg[xi], index_last); + // + endcase + + + // + // Enables + // + reg din_ena_narrow_reg = 1'b0; + reg [NUM_MULTS-1:0] din_ena_wide_reg = {NUM_MULTS{1'b0}}; + + assign din_ena_narrow = din_ena_narrow_reg; + assign din_ena_wide = din_ena_wide_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) din_ena_narrow_reg <= 1'b0; + else case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_narrow_reg <= 1'b1; + default: din_ena_narrow_reg <= 1'b0; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) din_ena_wide_reg <= {NUM_MULTS{1'b0}}; + else case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_wide_reg <= {NUM_MULTS{1'b1}}; + default: din_ena_wide_reg <= {NUM_MULTS{1'b0}}; + endcase + + + // + // Modes + // + reg [2-1:0] din_mode_wide_reg; + reg [2-1:0] din_mode_narrow_reg; + reg [2-1:0] dout_mode_wide_reg; + reg [2-1:0] dout_mode_narrow_reg; + + assign din_mode_wide = din_mode_wide_reg; + assign din_mode_narrow = din_mode_narrow_reg; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_wide_reg <= MODEXPNG_MODE_A; + default: din_mode_wide_reg <= 2'bXX; + endcase + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_narrow_reg <= MODEXPNG_MODE_B; + default: din_mode_narrow_reg <= 2'bXX; + endcase + + + // + // MAC Array + // + wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_a[0:NUM_MULTS]; + wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_b; + reg [ NUM_MULTS :0] mac_ce; + reg [ NUM_MULTS :0] mac_clr; + wire [ MODEXPNG_MAC_WIDTH-1:0] mac_p[0:NUM_MULTS]; + reg [ NUM_MULTS :0] mac_rdy_lsb; + reg [ NUM_MULTS :0] mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1:0]; + + //reg [ NUM_MULTS :0] mac_ce_dly[MODEXPNG_MAC_LATENCY-1:0]; + //wire [ NUM_MULTS :0] mac_rdy; + + + + + + assign mac_din_b = din_narrow; + + + genvar x; + generate for (x=0; x<=NUM_MULTS; x=x+1) + begin : gen_macs + // + //assign mac_rdy[x] = mac_ce_dly[MODEXPNG_MAC_LATENCY-1][x]; + // + modexpng_mac mac_inst + ( + .clk (clk), + .ce (mac_ce[x]), + .clr (mac_clr[x]), + .a (mac_din_a[x]), + .b (mac_din_b), + .p (mac_p[x]) + ); + // + end + // + endgenerate + + generate for (x=0; x<NUM_MULTS; x=x+1) + begin : gen_mac_din_a + // + assign mac_din_a[x] = din_wide[x*MODEXPNG_WORD_WIDTH+:MODEXPNG_WORD_WIDTH]; + // + end + endgenerate + + generate for (x=0; x<NUM_MULTS; x=x+1) + begin : gen_din_addr_wide + // + assign din_addr_wide[x*INDEX_WIDTH+:INDEX_WIDTH] = din_addr_wide_reg[x]; + // + end + endgenerate + + + // + // MAC Clock Enable Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; + else case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_ce <= {1'b0, {NUM_MULTS{1'b1}}}; + default: mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; + endcase + + + // + // MAC Valid Logic + // + integer y; + + always @(posedge clk) + // + for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin + mac_rdy_lsb_dly[0][xi] <= mac_rdy_lsb[xi]; + for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) + mac_rdy_lsb_dly[y][xi] <= mac_rdy_lsb_dly[y-1][xi]; + end + + always @(posedge clk) begin + // + fsm_state_dly[0] <= fsm_state; + for (y=1; y<=MODEXPNG_MAC_LATENCY; y=y+1) + fsm_state_dly[y] <= fsm_state_dly[y-1]; + end + + */ + + /* + always @(posedge clk) + // + for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin + mac_ce_dly[0][xi] <= mac_ce[xi]; + for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) + mac_ce_dly[y][xi] <= mac_ce_dly[y-1][xi]; + end + */ + /* + always @(posedge clk) + // + for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin + mac_clr_dly[0][xi] <= mac_clr[xi]; + for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) + mac_clr_dly[y][xi] <= mac_clr_dly[y-1][xi]; + end + */ + + /* + // + // MAC Clear Logic + // + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_clr <= {1'b0, {NUM_MULTS{1'b1}}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_clr <= calc_mac_clear_square(col_index, din_addr_narrow_dly); + default: mac_clr <= {1'bX, {NUM_MULTS{1'bX}}}; + endcase + + + // + // MAC Ready Logic + // + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_rdy_lsb <= calc_mac_clear_square(col_index, din_addr_narrow); + default: mac_rdy_lsb <= {1'bX, {NUM_MULTS{1'bX}}}; + endcase + + + + // + // Recombinators + // + reg rcmb_lsb_ce; + reg rcmb_lsb_clr; + reg [MODEXPNG_MAC_WIDTH-1: 0] rcmb_lsb_din; + wire [15: 0] rcmb_lsb_dout; + + modexpng_part_recombinator recomb_lsb + ( + .clk (clk), + .ce (rcmb_lsb_ce), + .clr (rcmb_lsb_clr), + .din (rcmb_lsb_din), + .dout (rcmb_lsb_dout) + ); + + + reg calc_rcmb_lsb_ce; + always @* + // + calc_rcmb_lsb_ce = | mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]; + + reg [MODEXPNG_MAC_WIDTH-1:0] calc_rcmb_lsb_din; + + always @* + // + casez (mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]) + 8'b00000001: calc_rcmb_lsb_din = mac_p[0]; + 8'b00000010: calc_rcmb_lsb_din = mac_p[1]; + 8'b00000100: calc_rcmb_lsb_din = mac_p[2]; + 8'b00001000: calc_rcmb_lsb_din = mac_p[3]; + 8'b00010000: calc_rcmb_lsb_din = mac_p[4]; + 8'b00100000: calc_rcmb_lsb_din = mac_p[5]; + 8'b01000000: calc_rcmb_lsb_din = mac_p[6]; + 8'b10000000: calc_rcmb_lsb_din = mac_p[7]; + default: calc_rcmb_lsb_din = {MODEXPNG_MAC_WIDTH{1'bX}}; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + rcmb_lsb_ce <= 1'b0; + else case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_ce <= calc_rcmb_lsb_ce; + default: rcmb_lsb_ce <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: rcmb_lsb_clr <= 1'b1; + default: rcmb_lsb_clr <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_din <= calc_rcmb_lsb_din; + default: rcmb_lsb_din <= {MODEXPNG_MAC_WIDTH{1'bX}}; + endcase + + + +*/ diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v new file mode 100644 index 0000000..178f87f --- /dev/null +++ b/rtl/dsp/dsp_array.v @@ -0,0 +1,111 @@ +module dsp_array +( + input clk, + + input ce_a, + input ce_b, + input ce_m, + input ce_p, + input ce_mode, + + input [8 -1:0] mode_z, + + input [4*18-1:0] a, + input [1*17-1:0] b, + output [8*47-1:0] p +); + + `include "../modexpng_parameters_x8.vh" + + wire [17:0] casc_a[0:3]; + wire [16:0] casc_b[0:3]; + + wire ce_a0 = ce_a; + reg ce_a1 = 1'b0; + reg ce_a2 = 1'b0; + + wire ce_b0 = ce_b; + reg ce_b1 = 1'b0; + + always @(posedge clk) begin + ce_a1 <= ce_a0; + ce_a2 <= ce_a1; + ce_b1 <= ce_b0; + end + + + genvar z; + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + // + begin : DSP48E1 + // + dsp_slice # + ( + .AB_INPUT("DIRECT"), + .B_REG(2) + ) + dsp_direct + ( + .clk (clk), + + .ce_a1 (ce_a0), + .ce_b1 (ce_b0), + .ce_a2 (ce_a1), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[z*18+:18]), + .b (b), + .p (p[47*2*z+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in ({17{1'b0}}), + .casc_b_in ({17{1'b0}}), + + .casc_a_out (casc_a[z]), + .casc_b_out (casc_b[z]) + ); + // + dsp_slice # + ( + .AB_INPUT("CASCADE"), + .B_REG(1) + ) + dsp_cascade + ( + .clk (clk), + + .ce_a1 (ce_a1), + .ce_b1 (1'b0), + .ce_a2 (ce_a2), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[z*18+:18]), + .b (b), + .p (p[47*(2*z+1)+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in (casc_a[z]), + .casc_b_in (casc_b[z]), + + .casc_a_out (), + .casc_b_out () + ); + // + end + // + endgenerate + + +endmodule diff --git a/rtl/dsp/dsp_slice.v b/rtl/dsp/dsp_slice.v new file mode 100644 index 0000000..9f1298b --- /dev/null +++ b/rtl/dsp/dsp_slice.v @@ -0,0 +1,125 @@ +module dsp_slice # +( + AB_INPUT = "DIRECT", + B_REG = 2 +) +( + input clk, + input ce_a1, + input ce_b1, + input ce_a2, + input ce_b2, + input ce_m, + input ce_p, + input ce_mode, + input [17:0] a, + input [16:0] b, + output [46:0] p, + input [ 4:0] inmode, + input [ 6:0] opmode, + input [ 3:0] alumode, + input [17:0] casc_a_in, + input [16:0] casc_b_in, + output [17:0] casc_a_out, + output [16:0] casc_b_out +); + + wire [30-18-1:0] casc_a_dummy; + wire [18-17-1:0] casc_b_dummy; + wire [48-47-1:0] p_dummy; + + DSP48E1 # + ( + .AREG (2), + .BREG (B_REG), + .CREG (0), + .DREG (0), + .ADREG (0), + .MREG (1), + .PREG (1), + .ACASCREG (1), + .BCASCREG (1), + .INMODEREG (0), + .OPMODEREG (1), + .ALUMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), + + .A_INPUT (AB_INPUT), + .B_INPUT (AB_INPUT), + + .USE_DPORT ("FALSE"), + .USE_MULT ("DYNAMIC"), + .USE_SIMD ("ONE48"), + + .MASK (48'h3fffffffffff), + .PATTERN (48'h000000000000), + .SEL_MASK ("MASK"), + .SEL_PATTERN ("PATTERN"), + + .USE_PATTERN_DETECT ("NO_PATDET"), + .AUTORESET_PATDET ("NO_RESET") + ) + DSP48E1_inst + ( + .CLK (clk), + + .CEA1 (ce_a1), + .CEB1 (ce_b1), + .CEA2 (ce_a2), + .CEB2 (ce_b2), + .CEAD (1'b0), + .CEC (1'b0), + .CED (1'b0), + .CEM (ce_m), + .CEP (ce_p), + .CEINMODE (1'b0), + .CECTRL (ce_mode), + .CEALUMODE (1'b0), + .CECARRYIN (1'b0), + + .A ({{(30-18){1'b0}}, a}), + .B ({{(18-17){1'b0}}, b}), + .C ({48{1'b0}}), + .D ({25{1'b0}}), + .P ({p_dummy, p}), + + .INMODE (inmode), + .OPMODE (opmode), + .ALUMODE (alumode), + + .ACIN ({{(30-18){1'b0}}, casc_a_in}), + .BCIN ({{(18-17){1'b0}}, casc_b_in}), + .ACOUT ({casc_a_dummy, casc_a_out}), + .BCOUT ({casc_b_dummy, casc_b_out}), + .PCIN ({48{1'b0}}), + .PCOUT (), + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .RSTA (1'b0), + .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0), + .RSTINMODE (1'b0), + .RSTCTRL (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), + + .UNDERFLOW (), + .OVERFLOW (), + .PATTERNDETECT (), + .PATTERNBDETECT (), + + .CARRYIN (1'b0), + .CARRYOUT (), + .CARRYINSEL (3'b000), + + .MULTSIGNIN (1'b0), + .MULTSIGNOUT () + ); + + +endmodule diff --git a/rtl/modexpng_mac.v b/rtl/modexpng_mac.v new file mode 100644 index 0000000..9105dab --- /dev/null +++ b/rtl/modexpng_mac.v @@ -0,0 +1,54 @@ +module modexpng_mac +( + clk, + ce, clr, + casc_a, + a_in, b_in, p_out, + a_casc_in, a_casc_out +); + + input clk; + input ce; + input clr; + input casc_a; + input [16:0] a_in; + input [16:0] b_in; + output [46:0] p_out; + input [16:0] a_casc_in; + output [16:0] a_casc_out; + + reg [16:0] a_reg; + reg [16:0] b_reg; + assign a_casc_out = a_reg; + always @(posedge clk) + // + if (ce) {b_reg, a_reg} <= {b_in, casc_a ? a_casc_in : a_in}; + + reg ce_dly1; + reg ce_dly2; + always @(posedge clk) + // + {ce_dly2, ce_dly1} <= {ce_dly1, ce}; + + reg clr_dly1; + reg clr_dly2; + always @(posedge clk) begin + // + if (ce) clr_dly1 <= clr; + if (ce_dly1) clr_dly2 <= clr_dly1; + // + end + + reg [33:0] m_reg; + wire [46:0] m_reg_ext = {{13{1'b0}}, m_reg}; + always @(posedge clk) + // + if (ce_dly1) m_reg <= {{17{1'b0}}, a_reg} * {{17{1'b0}}, b_reg}; + + reg [46:0] p_reg; + assign p_out = p_reg; + always @(posedge clk) + // + if (ce_dly2) p_reg <= clr_dly2 ? m_reg_ext : p_reg + m_reg_ext; + +endmodule diff --git a/rtl/modexpng_mac_array.v b/rtl/modexpng_mac_array.v new file mode 100644 index 0000000..067929e --- /dev/null +++ b/rtl/modexpng_mac_array.v @@ -0,0 +1,116 @@ +module modexpng_mac_array +( + clk, + ce, clr, + ce_aux, clr_aux, + casc_a, casc_a_aux, + a_in, b_in, p_out, + a_in_aux, p_out_aux +); + + + // + // Includes + // + `include "modexpng_parameters.vh" + `include "modexpng_parameters_x8.vh" + + + // + // Ports + // + input clk; + input ce; + input [NUM_MULTS -1:0] clr; + input ce_aux; + input clr_aux; + input [NUM_MULTS -2:0] casc_a; + input casc_a_aux; + input [NUM_MULTS * WORD_WIDTH -1:0] a_in; + input [ 1 * WORD_WIDTH -1:0] b_in; + output [NUM_MULTS * MAC_WIDTH -1:0] p_out; + input [ 1 * WORD_WIDTH -1:0] a_in_aux; + output [ 1 * MAC_WIDTH -1:0] p_out_aux; + + + // + // A-Cascade Paths + // + wire [WORD_WIDTH-1:0] a_casc_int[0:NUM_MULTS-2]; + wire [WORD_WIDTH-1:0] a_casc_int_aux; + + + // + // LSB + // + modexpng_mac mac_lsb + ( + .clk (clk), + .ce (ce), + .clr (clr[0]), + .casc_a (1'b0), + .a_in (a_in[0+:WORD_WIDTH]), + .b_in (b_in), + .p_out (p_out[0+:MAC_WIDTH]), + .a_casc_in ({WORD_WIDTH{1'b0}}), + .a_casc_out (a_casc_int[0]) + ); + + + // + // INT + // + genvar z; + generate for (z=1; z<(NUM_MULTS-1); z=z+1) + begin : gen_modexpng_mac_int + modexpng_mac mac_int + ( + .clk (clk), + .ce (ce), + .clr (clr[z]), + .casc_a (casc_a[z-1]), + .a_in (a_in[z*WORD_WIDTH+:WORD_WIDTH]), + .b_in (b_in), + .p_out (p_out[z*MAC_WIDTH+:MAC_WIDTH]), + .a_casc_in (a_casc_int[z-1]), + .a_casc_out (a_casc_int[z]) + ); + end + endgenerate + + + // + // MSB + // + modexpng_mac mac_msb + ( + .clk (clk), + .ce (ce), + .clr (clr[NUM_MULTS-1]), + .casc_a (casc_a[NUM_MULTS-2]), + .a_in (a_in[(NUM_MULTS-1)*WORD_WIDTH+:WORD_WIDTH]), + .b_in (b_in), + .p_out (p_out[(NUM_MULTS-1)*MAC_WIDTH+:MAC_WIDTH]), + .a_casc_in (a_casc_int[NUM_MULTS-2]), + .a_casc_out (a_casc_int_aux) + ); + + + // + // AUX + // + modexpng_mac mac_aux + ( + .clk (clk), + .ce (ce_aux), + .clr (clr_aux), + .casc_a (casc_a_aux), + .a_in (a_in_aux), + .b_in (b_in), + .p_out (p_out_aux), + .a_casc_in (a_casc_int_aux), + .a_casc_out () + ); + + +endmodule diff --git a/rtl/modexpng_mem.v b/rtl/modexpng_mem.v new file mode 100644 index 0000000..ca89214 --- /dev/null +++ b/rtl/modexpng_mem.v @@ -0,0 +1,93 @@ +// +// TODO: Add license text! +// + +module modexpng_mem # +( + parameter MEM_WIDTH = 17, + parameter MEM_ADDR_BITS = 6 +) +( + input clk, + + input [MEM_ADDR_BITS-1:0] a_addr, + input a_en, + input a_wr, + input [MEM_WIDTH -1:0] a_in, + output [MEM_WIDTH -1:0] a_out, + + input [MEM_ADDR_BITS-1:0] b_addr, + input b_en, + input b_reg_en, + output [MEM_WIDTH -1:0] b_out +); + + + // + // BRAM + // + (* RAM_STYLE="BLOCK" *) + reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; + + + // + // Initialization for Simulation + // + /* + integer c; + initial begin + for (c=0; c<(2**MEM_ADDR_BITS); c=c+1) + bram[c] = {MEM_WIDTH{1'b0}}; + end + */ + + + + // + // Output Registers + // + reg [MEM_WIDTH-1:0] bram_b; + reg [MEM_WIDTH-1:0] bram_b_reg; + + assign a_out = 32'hDEADCE11; + assign b_out = bram_b_reg; + + + // + // Note, that when both ports are accessing the same location, conflict can + // potentionally arise. See Xilinx UG473 (pages 19-20, "Conflict + // Avoidance") for more information. In our configuration to avoid that the + // write port must be coded to operate in READ_FIRST mode. If the write + // port is overwriting the same address the read port is accessing, the + // write port must read the previously stored data (not the data it is + // writing, as that would be WRITE_FIRST mode). + // + + + // + // Write-Only Port A + // + always @(posedge clk) + // + if (a_en) + // + if (a_wr) bram[a_addr] <= a_in; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + if (b_en) + // + bram_b <= bram[b_addr]; + + always @(posedge clk) + // + if (b_reg_en) + // + bram_b_reg <= bram_b; + + +endmodule diff --git a/rtl/modexpng_mmm_col_index.v b/rtl/modexpng_mmm_col_index.v new file mode 100644 index 0000000..b904795 --- /dev/null +++ b/rtl/modexpng_mmm_col_index.v @@ -0,0 +1,90 @@ +module modexpng_mmm_col_index +( + clk, + index_last, + fsm_state_next, + col_index, + col_index_done, + col_index_zero, + col_index_next, + col_index_prev +); + + + // + // Includes + // + //`include "modexpng_parameters.vh" + //`include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input [ INDEX_WIDTH-1:0] index_last; + input [FSM_STATE_WIDTH-1:0] fsm_state_next; + output [ INDEX_WIDTH-4:0] col_index; + output col_index_done; + output [ INDEX_WIDTH-4:0] col_index_zero; + output [ INDEX_WIDTH-4:0] col_index_next; + output [ INDEX_WIDTH-4:0] col_index_prev; + + + // + // Registers + // + reg [INDEX_WIDTH-4:0] col_index_reg; + reg [INDEX_WIDTH-4:0] col_index_last; + reg [INDEX_WIDTH-4:0] col_index_dly; + + + // + // Mapping + // + assign col_index = col_index_reg; + assign col_index_prev = col_index_dly; + + + // + // Handy Wires + // + assign col_index_done = col_index == col_index_last; + assign col_index_zero = {(INDEX_WIDTH-3){1'b0}}; + assign col_index_next = col_index + 1'b1; + + + // + // Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG: begin + col_index_reg <= col_index_zero; + col_index_last <= index_last[INDEX_WIDTH-1:3]; + end + // + FSM_STATE_MULT_SQUARE_COL_N_TRIG: + col_index_reg <= col_index_next; + // + endcase + + + // + // Delay Logic + // + always @(posedge clk) + // + col_index_dly <= col_index; + + +endmodule diff --git a/rtl/modexpng_mmm_din_addr.v b/rtl/modexpng_mmm_din_addr.v new file mode 100644 index 0000000..565c7e0 --- /dev/null +++ b/rtl/modexpng_mmm_din_addr.v @@ -0,0 +1,167 @@ +module modexpng_mmm_din_addr +( + clk, rst_n, + index_last, + fsm_state_next, + col_index_zero, col_index_next, + din_addr, din_bank, din_ena, din_reg_ena, + din_addr_cnt, din_addr_cnt_last, + din_addr_cnt_lower_prev, din_addr_cnt_upper_prev +); + + + // + // Includes + // + `include "modexpng_parameters.vh" + //`include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input rst_n; + input [ INDEX_WIDTH-1:0] index_last; + input [FSM_STATE_WIDTH-1:0] fsm_state_next; + input [ INDEX_WIDTH-4:0] col_index_zero; + input [ INDEX_WIDTH-4:0] col_index_next; + output [ INDEX_WIDTH-4:0] din_addr; + output [ 3-1:0] din_bank; + output [ 1-1:0] din_ena; + output [ 1-1:0] din_reg_ena; + output [ INDEX_WIDTH-1:0] din_addr_cnt; + output [ INDEX_WIDTH-1:0] din_addr_cnt_last; + output [ 3-1:0] din_addr_cnt_lower_prev; + output [ INDEX_WIDTH-4:0] din_addr_cnt_upper_prev; + + + // + // Address + // + reg [INDEX_WIDTH-1:0] din_addr_reg; + wire [INDEX_WIDTH-1:0] din_addr_zero = {INDEX_WIDTH{1'b0}}; + reg [INDEX_WIDTH-1:0] din_addr_last; + wire [INDEX_WIDTH-1:0] din_addr_prev = (din_addr_reg == din_addr_zero) ? din_addr_last : din_addr_reg - 1'b1; + + reg [INDEX_WIDTH-1:0] din_addr_cnt_reg; + wire [INDEX_WIDTH-1:0] din_addr_cnt_zero = {INDEX_WIDTH{1'b0}}; + wire [INDEX_WIDTH-1:0] din_addr_cnt_next = din_addr_cnt_reg + 1'b1; + reg [INDEX_WIDTH-1:0] din_addr_cnt_last_reg; + wire [ 3-1:0] din_addr_cnt_lower = din_addr_cnt_reg[ 3-1:0]; + wire [INDEX_WIDTH-4:0] din_addr_cnt_upper = din_addr_cnt_reg[INDEX_WIDTH-1:3]; + reg [ 3-1:0] din_addr_cnt_lower_dly; + reg [INDEX_WIDTH-4:0] din_addr_cnt_upper_dly; + + reg [ 3-1:0] din_bank_reg; + + + // + // Enables + // + reg din_ena_reg = 1'b0; + reg din_reg_ena_reg = 1'b0; + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) + din_ena_reg <= 1'b0; + else case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + din_ena_reg <= 1'b1; + // + default: + din_ena_reg <= 1'b0; + // + endcase + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) + din_reg_ena_reg <= 1'b0; + else + din_reg_ena_reg <= din_ena_reg; + + + // + // Address Mapping + // + assign din_addr = din_addr_reg[INDEX_WIDTH-1:3]; + + assign din_addr_cnt = din_addr_cnt_reg; + assign din_addr_cnt_last = din_addr_cnt_last_reg; + assign din_addr_cnt_lower_prev = din_addr_cnt_lower_dly; + assign din_addr_cnt_upper_prev = din_addr_cnt_upper_dly; + + assign din_bank = din_bank_reg; + + + // + // Enable Mapping + // + assign din_ena = din_ena_reg; + assign din_reg_ena = din_reg_ena_reg; + + + // + // Delay + // + always @(posedge clk) begin + din_addr_cnt_lower_dly <= din_addr_cnt_lower; + din_addr_cnt_upper_dly <= din_addr_cnt_upper; + end + + + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG: begin + din_addr_reg <= {col_index_zero, {3{1'b0}}}; + din_addr_last <= index_last; + din_addr_cnt_reg <= din_addr_cnt_zero; + din_addr_cnt_last_reg <= index_last; + end + // + FSM_STATE_MULT_SQUARE_COL_N_TRIG: begin + din_addr_reg <= {col_index_next, {3{1'b0}}}; + din_addr_cnt_reg <= din_addr_cnt_zero; + end + // + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin + din_addr_reg <= din_addr_prev; + din_addr_cnt_reg <= din_addr_cnt_next; + end + // + //default: + // + endcase + + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + din_bank_reg = BANK_XY_T1T2; + // + default: + din_bank_reg = BANK_XY_ANY; + // + endcase + +endmodule diff --git a/rtl/modexpng_mmm_dout_addr.v b/rtl/modexpng_mmm_dout_addr.v new file mode 100644 index 0000000..3749d82 --- /dev/null +++ b/rtl/modexpng_mmm_dout_addr.v @@ -0,0 +1,167 @@ +module modexpng_mmm_dout_addr +( + clk, rst_n, + //index_last, + fsm_state, + load_xy_addr, + load_addr_zero, + load_nn_coeff_addr_done, + /* + + col_index_zero, col_index_next,*/ + x_dout_addr, y_dout_addr, + x_dout_ena, y_dout_ena, + x_dout_bank, y_dout_bank + +); + + + // + // Includes + // + `include "modexpng_parameters.vh" + `include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input rst_n; + //input [ INDEX_WIDTH-1:0] index_last; + input [FSM_STATE_WIDTH-1:0] fsm_state; + input [INDEX_WIDTH:0] load_xy_addr; // address + input load_addr_zero; + input load_nn_coeff_addr_done; + //input [ INDEX_WIDTH-4:0] col_index_zero; + //input [ INDEX_WIDTH-4:0] col_index_next; + output [INDEX_WIDTH-4:0] x_dout_addr; + output [INDEX_WIDTH-4:0] y_dout_addr; + + output [NUM_MULTS-1:0] x_dout_ena; + output [NUM_MULTS-1:0] y_dout_ena; + + output [3-1:0] x_dout_bank; + output [3-1:0] y_dout_bank; + + + // + // Registers + // + reg [INDEX_WIDTH-4:0] x_dout_addr_reg; //clog2 + reg [INDEX_WIDTH-4:0] y_dout_addr_reg; //clog2 + + reg [NUM_MULTS-1:0] x_dout_ena_reg = {NUM_MULTS{1'b0}}; + reg [NUM_MULTS-1:0] y_dout_ena_reg = {NUM_MULTS{1'b0}}; + + reg [NUM_MULTS-1:0] x_dout_ena_int; + reg [NUM_MULTS-1:0] y_dout_ena_int; + + reg [3-1:0] x_dout_bank_reg; + reg [3-1:0] y_dout_bank_reg; + + + // + // Mapping + // + assign x_dout_addr = x_dout_addr_reg; + assign y_dout_addr = y_dout_addr_reg; + + assign x_dout_ena = x_dout_ena_reg; + assign y_dout_ena = y_dout_ena_reg; + + assign x_dout_bank = x_dout_bank_reg; + assign y_dout_bank = y_dout_bank_reg; + + + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3: begin + x_dout_addr_reg <= load_xy_addr[INDEX_WIDTH-1:3]; + y_dout_addr_reg <= load_xy_addr[INDEX_WIDTH-1:3]; + end + // + FSM_STATE_LOAD_NN_COEFF_3: begin + x_dout_addr_reg <= !load_nn_coeff_addr_done ? load_xy_addr[INDEX_WIDTH-1:3] : BANK_XY_AUX_ADDR_N_COEFF[INDEX_WIDTH-4:0]; + y_dout_addr_reg <= !load_nn_coeff_addr_done ? load_xy_addr[INDEX_WIDTH-1:3] : BANK_XY_AUX_ADDR_N_COEFF[INDEX_WIDTH-4:0]; + end + // + default: begin + x_dout_addr_reg <= {INDEX_WIDTH-3{1'bX}}; + y_dout_addr_reg <= {INDEX_WIDTH-3{1'bX}}; + end + // + endcase + + wire [NUM_MULTS-1:0] load_xy_ena_init = {{NUM_MULTS-1{1'b0}}, 1'b1}; + + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_LOAD_T1T2_2: begin + x_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {x_dout_ena_int[NUM_MULTS-2:0], x_dout_ena_int[NUM_MULTS-1]}; + y_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {y_dout_ena_int[NUM_MULTS-2:0], y_dout_ena_int[NUM_MULTS-1]}; + end + // + FSM_STATE_LOAD_NN_COEFF_2: begin + x_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {x_dout_ena_int[NUM_MULTS-2:0], x_dout_ena_int[NUM_MULTS-1] & ~load_nn_coeff_addr_done}; + y_dout_ena_int <= load_addr_zero ? load_xy_ena_init : {y_dout_ena_int[NUM_MULTS-2:0], y_dout_ena_int[NUM_MULTS-1]}; + end + // + endcase + + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) begin + x_dout_ena_reg <= {NUM_MULTS{1'b0}}; + y_dout_ena_reg <= {NUM_MULTS{1'b0}}; + end else case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3, + FSM_STATE_LOAD_NN_COEFF_3: begin + x_dout_ena_reg <= x_dout_ena_int; + y_dout_ena_reg <= y_dout_ena_int; + end + // + default: begin + x_dout_ena_reg <= {NUM_MULTS{1'b0}}; + y_dout_ena_reg <= {NUM_MULTS{1'b0}}; + end + // + endcase + + + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3: begin + x_dout_bank_reg <= BANK_X_T1; + y_dout_bank_reg <= BANK_Y_T2; + end + // + FSM_STATE_LOAD_NN_COEFF_3: begin + x_dout_bank_reg <= !load_nn_coeff_addr_done ? BANK_X_N : BANK_XY_AUX; + y_dout_bank_reg <= !load_nn_coeff_addr_done ? BANK_Y_N_COEFF : BANK_XY_AUX; + end + // + default: begin + x_dout_bank_reg <= BANK_XY_ANY; + y_dout_bank_reg <= BANK_XY_ANY; + end + // + endcase + + +endmodule diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh new file mode 100644 index 0000000..c237a0b --- /dev/null +++ b/rtl/modexpng_mmm_fsm.vh @@ -0,0 +1,24 @@ +localparam FSM_STATE_WIDTH = 32; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; +
\ No newline at end of file diff --git a/rtl/modexpng_mmm_pad.v b/rtl/modexpng_mmm_pad.v new file mode 100644 index 0000000..a2a21ff --- /dev/null +++ b/rtl/modexpng_mmm_pad.v @@ -0,0 +1,153 @@ +module modexpng_mmm_pad +( + clk, rst_n, + fsm_state, + load_xy_addr_lsb, + pad_x_rd_addr, pad_y_rd_addr, + pad_x_rd_ena, pad_y_rd_ena, + pad_x_rd_dout, pad_y_rd_dout, + load_x_din, load_y_din +); + + + // + // Includes + // + `include "modexpng_parameters.vh" + //`include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input rst_n; + input [FSM_STATE_WIDTH-1:0] fsm_state; + + input [INDEX_WIDTH-1:0] load_xy_addr_lsb; + + input [WORD_WIDTH-1:0] load_x_din; + input [WORD_WIDTH-1:0] load_y_din; + + input [INDEX_WIDTH-1:0] pad_x_rd_addr; + input [INDEX_WIDTH-1:0] pad_y_rd_addr; + + input pad_x_rd_ena; + input pad_y_rd_ena; + + output [WORD_WIDTH-1:0] pad_x_rd_dout; + output [WORD_WIDTH-1:0] pad_y_rd_dout; + + + // + // Registers + // + reg [INDEX_WIDTH-1:0] pad_x_wr_addr; + reg [INDEX_WIDTH-1:0] pad_y_wr_addr; + reg pad_x_wr_ena; + reg pad_y_wr_ena; + reg [ WORD_WIDTH-1:0] pad_x_wr_din; + reg [ WORD_WIDTH-1:0] pad_y_wr_din; + + bram_1wo_1ro_readfirst_ce # + ( + .MEM_WIDTH (WORD_WIDTH), + .MEM_ADDR_BITS (INDEX_WIDTH) + ) + pad_x + ( + .clk (clk), + + .a_addr (pad_x_wr_addr), + .a_en (pad_x_wr_ena), + .a_wr (pad_x_wr_ena), + .a_in (pad_x_wr_din), + .a_out (), // unused + + .b_addr (pad_x_rd_addr), + .b_en (pad_x_rd_ena), + .b_out (pad_x_rd_dout) + ); + + bram_1wo_1ro_readfirst_ce # + ( + .MEM_WIDTH (WORD_WIDTH), + .MEM_ADDR_BITS (INDEX_WIDTH) + ) + pad_y + ( + .clk (clk), + + .a_addr (pad_y_wr_addr), + .a_en (pad_y_wr_ena), + .a_wr (pad_y_wr_ena), + .a_in (pad_y_wr_din), + .a_out (), // unused + + .b_addr (pad_y_rd_addr), + .b_en (pad_y_rd_ena), + .b_out (pad_y_rd_dout) + ); + + + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3: begin + pad_x_wr_addr <= load_xy_addr_lsb; + pad_y_wr_addr <= load_xy_addr_lsb; + end + // + default: begin + pad_x_wr_addr <= {INDEX_WIDTH{1'bX}}; + pad_y_wr_addr <= {INDEX_WIDTH{1'bX}}; + end + // + endcase + + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3: begin + pad_x_wr_din <= load_x_din; + pad_y_wr_din <= load_y_din; + end + // + default: begin + pad_x_wr_din <= load_x_din; + pad_y_wr_din <= load_y_din; + end + // + endcase + + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) begin + pad_x_wr_ena <= 1'b0; + pad_y_wr_ena <= 1'b0; + end else case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3: begin + pad_x_wr_ena <= 1'b1; + pad_y_wr_ena <= 1'b1; + end + // + default: begin + pad_x_wr_ena <= 1'b0; + pad_y_wr_ena <= 1'b0; + end + // + endcase + + +endmodule diff --git a/rtl/modexpng_mmm_transporter.v b/rtl/modexpng_mmm_transporter.v new file mode 100644 index 0000000..a8f309a --- /dev/null +++ b/rtl/modexpng_mmm_transporter.v @@ -0,0 +1,157 @@ +module modexpng_mmm_transporter +( + clk, + ena, + index_last, + fsm_state, + fsm_state_next, + load_phase, + load_xy_addr, + load_xy_addr_vld, + load_xy_req, + load_addr_zero, + load_t1t2_addr_done, + load_nn_coeff_addr_done +); + + + // + // Includes + // + //`include "modexpng_parameters.vh" + //`include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input ena; + input [ INDEX_WIDTH-1:0] index_last; + input [FSM_STATE_WIDTH-1:0] fsm_state; + input [FSM_STATE_WIDTH-1:0] fsm_state_next; + output load_phase; + output [ INDEX_WIDTH:0] load_xy_addr; + output load_xy_addr_vld; + output load_xy_req; + output load_addr_zero; + output load_t1t2_addr_done; + output load_nn_coeff_addr_done; + + + // + // Load Address Generator + // + reg load_phase_reg; + reg [INDEX_WIDTH:0] load_xy_addr_reg; + reg load_xy_addr_vld_reg; + reg load_xy_req_reg; + + + // + // Mapping + // + assign load_phase = load_phase_reg; + assign load_xy_addr = load_xy_addr_reg; + assign load_xy_addr_vld = load_xy_addr_vld_reg; + assign load_xy_req = load_xy_req_reg; + + + // + // Handy Quantities + // + wire [INDEX_WIDTH:0] load_xy_addr_zero = {{INDEX_WIDTH{1'b0}}, 1'b0}; + wire [INDEX_WIDTH:0] load_xy_addr_next = load_xy_addr_reg + 1'b1; + wire [INDEX_WIDTH:0] load_xy_addr_xxx = {{INDEX_WIDTH{1'bX}}, 1'bX}; + + + // + // More Handy Quantities + // + reg [INDEX_WIDTH:0] load_t1t2_addr_last; + reg [INDEX_WIDTH:0] load_nn_coeff_addr_last; + + + // + // Flags + // + assign load_addr_zero = load_xy_addr_reg == load_xy_addr_zero; + assign load_t1t2_addr_done = load_xy_addr_reg == load_t1t2_addr_last; + assign load_nn_coeff_addr_done = load_xy_addr_reg == load_nn_coeff_addr_last; + + + // + // Last Index Latch + // + always @(posedge clk) + // + if (ena && (fsm_state == FSM_STATE_IDLE)) begin + load_t1t2_addr_last <= {1'b0, index_last}; + load_nn_coeff_addr_last <= {1'b0, index_last} + 1'b1; + end + + + // + // Update Load Phase + // + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_LOAD_T1T2_1, + FSM_STATE_LOAD_T1T2_2, + FSM_STATE_LOAD_T1T2_3: load_phase_reg <= 1'b0; + FSM_STATE_LOAD_NN_COEFF_1, + FSM_STATE_LOAD_NN_COEFF_2, + FSM_STATE_LOAD_NN_COEFF_3: load_phase_reg <= 1'b1; + default: load_phase_reg <= 1'bX; + endcase + + + // + // Update Load Address + // + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_LOAD_T1T2_1: load_xy_addr_reg <= (fsm_state == FSM_STATE_LOAD_T1T2_3) ? load_xy_addr_next : load_xy_addr_zero; + FSM_STATE_LOAD_T1T2_2, + FSM_STATE_LOAD_T1T2_3: load_xy_addr_reg <= load_xy_addr_reg; + FSM_STATE_LOAD_NN_COEFF_1: load_xy_addr_reg <= (fsm_state == FSM_STATE_LOAD_NN_COEFF_3) ? load_xy_addr_next : load_xy_addr_zero; + FSM_STATE_LOAD_NN_COEFF_2, + FSM_STATE_LOAD_NN_COEFF_3: load_xy_addr_reg <= load_xy_addr_reg; + default load_xy_addr_reg <= load_xy_addr_xxx; + endcase + + + // + // Update Address Valid Flag + // + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_LOAD_T1T2_1, + FSM_STATE_LOAD_NN_COEFF_1: load_xy_addr_vld_reg <= 1'b1; + default load_xy_addr_vld_reg <= 1'b0; + endcase + + + // + // Update Load Request Flag + // + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_LOAD_T1T2_2, + FSM_STATE_LOAD_NN_COEFF_2: load_xy_req_reg <= 1'b1; + default load_xy_req_reg <= 1'b0; + endcase + + +endmodule diff --git a/rtl/modexpng_mmm_x8_dual.v b/rtl/modexpng_mmm_x8_dual.v new file mode 100644 index 0000000..99a37fa --- /dev/null +++ b/rtl/modexpng_mmm_x8_dual.v @@ -0,0 +1,550 @@ +module modexpng_mmm_x8_dual +( + clk, rst_n, + ena, rdy, + mode, transfer, + index_last, + x_din, y_din, x_dout, y_dout, + x_din_addr, y_din_addr, x_dout_addr, y_dout_addr, + x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena, + x_din_bank, y_din_bank, x_dout_bank, y_dout_bank, + load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req, + load_x_din, load_y_din +); + + + // + // Includes + // + `include "modexpng_parameters.vh" + `include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input rst_n; + + input ena; + output rdy; + + input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2 + // load/unload: 0 = load, 1 = unload + input transfer; // 0 = multiply, 1 = load/unload + + input [INDEX_WIDTH-1:0] index_last; + + input [NUM_MULTS*WORD_WIDTH-1:0] x_din; + input [NUM_MULTS*WORD_WIDTH-1:0] y_din; + output [NUM_MULTS*WORD_WIDTH-1:0] x_dout; + output [NUM_MULTS*WORD_WIDTH-1:0] y_dout; + + output [INDEX_WIDTH-4:0] x_din_addr; + output [INDEX_WIDTH-4:0] y_din_addr; + output [INDEX_WIDTH-4:0] x_dout_addr; + output [INDEX_WIDTH-4:0] y_dout_addr; + + output [ 1-1:0] x_din_ena; + output [ 1-1:0] y_din_ena; + output [NUM_MULTS-1:0] x_dout_ena; + output [NUM_MULTS-1:0] y_dout_ena; + output [ 1-1:0] x_din_reg_ena; + output [ 1-1:0] y_din_reg_ena; + + output [3-1:0] x_din_bank; + output [3-1:0] y_din_bank; + output [3-1:0] x_dout_bank; + output [3-1:0] y_dout_bank; + + output load_phase; // 0 = T1, T2; 1 = N, N_COEFF + output [ INDEX_WIDTH:0] load_xy_addr; // address + output load_xy_addr_vld; // address valid + output load_xy_req; // data request + + input [WORD_WIDTH-1:0] load_x_din; // data input + input [WORD_WIDTH-1:0] load_y_din; // data input + + + // + // FSM State and Next States + // + reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; + reg [FSM_STATE_WIDTH-1:0] fsm_state_next; + reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; + reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; + + + // + // FSM Idle Next State + // + always @* + // + case ({transfer, mode}) + 2'b00, + 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG; + 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1; + 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload? + endcase + + + // + // Column Counter + // + wire [ INDEX_WIDTH-4:0] col_index; + wire col_index_done; + wire [ INDEX_WIDTH-4:0] col_index_zero; + wire [ INDEX_WIDTH-4:0] col_index_next; + wire [ INDEX_WIDTH-4:0] col_index_prev; + + modexpng_mmm_col_index # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + mmm_col_index + ( + .clk (clk), + .index_last (index_last), + .fsm_state_next (fsm_state_next), + .col_index (col_index), + .col_index_done (col_index_done), + .col_index_zero (col_index_zero), + .col_index_next (col_index_next), + .col_index_prev (col_index_prev) + ); + + + // + // Load Address Generator + // + wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0]; + wire load_addr_zero; + wire load_t1t2_addr_done; + wire load_nn_coeff_addr_done; + + modexpng_mmm_transporter # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + transporter + ( + .clk (clk), + .ena (ena), + .index_last (index_last), + .fsm_state (fsm_state), + .fsm_state_next (fsm_state_next), + .load_phase (load_phase), + .load_xy_addr (load_xy_addr), + .load_xy_addr_vld (load_xy_addr_vld), + .load_xy_req (load_xy_req), + .load_addr_zero (load_addr_zero), + .load_t1t2_addr_done (load_t1t2_addr_done), + .load_nn_coeff_addr_done (load_nn_coeff_addr_done) + ); + + + // + // X, Y Address + // + wire [INDEX_WIDTH-1:0] x_din_addr_cnt; + wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last; + wire [ 3-1:0] x_din_addr_cnt_lower_prev; + wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev; + + modexpng_mmm_din_addr # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + din_addr_x + ( + .clk (clk), + .rst_n (rst_n), + .index_last (index_last), + .fsm_state_next (fsm_state_next), + .col_index_zero (col_index_zero), + .col_index_next (col_index_next), + .din_addr (x_din_addr), + .din_bank (x_din_bank), + .din_ena (x_din_ena), + .din_reg_ena (x_din_reg_ena), + .din_addr_cnt (x_din_addr_cnt), + .din_addr_cnt_last (x_din_addr_cnt_last), + .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev), + .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev) + ); + + modexpng_mmm_dout_addr # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + dout_addr_xy + ( + .clk (clk), + .rst_n (rst_n), + .fsm_state (fsm_state), + .load_xy_addr (load_xy_addr), + .load_addr_zero (load_addr_zero), + .load_nn_coeff_addr_done (load_nn_coeff_addr_done), + .x_dout_addr (x_dout_addr), + .y_dout_addr (y_dout_addr), + .x_dout_ena (x_dout_ena), + .y_dout_ena (y_dout_ena), + .x_dout_bank (x_dout_bank), + .y_dout_bank (y_dout_bank) + ); + + + // + // Helper Memories ("Scratchpad") + // + reg [INDEX_WIDTH-1:0] pad_xy_rd_addr; + reg pad_xy_rd_ena = 1'b0; + wire [ WORD_WIDTH-1:0] pad_x_rd_dout; + wire [ WORD_WIDTH-1:0] pad_y_rd_dout; + + wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}}; + wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1; + + modexpng_mmm_pad pad + ( + .clk (clk), + .rst_n (rst_n), + .fsm_state (fsm_state), + .load_xy_addr_lsb (load_xy_addr_lsb), + .load_x_din (load_x_din), + .load_y_din (load_y_din), + .pad_x_rd_addr (pad_xy_rd_addr), + .pad_y_rd_addr (pad_xy_rd_addr), + .pad_x_rd_ena (pad_xy_rd_ena), + .pad_y_rd_ena (pad_xy_rd_ena), + .pad_x_rd_dout (pad_x_rd_dout), + .pad_y_rd_dout (pad_y_rd_dout) + ); + + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) begin + pad_xy_rd_ena <= 1'b0; + end else case (fsm_state_next) + + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + pad_xy_rd_ena <= 1'b1; + + default: + pad_xy_rd_ena <= 1'b0; + + endcase + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: + pad_xy_rd_addr <= pad_xy_rd_addr_zero; + + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + pad_xy_rd_addr <= pad_xy_rd_addr_next; + + default: + pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}}; + + endcase + + + + + // + // Flags + // + + wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last; + + always @* + // + fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;; + + + // + // MAC Arrays + // + reg mac_x_ce = 1'b0; + reg mac_x_ce_aux = 1'b0; + reg [NUM_MULTS -1:0] mac_x_clr; + reg mac_x_clr_aux; + reg [NUM_MULTS -2:0] mac_x_casc_a; + reg mac_x_casc_a_aux; + wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a; + reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux; + //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1]; + reg [ 1 * WORD_WIDTH -1:0] mac_x_b; + wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p; + wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux; + + reg mac_y_ce = 1'b0; + reg mac_y_ce_aux = 1'b0; + reg [NUM_MULTS -1:0] mac_y_clr; + reg mac_y_clr_aux; + reg [NUM_MULTS -2:0] mac_y_casc_a; + reg mac_y_casc_a_aux; + wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a; + reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux; + //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1]; + reg [ 1 * WORD_WIDTH -1:0] mac_y_b; + wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p; + wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux; + + modexpng_mac_array mac_array_x + ( + .clk (clk), + .ce (mac_x_ce), + .ce_aux (mac_x_ce_aux), + .clr (mac_x_clr), + .clr_aux (mac_x_clr_aux), + .casc_a (mac_x_casc_a), + .casc_a_aux (mac_x_casc_a_aux), + .a_in (mac_x_a), + .a_in_aux (mac_x_a_aux), + .b_in (mac_x_b), + .p_out (mac_x_p), + .p_out_aux (mac_x_p_aux) + ); + + modexpng_mac_array mac_array_y + ( + .clk (clk), + .ce (mac_y_ce), + .ce_aux (mac_y_ce_aux), + .clr (mac_y_clr), + .clr_aux (mac_y_clr_aux), + .casc_a (mac_y_casc_a), + .casc_a_aux (mac_y_casc_a_aux), + .a_in (mac_y_a), + .a_in_aux (mac_y_a_aux), + .b_in (mac_y_b), + .p_out (mac_y_p), + .p_out_aux (mac_y_p_aux) + ); + + genvar gen_z; + + generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1) + begin : gen_xy_din + //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z]; + //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z]; + //gen_xy_dout + assign mac_x_a[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_din[gen_z*WORD_WIDTH+:WORD_WIDTH]; + + //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z]; + //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z]; + end + endgenerate + + + // + // MAC Clock Enable Logic + // + reg mac_xy_ce_adv = 1'b0; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) mac_xy_ce_adv <= 1'b0; + else case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_ce_adv <= 1'b1; + default: mac_xy_ce_adv <= 1'b0; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) {mac_y_ce, mac_x_ce} <= 2'b00; + else {mac_y_ce, mac_x_ce} <= {2{mac_xy_ce_adv}}; + + + // + // MAC Clear Logic + // + wire [NUM_MULTS-1:0] calc_mac_x_clear_square_value = + calc_mac_clear_square(col_index_prev, x_din_addr_cnt_lower_prev, x_din_addr_cnt_upper_prev); + + reg [NUM_MULTS-1:0] mac_xy_clr_adv; + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_clr_adv <= {NUM_MULTS{1'b1}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_clr_adv <= calc_mac_x_clear_square_value; + default: mac_xy_clr_adv <= {NUM_MULTS{1'bX}}; + endcase + + always @(posedge clk) + // + {mac_y_clr, mac_x_clr} <= {2{mac_xy_clr_adv}}; + + + // + // MAC Cascade Logic + // + reg [NUM_MULTS-2:0] mac_xy_casc_a_adv; + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b0}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b1}}; + default: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'bX}}; + endcase + + always @(posedge clk) + // + {mac_y_casc_a, mac_x_casc_a} <= {2{mac_xy_casc_a_adv}}; + + + + // + // DOUT Mapping + // + generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1) + begin : gen_xy_dout + assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z]; + assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z]; + end + endgenerate + + + // + // DOUT + // + reg [WORD_WIDTH-1:0] x_dout_reg[0:NUM_MULTS-1]; + reg [WORD_WIDTH-1:0] y_dout_reg[0:NUM_MULTS-1]; + + + + + integer int_z; + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_LOAD_T1T2_3, + FSM_STATE_LOAD_NN_COEFF_3: + for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin + x_dout_reg[int_z] <= load_x_din; + y_dout_reg[int_z] <= load_y_din; + end + // + default: + for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin + x_dout_reg[int_z] <= {WORD_WIDTH{1'bX}}; + y_dout_reg[int_z] <= {WORD_WIDTH{1'bX}}; + end + // + endcase + + + + // + // FSM Process + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + // + // FSM Transition Logic + // + always @* begin + // + fsm_state_next = FSM_STATE_IDLE; + // + case (fsm_state) + FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle : FSM_STATE_IDLE; + + FSM_STATE_LOAD_T1T2_1: fsm_state_next = FSM_STATE_LOAD_T1T2_2 ; + FSM_STATE_LOAD_T1T2_2: fsm_state_next = FSM_STATE_LOAD_T1T2_3 ; + FSM_STATE_LOAD_T1T2_3: fsm_state_next = load_t1t2_addr_done ? FSM_STATE_LOAD_NN_COEFF_1 : FSM_STATE_LOAD_T1T2_1; + + FSM_STATE_LOAD_NN_COEFF_1: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_2 ; + FSM_STATE_LOAD_NN_COEFF_2: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_3 ; + FSM_STATE_LOAD_NN_COEFF_3: fsm_state_next = load_nn_coeff_addr_done ? FSM_STATE_STOP : FSM_STATE_LOAD_NN_COEFF_1; + + FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_TRIG : FSM_STATE_MULT_SQUARE_COL_0_BUSY; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + /* + FSM_STATE_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_0_BUSY ; + FSM_STATE_TRIANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_TRIANGLE_COL_N_TRIG : FSM_STATE_TRIANGLE_COL_0_BUSY; + FSM_STATE_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_N_BUSY ; + FSM_STATE_TRIANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_triangle : FSM_STATE_TRIANGLE_COL_N_BUSY; + + FSM_STATE_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_0_BUSY ; + FSM_STATE_RECTANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_RECTANGLE_COL_N_TRIG : FSM_STATE_RECTANGLE_COL_0_BUSY; + FSM_STATE_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_N_BUSY ; + FSM_STATE_RECTANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_rectangle : FSM_STATE_RECTANGLE_COL_N_BUSY; + */ + + FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Ready Output + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else case (fsm_state) + FSM_STATE_IDLE: if (ena) rdy_reg <= 1'b0; + FSM_STATE_STOP: rdy_reg <= 1'b1; + endcase + + function [ NUM_MULTS-1:0] calc_mac_clear_square; + input [INDEX_WIDTH-4:0] col_index_delayed; + input [ 3-1:0] x_din_addr_cnt_lower_delayed; + input [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_delayed; + begin + if (x_din_addr_cnt_upper_delayed == col_index_delayed) + case (x_din_addr_cnt_lower_delayed) + 3'b000: calc_mac_clear_square = 8'b00000001; + 3'b001: calc_mac_clear_square = 8'b00000010; + 3'b010: calc_mac_clear_square = 8'b00000100; + 3'b011: calc_mac_clear_square = 8'b00001000; + 3'b100: calc_mac_clear_square = 8'b00010000; + 3'b101: calc_mac_clear_square = 8'b00100000; + 3'b110: calc_mac_clear_square = 8'b01000000; + 3'b111: calc_mac_clear_square = 8'b10000000; + endcase + else + calc_mac_clear_square = {NUM_MULTS{1'b0}}; + end + endfunction + + +endmodule diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh new file mode 100644 index 0000000..f846119 --- /dev/null +++ b/rtl/modexpng_parameters.vh @@ -0,0 +1,39 @@ +//localparam WORD_WIDTH = 17; +//localparam MAC_WIDTH = 47; + +//localparam BANK_ADDR_WIDTH = 3; // TODO: Replace everywhere! + +localparam [2:0] BANK_FAT_T1T2 = 3'd0; +localparam [2:0] BANK_FAT_ABL = 3'd1; +localparam [2:0] BANK_FAT_ABH = 3'd2; +localparam [2:0] BANK_FAT_Q = 3'd3; +localparam [2:0] BANK_FAT_Q_EXT = 3'd4; +localparam [2:0] BANK_FAT_ML = 3'd5; +localparam [2:0] BANK_FAT_MH = 3'd6; +localparam [2:0] BANK_FAT_MH_EXT = 3'd7; + +localparam [1:0] BANK_SLIM_T1T2 = 2'd0; +localparam [1:0] BANK_SLIM_N = 2'd1; +localparam [1:0] BANK_SLIM_N_COEFF = 2'd2; +localparam [1:0] BANK_SLIM_N_COEFF_EXT = 2'd3; + + +//localparam BANK_Y_T2 = 3'd0; +//localparam BANK_XY_T1T2 = 3'd0; + +//localparam BANK_XY_AB_LSB = 3'd1; +//localparam BANK_XY_AB_MSB = 3'd2; + +//localparam BANK_X_N = 3'd3; +//localparam BANK_Y_N_COEFF = 3'd3; + +//localparam BANK_XY_M = 3'd4; + +//localparam BANK_XY_Q_LSB = 3'd5; +//localparam BANK_XY_Q_MSB = 3'd6; + +//localparam BANK_XY_AUX = 3'd7; + +//localparam BANK_XY_ANY = 3'bXXX; + +//localparam BANK_XY_AUX_ADDR_N_COEFF = 0; diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8.vh new file mode 100644 index 0000000..8734354 --- /dev/null +++ b/rtl/modexpng_parameters_x8.vh @@ -0,0 +1 @@ +localparam NUM_MULTS = 8; diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v new file mode 100644 index 0000000..db4774b --- /dev/null +++ b/rtl/modexpng_part_recombinator.v @@ -0,0 +1,623 @@ +module modexpng_part_recombinator +( + clk, + rdy, + fsm_state_next, + index_last, + dsp_x_ce_p, dsp_y_ce_p, + ena_x, ena_y, + dsp_x_p, dsp_y_p, + col_index, col_index_last, slim_bram_xy_addr, + fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid +); + + + // + // Headers + // + `include "../rtl/modexpng_mmm_fsm.vh" + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_parameters_x8.vh" + + + input clk; + output rdy; + input [FSM_STATE_WIDTH-1:0] fsm_state_next; + input [7:0] index_last; + input dsp_x_ce_p; + input dsp_y_ce_p; + input ena_x; + input ena_y; + input [8*47-1:0] dsp_x_p; + input [8*47-1:0] dsp_y_p; + input [ 4:0] col_index; + input [ 4:0] col_index_last; + input [ 7:0] slim_bram_xy_addr; + + output [ 2:0] fat_bram_xy_bank; + output [ 7:0] fat_bram_xy_addr; + output [ 17:0] fat_bram_x_dout; + output [ 17:0] fat_bram_y_dout; + output fat_bram_xy_dout_valid; + + + // + // Latches + // + reg [1*47-1:0] dsp_x_p_latch[0:7]; + reg [1*47-1:0] dsp_y_p_latch[0:7]; + + + // + // Mapping + // + wire [46:0] dsp_x_p_split[0:7]; + wire [46:0] dsp_y_p_split[0:7]; + + genvar z; + generate for (z=0; z<NUM_MULTS; z=z+1) + begin : gen_dsp_xy_p_split + assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; + assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; + end + endgenerate + + + // + // Delays + // + reg dsp_y_ce_p_dly1 = 1'b0; + reg dsp_x_ce_p_dly1 = 1'b0; + + always @(posedge clk) begin + // + {dsp_y_ce_p_dly1, dsp_x_ce_p_dly1} <= {dsp_y_ce_p, dsp_x_ce_p}; + // + end + + + // + // Registers + // + + // valid + reg x_valid_lsb = 1'b0; + reg y_valid_lsb = 1'b0; + reg x_valid_msb = 1'b0; + reg y_valid_msb = 1'b0; + + // bitmap + reg [7:0] x_bitmap_lsb = {8{1'b0}}; + reg [7:0] y_bitmap_lsb = {8{1'b0}}; + reg [7:0] x_bitmap_msb = {8{1'b0}}; + reg [7:0] y_bitmap_msb = {8{1'b0}}; + + // index + reg [2:0] x_index_lsb = 3'dX; + reg [2:0] y_index_lsb = 3'dX; + + // purge + reg x_purge_lsb = 1'b0; + reg y_purge_lsb = 1'b0; + reg x_purge_msb = 1'b0; + reg y_purge_msb = 1'b0; + + // valid - latch + reg x_valid_latch_lsb = 1'b0; + reg y_valid_latch_lsb = 1'b0; + + // bitmap - latch + reg [7:0] x_bitmap_latch_lsb = {8{1'b0}}; + reg [7:0] y_bitmap_latch_lsb = {8{1'b0}}; + reg [7:0] x_bitmap_latch_msb = {8{1'b0}}; + reg [7:0] y_bitmap_latch_msb = {8{1'b0}}; + + // index - latch + reg [2:0] x_index_latch_lsb = 3'dX; + reg [2:0] y_index_latch_lsb = 3'dX; + + // purge - index + reg x_purge_latch_lsb = 1'b0; + reg y_purge_latch_lsb = 1'b0; + reg x_purge_latch_msb = 1'b0; + reg y_purge_latch_msb = 1'b0; + + // + reg xy_valid_lsb_adv[1:6]; + reg xy_valid_msb_adv[1:6]; + reg [7:0] xy_bitmap_lsb_adv[1:6]; + reg [7:0] xy_bitmap_msb_adv[1:6]; + reg [2:0] xy_index_lsb_adv[1:6]; + reg [2:0] xy_index_msb_adv[1:6]; + reg xy_purge_lsb_adv[1:6]; + reg xy_purge_msb_adv[1:6]; + + + integer i; + initial for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv[i] = 1'b0; + xy_valid_msb_adv[i] = 1'b0; + xy_bitmap_lsb_adv[i] = {8{1'b0}}; + xy_bitmap_msb_adv[i] = {8{1'b0}}; + xy_index_lsb_adv[i] = 3'dX; + xy_index_msb_adv[i] = 3'dX; + xy_purge_lsb_adv[i] = 1'b0; + xy_purge_msb_adv[i] = 1'b0; + end + + function [0:0] calc_square_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_square_valid_lsb = 1'b1; + else + calc_square_valid_lsb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_square_bitmap_lsb = 8'b00000001; + 3'b001: calc_square_bitmap_lsb = 8'b00000010; + 3'b010: calc_square_bitmap_lsb = 8'b00000100; + 3'b011: calc_square_bitmap_lsb = 8'b00001000; + 3'b100: calc_square_bitmap_lsb = 8'b00010000; + 3'b101: calc_square_bitmap_lsb = 8'b00100000; + 3'b110: calc_square_bitmap_lsb = 8'b01000000; + 3'b111: calc_square_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_square_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + function [2:0] calc_square_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_square_index_lsb = 3'd0; + 3'b001: calc_square_index_lsb = 3'd1; + 3'b010: calc_square_index_lsb = 3'd2; + 3'b011: calc_square_index_lsb = 3'd3; + 3'b100: calc_square_index_lsb = 3'd4; + 3'b101: calc_square_index_lsb = 3'd5; + 3'b110: calc_square_index_lsb = 3'd6; + 3'b111: calc_square_index_lsb = 3'd7; + endcase + // + else + calc_square_index_lsb = 3'dX; + // + end + endfunction + + function calc_square_purge_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_square_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value; + else + calc_square_purge_lsb = 1'b0; + // + end + endfunction + + function calc_square_valid_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (slim_bram_xy_addr_value == index_last_value) + calc_square_valid_msb = 1'b1; + else + calc_square_valid_msb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (slim_bram_xy_addr_value == index_last_value) begin + calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; + calc_square_bitmap_msb[6:0] = 7'b1111111; + end else + calc_square_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function calc_square_purge_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (slim_bram_xy_addr_value == index_last_value) + calc_square_purge_msb = col_index_value == col_index_last_value; + else + calc_square_purge_msb = 1'b0; + // + end + endfunction + + + reg recomb_lsb_ce = 1'b0; + reg [ 2:0] recomb_lsb_ce_purge = 3'b000; + wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_purge[0]; + reg recomb_lsb_clr; + reg recomb_lsb_vld = 1'b0; + + reg [46:0] recomb_lsb_din; + wire [15:0] recomb_lsb_dout; + + reg recomb_msb_ce = 1'b0; + reg [ 1:0] recomb_msb_ce_purge = 2'b00; + wire recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0]; + reg recomb_msb_clr; + reg recomb_msb_vld = 1'b0; + + always @(posedge clk) + // + {recomb_msb_vld, recomb_lsb_vld} <= {recomb_msb_ce_combined, recomb_lsb_ce_combined}; + + reg [46:0] recomb_msb_din; + wire [15:0] recomb_msb_dout; + + modexpng_recombinator_block recomb_x_lsb + ( + .clk (clk), + .ce (recomb_lsb_ce_combined), + .clr (recomb_lsb_clr), + .din (recomb_lsb_din), + .dout (recomb_lsb_dout) + ); + + modexpng_recombinator_block recomb_x_msb + ( + .clk (clk), + .ce (recomb_msb_ce_combined), + .clr (recomb_msb_clr), + .din (recomb_msb_din), + .dout (recomb_msb_dout) + ); + + always @(posedge clk) begin + // + recomb_lsb_ce <= x_valid_latch_lsb; + recomb_msb_ce <= x_bitmap_latch_msb[0]; + // + if (x_purge_latch_lsb) + recomb_lsb_ce_purge <= 3'b111; + else + recomb_lsb_ce_purge <= {1'b0, recomb_lsb_ce_purge[2:1]}; + // + if (x_purge_latch_msb && x_bitmap_latch_msb[0] && !x_bitmap_latch_msb[1]) + recomb_msb_ce_purge = 2'b11; + else + recomb_msb_ce_purge <= {1'b0, recomb_msb_ce_purge[1]}; + // + end + + + always @(posedge clk) + // + if (ena_x & ena_y) begin + recomb_lsb_clr <= 1'b1; + recomb_msb_clr <= 1'b1; + end else begin + if (recomb_lsb_ce) recomb_lsb_clr <= 1'b0; + if (recomb_msb_ce) recomb_msb_clr <= 1'b0; + end + + always @(posedge clk) + // + if (x_valid_latch_lsb) + recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb]; + else + recomb_lsb_din <= {47{1'b0}}; + + always @(posedge clk) + // + if (x_bitmap_latch_msb[0]) + recomb_msb_din <= dsp_x_p_latch[0]; + else + recomb_msb_din <= {47{1'b0}}; + + + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); + xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); + xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr); + xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr); + // + xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, slim_bram_xy_addr, index_last); + xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, index_last); + xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, slim_bram_xy_addr, index_last); + // + end + // + default: begin + // + xy_valid_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= {8{1'b0}}; + xy_index_lsb_adv [6] <= 3'dX; + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + endcase + + + always @(posedge clk) begin + // + {y_valid_lsb, x_valid_lsb} <= {2{xy_valid_lsb_adv [1]}}; + {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}}; + {y_index_lsb, x_index_lsb} <= {2{xy_index_lsb_adv [1]}}; + {y_purge_lsb, x_purge_lsb} <= {2{xy_purge_lsb_adv [1]}}; + // + {y_valid_latch_lsb, x_valid_latch_lsb} <= {y_valid_lsb, x_valid_lsb}; + {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb}; + {y_index_latch_lsb, x_index_latch_lsb} <= {y_index_lsb, x_index_lsb}; + {y_purge_latch_lsb, x_purge_latch_lsb} <= {y_purge_lsb, x_purge_lsb}; + // + {y_valid_msb, x_valid_msb} <= {2{xy_valid_msb_adv[1]}}; + {y_bitmap_msb, x_bitmap_msb} <= {2{xy_bitmap_msb_adv[1]}}; + {y_purge_msb, x_purge_msb} <= {2{xy_purge_msb_adv[1]}}; + // + if (x_valid_msb) begin + x_bitmap_latch_msb <= x_bitmap_msb; + x_purge_latch_msb <= x_purge_msb; + end else begin + x_bitmap_latch_msb <= {1'b0, x_bitmap_latch_msb[7:1]}; + end + // + // + for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; + xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; + xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; + xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; + // + xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; + xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; + xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; + end + // + end + + always @(posedge clk) + // + if (x_bitmap_latch_msb[1]) // only shift 7 times + // + for (i=0; i<8; i=i+1) + if (i < 7) + dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; + else + dsp_x_p_latch[i] <= {47{1'bX}}; + // + else if (dsp_x_ce_p_dly1) + // + for (i=0; i<8; i=i+1) + // + if (x_bitmap_lsb[i]) + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + else if (x_valid_msb && x_bitmap_msb[i]) + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + + reg recomb_x_lsb_dout_valid = 1'b0; + reg recomb_x_msb_dout_valid = 1'b0; + + always @(posedge clk) begin + recomb_x_lsb_dout_valid <= recomb_lsb_ce_combined; + recomb_x_msb_dout_valid <= recomb_msb_ce_combined; + end + + + + reg [ 2:0] fat_bram_xy_bank_reg; + reg [ 7:0] fat_bram_xy_addr_reg; + reg [ 7:0] fat_bram_xy_cnt_lsb; + reg [ 7:0] fat_bram_xy_cnt_msb; + reg [17:0] fat_bram_x_dout_reg; + reg [17:0] fat_bram_y_dout_reg; + reg fat_bram_xy_dout_valid_reg = 1'b0; + + reg [15:0] recomb_msb_dout_carry_0; + reg [15:0] recomb_msb_dout_carry_1; + + reg [15:0] recomb_msb_dout_delay_0; + reg [15:0] recomb_msb_dout_delay_1; + reg [15:0] recomb_msb_dout_delay_2; + + reg [ 7:0] recomb_msb_cnt_delay_0 = 8'd0; + reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0; + reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0; + + assign fat_bram_xy_bank = fat_bram_xy_bank_reg; + assign fat_bram_xy_addr = fat_bram_xy_addr_reg; + assign fat_bram_x_dout = fat_bram_x_dout_reg; + assign fat_bram_y_dout = fat_bram_y_dout_reg; + assign fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg; + + reg rdy_reg = 1'b1; + reg rdy_adv = 1'b1; + + assign rdy = rdy_reg; + + + always @(posedge clk) + // + if (ena_x & ena_y) + rdy_reg <= 1'b0; + else + rdy_reg <= rdy_adv; + + always @(posedge clk) + // + if (ena_x & ena_y) begin + rdy_adv <= 1'b0; + fat_bram_xy_cnt_lsb <= 8'd0; + fat_bram_xy_cnt_msb <= 8'd0; + end else begin + // + case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}) + // + 2'b00: begin + // + if (recomb_msb_cnt_delay_2 > 8'd0) begin + // + rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0; + // + recomb_msb_dout_delay_0 <= {18{1'bX}}; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= 8'd0; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= recomb_msb_cnt_delay_2; + fat_bram_x_dout_reg <= recomb_msb_dout_delay_2; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + end else begin + // + fat_bram_xy_bank_reg <= 3'bXXX; + fat_bram_xy_addr_reg <= 8'hXX; + fat_bram_x_dout_reg <= {18{1'bX}}; + fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b0; + // + end + // + end + // + 2'b01: begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABL; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; +// fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; + // + end + // + 2'b10: begin + // + if (fat_bram_xy_cnt_msb < 8'd2) begin + // + recomb_msb_dout_carry_0 <= recomb_msb_dout; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + // + fat_bram_xy_bank_reg <= 3'bXXX; + fat_bram_xy_addr_reg <= 8'hXX; + fat_bram_x_dout_reg <= {18{1'bX}}; + // fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b0; + // + end else begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_msb; + fat_bram_x_dout_reg <= {2'b00, recomb_msb_dout}; + // fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b1; + // + end + // + fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; + // + end + // + 2'b11: begin + // + if (fat_bram_xy_cnt_lsb == index_last) begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABL; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= 8'd0; + // + end else begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; + // + recomb_msb_dout_carry_0 <= {16{1'bX}}; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + // + end + // + recomb_msb_dout_delay_0 <= recomb_msb_dout; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; + // + end + // + endcase + // + end + + + + +endmodule diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v new file mode 100644 index 0000000..efe0ac5 --- /dev/null +++ b/rtl/modexpng_recombinator_block.v @@ -0,0 +1,35 @@ +module modexpng_recombinator_block +( + clk, + ce, clr, + din, dout +); + + input clk; + input ce; + input clr; + input [46:0] din; + output [15:0] dout; + + reg [14:0] z; + reg [16:0] y; + reg [17:0] x; + //reg [15:0] w; + + //assign dout = w; + assign dout = x[15:0]; + + wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here + wire [15:0] din_y = din[31:16]; + wire [15:0] din_x = din[15: 0]; + + always @(posedge clk) + // + if (ce) begin + z <= din_z; + y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; + x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; + //w <= clr ? {16{1'bX}} : x[15:0]; + end + +endmodule |