From ecf0374b7bbf1c1ea56fea8f1acaeea85c3612d2 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:07:56 +0300 Subject: Further work on the Montgomery modular multiplier. Added the third "rectangular" stage of the multiplication process, i.e. computation of how many copies of the modulus N to add to the intermediate product AB to zeroize the lower half: M = Q * N. --- bench/tb_square.v | 362 ++++++++++++++++++++++++++------- rtl/modexpng_mmm_fsm.vh | 11 +- rtl/modexpng_parameters.vh | 26 +-- rtl/modexpng_part_recombinator.v | 425 ++++++++++++++++++++++++++++++++------- 4 files changed, 668 insertions(+), 156 deletions(-) diff --git a/bench/tb_square.v b/bench/tb_square.v index 23831db..d35a5cc 100644 --- a/bench/tb_square.v +++ b/bench/tb_square.v @@ -41,6 +41,8 @@ module tb_square; reg [17:0] AB[0:63]; reg [17:0] N_COEFF[0:32]; reg [17:0] Q[0:32]; + reg [17:0] N[0:31]; + reg [17:0] M[0:64]; // @@ -103,6 +105,33 @@ module tb_square; Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a; Q[32] = 18'h0eef3; // + N[ 0] = 18'h03ad9; N[ 1] = 18'h046b4; N[ 2] = 18'h0e181; N[ 3] = 18'h0fac7; + N[ 4] = 18'h0be72; N[ 5] = 18'h029ab; N[ 6] = 18'h07e51; N[ 7] = 18'h037a8; + N[ 8] = 18'h0880c; N[ 9] = 18'h05a7d; N[10] = 18'h043c2; N[11] = 18'h038c9; + N[12] = 18'h01275; N[13] = 18'h0aa0d; N[14] = 18'h0c0c1; N[15] = 18'h0d035; + N[16] = 18'h04082; N[17] = 18'h0543c; N[18] = 18'h0dcb0; N[19] = 18'h0497c; + N[20] = 18'h0b12c; N[21] = 18'h013d4; N[22] = 18'h0b80a; N[23] = 18'h051cf; + N[24] = 18'h0286c; N[25] = 18'h0b600; N[26] = 18'h0d838; N[27] = 18'h0af4b; + N[28] = 18'h08274; N[29] = 18'h06a07; N[30] = 18'h0beea; N[31] = 18'h0f000; + // + M[ 0] = 18'h041b2; M[ 1] = 18'h00128; M[ 2] = 18'h06b69; M[ 3] = 18'h08e7e; + M[ 4] = 18'h0118c; M[ 5] = 18'h0b96d; M[ 6] = 18'h0ebe5; M[ 7] = 18'h0f873; + M[ 8] = 18'h0cf14; M[ 9] = 18'h0de83; M[10] = 18'h09690; M[11] = 18'h05e9a; + M[12] = 18'h048ac; M[13] = 18'h0b506; M[14] = 18'h01283; M[15] = 18'h08631; + M[16] = 18'h0179c; M[17] = 18'h06820; M[18] = 18'h0867b; M[19] = 18'h0b750; + M[20] = 18'h0e680; M[21] = 18'h0df95; M[22] = 18'h0d818; M[23] = 18'h0b4c5; + M[24] = 18'h0cced; M[25] = 18'h0c4a9; M[26] = 18'h0bb78; M[27] = 18'h04295; + M[28] = 18'h0b1b4; M[29] = 18'h09635; M[30] = 18'h0066b; M[31] = 18'h022b1; + M[32] = 18'h04fdb; M[33] = 18'h0efc8; M[34] = 18'h00a14; M[35] = 18'h04bef; + M[36] = 18'h006a1; M[37] = 18'h0f1a6; M[38] = 18'h0fc40; M[39] = 18'h0adb5; + M[40] = 18'h06e8f; M[41] = 18'h02c60; M[42] = 18'h083e1; M[43] = 18'h0f862; + M[44] = 18'h0da61; M[45] = 18'h0dd3d; M[46] = 18'h03381; M[47] = 18'h09db0; + M[48] = 18'h05454; M[49] = 18'h07525; M[50] = 18'h0d9c7; M[51] = 18'h0a361; + M[52] = 18'h049e0; M[53] = 18'h0a671; M[54] = 18'h0242e; M[55] = 18'h07cb2; + M[56] = 18'h02021; M[57] = 18'h0bde1; M[58] = 18'h025aa; M[59] = 18'h0c615; + M[60] = 18'h05645; M[61] = 18'h03b46; M[62] = 18'h065d6; M[63] = 18'h0390d; + M[64] = 18'h0e005; + // end @@ -134,6 +163,12 @@ module tb_square; reg [ 7:0] tb_slim_bram_xy_addr; reg [17:0] tb_slim_bram_x_din; reg [17:0] tb_slim_bram_y_din; + + reg mgr_slim_bram_xy_ena = 1'b0; + reg [ 1:0] mgr_slim_bram_xy_bank; + reg [ 7:0] mgr_slim_bram_xy_addr; + reg [17:0] mgr_slim_bram_x_din; + reg [17:0] mgr_slim_bram_y_din; reg mac_slim_bram_xy_ena = 1'b0; reg mac_slim_bram_xy_reg_ena = 1'b0; @@ -195,10 +230,10 @@ module tb_square; ip_bram_18k slim_bram_x ( .clka (clk), - .ena (tb_slim_bram_xy_ena), - .wea (tb_slim_bram_xy_ena), - .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}), - .dina (tb_slim_bram_x_din), + .ena (mgr_slim_bram_xy_ena), + .wea (mgr_slim_bram_xy_ena), + .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}), + .dina (mgr_slim_bram_x_din), .clkb (clk), .enb (mac_slim_bram_xy_ena), @@ -210,10 +245,10 @@ module tb_square; ip_bram_18k slim_bram_y ( .clka (clk), - .ena (tb_slim_bram_xy_ena), - .wea (tb_slim_bram_xy_ena), - .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}), - .dina (tb_slim_bram_y_din), + .ena (mgr_slim_bram_xy_ena), + .wea (mgr_slim_bram_xy_ena), + .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}), + .dina (mgr_slim_bram_y_din), .clkb (clk), .enb (mac_slim_bram_xy_ena), @@ -266,14 +301,23 @@ module tb_square; wait_clock_tick; end for (i=32; i<33; i=i+1) begin - tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF_EXT; - tb_slim_bram_xy_addr = 0; + tb_slim_bram_xy_bank = BANK_SLIM_EXT; + tb_slim_bram_xy_addr = 0; // ! tb_slim_bram_x_din = N_COEFF[i]; tb_slim_bram_y_din = N_COEFF[i]; wait_clock_tick; end + for (i=0; i<32; i=i+1) begin + tb_fat_bram_xy_bank = BANK_FAT_N; + tb_fat_bram_xy_addr = i[7:0]; + tb_fat_bram_x_din = N[i]; + tb_fat_bram_y_din = N[i]; + + wait_clock_tick; + end + tb_fat_bram_xy_ena = 1'b0; tb_slim_bram_xy_ena = 1'b0; @@ -299,6 +343,7 @@ module tb_square; verify_ab; verify_q; + verify_m; end @@ -418,25 +463,23 @@ module tb_square; wire mult_square_addr_almost_done_comb; reg mult_square_addr_almost_done_flop; - - //wire mult_square_addr_surely_done_comb; reg mult_square_addr_surely_done_flop; - reg mult_triangle_addr_almost_done_comb; - reg mult_triangle_addr_almost_done_flop; - - //wire mult_triangle_addr_surely_done_comb; + wire mult_triangle_addr_almost_done_comb; + reg mult_triangle_addr_almost_done_flop; reg mult_triangle_addr_surely_done_flop; reg mult_triangle_addr_tardy_done_flop; + + wire mult_rectangle_addr_almost_done_comb; + reg mult_rectangle_addr_almost_done_flop; + reg mult_rectangle_addr_surely_done_flop; + reg mult_rectangle_addr_tardy_done_flop; + assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1; + assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index); + assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1; - always @* - // - //if (!col_is_last) - mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index); - //else - //mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index); @@ -482,6 +525,29 @@ module tb_square; // end + + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: + mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb; + //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= + //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb}; + + default: + mult_rectangle_addr_almost_done_flop <= 1'b0; + //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00; + + endcase + + always @(posedge clk) begin + // + mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop; + mult_rectangle_addr_tardy_done_flop <= mult_rectangle_addr_surely_done_flop; + // + end // @@ -489,6 +555,7 @@ module tb_square; // wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; always @(posedge clk) @@ -510,6 +577,14 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ? 8'd0 : mac_slim_bram_xy_addr + 1'b1; // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ? + 8'd1 : mac_slim_bram_xy_addr + 1'b1; + // default: mac_slim_bram_xy_addr <= 8'dX; endcase @@ -543,7 +618,14 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last); + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last); // default: mac_fat_bram_xy_addr[j] <= 8'dX; endcase @@ -564,7 +646,14 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last); + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last); // default: mac_fat_bram_xy_addr[4] <= 8'dX; endcase @@ -574,19 +663,30 @@ module tb_square; always @(posedge clk) // case (fsm_state_next) + // FSM_STATE_MULT_SQUARE_COL_0_INIT, FSM_STATE_MULT_SQUARE_COL_N_INIT, FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2; + // FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ? - BANK_SLIM_N_COEFF_EXT : BANK_SLIM_N_COEFF; + BANK_SLIM_EXT : BANK_SLIM_N_COEFF; + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ? + BANK_SLIM_EXT : BANK_SLIM_Q; + // default: mac_slim_bram_xy_bank <= 2'bXX; endcase @@ -605,6 +705,12 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL}; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}}; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_N}}; default: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}}; endcase @@ -625,6 +731,12 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_rectangle_addr_surely_done_flop; default: mac_slim_bram_xy_ena <= 1'b0; endcase @@ -642,7 +754,13 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1; default: mac_fat_bram_xy_ena <= 1'b0; endcase @@ -654,12 +772,30 @@ module tb_square; always @(posedge clk) // mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena; - + + reg ladder_mode = 1'b0; // 0 = X:T1*T2, Y:T2*T2 + // 1 = X:T1*T2, Y:T2*T1 + + reg dsp_swap_xy; + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_swap_xy <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_swap_xy <= 1'b0; + endcase + always @(posedge clk) // - if (mac_slim_bram_xy_reg_ena_dly) - {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]}; + if (mac_slim_bram_xy_reg_ena_dly) begin // rewrite + if (!dsp_swap_xy) + {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]}; + else begin + if (!ladder_mode) {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]}; + else {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]}; + end + end else {dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}}; @@ -711,7 +847,8 @@ module tb_square; case (fsm_state_next) // FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: begin + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin col_index <= 5'd0; col_index_last <= index_last[7:3]; col_index_next1 <= 5'd1; @@ -721,7 +858,8 @@ module tb_square; end // FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: begin + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin col_index <= col_index_next1; col_is_last <= col_index_next1 == col_index_last; col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1; @@ -730,8 +868,9 @@ module tb_square; // endcase - assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; - assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; always @(posedge clk) // @@ -741,9 +880,13 @@ module tb_square; FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly); FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly); default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; endcase @@ -774,26 +917,26 @@ module tb_square; calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; end endfunction - /* - function [NUM_MULTS:0] calc_mac_mode_z_triangle; + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; input [ 4:0] col_index_value; input [ 7:0] mac_slim_bram_xy_addr_value; begin if (mac_slim_bram_xy_addr_value[7:3] == col_index_value) case (mac_slim_bram_xy_addr_value[2:0]) - 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; - 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; - 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; - 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; - 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; - 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; - 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; - 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; endcase else - calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; end - endfunction*/ + endfunction reg recomb_x_ena = 1'b0; reg recomb_y_ena = 1'b0; @@ -810,44 +953,66 @@ module tb_square; wire [17:0] recomb_fat_bram_x_dout; wire [17:0] recomb_fat_bram_y_dout; wire recomb_fat_bram_xy_dout_valid; + wire [ 2:0] recomb_slim_bram_xy_bank; + wire [ 7:0] recomb_slim_bram_xy_addr; + wire [17:0] recomb_slim_bram_x_dout; + wire [17:0] recomb_slim_bram_y_dout; + wire recomb_slim_bram_xy_dout_valid; wire recomb_rdy; modexpng_part_recombinator recomb ( - .clk (clk), - .rdy (recomb_rdy), - .fsm_state_next (fsm_state_next), - .index_last (index_last), - .dsp_x_ce_p (dsp_x_ce_p), - .dsp_y_ce_p (dsp_y_ce_p), - .ena_x (recomb_x_ena), - .ena_y (recomb_y_ena), - .dsp_x_p (dsp_x_p), - .dsp_y_p (dsp_y_p), - .col_index (col_index), - .col_index_last (col_index_last), - .slim_bram_xy_addr (mac_slim_bram_xy_addr), - .slim_bram_xy_bank (mac_slim_bram_xy_bank), - .fat_bram_xy_bank (recomb_fat_bram_xy_bank), - .fat_bram_xy_addr (recomb_fat_bram_xy_addr), - .fat_bram_x_dout (recomb_fat_bram_x_dout), - .fat_bram_y_dout (recomb_fat_bram_y_dout), - .fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid) + .clk (clk), + .rdy (recomb_rdy), + .fsm_state_next (fsm_state_next), + .index_last (index_last), + .dsp_x_ce_p (dsp_x_ce_p), + .dsp_y_ce_p (dsp_y_ce_p), + .ena_x (recomb_x_ena), + .ena_y (recomb_y_ena), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + .col_index (col_index), + .col_index_last (col_index_last), + .slim_bram_xy_addr (mac_slim_bram_xy_addr), + .slim_bram_xy_bank (mac_slim_bram_xy_bank), + .rcmb_fat_bram_xy_bank (recomb_fat_bram_xy_bank), + .rcmb_fat_bram_xy_addr (recomb_fat_bram_xy_addr), + .rcmb_fat_bram_x_dout (recomb_fat_bram_x_dout), + .rcmb_fat_bram_y_dout (recomb_fat_bram_y_dout), + .rcmb_fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid), + .rcmb_slim_bram_xy_bank (recomb_slim_bram_xy_bank), + .rcmb_slim_bram_xy_addr (recomb_slim_bram_xy_addr), + .rcmb_slim_bram_x_dout (recomb_slim_bram_x_dout), + .rcmb_slim_bram_y_dout (recomb_slim_bram_y_dout), + .rcmb_slim_bram_xy_dout_valid (recomb_slim_bram_xy_dout_valid) ); reg [17:0] AB_READ[0:63]; reg [17:0] Q_READ[0:32]; + reg [17:0] M_READ[0:64]; - always @(posedge clk) + always @(posedge clk) begin // if (recomb_fat_bram_xy_dout_valid) // case (recomb_fat_bram_xy_bank) - 3'd1: AB_READ[ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; - 3'd2: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; - 3'd3: Q_READ [ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; - 3'd4: Q_READ [32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; + BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout; + BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; + BANK_FAT_ML: M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout; + BANK_FAT_MH: M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; + BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout; endcase + // + if (recomb_slim_bram_xy_dout_valid) + // + case (recomb_slim_bram_xy_bank) + BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout; + BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1) + Q_READ[32] <= recomb_slim_bram_x_dout; + endcase + // + end always @(posedge clk) @@ -873,6 +1038,29 @@ module tb_square; end + always @(posedge clk) + // + if (tb_slim_bram_xy_ena) begin + mgr_slim_bram_xy_ena <= 1'b1; + mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank; + mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr; + mgr_slim_bram_x_din <= tb_slim_bram_x_din; + mgr_slim_bram_y_din <= tb_slim_bram_y_din; + end else if (recomb_slim_bram_xy_dout_valid) begin + mgr_slim_bram_xy_ena <= 1'b1; + mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank; + mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr; + mgr_slim_bram_x_din <= recomb_slim_bram_x_dout; + mgr_slim_bram_y_din <= recomb_slim_bram_y_dout; + end else begin + mgr_slim_bram_xy_ena <= 1'b0; + mgr_slim_bram_xy_bank <= 3'bXXX; + mgr_slim_bram_xy_addr <= 8'hXX; + mgr_slim_bram_x_din <= {18{1'bX}}; + mgr_slim_bram_y_din <= {18{1'bX}}; + end + + task verify_ab; reg verify_ab_ok; begin @@ -911,9 +1099,29 @@ module tb_square; endtask + task verify_m; + reg verify_m_ok; + begin + verify_m_ok = 1; + for (i=0; i<65; i=i+1) + if (M_READ[i] === M[i]) + $display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]); + else begin + $display("M / M_READ [%02d] = 0x%05x / 0x%05x ", i, M[i], M_READ[i]); + verify_m_ok = 0; + end + if (verify_m_ok) + $display("M is OK."); + else + $display("M is WRONG!"); + end + endtask + + wire mult_square_addr_done = mult_square_addr_surely_done_flop; - wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop; + wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop; + always @* begin // @@ -940,7 +1148,17 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; - FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = FSM_STATE_MULT_TRIANGLE_HOLDOFF;//recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF; + FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; default: fsm_state_next = FSM_STATE_IDLE ; diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh index 2700a42..3bdae66 100644 --- a/rtl/modexpng_mmm_fsm.vh +++ b/rtl/modexpng_mmm_fsm.vh @@ -30,5 +30,14 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; + localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; - \ No newline at end of file diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index f846119..57eef35 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -3,19 +3,19 @@ //localparam BANK_ADDR_WIDTH = 3; // TODO: Replace everywhere! -localparam [2:0] BANK_FAT_T1T2 = 3'd0; -localparam [2:0] BANK_FAT_ABL = 3'd1; -localparam [2:0] BANK_FAT_ABH = 3'd2; -localparam [2:0] BANK_FAT_Q = 3'd3; -localparam [2:0] BANK_FAT_Q_EXT = 3'd4; -localparam [2:0] BANK_FAT_ML = 3'd5; -localparam [2:0] BANK_FAT_MH = 3'd6; -localparam [2:0] BANK_FAT_MH_EXT = 3'd7; - -localparam [1:0] BANK_SLIM_T1T2 = 2'd0; -localparam [1:0] BANK_SLIM_N = 2'd1; -localparam [1:0] BANK_SLIM_N_COEFF = 2'd2; -localparam [1:0] BANK_SLIM_N_COEFF_EXT = 2'd3; +localparam [2:0] BANK_FAT_T1T2 = 3'd0; +localparam [2:0] BANK_FAT_ABL = 3'd1; +localparam [2:0] BANK_FAT_ABH = 3'd2; +localparam [2:0] BANK_FAT_N = 3'd3; +localparam [2:0] BANK_FAT_ML = 3'd4; +localparam [2:0] BANK_FAT_MH = 3'd5; +localparam [2:0] BANK_FAT_EXT = 3'd6; // 0 -> MH' +localparam [2:0] BANK_FAT_UNUSED = 3'd7; + +localparam [1:0] BANK_SLIM_T1T2 = 2'd0; +localparam [1:0] BANK_SLIM_N_COEFF = 2'd1; +localparam [1:0] BANK_SLIM_Q = 2'd2; +localparam [1:0] BANK_SLIM_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q' //localparam BANK_Y_T2 = 3'd0; diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v index c51e7ef..567ecd5 100644 --- a/rtl/modexpng_part_recombinator.v +++ b/rtl/modexpng_part_recombinator.v @@ -9,7 +9,8 @@ module modexpng_part_recombinator dsp_x_p, dsp_y_p, col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, - fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid + rcmb_fat_bram_xy_bank, rcmb_fat_bram_xy_addr, rcmb_fat_bram_x_dout, rcmb_fat_bram_y_dout, rcmb_fat_bram_xy_dout_valid, + rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid ); @@ -36,11 +37,17 @@ module modexpng_part_recombinator input [ 7:0] slim_bram_xy_addr; input [ 1:0] slim_bram_xy_bank; - output [ 2:0] fat_bram_xy_bank; - output [ 7:0] fat_bram_xy_addr; - output [ 17:0] fat_bram_x_dout; - output [ 17:0] fat_bram_y_dout; - output fat_bram_xy_dout_valid; + output [ 2:0] rcmb_fat_bram_xy_bank; + output [ 7:0] rcmb_fat_bram_xy_addr; + output [ 17:0] rcmb_fat_bram_x_dout; + output [ 17:0] rcmb_fat_bram_y_dout; + output rcmb_fat_bram_xy_dout_valid; + + output [ 2:0] rcmb_slim_bram_xy_bank; + output [ 7:0] rcmb_slim_bram_xy_addr; + output [ 17:0] rcmb_slim_bram_x_dout; + output [ 17:0] rcmb_slim_bram_y_dout; + output rcmb_slim_bram_xy_dout_valid; // @@ -148,10 +155,10 @@ module modexpng_part_recombinator if (ena_x && ena_y) // case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; - //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; - default: rcmb_mode <= 2'd0; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; + default: rcmb_mode <= 2'd0; endcase @@ -203,7 +210,7 @@ module modexpng_part_recombinator input [1:0] slim_bram_xy_bank_value; begin // - if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT) + if (slim_bram_xy_bank_value == BANK_SLIM_EXT) calc_triangle_aux_lsb = 1'b1; else calc_triangle_aux_lsb = 1'b0; @@ -216,6 +223,21 @@ module modexpng_part_recombinator end endfunction + function calc_rectangle_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT; + else + calc_rectangle_valid_lsb = 1'b0; + // + end + endfunction + function [7:0] calc_square_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; @@ -265,6 +287,32 @@ module modexpng_part_recombinator // end endfunction + + function [7:0] calc_rectangle_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + begin + // + if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT)) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_rectangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction function [2:0] calc_square_index_lsb; input [4:0] col_index_value; @@ -315,6 +363,32 @@ module modexpng_part_recombinator // end endfunction + + function [2:0] calc_rectangle_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + begin + // + if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT)) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_rectangle_index_lsb = 3'd0; + 3'b001: calc_rectangle_index_lsb = 3'd1; + 3'b010: calc_rectangle_index_lsb = 3'd2; + 3'b011: calc_rectangle_index_lsb = 3'd3; + 3'b100: calc_rectangle_index_lsb = 3'd4; + 3'b101: calc_rectangle_index_lsb = 3'd5; + 3'b110: calc_rectangle_index_lsb = 3'd6; + 3'b111: calc_rectangle_index_lsb = 3'd7; + endcase + // + else + calc_rectangle_index_lsb = 3'dX; + // + end + endfunction function calc_square_purge_lsb; input [4:0] col_index_value; @@ -330,6 +404,20 @@ module modexpng_part_recombinator end endfunction + function calc_rectangle_purge_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value; + else + calc_rectangle_purge_lsb = 1'b0; + // + end + endfunction + function calc_square_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; @@ -344,6 +432,22 @@ module modexpng_part_recombinator // end endfunction + + function calc_rectangle_valid_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + input [7:0] index_last_value; + begin + // + if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) + calc_rectangle_valid_msb = 1'b1; + else + calc_rectangle_valid_msb = 1'b0; + // + end + endfunction function [7:0] calc_square_bitmap_msb; input [4:0] col_index_value; @@ -361,6 +465,22 @@ module modexpng_part_recombinator end endfunction + function [7:0] calc_rectangle_bitmap_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + input [7:0] index_last_value; + begin + // + if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin + calc_rectangle_bitmap_msb[7:0] = 8'b11111111; + end else + calc_rectangle_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + function calc_square_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; @@ -376,6 +496,22 @@ module modexpng_part_recombinator end endfunction + function calc_rectangle_purge_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + input [7:0] index_last_value; + begin + // + if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) + calc_rectangle_purge_msb = col_index_value == col_index_last_value; + else + calc_rectangle_purge_msb = 1'b0; + // + end + endfunction + reg recomb_lsb_ce = 1'b0; reg recomb_lsb_ce_aux; @@ -494,7 +630,24 @@ module modexpng_part_recombinator xy_bitmap_msb_adv[6] <= {8{1'b0}}; xy_purge_msb_adv [6] <= 1'b0; // - end + end + // + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); + xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); + xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr); + // + xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last); + xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last); + xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last); + // + end // default: begin // @@ -586,15 +739,24 @@ module modexpng_part_recombinator end - reg [ 2:0] fat_bram_xy_bank_reg; reg [ 7:0] fat_bram_xy_addr_reg; - reg [ 7:0] fat_bram_xy_cnt_lsb; - reg [ 7:0] fat_bram_xy_cnt_msb; reg [17:0] fat_bram_x_dout_reg; reg [17:0] fat_bram_y_dout_reg; reg fat_bram_xy_dout_valid_reg = 1'b0; + reg [ 2:0] slim_bram_xy_bank_reg; + reg [ 7:0] slim_bram_xy_addr_reg; + reg [17:0] slim_bram_x_dout_reg; + reg [17:0] slim_bram_y_dout_reg; + reg slim_bram_xy_dout_valid_reg = 1'b0; + + reg [ 7:0] bram_xy_cnt_lsb; + reg [ 7:0] bram_xy_cnt_msb; + + reg bram_xy_cnt_lsb_wrapped; + reg bram_xy_cnt_msb_wrapped; + reg [15:0] recomb_msb_dout_carry_0; reg [15:0] recomb_msb_dout_carry_1; @@ -606,11 +768,21 @@ module modexpng_part_recombinator reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0; reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0; - assign fat_bram_xy_bank = fat_bram_xy_bank_reg; - assign fat_bram_xy_addr = fat_bram_xy_addr_reg; - assign fat_bram_x_dout = fat_bram_x_dout_reg; - assign fat_bram_y_dout = fat_bram_y_dout_reg; - assign fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg; + reg recomb_msb_flag_delay_0; + reg recomb_msb_flag_delay_1; + reg recomb_msb_flag_delay_2; + + assign rcmb_fat_bram_xy_bank = fat_bram_xy_bank_reg; + assign rcmb_fat_bram_xy_addr = fat_bram_xy_addr_reg; + assign rcmb_fat_bram_x_dout = fat_bram_x_dout_reg; + assign rcmb_fat_bram_y_dout = fat_bram_y_dout_reg; + assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg; + + assign rcmb_slim_bram_xy_bank = slim_bram_xy_bank_reg; + assign rcmb_slim_bram_xy_addr = slim_bram_xy_addr_reg; + assign rcmb_slim_bram_x_dout = slim_bram_x_dout_reg; + assign rcmb_slim_bram_y_dout = slim_bram_y_dout_reg; + assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg; reg rdy_reg = 1'b1; reg rdy_adv = 1'b1; @@ -629,7 +801,9 @@ module modexpng_part_recombinator task advance_recomb_msb_dout_delay; input [15:0] dout; input [ 7:0] cnt; + input flag; begin + // recomb_msb_dout_delay_0 <= dout; recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; @@ -637,6 +811,11 @@ module modexpng_part_recombinator recomb_msb_cnt_delay_0 <= cnt; recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + recomb_msb_flag_delay_0 <= flag; + recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0; + recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1; + // end endtask @@ -659,10 +838,24 @@ module modexpng_part_recombinator fat_bram_xy_addr_reg <= addr; fat_bram_x_dout_reg <= dout_x; fat_bram_y_dout_reg <= dout_y; - fat_bram_xy_dout_valid_reg <= 1'b1; + fat_bram_xy_dout_valid_reg <= valid; end endtask + task _update_slim_bram_regs; + input [ 2:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + slim_bram_xy_bank_reg <= bank; + slim_bram_xy_addr_reg <= addr; + slim_bram_x_dout_reg <= dout_x; + slim_bram_y_dout_reg <= dout_y; + slim_bram_xy_dout_valid_reg <= valid; + end + endtask task set_fat_bram_regs; input [ 2:0] bank; @@ -674,44 +867,73 @@ module modexpng_part_recombinator end endtask + task set_slim_bram_regs; + input [ 2:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + task clear_fat_bram_regs; begin _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); end endtask + + task clear_slim_bram_regs; + begin + _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask - task _set_fat_bram_cnt_lsb; + task _set_bram_cnt_lsb; input [7:0] cnt; + input wrapped; begin - fat_bram_xy_cnt_lsb <= cnt; + bram_xy_cnt_lsb <= cnt; + bram_xy_cnt_lsb_wrapped <= wrapped; end endtask - task _set_fat_bram_cnt_msb; + + task _set_bram_cnt_msb; input [7:0] cnt; + input wrapped; begin - fat_bram_xy_cnt_msb <= cnt; + bram_xy_cnt_msb <= cnt; + bram_xy_cnt_msb_wrapped <= wrapped; end endtask - task inc_fat_bram_cnt_lsb; + task inc_bram_cnt_lsb; begin - _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1); + if (bram_xy_cnt_lsb == index_last) + _set_bram_cnt_lsb(8'd0, 1'b1); + else + _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped); end endtask - task inc_fat_bram_cnt_msb; + + task inc_bram_cnt_msb; begin - _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1); + if (bram_xy_cnt_msb == index_last) + _set_bram_cnt_msb(8'd0, 1'b1); + else + _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped); end endtask - task clr_fat_bram_cnt_lsb; + task clr_bram_cnt_lsb; begin - _set_fat_bram_cnt_lsb(8'd0); + _set_bram_cnt_lsb(8'd0, 1'b0); end endtask - task clr_fat_bram_cnt_msb; + + task clr_bram_cnt_msb; begin - _set_fat_bram_cnt_msb(8'd0); + _set_bram_cnt_msb(8'd0, 1'b0); end endtask @@ -724,51 +946,53 @@ module modexpng_part_recombinator always @(posedge clk) // if (ena_x & ena_y) begin - clr_fat_bram_cnt_lsb(); - clr_fat_bram_cnt_msb(); + clr_bram_cnt_lsb(); + clr_bram_cnt_msb(); end else begin // if not ready??? // case (rcmb_mode) 2'd1: recombine_square(); 2'd2: recombine_triangle(); + 2'd3: recombine_rectangle(); endcase // end task recombine_square; + // begin // case (rcmb_xy_dout_valid) // - 2'b01: inc_fat_bram_cnt_lsb(); - 2'b10: inc_fat_bram_cnt_msb(); + 2'b01: inc_bram_cnt_lsb(); + 2'b10: inc_bram_cnt_msb(); 2'b11: begin - if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb(); - else inc_fat_bram_cnt_lsb(); - inc_fat_bram_cnt_msb(); + inc_bram_cnt_lsb(); + inc_bram_cnt_msb(); end // endcase // case (rcmb_xy_dout_valid) // - 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); - else clear_fat_bram_regs(); - 2'b01: set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); - else set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); - 2'b11: if (fat_bram_xy_cnt_lsb < index_last) set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); - else set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); + else clear_fat_bram_regs(); + 2'b01: set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b10: if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); + else set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); + 2'b11: if (bram_xy_cnt_lsb_wrapped) set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); + else set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + default: clear_fat_bram_regs(); // DEBUG!!! // endcase // case (rcmb_xy_dout_valid) // - 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0); - 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout); - // - 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb); - if (fat_bram_xy_cnt_lsb < index_last) shift_recomb_msb_dout_carry({16{1'bX}}); + 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0); + 2'b10: if (bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout); +// // + 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1); + if (bram_xy_cnt_lsb_wrapped) shift_recomb_msb_dout_carry({16{1'bX}}); end // endcase @@ -779,50 +1003,111 @@ module modexpng_part_recombinator task recombine_triangle; + // begin // case (rcmb_xy_dout_valid) // - 2'b01: begin inc_fat_bram_cnt_lsb(); - if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb(); - end - // + 2'b01: inc_bram_cnt_lsb(); + // endcase // case (rcmb_xy_dout_valid) // - 2'b00: clear_fat_bram_regs(); - 2'b01: if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); - else set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b00: clear_slim_bram_regs(); + 2'b01: if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + else set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b10: clear_slim_bram_regs(); + 2'b11: clear_slim_bram_regs(); // endcase // end + // endtask + + task recombine_rectangle; + // + begin + // + case (rcmb_xy_dout_valid) + // + 2'b01: inc_bram_cnt_lsb(); + 2'b10: inc_bram_cnt_msb(); + 2'b11: begin + inc_bram_cnt_lsb(); + inc_bram_cnt_msb(); + end + // + endcase +// // + case (rcmb_xy_dout_valid) +// // + 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); + else clear_fat_bram_regs(); + 2'b01: set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b10: if (!bram_xy_cnt_msb_wrapped) begin + if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); + else set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); + end else + set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}}); + + 2'b11: set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); +// // + endcase +// // + case (rcmb_xy_dout_valid) +// // + 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0); + 2'b10: begin + if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped) shift_recomb_msb_dout_carry(recomb_msb_dout); + if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0); + end +// // + 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1); + shift_recomb_msb_dout_carry({16{1'bX}}); + end +// // + endcase + // + end + // + endtask always @(posedge clk) // if (ena_x & ena_y) begin rdy_adv <= 1'b0; - end else begin + end else if (!rdy_reg) begin // - case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}) + case (rcmb_mode) + // + 2'd1: case (rcmb_xy_dout_valid) + // + 2'b00: begin + // + if (recomb_msb_flag_delay_2) begin + // + rdy_adv <= ~recomb_msb_flag_delay_1; + // + end + // + end + endcase + // + 2'd2: case (rcmb_xy_dout_valid) + // + 2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped; // + // + endcase // - 2'b00: begin - // - if (recomb_msb_cnt_delay_2 > 8'd0) begin - // - rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0; - // - end - // - end endcase // end + // add ready for mode=3 endmodule -- cgit v1.2.3