diff options
Diffstat (limited to 'rtl/modexpng_part_recombinator.v')
-rw-r--r-- | rtl/modexpng_part_recombinator.v | 623 |
1 files changed, 623 insertions, 0 deletions
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v new file mode 100644 index 0000000..db4774b --- /dev/null +++ b/rtl/modexpng_part_recombinator.v @@ -0,0 +1,623 @@ +module modexpng_part_recombinator +( + clk, + rdy, + fsm_state_next, + index_last, + dsp_x_ce_p, dsp_y_ce_p, + ena_x, ena_y, + dsp_x_p, dsp_y_p, + col_index, col_index_last, slim_bram_xy_addr, + fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid +); + + + // + // Headers + // + `include "../rtl/modexpng_mmm_fsm.vh" + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_parameters_x8.vh" + + + input clk; + output rdy; + input [FSM_STATE_WIDTH-1:0] fsm_state_next; + input [7:0] index_last; + input dsp_x_ce_p; + input dsp_y_ce_p; + input ena_x; + input ena_y; + input [8*47-1:0] dsp_x_p; + input [8*47-1:0] dsp_y_p; + input [ 4:0] col_index; + input [ 4:0] col_index_last; + input [ 7:0] slim_bram_xy_addr; + + output [ 2:0] fat_bram_xy_bank; + output [ 7:0] fat_bram_xy_addr; + output [ 17:0] fat_bram_x_dout; + output [ 17:0] fat_bram_y_dout; + output fat_bram_xy_dout_valid; + + + // + // Latches + // + reg [1*47-1:0] dsp_x_p_latch[0:7]; + reg [1*47-1:0] dsp_y_p_latch[0:7]; + + + // + // Mapping + // + wire [46:0] dsp_x_p_split[0:7]; + wire [46:0] dsp_y_p_split[0:7]; + + genvar z; + generate for (z=0; z<NUM_MULTS; z=z+1) + begin : gen_dsp_xy_p_split + assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; + assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; + end + endgenerate + + + // + // Delays + // + reg dsp_y_ce_p_dly1 = 1'b0; + reg dsp_x_ce_p_dly1 = 1'b0; + + always @(posedge clk) begin + // + {dsp_y_ce_p_dly1, dsp_x_ce_p_dly1} <= {dsp_y_ce_p, dsp_x_ce_p}; + // + end + + + // + // Registers + // + + // valid + reg x_valid_lsb = 1'b0; + reg y_valid_lsb = 1'b0; + reg x_valid_msb = 1'b0; + reg y_valid_msb = 1'b0; + + // bitmap + reg [7:0] x_bitmap_lsb = {8{1'b0}}; + reg [7:0] y_bitmap_lsb = {8{1'b0}}; + reg [7:0] x_bitmap_msb = {8{1'b0}}; + reg [7:0] y_bitmap_msb = {8{1'b0}}; + + // index + reg [2:0] x_index_lsb = 3'dX; + reg [2:0] y_index_lsb = 3'dX; + + // purge + reg x_purge_lsb = 1'b0; + reg y_purge_lsb = 1'b0; + reg x_purge_msb = 1'b0; + reg y_purge_msb = 1'b0; + + // valid - latch + reg x_valid_latch_lsb = 1'b0; + reg y_valid_latch_lsb = 1'b0; + + // bitmap - latch + reg [7:0] x_bitmap_latch_lsb = {8{1'b0}}; + reg [7:0] y_bitmap_latch_lsb = {8{1'b0}}; + reg [7:0] x_bitmap_latch_msb = {8{1'b0}}; + reg [7:0] y_bitmap_latch_msb = {8{1'b0}}; + + // index - latch + reg [2:0] x_index_latch_lsb = 3'dX; + reg [2:0] y_index_latch_lsb = 3'dX; + + // purge - index + reg x_purge_latch_lsb = 1'b0; + reg y_purge_latch_lsb = 1'b0; + reg x_purge_latch_msb = 1'b0; + reg y_purge_latch_msb = 1'b0; + + // + reg xy_valid_lsb_adv[1:6]; + reg xy_valid_msb_adv[1:6]; + reg [7:0] xy_bitmap_lsb_adv[1:6]; + reg [7:0] xy_bitmap_msb_adv[1:6]; + reg [2:0] xy_index_lsb_adv[1:6]; + reg [2:0] xy_index_msb_adv[1:6]; + reg xy_purge_lsb_adv[1:6]; + reg xy_purge_msb_adv[1:6]; + + + integer i; + initial for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv[i] = 1'b0; + xy_valid_msb_adv[i] = 1'b0; + xy_bitmap_lsb_adv[i] = {8{1'b0}}; + xy_bitmap_msb_adv[i] = {8{1'b0}}; + xy_index_lsb_adv[i] = 3'dX; + xy_index_msb_adv[i] = 3'dX; + xy_purge_lsb_adv[i] = 1'b0; + xy_purge_msb_adv[i] = 1'b0; + end + + function [0:0] calc_square_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_square_valid_lsb = 1'b1; + else + calc_square_valid_lsb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_square_bitmap_lsb = 8'b00000001; + 3'b001: calc_square_bitmap_lsb = 8'b00000010; + 3'b010: calc_square_bitmap_lsb = 8'b00000100; + 3'b011: calc_square_bitmap_lsb = 8'b00001000; + 3'b100: calc_square_bitmap_lsb = 8'b00010000; + 3'b101: calc_square_bitmap_lsb = 8'b00100000; + 3'b110: calc_square_bitmap_lsb = 8'b01000000; + 3'b111: calc_square_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_square_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + function [2:0] calc_square_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_square_index_lsb = 3'd0; + 3'b001: calc_square_index_lsb = 3'd1; + 3'b010: calc_square_index_lsb = 3'd2; + 3'b011: calc_square_index_lsb = 3'd3; + 3'b100: calc_square_index_lsb = 3'd4; + 3'b101: calc_square_index_lsb = 3'd5; + 3'b110: calc_square_index_lsb = 3'd6; + 3'b111: calc_square_index_lsb = 3'd7; + endcase + // + else + calc_square_index_lsb = 3'dX; + // + end + endfunction + + function calc_square_purge_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_square_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value; + else + calc_square_purge_lsb = 1'b0; + // + end + endfunction + + function calc_square_valid_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (slim_bram_xy_addr_value == index_last_value) + calc_square_valid_msb = 1'b1; + else + calc_square_valid_msb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (slim_bram_xy_addr_value == index_last_value) begin + calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; + calc_square_bitmap_msb[6:0] = 7'b1111111; + end else + calc_square_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function calc_square_purge_msb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (slim_bram_xy_addr_value == index_last_value) + calc_square_purge_msb = col_index_value == col_index_last_value; + else + calc_square_purge_msb = 1'b0; + // + end + endfunction + + + reg recomb_lsb_ce = 1'b0; + reg [ 2:0] recomb_lsb_ce_purge = 3'b000; + wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_purge[0]; + reg recomb_lsb_clr; + reg recomb_lsb_vld = 1'b0; + + reg [46:0] recomb_lsb_din; + wire [15:0] recomb_lsb_dout; + + reg recomb_msb_ce = 1'b0; + reg [ 1:0] recomb_msb_ce_purge = 2'b00; + wire recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0]; + reg recomb_msb_clr; + reg recomb_msb_vld = 1'b0; + + always @(posedge clk) + // + {recomb_msb_vld, recomb_lsb_vld} <= {recomb_msb_ce_combined, recomb_lsb_ce_combined}; + + reg [46:0] recomb_msb_din; + wire [15:0] recomb_msb_dout; + + modexpng_recombinator_block recomb_x_lsb + ( + .clk (clk), + .ce (recomb_lsb_ce_combined), + .clr (recomb_lsb_clr), + .din (recomb_lsb_din), + .dout (recomb_lsb_dout) + ); + + modexpng_recombinator_block recomb_x_msb + ( + .clk (clk), + .ce (recomb_msb_ce_combined), + .clr (recomb_msb_clr), + .din (recomb_msb_din), + .dout (recomb_msb_dout) + ); + + always @(posedge clk) begin + // + recomb_lsb_ce <= x_valid_latch_lsb; + recomb_msb_ce <= x_bitmap_latch_msb[0]; + // + if (x_purge_latch_lsb) + recomb_lsb_ce_purge <= 3'b111; + else + recomb_lsb_ce_purge <= {1'b0, recomb_lsb_ce_purge[2:1]}; + // + if (x_purge_latch_msb && x_bitmap_latch_msb[0] && !x_bitmap_latch_msb[1]) + recomb_msb_ce_purge = 2'b11; + else + recomb_msb_ce_purge <= {1'b0, recomb_msb_ce_purge[1]}; + // + end + + + always @(posedge clk) + // + if (ena_x & ena_y) begin + recomb_lsb_clr <= 1'b1; + recomb_msb_clr <= 1'b1; + end else begin + if (recomb_lsb_ce) recomb_lsb_clr <= 1'b0; + if (recomb_msb_ce) recomb_msb_clr <= 1'b0; + end + + always @(posedge clk) + // + if (x_valid_latch_lsb) + recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb]; + else + recomb_lsb_din <= {47{1'b0}}; + + always @(posedge clk) + // + if (x_bitmap_latch_msb[0]) + recomb_msb_din <= dsp_x_p_latch[0]; + else + recomb_msb_din <= {47{1'b0}}; + + + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); + xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); + xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr); + xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr); + // + xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, slim_bram_xy_addr, index_last); + xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, index_last); + xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, slim_bram_xy_addr, index_last); + // + end + // + default: begin + // + xy_valid_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= {8{1'b0}}; + xy_index_lsb_adv [6] <= 3'dX; + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + endcase + + + always @(posedge clk) begin + // + {y_valid_lsb, x_valid_lsb} <= {2{xy_valid_lsb_adv [1]}}; + {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}}; + {y_index_lsb, x_index_lsb} <= {2{xy_index_lsb_adv [1]}}; + {y_purge_lsb, x_purge_lsb} <= {2{xy_purge_lsb_adv [1]}}; + // + {y_valid_latch_lsb, x_valid_latch_lsb} <= {y_valid_lsb, x_valid_lsb}; + {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb}; + {y_index_latch_lsb, x_index_latch_lsb} <= {y_index_lsb, x_index_lsb}; + {y_purge_latch_lsb, x_purge_latch_lsb} <= {y_purge_lsb, x_purge_lsb}; + // + {y_valid_msb, x_valid_msb} <= {2{xy_valid_msb_adv[1]}}; + {y_bitmap_msb, x_bitmap_msb} <= {2{xy_bitmap_msb_adv[1]}}; + {y_purge_msb, x_purge_msb} <= {2{xy_purge_msb_adv[1]}}; + // + if (x_valid_msb) begin + x_bitmap_latch_msb <= x_bitmap_msb; + x_purge_latch_msb <= x_purge_msb; + end else begin + x_bitmap_latch_msb <= {1'b0, x_bitmap_latch_msb[7:1]}; + end + // + // + for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; + xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; + xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; + xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; + // + xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; + xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; + xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; + end + // + end + + always @(posedge clk) + // + if (x_bitmap_latch_msb[1]) // only shift 7 times + // + for (i=0; i<8; i=i+1) + if (i < 7) + dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; + else + dsp_x_p_latch[i] <= {47{1'bX}}; + // + else if (dsp_x_ce_p_dly1) + // + for (i=0; i<8; i=i+1) + // + if (x_bitmap_lsb[i]) + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + else if (x_valid_msb && x_bitmap_msb[i]) + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + + reg recomb_x_lsb_dout_valid = 1'b0; + reg recomb_x_msb_dout_valid = 1'b0; + + always @(posedge clk) begin + recomb_x_lsb_dout_valid <= recomb_lsb_ce_combined; + recomb_x_msb_dout_valid <= recomb_msb_ce_combined; + end + + + + reg [ 2:0] fat_bram_xy_bank_reg; + reg [ 7:0] fat_bram_xy_addr_reg; + reg [ 7:0] fat_bram_xy_cnt_lsb; + reg [ 7:0] fat_bram_xy_cnt_msb; + reg [17:0] fat_bram_x_dout_reg; + reg [17:0] fat_bram_y_dout_reg; + reg fat_bram_xy_dout_valid_reg = 1'b0; + + reg [15:0] recomb_msb_dout_carry_0; + reg [15:0] recomb_msb_dout_carry_1; + + reg [15:0] recomb_msb_dout_delay_0; + reg [15:0] recomb_msb_dout_delay_1; + reg [15:0] recomb_msb_dout_delay_2; + + reg [ 7:0] recomb_msb_cnt_delay_0 = 8'd0; + reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0; + reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0; + + assign fat_bram_xy_bank = fat_bram_xy_bank_reg; + assign fat_bram_xy_addr = fat_bram_xy_addr_reg; + assign fat_bram_x_dout = fat_bram_x_dout_reg; + assign fat_bram_y_dout = fat_bram_y_dout_reg; + assign fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg; + + reg rdy_reg = 1'b1; + reg rdy_adv = 1'b1; + + assign rdy = rdy_reg; + + + always @(posedge clk) + // + if (ena_x & ena_y) + rdy_reg <= 1'b0; + else + rdy_reg <= rdy_adv; + + always @(posedge clk) + // + if (ena_x & ena_y) begin + rdy_adv <= 1'b0; + fat_bram_xy_cnt_lsb <= 8'd0; + fat_bram_xy_cnt_msb <= 8'd0; + end else begin + // + case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}) + // + 2'b00: begin + // + if (recomb_msb_cnt_delay_2 > 8'd0) begin + // + rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0; + // + recomb_msb_dout_delay_0 <= {18{1'bX}}; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= 8'd0; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= recomb_msb_cnt_delay_2; + fat_bram_x_dout_reg <= recomb_msb_dout_delay_2; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + end else begin + // + fat_bram_xy_bank_reg <= 3'bXXX; + fat_bram_xy_addr_reg <= 8'hXX; + fat_bram_x_dout_reg <= {18{1'bX}}; + fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b0; + // + end + // + end + // + 2'b01: begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABL; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; +// fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; + // + end + // + 2'b10: begin + // + if (fat_bram_xy_cnt_msb < 8'd2) begin + // + recomb_msb_dout_carry_0 <= recomb_msb_dout; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + // + fat_bram_xy_bank_reg <= 3'bXXX; + fat_bram_xy_addr_reg <= 8'hXX; + fat_bram_x_dout_reg <= {18{1'bX}}; + // fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b0; + // + end else begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_msb; + fat_bram_x_dout_reg <= {2'b00, recomb_msb_dout}; + // fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b1; + // + end + // + fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; + // + end + // + 2'b11: begin + // + if (fat_bram_xy_cnt_lsb == index_last) begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABL; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= 8'd0; + // + end else begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; + // + recomb_msb_dout_carry_0 <= {16{1'bX}}; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + // + end + // + recomb_msb_dout_delay_0 <= recomb_msb_dout; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; + // + end + // + endcase + // + end + + + + +endmodule |