From 71f70252dfc7e41103dde420a721be8aa48486d5 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 16:18:33 +0300 Subject: Redesigned core architecture, unified bank structure. All storage blocks now have eight 4kbit entries and occupy one 36K BRAM tile. --- rtl/modexpng_mmm_dual_x8.v | 951 --------------------------------------------- 1 file changed, 951 deletions(-) delete mode 100644 rtl/modexpng_mmm_dual_x8.v (limited to 'rtl/modexpng_mmm_dual_x8.v') diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual_x8.v deleted file mode 100644 index 2e4f4e0..0000000 --- a/rtl/modexpng_mmm_dual_x8.v +++ /dev/null @@ -1,951 +0,0 @@ -module modexpng_mmm_dual_x8 -( - clk, rst, - - ena, rdy, - - - ladder_mode, - word_index_last, - word_index_last_minus1, - - rd_wide_xy_ena, - rd_wide_xy_ena_aux, - rd_wide_xy_bank, - rd_wide_xy_bank_aux, - rd_wide_xy_addr, - rd_wide_xy_addr_aux, - rd_wide_x_dout, - rd_wide_y_dout, - rd_wide_x_dout_aux, - rd_wide_y_dout_aux, - - rd_narrow_xy_ena, - rd_narrow_xy_bank, - rd_narrow_xy_addr, - rd_narrow_x_dout, - rd_narrow_y_dout, - - rcmb_wide_xy_bank, - rcmb_wide_xy_addr, - rcmb_wide_x_dout, - rcmb_wide_y_dout, - rcmb_wide_xy_valid, - - rcmb_narrow_xy_bank, - rcmb_narrow_xy_addr, - rcmb_narrow_x_dout, - rcmb_narrow_y_dout, - rcmb_narrow_xy_valid, - - rcmb_xy_bank, - rcmb_xy_addr, - rcmb_x_dout, - rcmb_y_dout, - rcmb_xy_valid, - - rdct_ena -); - - - // - // Headers - // - `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" - - - // - // Ports - // - input clk; - input rst; - - input ena; - output rdy; - - input ladder_mode; - input [7:0] word_index_last; - input [7:0] word_index_last_minus1; - - output rd_wide_xy_ena; - output rd_wide_xy_ena_aux; - output [ 1:0] rd_wide_xy_bank; - output [ 1:0] rd_wide_xy_bank_aux; - output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; - output [ 8-1:0] rd_wide_xy_addr_aux; - input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; - input [18*NUM_MULTS/2-1:0] rd_wide_y_dout; - input [ 18-1:0] rd_wide_x_dout_aux; - input [ 18-1:0] rd_wide_y_dout_aux; - - output rd_narrow_xy_ena; - output [ 1:0] rd_narrow_xy_bank; - output [ 7:0] rd_narrow_xy_addr; - input [18-1:0] rd_narrow_x_dout; - input [18-1:0] rd_narrow_y_dout; - - output [ 1:0] rcmb_wide_xy_bank; - output [ 7:0] rcmb_wide_xy_addr; - output [17:0] rcmb_wide_x_dout; - output [17:0] rcmb_wide_y_dout; - output rcmb_wide_xy_valid; - - output [ 1:0] rcmb_narrow_xy_bank; - output [ 7:0] rcmb_narrow_xy_addr; - output [17:0] rcmb_narrow_x_dout; - output [17:0] rcmb_narrow_y_dout; - output rcmb_narrow_xy_valid; - - output [ 1:0] rcmb_xy_bank; - output [ 7:0] rcmb_xy_addr; - output [17:0] rcmb_x_dout; - output [17:0] rcmb_y_dout; - output rcmb_xy_valid; - - output rdct_ena; - - - // - // FSM Declaration - // - reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; - reg [FSM_STATE_WIDTH-1:0] fsm_state_next; - - wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; - wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; - wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; - - - // - // FSM Process - // - always @(posedge clk) - // - if (rst) fsm_state <= FSM_STATE_IDLE; - else fsm_state <= fsm_state_next; - - - // - // Storage Control Interface - // - reg wide_xy_ena = 1'b0; - reg wide_xy_ena_aux = 1'b0; - reg [ 1:0] wide_xy_bank; - reg [ 1:0] wide_xy_bank_aux; - reg [ 8-1:0] wide_xy_addr[0:3]; - reg [ 8-1:0] wide_xy_addr_aux; - - reg narrow_xy_ena = 1'b0; - reg [ 1:0] narrow_xy_bank; - reg [ 7:0] narrow_xy_addr; - reg [ 7:0] narrow_xy_addr_dly; - - assign rd_wide_xy_ena = wide_xy_ena; - assign rd_wide_xy_ena_aux = wide_xy_ena_aux; - assign rd_wide_xy_bank = wide_xy_bank; - assign rd_wide_xy_bank_aux = wide_xy_bank_aux; - assign rd_wide_xy_addr_aux = wide_xy_addr_aux; - - assign rd_narrow_xy_ena = narrow_xy_ena; - assign rd_narrow_xy_bank = narrow_xy_bank; - assign rd_narrow_xy_addr = narrow_xy_addr; - - genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) - begin : gen_rd_wide_xy_addr - assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z]; - end - endgenerate - - // - // Column Counter - // - reg [4:0] col_index; // current column index - reg [4:0] col_index_prev; // delayed column index value - reg [4:0] col_index_last; // index of the very last column - reg [4:0] col_index_next; // precomputed next column index - reg col_is_last; // flag set during the very last column - - always @(posedge clk) - // - col_index_prev <= col_index; - - // - // Column Counter Increment Logic - // - always @(posedge clk) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin - col_index <= 5'd0; - col_index_last <= word_index_last[7:3]; - col_index_next <= 5'd1; - col_is_last <= 1'b0; - - end - // - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin - col_index <= col_index_next; - col_is_last <= col_index_next == col_index_last; - col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; - end - // - endcase - - - // - // Completion Flags - // - wire square_almost_done_comb; - reg square_almost_done_flop = 1'b0; - reg square_surely_done_flop = 1'b0; - - wire triangle_almost_done_comb; - reg triangle_almost_done_flop = 1'b0; - reg triangle_surely_done_flop = 1'b0; - reg triangle_tardy_done_flop = 1'b0; - - wire rectangle_almost_done_comb; - reg rectangle_almost_done_flop = 1'b0; - reg rectangle_surely_done_flop = 1'b0; - reg rectangle_tardy_done_flop = 1'b0; - - assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; - assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); - assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; - - // - // Square Completion Flags - // - always @(posedge clk) begin - // - case (fsm_state) - // - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - square_almost_done_flop <= square_almost_done_comb; - // - default: - square_almost_done_flop <= 1'b0; - // - endcase - // - square_surely_done_flop <= square_almost_done_flop; - // - end - - // - // Triangle Completion Flags - // - always @(posedge clk) begin - // - case (fsm_state) - // - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: - triangle_almost_done_flop <= triangle_almost_done_comb; - // - default: - triangle_almost_done_flop <= 1'b0; - // - endcase - // - triangle_surely_done_flop <= triangle_almost_done_flop; - triangle_tardy_done_flop <= triangle_surely_done_flop; - // - end - - // - // Rectangle Completion Flags - // - always @(posedge clk) begin - // - case (fsm_state) - // - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: - rectangle_almost_done_flop <= rectangle_almost_done_comb; - // - default: - rectangle_almost_done_flop <= 1'b0; - // - endcase - // - rectangle_surely_done_flop <= rectangle_almost_done_flop; - rectangle_tardy_done_flop <= rectangle_surely_done_flop; - // - end - - - // - // Narrow Storage Control Logic - // - always @(posedge clk) - // - if (rst) narrow_xy_ena <= 1'b0; - else begin - // - // Narrow Address - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? - 8'd0 : narrow_xy_addr + 1'b1; - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? - 8'd1 : narrow_xy_addr + 1'b1; - // - default: narrow_xy_addr <= 8'dX; - // - endcase - // - // Narrow Bank - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2; - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? - BANK_NARROW_EXT : BANK_NARROW_N_COEFF; - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? - BANK_NARROW_EXT : BANK_NARROW_Q; - // - default: narrow_xy_bank <= 2'bXX; - // - endcase - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; - // - default: narrow_xy_ena <= 1'b0; - // - endcase - // - end - - - // - // Wide Storage Control Logic - // - - wire [2:0] wide_offset_rom[0:3]; - - generate for (z=1; z 8'd0) - wide_xy_addr_next = wide_xy_addr_current - 1'b1; - else - wide_xy_addr_next = wide_xy_addr_last; - end - endfunction - - integer j; - always @(posedge clk) - // - if (rst) begin - wide_xy_ena <= 1'b0; - wide_xy_ena_aux <= 1'b0; - end else begin - // - // Wide Address - // - for (j=0; j<(NUM_MULTS/2); j=j+1) - // - case (fsm_state_next) - // - // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! - // - FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - default: wide_xy_addr[j] <= 8'dX; - endcase - // - // Wide Aux Address - // - case (fsm_state_next) - // - // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! - // - FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; - //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? - //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; - // - default: wide_xy_addr_aux <= 8'dX; - endcase - // - // Wide Bank - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2; - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; - default: wide_xy_bank <= 3'bXXX; - endcase - // - // Wide Aux Bank - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2; - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) - case (rcmb_xy_bank) - BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL; - BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH; - //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX - default: wide_xy_bank_aux <= 3'bXXX; - endcase - else wide_xy_bank_aux <= 3'bXXX; - default: wide_xy_bank_aux <= 3'bXXX; - endcase - // - // Wide Enable - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; - default: wide_xy_ena <= 1'b0; - endcase - // - // Wide Aux Enable - // - case (fsm_state_next) - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); - default: wide_xy_ena_aux <= 1'b0; - endcase - // - end - - - // - // Delay Lines - // - always @(posedge clk) - // - narrow_xy_addr_dly <= narrow_xy_addr; - - - // - // DSP Array Logic - // - reg dsp_xy_ce_a = 1'b0; - reg dsp_xy_ce_b = 1'b0; - reg dsp_xy_ce_b_dly = 1'b0; - reg dsp_xy_ce_m = 1'b0; - reg dsp_xy_ce_p = 1'b0; - reg dsp_xy_ce_mode = 1'b0; - - reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; - - wire [5*18-1:0] dsp_x_a; - wire [5*18-1:0] dsp_y_a; - - reg [1*16-1:0] dsp_x_b; - reg [1*16-1:0] dsp_y_b; - - reg [ 1:0] dsp_xy_b_carry; - - wire [9*47-1:0] dsp_x_p; - wire [9*47-1:0] dsp_y_p; - - //generate for (z=0; z<(NUM_MULTS/2); z=z+1) - //begin : gen_dsp_xy_a_split - //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; - //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; - //end - //endgenerate - - assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; - assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; - - //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; - //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; - - always @(posedge clk) - // - dsp_xy_ce_b_dly <= dsp_xy_ce_b; - - - modexpng_dsp_array_block dsp_array_block_x - ( - .clk (clk), - - .ce_a (dsp_xy_ce_a), - .ce_b (dsp_xy_ce_b), - .ce_m (dsp_xy_ce_m), - .ce_p (dsp_xy_ce_p), - .ce_mode (dsp_xy_ce_mode), - - .mode_z (dsp_xy_mode_z), - - .a (dsp_x_a), - .b (dsp_x_b), - .p (dsp_x_p) - ); - - modexpng_dsp_array_block dsp_array_block_y - ( - .clk (clk), - - .ce_a (dsp_xy_ce_a), - .ce_b (dsp_xy_ce_b), - .ce_m (dsp_xy_ce_m), - .ce_p (dsp_xy_ce_p), - .ce_mode (dsp_xy_ce_mode), - - .mode_z (dsp_xy_mode_z), - - .a (dsp_y_a), - .b (dsp_y_b), - .p (dsp_y_p) - ); - - - - - // - // DSP Control Logic - // - reg narrow_xy_ena_dly1 = 1'b0; - reg narrow_xy_ena_dly2 = 1'b0; - - always @(posedge clk) - // - if (rst) begin - // - narrow_xy_ena_dly1 <= 1'b0; - narrow_xy_ena_dly2 <= 1'b0; - // - dsp_xy_ce_a <= 1'b0; - dsp_xy_ce_b <= 1'b0; - dsp_xy_ce_m <= 1'b0; - dsp_xy_ce_p <= 1'b0; - dsp_xy_ce_mode <= 1'b0; - // - end else begin - // - narrow_xy_ena_dly1 <= narrow_xy_ena; - narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; - // - dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; - dsp_xy_ce_b <= narrow_xy_ena_dly2; - dsp_xy_ce_m <= dsp_xy_ce_b_dly; - dsp_xy_ce_p <= dsp_xy_ce_m; - dsp_xy_ce_mode <= dsp_xy_ce_b_dly; - // - end - - // - // DSP Feed Logic - // - reg dsp_merge_xy_b; - - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; - endcase - - // - // On-the-fly Carry Recombination - // - wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; - - always @(posedge clk) - // - if (narrow_xy_ena_dly2) begin // rewrite - // - if (!dsp_merge_xy_b) begin - dsp_x_b <= rd_narrow_x_dout[15:0]; - dsp_y_b <= rd_narrow_y_dout[15:0]; - dsp_xy_b_carry <= 2'b00; - end else begin - dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; - dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; - dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; - end - // - end else begin - // - dsp_x_b <= {16{1'bX}}; - dsp_y_b <= {16{1'bX}}; - // - dsp_xy_b_carry <= 2'b00; - // - end - - - reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; - - function [NUM_MULTS:0] calc_mac_mode_z_square; - input [ 4:0] col_index_value; - input [ 7:0] narrow_xy_addr_value; - begin - if (narrow_xy_addr_value[7:3] == col_index_value) - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; - 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; - 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; - 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; - 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; - 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; - 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; - 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; - endcase - else - calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; - end - endfunction - - function [NUM_MULTS:0] calc_mac_mode_z_rectangle; - input [ 4:0] col_index_value; - input [ 7:0] narrow_xy_addr_value; - begin - if (narrow_xy_addr_value[7:3] == col_index_value) - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; - 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; - 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; - 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; - 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; - 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; - 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; - 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; - endcase - else - calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; - end - endfunction - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); - default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; - endcase - - always @(posedge clk) begin - dsp_xy_mode_z <= dsp_xy_mode_z_adv1; - // - dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; - dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; - dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; - end - - - - - - // - // Recombinator - // - reg rcmb_ena = 1'b0; - wire rcmb_rdy; - - modexpng_recombinator_block recombinator_block - ( - .clk (clk), - - .ena (rcmb_ena), - .rdy (rcmb_rdy), - - .fsm_state_next (fsm_state_next), - - .word_index_last (word_index_last), - - .dsp_xy_ce_p (dsp_xy_ce_p), - .dsp_x_p (dsp_x_p), - .dsp_y_p (dsp_y_p), - - .col_index (col_index), - .col_index_last (col_index_last), - - .rd_narrow_xy_addr (narrow_xy_addr), - .rd_narrow_xy_bank (narrow_xy_bank), - - .rcmb_wide_xy_bank (rcmb_wide_xy_bank), - .rcmb_wide_xy_addr (rcmb_wide_xy_addr), - .rcmb_wide_x_dout (rcmb_wide_x_dout), - .rcmb_wide_y_dout (rcmb_wide_y_dout), - .rcmb_wide_xy_valid (rcmb_wide_xy_valid), - - .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), - .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), - .rcmb_narrow_x_dout (rcmb_narrow_x_dout), - .rcmb_narrow_y_dout (rcmb_narrow_y_dout), - .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), - - .rdct_narrow_xy_bank (rcmb_xy_bank), - .rdct_narrow_xy_addr (rcmb_xy_addr), - .rdct_narrow_x_dout (rcmb_x_dout), - .rdct_narrow_y_dout (rcmb_y_dout), - .rdct_narrow_xy_valid (rcmb_xy_valid) - - ); - - - // - // Recombinator Enable Logic - // - always @(posedge clk) - // - if (rst) rcmb_ena <= 1'b0; - else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; - - - // - // Handy Completion Flags - // - wire square_done = square_surely_done_flop; - wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; - wire rectangle_done = rectangle_tardy_done_flop; - - - // - // FSM Transition Logic - // - assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; - assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; - assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; - - always @* begin - // - fsm_state_next = FSM_STATE_IDLE; - // - case (fsm_state) - FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; - - FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; - FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; - FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; - - FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; - FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; - FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; - - FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF; - - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; - - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; - - FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; - - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; - - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; - - FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; - - default: fsm_state_next = FSM_STATE_IDLE ; - - endcase - // - end - - - // - // Reductor Control Logic - // - reg rdct_ena_reg = 1'b0; - - assign rdct_ena = rdct_ena_reg; - - always @(posedge clk) // add reset!!! - // - case (fsm_state) - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; - default: rdct_ena_reg <= 1'b0; - endcase - - - -endmodule -- cgit v1.2.3