From fde62e373fdfcefefb7da10757a3db933160c911 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:16:58 +0300 Subject: Major rewrite (different core hierarchy, buses, wrappers, etc). --- rtl/modexpng_mmm_dual_x8.v | 951 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 951 insertions(+) create mode 100644 rtl/modexpng_mmm_dual_x8.v (limited to 'rtl/modexpng_mmm_dual_x8.v') diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual_x8.v new file mode 100644 index 0000000..2e4f4e0 --- /dev/null +++ b/rtl/modexpng_mmm_dual_x8.v @@ -0,0 +1,951 @@ +module modexpng_mmm_dual_x8 +( + clk, rst, + + ena, rdy, + + + ladder_mode, + word_index_last, + word_index_last_minus1, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout, + + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_dout, + rcmb_wide_y_dout, + rcmb_wide_xy_valid, + + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_dout, + rcmb_narrow_y_dout, + rcmb_narrow_xy_valid, + + rcmb_xy_bank, + rcmb_xy_addr, + rcmb_x_dout, + rcmb_y_dout, + rcmb_xy_valid, + + rdct_ena +); + + + // + // Headers + // + `include "../rtl_1/modexpng_mmm_fsm_old.vh" + `include "../rtl_1/modexpng_parameters_old.vh" + `include "../rtl_1/modexpng_parameters_x8_old.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input ladder_mode; + input [7:0] word_index_last; + input [7:0] word_index_last_minus1; + + output rd_wide_xy_ena; + output rd_wide_xy_ena_aux; + output [ 1:0] rd_wide_xy_bank; + output [ 1:0] rd_wide_xy_bank_aux; + output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; + output [ 8-1:0] rd_wide_xy_addr_aux; + input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; + input [18*NUM_MULTS/2-1:0] rd_wide_y_dout; + input [ 18-1:0] rd_wide_x_dout_aux; + input [ 18-1:0] rd_wide_y_dout_aux; + + output rd_narrow_xy_ena; + output [ 1:0] rd_narrow_xy_bank; + output [ 7:0] rd_narrow_xy_addr; + input [18-1:0] rd_narrow_x_dout; + input [18-1:0] rd_narrow_y_dout; + + output [ 1:0] rcmb_wide_xy_bank; + output [ 7:0] rcmb_wide_xy_addr; + output [17:0] rcmb_wide_x_dout; + output [17:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ 1:0] rcmb_narrow_xy_bank; + output [ 7:0] rcmb_narrow_xy_addr; + output [17:0] rcmb_narrow_x_dout; + output [17:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ 1:0] rcmb_xy_bank; + output [ 7:0] rcmb_xy_addr; + output [17:0] rcmb_x_dout; + output [17:0] rcmb_y_dout; + output rcmb_xy_valid; + + output rdct_ena; + + + // + // FSM Declaration + // + reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; + reg [FSM_STATE_WIDTH-1:0] fsm_state_next; + + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) fsm_state <= FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + // + // Storage Control Interface + // + reg wide_xy_ena = 1'b0; + reg wide_xy_ena_aux = 1'b0; + reg [ 1:0] wide_xy_bank; + reg [ 1:0] wide_xy_bank_aux; + reg [ 8-1:0] wide_xy_addr[0:3]; + reg [ 8-1:0] wide_xy_addr_aux; + + reg narrow_xy_ena = 1'b0; + reg [ 1:0] narrow_xy_bank; + reg [ 7:0] narrow_xy_addr; + reg [ 7:0] narrow_xy_addr_dly; + + assign rd_wide_xy_ena = wide_xy_ena; + assign rd_wide_xy_ena_aux = wide_xy_ena_aux; + assign rd_wide_xy_bank = wide_xy_bank; + assign rd_wide_xy_bank_aux = wide_xy_bank_aux; + assign rd_wide_xy_addr_aux = wide_xy_addr_aux; + + assign rd_narrow_xy_ena = narrow_xy_ena; + assign rd_narrow_xy_bank = narrow_xy_bank; + assign rd_narrow_xy_addr = narrow_xy_addr; + + genvar z; + generate for (z=0; z<(NUM_MULTS/2); z=z+1) + begin : gen_rd_wide_xy_addr + assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z]; + end + endgenerate + + // + // Column Counter + // + reg [4:0] col_index; // current column index + reg [4:0] col_index_prev; // delayed column index value + reg [4:0] col_index_last; // index of the very last column + reg [4:0] col_index_next; // precomputed next column index + reg col_is_last; // flag set during the very last column + + always @(posedge clk) + // + col_index_prev <= col_index; + + // + // Column Counter Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin + col_index <= 5'd0; + col_index_last <= word_index_last[7:3]; + col_index_next <= 5'd1; + col_is_last <= 1'b0; + + end + // + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin + col_index <= col_index_next; + col_is_last <= col_index_next == col_index_last; + col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; + end + // + endcase + + + // + // Completion Flags + // + wire square_almost_done_comb; + reg square_almost_done_flop = 1'b0; + reg square_surely_done_flop = 1'b0; + + wire triangle_almost_done_comb; + reg triangle_almost_done_flop = 1'b0; + reg triangle_surely_done_flop = 1'b0; + reg triangle_tardy_done_flop = 1'b0; + + wire rectangle_almost_done_comb; + reg rectangle_almost_done_flop = 1'b0; + reg rectangle_surely_done_flop = 1'b0; + reg rectangle_tardy_done_flop = 1'b0; + + assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); + assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + + // + // Square Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + square_almost_done_flop <= square_almost_done_comb; + // + default: + square_almost_done_flop <= 1'b0; + // + endcase + // + square_surely_done_flop <= square_almost_done_flop; + // + end + + // + // Triangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: + triangle_almost_done_flop <= triangle_almost_done_comb; + // + default: + triangle_almost_done_flop <= 1'b0; + // + endcase + // + triangle_surely_done_flop <= triangle_almost_done_flop; + triangle_tardy_done_flop <= triangle_surely_done_flop; + // + end + + // + // Rectangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: + rectangle_almost_done_flop <= rectangle_almost_done_comb; + // + default: + rectangle_almost_done_flop <= 1'b0; + // + endcase + // + rectangle_surely_done_flop <= rectangle_almost_done_flop; + rectangle_tardy_done_flop <= rectangle_surely_done_flop; + // + end + + + // + // Narrow Storage Control Logic + // + always @(posedge clk) + // + if (rst) narrow_xy_ena <= 1'b0; + else begin + // + // Narrow Address + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? + 8'd0 : narrow_xy_addr + 1'b1; + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + 8'd1 : narrow_xy_addr + 1'b1; + // + default: narrow_xy_addr <= 8'dX; + // + endcase + // + // Narrow Bank + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2; + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? + BANK_NARROW_EXT : BANK_NARROW_N_COEFF; + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + BANK_NARROW_EXT : BANK_NARROW_Q; + // + default: narrow_xy_bank <= 2'bXX; + // + endcase + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; + // + default: narrow_xy_ena <= 1'b0; + // + endcase + // + end + + + // + // Wide Storage Control Logic + // + + wire [2:0] wide_offset_rom[0:3]; + + generate for (z=1; z 8'd0) + wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else + wide_xy_addr_next = wide_xy_addr_last; + end + endfunction + + integer j; + always @(posedge clk) + // + if (rst) begin + wide_xy_ena <= 1'b0; + wide_xy_ena_aux <= 1'b0; + end else begin + // + // Wide Address + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= 8'dX; + endcase + // + // Wide Aux Address + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; + //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + // + default: wide_xy_addr_aux <= 8'dX; + endcase + // + // Wide Bank + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + default: wide_xy_bank <= 3'bXXX; + endcase + // + // Wide Aux Bank + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH; + //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX + default: wide_xy_bank_aux <= 3'bXXX; + endcase + else wide_xy_bank_aux <= 3'bXXX; + default: wide_xy_bank_aux <= 3'bXXX; + endcase + // + // Wide Enable + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + default: wide_xy_ena <= 1'b0; + endcase + // + // Wide Aux Enable + // + case (fsm_state_next) + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: wide_xy_ena_aux <= 1'b0; + endcase + // + end + + + // + // Delay Lines + // + always @(posedge clk) + // + narrow_xy_addr_dly <= narrow_xy_addr; + + + // + // DSP Array Logic + // + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_b_dly = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; + + reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + + wire [5*18-1:0] dsp_x_a; + wire [5*18-1:0] dsp_y_a; + + reg [1*16-1:0] dsp_x_b; + reg [1*16-1:0] dsp_y_b; + + reg [ 1:0] dsp_xy_b_carry; + + wire [9*47-1:0] dsp_x_p; + wire [9*47-1:0] dsp_y_p; + + //generate for (z=0; z<(NUM_MULTS/2); z=z+1) + //begin : gen_dsp_xy_a_split + //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; + //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; + //end + //endgenerate + + assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; + assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + + //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; + //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; + + always @(posedge clk) + // + dsp_xy_ce_b_dly <= dsp_xy_ce_b; + + + modexpng_dsp_array_block dsp_array_block_x + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + modexpng_dsp_array_block dsp_array_block_y + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + + + // + // DSP Control Logic + // + reg narrow_xy_ena_dly1 = 1'b0; + reg narrow_xy_ena_dly2 = 1'b0; + + always @(posedge clk) + // + if (rst) begin + // + narrow_xy_ena_dly1 <= 1'b0; + narrow_xy_ena_dly2 <= 1'b0; + // + dsp_xy_ce_a <= 1'b0; + dsp_xy_ce_b <= 1'b0; + dsp_xy_ce_m <= 1'b0; + dsp_xy_ce_p <= 1'b0; + dsp_xy_ce_mode <= 1'b0; + // + end else begin + // + narrow_xy_ena_dly1 <= narrow_xy_ena; + narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; + // + dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; + dsp_xy_ce_b <= narrow_xy_ena_dly2; + dsp_xy_ce_m <= dsp_xy_ce_b_dly; + dsp_xy_ce_p <= dsp_xy_ce_m; + dsp_xy_ce_mode <= dsp_xy_ce_b_dly; + // + end + + // + // DSP Feed Logic + // + reg dsp_merge_xy_b; + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; + endcase + + // + // On-the-fly Carry Recombination + // + wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + + always @(posedge clk) + // + if (narrow_xy_ena_dly2) begin // rewrite + // + if (!dsp_merge_xy_b) begin + dsp_x_b <= rd_narrow_x_dout[15:0]; + dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_xy_b_carry <= 2'b00; + end else begin + dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + end + // + end else begin + // + dsp_x_b <= {16{1'bX}}; + dsp_y_b <= {16{1'bX}}; + // + dsp_xy_b_carry <= 2'b00; + // + end + + + reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; + + function [NUM_MULTS:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + endcase + + always @(posedge clk) begin + dsp_xy_mode_z <= dsp_xy_mode_z_adv1; + // + dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; + dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; + dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + end + + + + + + // + // Recombinator + // + reg rcmb_ena = 1'b0; + wire rcmb_rdy; + + modexpng_recombinator_block recombinator_block + ( + .clk (clk), + + .ena (rcmb_ena), + .rdy (rcmb_rdy), + + .fsm_state_next (fsm_state_next), + + .word_index_last (word_index_last), + + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + + .col_index (col_index), + .col_index_last (col_index_last), + + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) + + ); + + + // + // Recombinator Enable Logic + // + always @(posedge clk) + // + if (rst) rcmb_ena <= 1'b0; + else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; + + + // + // Handy Completion Flags + // + wire square_done = square_surely_done_flop; + wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; + wire rectangle_done = rectangle_tardy_done_flop; + + + // + // FSM Transition Logic + // + assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + + always @* begin + // + fsm_state_next = FSM_STATE_IDLE; + // + case (fsm_state) + FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + + FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF; + + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + + FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; + + default: fsm_state_next = FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Reductor Control Logic + // + reg rdct_ena_reg = 1'b0; + + assign rdct_ena = rdct_ena_reg; + + always @(posedge clk) // add reset!!! + // + case (fsm_state) + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; + default: rdct_ena_reg <= 1'b0; + endcase + + + +endmodule -- cgit v1.2.3