diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 16:18:33 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 16:18:33 +0300 |
commit | 71f70252dfc7e41103dde420a721be8aa48486d5 (patch) | |
tree | 182c413b590d6056b02c5d20818c3385c83610e3 /rtl | |
parent | fde62e373fdfcefefb7da10757a3db933160c911 (diff) |
Redesigned core architecture, unified bank structure. All storage blocks now
have eight 4kbit entries and occupy one 36K BRAM tile.
Diffstat (limited to 'rtl')
25 files changed, 3389 insertions, 1042 deletions
diff --git a/rtl/_modexpng_mmm_dual_x8.v b/rtl/_modexpng_mmm_dual_x8.v new file mode 100644 index 0000000..ffd5ccf --- /dev/null +++ b/rtl/_modexpng_mmm_dual_x8.v @@ -0,0 +1,961 @@ +module modexpng_mmm_dual_x8 +( + clk, rst, + + ena, rdy, + + ladder_mode, + word_index_last, + word_index_last_minus1, + + sel_wide_in, + sel_narrow_in, + sel_wide_out, + sel_narrow_out, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout, + + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_dout, + rcmb_wide_y_dout, + rcmb_wide_xy_valid, + + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_dout, + rcmb_narrow_y_dout, + rcmb_narrow_xy_valid, + + rcmb_xy_bank, + rcmb_xy_addr, + rcmb_x_dout, + rcmb_y_dout, + rcmb_xy_valid, + + rdct_ena, rdct_rdy +); + + + // + // Headers + // + `include "../rtl/modexpng_mmm_fsm.vh" + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input ladder_mode; + input [ OP_ADDR_W -1:0] word_index_last; + input [ OP_ADDR_W -1:0] word_index_last_minus1; + + input [ BANK_ADDR_W -1:0] sel_wide_in; + input [ BANK_ADDR_W -1:0] sel_narrow_in; + input [ BANK_ADDR_W -1:0] sel_wide_out; + input [ BANK_ADDR_W -1:0] sel_narrow_out; + + output rd_wide_xy_ena; + output rd_wide_xy_ena_aux; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + output [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + output rd_narrow_xy_ena; + output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + input [ WORD_EXT_W -1:0] rd_narrow_x_dout; + input [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_x_dout; + output [ WORD_EXT_W -1:0] rcmb_y_dout; + output rcmb_xy_valid; + + output rdct_ena; + input rdct_rdy; + + + // + // FSM Declaration + // + reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE; + reg [MMM_FSM_STATE_W-1:0] fsm_state_next; + + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square; + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle; + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle; + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) fsm_state <= MMM_FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + // + // Storage Control Interface + // + reg wide_xy_ena = 1'b0; + reg wide_xy_ena_aux = 1'b0; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; + reg [OP_ADDR_W -1:0] wide_xy_addr[0:3]; + reg [OP_ADDR_W -1:0] wide_xy_addr_aux; + + reg narrow_xy_ena = 1'b0; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; + reg [OP_ADDR_W -1:0] narrow_xy_addr; + reg [OP_ADDR_W -1:0] narrow_xy_addr_dly; + + assign rd_wide_xy_ena = wide_xy_ena; + assign rd_wide_xy_ena_aux = wide_xy_ena_aux; + assign rd_wide_xy_bank = wide_xy_bank; + assign rd_wide_xy_bank_aux = wide_xy_bank_aux; + assign rd_wide_xy_addr_aux = wide_xy_addr_aux; + + assign rd_narrow_xy_ena = narrow_xy_ena; + assign rd_narrow_xy_bank = narrow_xy_bank; + assign rd_narrow_xy_addr = narrow_xy_addr; + + genvar z; + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) + begin : gen_rd_wide_xy_addr + assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z]; + end + endgenerate + + // + // Column Counter + // + reg [4:0] col_index; // current column index + reg [4:0] col_index_prev; // delayed column index value + reg [4:0] col_index_last; // index of the very last column + reg [4:0] col_index_next; // precomputed next column index + reg col_is_last; // flag set during the very last column + + always @(posedge clk) + // + col_index_prev <= col_index; + + // + // Column Counter Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin + col_index <= 5'd0; + col_index_last <= word_index_last[7:3]; + col_index_next <= 5'd1; + col_is_last <= 1'b0; + + end + // + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin + col_index <= col_index_next; + col_is_last <= col_index_next == col_index_last; + col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; + end + // + endcase + + + // + // Completion Flags + // + wire square_almost_done_comb; + reg square_almost_done_flop = 1'b0; + reg square_surely_done_flop = 1'b0; + + wire triangle_almost_done_comb; + reg triangle_almost_done_flop = 1'b0; + reg triangle_surely_done_flop = 1'b0; + reg triangle_tardy_done_flop = 1'b0; + + wire rectangle_almost_done_comb; + reg rectangle_almost_done_flop = 1'b0; + reg rectangle_surely_done_flop = 1'b0; + reg rectangle_tardy_done_flop = 1'b0; + + assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); + assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + + // + // Square Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: + square_almost_done_flop <= square_almost_done_comb; + // + default: + square_almost_done_flop <= 1'b0; + // + endcase + // + square_surely_done_flop <= square_almost_done_flop; + // + end + + // + // Triangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: + triangle_almost_done_flop <= triangle_almost_done_comb; + // + default: + triangle_almost_done_flop <= 1'b0; + // + endcase + // + triangle_surely_done_flop <= triangle_almost_done_flop; + triangle_tardy_done_flop <= triangle_surely_done_flop; + // + end + + // + // Rectangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: + rectangle_almost_done_flop <= rectangle_almost_done_comb; + // + default: + rectangle_almost_done_flop <= 1'b0; + // + endcase + // + rectangle_surely_done_flop <= rectangle_almost_done_flop; + rectangle_tardy_done_flop <= rectangle_surely_done_flop; + // + end + + + // + // Narrow Storage Control Logic + // + always @(posedge clk) + // + if (rst) narrow_xy_ena <= 1'b0; + else begin + // + // Narrow Address + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? + 8'd0 : narrow_xy_addr + 1'b1; + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + 8'd1 : narrow_xy_addr + 1'b1; + // + default: narrow_xy_addr <= 8'dX; + // + endcase + // + // Narrow Bank + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? + BANK_NARROW_EXT : BANK_NARROW_COEFF; + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + BANK_NARROW_EXT : BANK_NARROW_Q; + // + default: narrow_xy_bank <= 2'bXX; + // + endcase + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; + // + default: narrow_xy_ena <= 1'b0; + // + endcase + // + end + + + // + // Wide Storage Control Logic + // + + wire [2:0] wide_offset_rom[0:3]; + + generate for (z=1; z<NUM_MULTS; z=z+2) + begin : gen_wide_offset_rom + assign wide_offset_rom[(z-1)/2] = z[2:0]; + end + endgenerate + + function [7:0] wide_xy_addr_next; + input [7:0] wide_xy_addr_current; + input [7:0] wide_xy_addr_last; + begin + if (wide_xy_addr_current > 8'd0) + wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else + wide_xy_addr_next = wide_xy_addr_last; + end + endfunction + + integer j; + always @(posedge clk) + // + if (rst) begin + wide_xy_ena <= 1'b0; + wide_xy_ena_aux <= 1'b0; + end else begin + // + // Wide Address + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= 8'dX; + endcase + // + // Wide Aux Address + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; + //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + // + default: wide_xy_addr_aux <= 8'dX; + endcase + // + // Wide Bank + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; // ? combine ? + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + default: wide_xy_bank <= 3'bXXX; + endcase + // + // Wide Aux Bank + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; + //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX + default: wide_xy_bank_aux <= 3'bXXX; + endcase + else wide_xy_bank_aux <= 3'bXXX; + default: wide_xy_bank_aux <= 3'bXXX; + endcase + // + // Wide Enable + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + default: wide_xy_ena <= 1'b0; + endcase + // + // Wide Aux Enable + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: wide_xy_ena_aux <= 1'b0; + endcase + // + end + + + // + // Delay Lines + // + always @(posedge clk) + // + narrow_xy_addr_dly <= narrow_xy_addr; + + + // + // DSP Array Logic + // + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_b_dly = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; + + reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + + wire [5*18-1:0] dsp_x_a; + wire [5*18-1:0] dsp_y_a; + + reg [1*16-1:0] dsp_x_b; + reg [1*16-1:0] dsp_y_b; + + reg [ 1:0] dsp_xy_b_carry; + + wire [9*47-1:0] dsp_x_p; + wire [9*47-1:0] dsp_y_p; + + //generate for (z=0; z<(NUM_MULTS/2); z=z+1) + //begin : gen_dsp_xy_a_split + //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; + //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; + //end + //endgenerate + + assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; + assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + + //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; + //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; + + always @(posedge clk) + // + dsp_xy_ce_b_dly <= dsp_xy_ce_b; + + + modexpng_dsp_array_block dsp_array_block_x + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + modexpng_dsp_array_block dsp_array_block_y + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + + + // + // DSP Control Logic + // + reg narrow_xy_ena_dly1 = 1'b0; + reg narrow_xy_ena_dly2 = 1'b0; + + always @(posedge clk) + // + if (rst) begin + // + narrow_xy_ena_dly1 <= 1'b0; + narrow_xy_ena_dly2 <= 1'b0; + // + dsp_xy_ce_a <= 1'b0; + dsp_xy_ce_b <= 1'b0; + dsp_xy_ce_m <= 1'b0; + dsp_xy_ce_p <= 1'b0; + dsp_xy_ce_mode <= 1'b0; + // + end else begin + // + narrow_xy_ena_dly1 <= narrow_xy_ena; + narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; + // + dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; + dsp_xy_ce_b <= narrow_xy_ena_dly2; + dsp_xy_ce_m <= dsp_xy_ce_b_dly; + dsp_xy_ce_p <= dsp_xy_ce_m; + dsp_xy_ce_mode <= dsp_xy_ce_b_dly; + // + end + + // + // DSP Feed Logic + // + reg dsp_merge_xy_b; + + always @(posedge clk) + // + case (fsm_state) + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; + endcase + + // + // On-the-fly Carry Recombination + // + wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + + always @(posedge clk) + // + if (narrow_xy_ena_dly2) begin // rewrite + // + if (!dsp_merge_xy_b) begin + dsp_x_b <= rd_narrow_x_dout[15:0]; + dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_xy_b_carry <= 2'b00; + end else begin + dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + end + // + end else begin + // + dsp_x_b <= {16{1'bX}}; + dsp_y_b <= {16{1'bX}}; + // + dsp_xy_b_carry <= 2'b00; + // + end + + + reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; + + function [NUM_MULTS:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + always @(posedge clk) + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + endcase + + always @(posedge clk) begin + dsp_xy_mode_z <= dsp_xy_mode_z_adv1; + // + dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; + dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; + dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + end + + + + + + // + // Recombinator + // + reg rcmb_ena = 1'b0; + wire rcmb_rdy; + + modexpng_recombinator_block recombinator_block + ( + .clk (clk), + .rst (rst), + + .ena (rcmb_ena), + .rdy (rcmb_rdy), + + .mmm_fsm_state_next (fsm_state_next), + + .word_index_last (word_index_last), + + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + + .col_index (col_index), + .col_index_last (col_index_last), + + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) + + ); + + + // + // Recombinator Enable Logic + // + always @(posedge clk) + // + if (rst) rcmb_ena <= 1'b0; + else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; + + + // + // Handy Completion Flags + // + wire square_done = square_surely_done_flop; + wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; + wire rectangle_done = rectangle_tardy_done_flop; + + + // + // FSM Transition Logic + // + assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + + always @* begin + // + fsm_state_next = MMM_FSM_STATE_IDLE; + // + case (fsm_state) + MMM_FSM_STATE_IDLE: fsm_state_next = ena ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_IDLE; + + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF; + + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF; + + default: fsm_state_next = MMM_FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Reductor Control Logic + // + reg rdct_ena_reg = 1'b0; + + assign rdct_ena = rdct_ena_reg; + + always @(posedge clk) // add reset!!! + // + case (fsm_state) + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; + default: rdct_ena_reg <= 1'b0; + endcase + + + +endmodule diff --git a/rtl/_modexpng_mmm_fsm.vh b/rtl/_modexpng_mmm_fsm.vh new file mode 100644 index 0000000..1c2a57b --- /dev/null +++ b/rtl/_modexpng_mmm_fsm.vh @@ -0,0 +1,43 @@ +localparam MMM_FSM_STATE_W = 32; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_IDLE = 0; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_1 = 1; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_2 = 2; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_3 = 3; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_1 = 4; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_2 = 5; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_3 = 6; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_STOP = 40; diff --git a/rtl/_modexpng_recombinator_block.v b/rtl/_modexpng_recombinator_block.v new file mode 100644 index 0000000..61bf734 --- /dev/null +++ b/rtl/_modexpng_recombinator_block.v @@ -0,0 +1,1225 @@ +module modexpng_recombinator_block +( + clk, rst, + ena, rdy, + mmm_fsm_state_next, + word_index_last, + dsp_xy_ce_p, + dsp_x_p, dsp_y_p, + col_index, col_index_last, + rd_narrow_xy_addr, rd_narrow_xy_bank, + rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, + rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid, + rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_mmm_fsm.vh" + + + input clk; + input rst; + input ena; + output rdy; + input [ MMM_FSM_STATE_W -1:0] mmm_fsm_state_next; + input [ OP_ADDR_W -1:0] word_index_last; + input dsp_xy_ce_p; + input [(NUM_MULTS+1) * MAC_W -1:0] dsp_x_p; + input [(NUM_MULTS+1) * MAC_W -1:0] dsp_y_p; + input [ COL_INDEX_W -1:0] col_index; + input [ COL_INDEX_W -1:0] col_index_last; + + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rdct_narrow_x_dout; + output [ WORD_EXT_W -1:0] rdct_narrow_y_dout; + output rdct_narrow_xy_valid; + + + // + // Latches + // + reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS]; + reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS]; + + + // + // Mapping + // + wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS]; + wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS]; + + genvar z; + generate for (z=0; z<NUM_MULTS_AUX; z=z+1) + begin : gen_dsp_xy_p_split + assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W]; + assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W]; + end + endgenerate + + + // + // Delays + // + reg dsp_xy_ce_p_dly1 = 1'b0; + + always @(posedge clk) + // + if (rst) dsp_xy_ce_p_dly1 <= 1'b0; + else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p; + + + // + // Registers + // + + // valid + reg xy_valid_lsb = 1'b0; + reg xy_aux_lsb = 1'b0; + reg xy_valid_msb = 1'b0; + + // bitmap + reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}}; + reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}}; + + // index + reg [2:0] xy_index_lsb = 3'dX; + + // purge + reg xy_purge_lsb = 1'b0; + reg xy_purge_msb = 1'b0; + + // valid - latch + reg xy_valid_latch_lsb = 1'b0; + + // aux - latch + reg xy_aux_latch_lsb = 1'b0; + + // bitmap - latch + reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = MULT_BITMAP_ZEROES; + reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = MULT_BITMAP_ZEROES; + + // index - latch + reg [MAC_INDEX_W-1:0] xy_index_latch_lsb = MAC_INDEX_DONT_CARE; + + // purge - index + reg xy_purge_latch_lsb = 1'b0; + reg xy_purge_latch_msb = 1'b0; + + // + reg xy_valid_lsb_adv[1:6]; + reg xy_valid_msb_adv[1:6]; + reg xy_aux_lsb_adv[1:6]; + reg [NUM_MULTS-1:0] xy_bitmap_lsb_adv[1:6]; + reg [NUM_MULTS-1:0] xy_bitmap_msb_adv[1:6]; + reg [MAC_INDEX_W-1:0] xy_index_lsb_adv[1:6]; + reg [MAC_INDEX_W-1:0] xy_index_msb_adv[1:6]; + reg xy_purge_lsb_adv[1:6]; + reg xy_purge_msb_adv[1:6]; + + reg [1:0] rcmb_mode; + + always @(posedge clk) + // + if (ena) + // + case (mmm_fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; + default: rcmb_mode <= 2'd0; + endcase + + + integer i; + initial for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv[i] = 1'b0; + xy_valid_msb_adv[i] = 1'b0; + xy_aux_lsb_adv[i] = 1'b0; + xy_bitmap_lsb_adv[i] = {8{1'b0}}; + xy_bitmap_msb_adv[i] = {8{1'b0}}; + xy_index_lsb_adv[i] = 3'dX; + xy_index_msb_adv[i] = 3'dX; + xy_purge_lsb_adv[i] = 1'b0; + xy_purge_msb_adv[i] = 1'b0; + end + + function calc_square_triangle_valid_lsb; + // + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [ OP_ADDR_W-1:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_square_triangle_valid_lsb = 1'b1; + else + calc_square_triangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_square_valid_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [ OP_ADDR_W-1:0] narrow_xy_addr_value; + begin + calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_triangle_valid_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_rectangle_valid_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT; + else + calc_rectangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_triangle_aux_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_bank_value == BANK_NARROW_EXT) + calc_triangle_aux_lsb = 1'b1; + else + calc_triangle_aux_lsb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_triangle_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_square_triangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + function [7:0] calc_square_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [7:0] calc_triangle_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [7:0] calc_rectangle_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT)) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_rectangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + /* + * These can be simplified (the difference between square/triangle and + * rectangle is that the bank is checked or not). A universal function would + * accept a parameter that tells it whether it should check the bank or not. + * Let's do it later, too early to optimize now, it seems. + * + * + */ + + function [2:0] calc_square_triangle_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_square_triangle_index_lsb = 3'd0; + 3'b001: calc_square_triangle_index_lsb = 3'd1; + 3'b010: calc_square_triangle_index_lsb = 3'd2; + 3'b011: calc_square_triangle_index_lsb = 3'd3; + 3'b100: calc_square_triangle_index_lsb = 3'd4; + 3'b101: calc_square_triangle_index_lsb = 3'd5; + 3'b110: calc_square_triangle_index_lsb = 3'd6; + 3'b111: calc_square_triangle_index_lsb = 3'd7; + endcase + // + else + calc_square_triangle_index_lsb = 3'dX; + // + end + endfunction + + function [2:0] calc_square_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [2:0] calc_triangle_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [2:0] calc_rectangle_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] slim_bram_xy_bank_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT)) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_rectangle_index_lsb = 3'd0; + 3'b001: calc_rectangle_index_lsb = 3'd1; + 3'b010: calc_rectangle_index_lsb = 3'd2; + 3'b011: calc_rectangle_index_lsb = 3'd3; + 3'b100: calc_rectangle_index_lsb = 3'd4; + 3'b101: calc_rectangle_index_lsb = 3'd5; + 3'b110: calc_rectangle_index_lsb = 3'd6; + 3'b111: calc_rectangle_index_lsb = 3'd7; + endcase + // + else + calc_rectangle_index_lsb = 3'dX; + // + end + endfunction + + function calc_square_rectangle_purge_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value; + else + calc_square_rectangle_purge_lsb = 1'b0; + // + end + endfunction + + function calc_square_purge_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_rectangle_purge_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_square_valid_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) + calc_square_valid_msb = 1'b1; + else + calc_square_valid_msb = 1'b0; + // + end + endfunction + + function calc_rectangle_valid_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) + calc_rectangle_valid_msb = 1'b1; + else + calc_rectangle_valid_msb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) begin + calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; + calc_square_bitmap_msb[6:0] = 7'b1111111; + end else + calc_square_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function [7:0] calc_rectangle_bitmap_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin + calc_rectangle_bitmap_msb[7:0] = 8'b11111111; + end else + calc_rectangle_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function calc_square_purge_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) + calc_square_purge_msb = col_index_value == col_index_last_value; + else + calc_square_purge_msb = 1'b0; + // + end + endfunction + + function calc_rectangle_purge_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) + calc_rectangle_purge_msb = col_index_value == col_index_last_value; + else + calc_rectangle_purge_msb = 1'b0; + // + end + endfunction + + + reg rcmb_xy_lsb_ce = 1'b0; + reg rcmb_xy_lsb_ce_aux; + reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; + wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; + reg rcmb_xy_lsb_clr; + + reg [46:0] rcmb_x_lsb_din; + reg [46:0] rcmb_y_lsb_din; + wire [15:0] rcmb_x_lsb_dout; + wire [15:0] rcmb_y_lsb_dout; + + reg rcmb_xy_msb_ce = 1'b0; + reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; + wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; + reg rcmb_xy_msb_clr; + + reg [46:0] rcmb_x_msb_din; + reg [46:0] rcmb_y_msb_din; + wire [15:0] rcmb_x_msb_dout; + wire [15:0] rcmb_y_msb_dout; + + modexpng_recombinator_cell recomb_x_lsb + ( + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_x_lsb_din), + .dout (rcmb_x_lsb_dout) + ); + modexpng_recombinator_cell recomb_y_lsb + ( + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_y_lsb_din), + .dout (rcmb_y_lsb_dout) + ); + + modexpng_recombinator_cell recomb_x_msb + ( + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .din (rcmb_x_msb_din), + .dout (rcmb_x_msb_dout) + ); + + modexpng_recombinator_cell recomb_y_msb + ( + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .din (rcmb_y_msb_din), + .dout (rcmb_y_msb_dout) + ); + + always @(posedge clk) begin + // + rcmb_xy_lsb_ce <= xy_valid_latch_lsb; + rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb; + rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0]; + // + if (xy_purge_latch_lsb) + rcmb_xy_lsb_ce_purge <= 3'b111; + else + rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]}; + // + if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) + rcmb_xy_msb_ce_purge = 2'b11; + else + rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]}; + // + end + + + always @(posedge clk) + // + if (ena) begin + rcmb_xy_lsb_clr <= 1'b1; + rcmb_xy_msb_clr <= 1'b1; + end else begin + if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0; + if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0; + end + + always @(posedge clk) + // + if (xy_valid_latch_lsb) begin + rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb]; + rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb]; + end else if (xy_aux_latch_lsb) begin + rcmb_x_lsb_din <= dsp_x_p_latch[8]; + rcmb_y_lsb_din <= dsp_y_p_latch[8]; + end else begin + rcmb_x_lsb_din <= {47{1'b0}}; + rcmb_y_lsb_din <= {47{1'b0}}; + end + + always @(posedge clk) + // + if (xy_bitmap_latch_msb[0]) begin + rcmb_x_msb_din <= dsp_x_p_latch[0]; + rcmb_y_msb_din <= dsp_y_p_latch[0]; + end else begin + rcmb_x_msb_din <= {47{1'b0}}; + rcmb_y_msb_din <= {47{1'b0}}; + end + + + always @(posedge clk) + // + case (mmm_fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + // + xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + // + end + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + // + xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + // + end + // + default: begin + // + xy_valid_lsb_adv [6] <= 1'b0; + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= {8{1'b0}}; + xy_index_lsb_adv [6] <= 3'dX; + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + endcase + + + always @(posedge clk) begin + // + xy_valid_lsb <= xy_valid_lsb_adv [1]; + xy_aux_lsb <= xy_aux_lsb_adv [1]; + xy_bitmap_lsb <= xy_bitmap_lsb_adv[1]; + xy_index_lsb <= xy_index_lsb_adv [1]; + xy_purge_lsb <= xy_purge_lsb_adv [1]; + // + xy_valid_latch_lsb <= xy_valid_lsb; + xy_aux_latch_lsb <= xy_aux_lsb; + xy_bitmap_latch_lsb <= xy_bitmap_lsb; + xy_index_latch_lsb <= xy_index_lsb; + xy_purge_latch_lsb <= xy_purge_lsb; + // + xy_valid_msb <= xy_valid_msb_adv[1]; + xy_bitmap_msb <= xy_bitmap_msb_adv[1]; + xy_purge_msb <= xy_purge_msb_adv[1]; + // + if (xy_valid_msb) begin + xy_bitmap_latch_msb <= xy_bitmap_msb; + xy_purge_latch_msb <= xy_purge_msb; + end else begin + xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]}; + end + // + // + for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; + xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; + xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; + xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; + xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; + // + xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; + xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; + xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; + end + // + end + + always @(posedge clk) + // + if (xy_bitmap_latch_msb[1]) // only shift 7 times + // + for (i=0; i<8; i=i+1) + // + if (i < 7) begin + dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; + dsp_y_p_latch[i] <= dsp_y_p_latch[i+1]; + end else begin + dsp_x_p_latch[i] <= {47{1'bX}}; + dsp_y_p_latch[i] <= {47{1'bX}}; + end + // + else if (dsp_xy_ce_p_dly1) begin + // + for (i=0; i<8; i=i+1) + // + if (xy_bitmap_lsb[i]) begin + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + dsp_y_p_latch[i] <= dsp_y_p_split[i]; + end else if (xy_valid_msb && xy_bitmap_msb[i]) begin + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + dsp_y_p_latch[i] <= dsp_y_p_split[i]; + end + // + if (xy_aux_lsb) begin + dsp_x_p_latch[8] <= dsp_x_p_split[8]; + dsp_y_p_latch[8] <= dsp_y_p_split[8]; + end + // + end + + reg rcmb_xy_lsb_valid = 1'b0; + reg rcmb_xy_msb_valid = 1'b0; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_lsb_valid <= 1'b0; + rcmb_xy_msb_valid <= 1'b0; + end else begin + rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined; + rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined; + end + + + reg [BANK_ADDR_W-1:0] wide_xy_bank; + reg [ 7:0] wide_xy_addr; + reg [17:0] wide_x_dout; + reg [17:0] wide_y_dout; + reg wide_xy_valid = 1'b0; + + reg [BANK_ADDR_W-1:0] narrow_xy_bank; + reg [ 7:0] narrow_xy_addr; + reg [17:0] narrow_x_dout; + reg [17:0] narrow_y_dout; + reg narrow_xy_valid = 1'b0; + + reg [BANK_ADDR_W-1:0] rdct_xy_bank; + reg [ 7:0] rdct_xy_addr; + reg [17:0] rdct_x_dout; + reg [17:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + reg [ 7:0] cnt_lsb; + reg [ 7:0] cnt_msb; + + reg cnt_lsb_wrapped; + reg cnt_msb_wrapped; + + reg [31:0] rcmb_xy_msb_carry_0; + reg [31:0] rcmb_xy_msb_carry_1; + + reg [31:0] rcmb_xy_msb_delay_0; + reg [31:0] rcmb_xy_msb_delay_1; + reg [31:0] rcmb_xy_msb_delay_2; + + reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0; + reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0; + reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0; + + reg rcmb_msb_flag_delay_0; + reg rcmb_msb_flag_delay_1; + reg rcmb_msb_flag_delay_2; + + assign rcmb_wide_xy_bank = wide_xy_bank; + assign rcmb_wide_xy_addr = wide_xy_addr; + assign rcmb_wide_x_dout = wide_x_dout; + assign rcmb_wide_y_dout = wide_y_dout; + assign rcmb_wide_xy_valid = wide_xy_valid; + + assign rcmb_narrow_xy_bank = narrow_xy_bank; + assign rcmb_narrow_xy_addr = narrow_xy_addr; + assign rcmb_narrow_x_dout = narrow_x_dout; + assign rcmb_narrow_y_dout = narrow_y_dout; + assign rcmb_narrow_xy_valid = narrow_xy_valid; + + assign rdct_narrow_xy_bank = rdct_xy_bank; + assign rdct_narrow_xy_addr = rdct_xy_addr; + assign rdct_narrow_x_dout = rdct_x_dout; + assign rdct_narrow_y_dout = rdct_y_dout; + assign rdct_narrow_xy_valid = rdct_xy_valid; + + reg rdy_reg = 1'b1; + reg rdy_adv = 1'b1; + + assign rdy = rdy_reg; + + + always @(posedge clk) + // + if (ena) rdy_reg <= 1'b0; + else rdy_reg <= rdy_adv; + + task advance_rcmb_msb_delay; + input [15:0] dout_x; + input [15:0] dout_y; + input [ 7:0] cnt; + input flag; + begin + // + rcmb_xy_msb_delay_0 <= {dout_y, dout_x}; + rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0; + rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1; + // + rcmb_msb_cnt_delay_0 <= cnt; + rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0; + rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1; + // + rcmb_msb_flag_delay_0 <= flag; + rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0; + rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1; + // + end + endtask + + task shift_rcmb_msb_carry; + input [15:0] dout_x; + input [15:0] dout_y; + begin + rcmb_xy_msb_carry_0 <= {dout_y, dout_x}; + rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0; + end + endtask + + task _update_wide; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + wide_xy_bank <= bank; + wide_xy_addr <= addr; + wide_x_dout <= dout_x; + wide_y_dout <= dout_y; + wide_xy_valid <= valid; + end + endtask + + task _update_narrow; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + narrow_xy_bank <= bank; + narrow_xy_addr <= addr; + narrow_x_dout <= dout_x; + narrow_y_dout <= dout_y; + narrow_xy_valid <= valid; + end + endtask + + task _update_rdct; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + rdct_xy_bank <= bank; + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_wide; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_wide(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task set_narrow; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_narrow(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task set_rdct; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_rdct(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_wide; + begin + _update_wide(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task clear_narrow; + begin + _update_narrow(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task clear_rdct; + begin + _update_rdct(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task _set_cnt_lsb; + input [7:0] cnt; + input wrapped; + begin + cnt_lsb <= cnt; + cnt_lsb_wrapped <= wrapped; + end + endtask + + task _set_cnt_msb; + input [7:0] cnt; + input wrapped; + begin + cnt_msb <= cnt; + cnt_msb_wrapped <= wrapped; + end + endtask + + task inc_cnt_lsb; + begin + if (cnt_lsb == word_index_last) + _set_cnt_lsb(8'd0, 1'b1); + else + _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped); + end + endtask + + task inc_cnt_both; + begin + inc_cnt_lsb; + inc_cnt_msb; + end + endtask + + task inc_cnt_msb; + begin + if (cnt_msb == word_index_last) + _set_cnt_msb(8'd0, 1'b1); + else + _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped); + end + endtask + + task clr_cnt_lsb; + begin + _set_cnt_lsb(8'd0, 1'b0); + end + endtask + + task clr_cnt_msb; + begin + _set_cnt_msb(8'd0, 1'b0); + end + endtask + + + + wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; + + always @(posedge clk) + // + if (ena) begin + clr_cnt_lsb(); + clr_cnt_msb(); + end else if (!rdy) + // + case (rcmb_mode) + 2'd1: recombine_square(); + 2'd2: recombine_triangle(); + 2'd3: recombine_rectangle(); + endcase + + wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout}; + wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout}; + + wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout}; + wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout}; + + wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]}; + wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]}; + + wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}}; + wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}}; + + + task recombine_square; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_wide; + // + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + 2'b10: if (cnt_msb < 8'd2) clear_wide; + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + // + 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); + else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + // + 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + end + // + endcase + // + end + // + endtask + + + task recombine_triangle; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb(); + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: clear_narrow; + 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b10: clear_narrow; + 2'b11: clear_narrow; + // + endcase + // + end + // + endtask + + + task recombine_rectangle; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; + // + endcase +// // + case (rcmb_xy_valid) +// // + 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_rdct; + 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b10: if (!cnt_msb_wrapped) begin + if (cnt_msb < 8'd2) clear_rdct; + else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + + 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); +// // + endcase +// // + case (rcmb_xy_valid) +// // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + 2'b10: begin + if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + end +// // + 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + end +// // + endcase + // + end + // + endtask + + + always @(posedge clk) + // + if (ena) begin + rdy_adv <= 1'b0; + end else if (!rdy_reg) begin + // + case (rcmb_mode) + // + 2'd1: case (rcmb_xy_valid) + // + 2'b00: begin + // + if (rcmb_msb_flag_delay_2) begin + // + rdy_adv <= ~rcmb_msb_flag_delay_1; + // + end + // + end + // + endcase + // + 2'd2: case (rcmb_xy_valid) + // + 2'b01: rdy_adv <= cnt_lsb_wrapped; // + // + endcase + // + 2'd3: case (rcmb_xy_valid) + // + 2'b00: begin + // + if (rcmb_msb_flag_delay_2) begin + // + rdy_adv <= ~rcmb_msb_flag_delay_1; + // + end + // + end + // + endcase + // + endcase + // + end + + + + // add ready for mode=3 +endmodule diff --git a/rtl/_modexpng_recombinator_cell.v b/rtl/_modexpng_recombinator_cell.v new file mode 100644 index 0000000..b72395e --- /dev/null +++ b/rtl/_modexpng_recombinator_cell.v @@ -0,0 +1,40 @@ +module modexpng_recombinator_cell +( + clk, + ce, clr, + din, dout +); + + // + // Headers + // + `include "modexpng_parameters.vh" + + // + // Ports + // + input clk; + input ce; + input clr; + input [ MAC_W -1:0] din; + output [WORD_W -1:0] dout; + + reg [WORD_W -2:0] z; + reg [WORD_W :0] y; + reg [WORD_W +1:0] x; + + assign dout = x[WORD_W-1:0]; + + wire [WORD_W -2:0] din_z = din[3*WORD_W -2 :2*WORD_W]; // [46:32] + wire [WORD_W -1:0] din_y = din[2*WORD_W -1 : WORD_W]; // [31:16] + wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0] + + always @(posedge clk) + // + if (ce) begin + z <= din_z; + y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; + x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {WORD_NULL, x[WORD_EXT_W-1:WORD_W]}; + end + +endmodule diff --git a/rtl/_modexpng_reductor.v b/rtl/_modexpng_reductor.v new file mode 100644 index 0000000..25cf394 --- /dev/null +++ b/rtl/_modexpng_reductor.v @@ -0,0 +1,252 @@ +module modexpng_reductor +( + clk, rst, + ena, rdy, + word_index_last, + rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux, + rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid, + rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid +); + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + input clk; + input rst; + // + input ena; + output rdy; + // + input [ OP_ADDR_W -1:0] word_index_last; + // + input [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + // + input [BANK_ADDR_W -1:0] rcmb_final_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_final_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_final_x_dout; + input [ WORD_EXT_W -1:0] rcmb_final_y_dout; + input rcmb_final_xy_valid; + // + output [ OP_ADDR_W -1:0] rdct_final_xy_addr; + output [ WORD_EXT_W -1:0] rdct_final_x_dout; + output [ WORD_EXT_W -1:0] rdct_final_y_dout; + output rdct_final_xy_valid; + + + // + // Ready + // + reg rdy_reg = 1'b1; + reg busy_now = 1'b0; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else begin + if (rdy && ena) rdy_reg <= 1'b0; + if (!rdy && !busy_now) rdy_reg <= 1'b1; + end + + + // + // Pipeline (Delay Match) + // + reg rcmb_xy_valid_dly1 = 1'b0; + reg rcmb_xy_valid_dly2 = 1'b0; + reg rcmb_xy_valid_dly3 = 1'b0; + + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; + + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3; + + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3; + + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_valid_dly1 <= 1'b0; + rcmb_xy_valid_dly2 <= 1'b0; + rcmb_xy_valid_dly3 <= 1'b0; + end else begin + rcmb_xy_valid_dly1 <= rcmb_final_xy_valid; + rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1; + rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2; + end + + + always @(posedge clk) begin + // + if (rcmb_final_xy_valid) begin + rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; + rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; + rcmb_x_dout_dly1 <= rcmb_final_x_dout; + rcmb_y_dout_dly1 <= rcmb_final_y_dout; + end + // + if (rcmb_xy_valid_dly1) begin + rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1; + rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1; + rcmb_x_dout_dly2 <= rcmb_x_dout_dly1; + rcmb_y_dout_dly2 <= rcmb_y_dout_dly1; + end + // + if (rcmb_xy_valid_dly2) begin + rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2; + rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2; + rcmb_x_dout_dly3 <= rcmb_x_dout_dly2; + rcmb_y_dout_dly3 <= rcmb_y_dout_dly2; + end + // + end + + + // + // Carry Logic + // + reg [RDCT_CARRY_W -1:0] rcmb_x_lsb_carry; + reg [WORD_W -1:0] rcmb_x_lsb_dummy; + reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout; + + reg [RDCT_CARRY_W -1:0] rcmb_y_lsb_carry; + reg [WORD_W -1:0] rcmb_y_lsb_dummy; + reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout; + + + // + // Carry Computation + // + always @(posedge clk) + // + if (ena) begin + rcmb_x_lsb_carry <= RDCT_CARRY_ZEROES; + rcmb_y_lsb_carry <= RDCT_CARRY_ZEROES; + end else if (rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_ML: begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + endcase + + + // + // Reduction + // + reg [ OP_ADDR_W -1:0] rdct_xy_addr; + reg [WORD_EXT_W -1:0] rdct_x_dout; + reg [WORD_EXT_W -1:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + assign rdct_final_xy_addr = rdct_xy_addr; + assign rdct_final_x_dout = rdct_x_dout; + assign rdct_final_y_dout = rdct_y_dout; + assign rdct_final_xy_valid = rdct_xy_valid; + + task _update_rdct; + input [ OP_ADDR_W -1:0] addr; + input [WORD_EXT_W -1:0] dout_x; + input [WORD_EXT_W -1:0] dout_y; + input valid; + begin + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_rdct; + input [ OP_ADDR_W -1:0] addr; + input [WORD_EXT_W -1:0] dout_x; + input [WORD_EXT_W -1:0] dout_y; + begin + _update_rdct(addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_rdct; + begin + _update_rdct(OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); + end + endtask + + + // + // Helper Wires + // + wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux; + wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux; + + wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_NULL, rcmb_x_lsb_carry}; + wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_NULL, rcmb_y_lsb_carry}; + + + // + // + // + always @(posedge clk) + // + if (rst) clear_rdct; + else begin + // + clear_rdct; + // + if (busy_now && rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) + set_rdct(OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); + else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) + set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); + + BANK_RCMB_EXT: + set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); + + endcase + // + end + + + + // + // Busy + // + always @(posedge clk) + // + if (rst) busy_now <= 1'b0; + else begin + if (rdy && ena) busy_now <= 1'b1; + //if (!rdy && !busy_now) rdy <= 1'b1; + end + + +endmodule diff --git a/rtl/_modexpng_storage_block.v b/rtl/_modexpng_storage_block.v new file mode 100644 index 0000000..d6ef1ee --- /dev/null +++ b/rtl/_modexpng_storage_block.v @@ -0,0 +1,219 @@ +module modexpng_storage_block +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + input wr_wide_xy_ena; + input [BANK_ADDR_W -1:0] wr_wide_xy_bank; + input [ OP_ADDR_W -1:0] wr_wide_xy_addr; + input [ WORD_EXT_W -1:0] wr_wide_x_din; + input [ WORD_EXT_W -1:0] wr_wide_y_din; + + input wr_narrow_xy_ena; + input [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + input [ WORD_EXT_W -1:0] wr_narrow_x_din; + input [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + input rd_narrow_xy_ena; + input [BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rd_narrow_x_dout; + output [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + + // + // Internal Registers + // + reg rd_wide_xy_reg_ena = 1'b0; + reg rd_wide_xy_reg_ena_aux = 1'b0; + reg rd_narrow_xy_reg_ena = 1'b0; + + always @(posedge clk) begin + // + rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + // + end + + + // + // Helper Signals + // + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1]; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; + + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; + assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; + + // + // "Wide" Storage + // + genvar z; + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) + begin : gen_wide_bram + // + assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]}; + // + modexpng_sdp_36k_wrapper wide_bram_x + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_x_din), + + .enb (rd_wide_xy_ena), + .regceb (rd_wide_xy_reg_ena), + .addrb (rd_wide_xy_offset[z]), + .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W]) + ); + // + modexpng_sdp_36k_wrapper wide_bram_y + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_y_din), + + .enb (rd_wide_xy_ena), + .regceb (rd_wide_xy_reg_ena), + .addrb (rd_wide_xy_offset[z]), + .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W]) + ); + // + end + endgenerate + + + // + // Auxilary Storage + // + modexpng_sdp_36k_wrapper wide_bram_x_aux + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_x_din), + + .enb (rd_wide_xy_ena_aux), + .regceb (rd_wide_xy_reg_ena_aux), + .addrb (rd_wide_xy_offset_aux), + .doutb (rd_wide_x_dout_aux) + ); + // + modexpng_sdp_36k_wrapper wide_bram_y_aux + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_y_din), + + .enb (rd_wide_xy_ena_aux), + .regceb (rd_wide_xy_reg_ena_aux), + .addrb (rd_wide_xy_offset_aux), + .doutb (rd_wide_y_dout_aux) + ); + + + // + // "Narrow" Storage + // + modexpng_sdp_36k_wrapper narrow_bram_x + ( + .clk (clk), + + .ena (wr_narrow_xy_ena), + .wea (wr_narrow_xy_ena), + .addra (wr_narrow_xy_offset), + .dina (wr_narrow_x_din), + + .enb (rd_narrow_xy_ena), + .regceb (rd_narrow_xy_reg_ena), + .addrb (rd_narrow_xy_offset), + .doutb (rd_narrow_x_dout) + ); + + modexpng_sdp_36k_wrapper narrow_bram_y + ( + .clk (clk), + + .ena (wr_narrow_xy_ena), + .wea (wr_narrow_xy_ena), + .addra (wr_narrow_xy_offset), + .dina (wr_narrow_y_din), + + .enb (rd_narrow_xy_ena), + .regceb (rd_narrow_xy_reg_ena), + .addrb (rd_narrow_xy_offset), + .doutb (rd_narrow_y_dout) + ); + + +endmodule diff --git a/rtl/_modexpng_storage_manager.v b/rtl/_modexpng_storage_manager.v new file mode 100644 index 0000000..958596a --- /dev/null +++ b/rtl/_modexpng_storage_manager.v @@ -0,0 +1,199 @@ +module modexpng_storage_manager +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + ext_wide_xy_ena, + ext_wide_xy_bank, + ext_wide_xy_addr, + ext_wide_x_din, + ext_wide_y_din, + + ext_narrow_xy_ena, + ext_narrow_xy_bank, + ext_narrow_xy_addr, + ext_narrow_x_din, + ext_narrow_y_din, + + rcmb_wide_xy_ena, + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_din, + rcmb_wide_y_din, + + rcmb_narrow_xy_ena, + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_din, + rcmb_narrow_y_din +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + output wr_wide_xy_ena; + output [BANK_ADDR_W -1:0] wr_wide_xy_bank; + output [ OP_ADDR_W -1:0] wr_wide_xy_addr; + output [ WORD_EXT_W -1:0] wr_wide_x_din; + output [ WORD_EXT_W -1:0] wr_wide_y_din; + + output wr_narrow_xy_ena; + output [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + output [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + output [ WORD_EXT_W -1:0] wr_narrow_x_din; + output [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input ext_wide_xy_ena; + input [BANK_ADDR_W -1:0] ext_wide_xy_bank; + input [ OP_ADDR_W -1:0] ext_wide_xy_addr; + input [ WORD_EXT_W -1:0] ext_wide_x_din; + input [ WORD_EXT_W -1:0] ext_wide_y_din; + + input ext_narrow_xy_ena; + input [BANK_ADDR_W -1:0] ext_narrow_xy_bank; + input [ OP_ADDR_W -1:0] ext_narrow_xy_addr; + input [ WORD_EXT_W -1:0] ext_narrow_x_din; + input [ WORD_EXT_W -1:0] ext_narrow_y_din; + + input rcmb_wide_xy_ena; + input [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_wide_x_din; + input [ WORD_EXT_W -1:0] rcmb_wide_y_din; + + input rcmb_narrow_xy_ena; + input [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_narrow_x_din; + input [ WORD_EXT_W -1:0] rcmb_narrow_y_din; + + reg wr_wide_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg; + + reg wr_narrow_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg; + + task _update_wide; + input xy_ena; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + wr_wide_xy_ena_reg <= xy_ena; + wr_wide_xy_bank_reg <= xy_bank; + wr_wide_xy_addr_reg <= xy_addr; + wr_wide_x_din_reg <= x_din; + wr_wide_y_din_reg <= y_din; + end + endtask + + task _update_narrow; + input xy_ena; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + wr_narrow_xy_ena_reg <= xy_ena; + wr_narrow_xy_bank_reg <= xy_bank; + wr_narrow_xy_addr_reg <= xy_addr; + wr_narrow_x_din_reg <= x_din; + wr_narrow_y_din_reg <= y_din; + end + endtask + + task enable_wide; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din); + end + endtask + + task enable_narrow; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din); + end + endtask + + task disable_wide; + begin + _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); + end + endtask + + task disable_narrow; + begin + _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); + end + endtask + + always @(posedge clk) + // + if (rst) disable_wide; + else begin + // + if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din); + else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din); + else disable_wide; + // + end + + always @(posedge clk) + // + if (rst) disable_narrow; + else begin + // + if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din); + else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din); + else disable_narrow; + // + end + + assign wr_wide_xy_ena = wr_wide_xy_ena_reg; + assign wr_wide_xy_bank = wr_wide_xy_bank_reg; + assign wr_wide_xy_addr = wr_wide_xy_addr_reg; + assign wr_wide_x_din = wr_wide_x_din_reg; + assign wr_wide_y_din = wr_wide_y_din_reg; + + assign wr_narrow_xy_ena = wr_narrow_xy_ena_reg; + assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg; + assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg; + assign wr_narrow_x_din = wr_narrow_x_din_reg; + assign wr_narrow_y_din = wr_narrow_y_din_reg; + +endmodule diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt deleted file mode 100644 index 987bd86..0000000 --- a/rtl/dev/temp.txt +++ /dev/null @@ -1,384 +0,0 @@ - // - // Helper Functions - // - /* - function [INDEX_WIDTH-1:0] calc_preset_a_index; - input [INDEX_WIDTH-4:0] col_in; - input integer x_in; - integer index_out; - begin - index_out = col_in * NUM_MULTS + x_in; - calc_preset_a_index = index_out[INDEX_WIDTH-1:0]; - end - endfunction - - function [INDEX_WIDTH-1:0] calc_rotate_a_index; - input [INDEX_WIDTH-1:0] current_index_in; - input [INDEX_WIDTH-1:0] last_index_in; - begin - if (current_index_in > {INDEX_WIDTH{1'b0}}) - calc_rotate_a_index = current_index_in - 1'b1; - else - calc_rotate_a_index = last_index_in; - end - endfunction - */ - - /* - // - // Narrow Counters - // - reg [INDEX_WIDTH-1:0] din_addr_narrow_reg; - reg [INDEX_WIDTH-1:0] din_addr_narrow_dly; - localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}}; - wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ? - din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero; - wire din_addr_narrow_done = din_addr_narrow_reg == index_last; - - assign din_addr_narrow = din_addr_narrow_reg; - - always @(posedge clk) - // - din_addr_narrow_dly <= din_addr_narrow_reg; - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; - FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; - FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; - endcase - - - // - // Helper Functions - // - function [NUM_MULTS-1:0] calc_mac_clear_bitmask; - input [2:0] t; - begin - case (t) - 3'd0: calc_mac_clear_bitmask = 8'b00000001; - 3'd1: calc_mac_clear_bitmask = 8'b00000010; - 3'd2: calc_mac_clear_bitmask = 8'b00000100; - 3'd3: calc_mac_clear_bitmask = 8'b00001000; - 3'd4: calc_mac_clear_bitmask = 8'b00010000; - 3'd5: calc_mac_clear_bitmask = 8'b00100000; - 3'd6: calc_mac_clear_bitmask = 8'b01000000; - 3'd7: calc_mac_clear_bitmask = 8'b10000000; - endcase - end - endfunction - - function [NUM_MULTS:0] calc_mac_clear_square; - input [INDEX_WIDTH-4:0] current_col_index; - input [INDEX_WIDTH-1:0] b_addr_prev; - begin - if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index) - calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])}; - else - calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}}; - end - endfunction - - - // - // Wide Counters - // - reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1]; - - integer xi; - always @(posedge clk) - // - for (xi=0; xi<NUM_MULTS; xi=xi+1) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(0, xi); - FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(col_index + 1'b1, xi); - // - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_wide_reg[xi] <= calc_rotate_a_index(din_addr_wide_reg[xi], index_last); - // - endcase - - - // - // Enables - // - reg din_ena_narrow_reg = 1'b0; - reg [NUM_MULTS-1:0] din_ena_wide_reg = {NUM_MULTS{1'b0}}; - - assign din_ena_narrow = din_ena_narrow_reg; - assign din_ena_wide = din_ena_wide_reg; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) din_ena_narrow_reg <= 1'b0; - else case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_narrow_reg <= 1'b1; - default: din_ena_narrow_reg <= 1'b0; - endcase - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) din_ena_wide_reg <= {NUM_MULTS{1'b0}}; - else case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_wide_reg <= {NUM_MULTS{1'b1}}; - default: din_ena_wide_reg <= {NUM_MULTS{1'b0}}; - endcase - - - // - // Modes - // - reg [2-1:0] din_mode_wide_reg; - reg [2-1:0] din_mode_narrow_reg; - reg [2-1:0] dout_mode_wide_reg; - reg [2-1:0] dout_mode_narrow_reg; - - assign din_mode_wide = din_mode_wide_reg; - assign din_mode_narrow = din_mode_narrow_reg; - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_wide_reg <= MODEXPNG_MODE_A; - default: din_mode_wide_reg <= 2'bXX; - endcase - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_narrow_reg <= MODEXPNG_MODE_B; - default: din_mode_narrow_reg <= 2'bXX; - endcase - - - // - // MAC Array - // - wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_a[0:NUM_MULTS]; - wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_b; - reg [ NUM_MULTS :0] mac_ce; - reg [ NUM_MULTS :0] mac_clr; - wire [ MODEXPNG_MAC_WIDTH-1:0] mac_p[0:NUM_MULTS]; - reg [ NUM_MULTS :0] mac_rdy_lsb; - reg [ NUM_MULTS :0] mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1:0]; - - //reg [ NUM_MULTS :0] mac_ce_dly[MODEXPNG_MAC_LATENCY-1:0]; - //wire [ NUM_MULTS :0] mac_rdy; - - - - - - assign mac_din_b = din_narrow; - - - genvar x; - generate for (x=0; x<=NUM_MULTS; x=x+1) - begin : gen_macs - // - //assign mac_rdy[x] = mac_ce_dly[MODEXPNG_MAC_LATENCY-1][x]; - // - modexpng_mac mac_inst - ( - .clk (clk), - .ce (mac_ce[x]), - .clr (mac_clr[x]), - .a (mac_din_a[x]), - .b (mac_din_b), - .p (mac_p[x]) - ); - // - end - // - endgenerate - - generate for (x=0; x<NUM_MULTS; x=x+1) - begin : gen_mac_din_a - // - assign mac_din_a[x] = din_wide[x*MODEXPNG_WORD_WIDTH+:MODEXPNG_WORD_WIDTH]; - // - end - endgenerate - - generate for (x=0; x<NUM_MULTS; x=x+1) - begin : gen_din_addr_wide - // - assign din_addr_wide[x*INDEX_WIDTH+:INDEX_WIDTH] = din_addr_wide_reg[x]; - // - end - endgenerate - - - // - // MAC Clock Enable Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; - else case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_ce <= {1'b0, {NUM_MULTS{1'b1}}}; - default: mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; - endcase - - - // - // MAC Valid Logic - // - integer y; - - always @(posedge clk) - // - for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin - mac_rdy_lsb_dly[0][xi] <= mac_rdy_lsb[xi]; - for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) - mac_rdy_lsb_dly[y][xi] <= mac_rdy_lsb_dly[y-1][xi]; - end - - always @(posedge clk) begin - // - fsm_state_dly[0] <= fsm_state; - for (y=1; y<=MODEXPNG_MAC_LATENCY; y=y+1) - fsm_state_dly[y] <= fsm_state_dly[y-1]; - end - - */ - - /* - always @(posedge clk) - // - for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin - mac_ce_dly[0][xi] <= mac_ce[xi]; - for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) - mac_ce_dly[y][xi] <= mac_ce_dly[y-1][xi]; - end - */ - /* - always @(posedge clk) - // - for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin - mac_clr_dly[0][xi] <= mac_clr[xi]; - for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) - mac_clr_dly[y][xi] <= mac_clr_dly[y-1][xi]; - end - */ - - /* - // - // MAC Clear Logic - // - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_clr <= {1'b0, {NUM_MULTS{1'b1}}}; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_clr <= calc_mac_clear_square(col_index, din_addr_narrow_dly); - default: mac_clr <= {1'bX, {NUM_MULTS{1'bX}}}; - endcase - - - // - // MAC Ready Logic - // - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_rdy_lsb <= calc_mac_clear_square(col_index, din_addr_narrow); - default: mac_rdy_lsb <= {1'bX, {NUM_MULTS{1'bX}}}; - endcase - - - - // - // Recombinators - // - reg rcmb_lsb_ce; - reg rcmb_lsb_clr; - reg [MODEXPNG_MAC_WIDTH-1: 0] rcmb_lsb_din; - wire [15: 0] rcmb_lsb_dout; - - modexpng_part_recombinator recomb_lsb - ( - .clk (clk), - .ce (rcmb_lsb_ce), - .clr (rcmb_lsb_clr), - .din (rcmb_lsb_din), - .dout (rcmb_lsb_dout) - ); - - - reg calc_rcmb_lsb_ce; - always @* - // - calc_rcmb_lsb_ce = | mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]; - - reg [MODEXPNG_MAC_WIDTH-1:0] calc_rcmb_lsb_din; - - always @* - // - casez (mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]) - 8'b00000001: calc_rcmb_lsb_din = mac_p[0]; - 8'b00000010: calc_rcmb_lsb_din = mac_p[1]; - 8'b00000100: calc_rcmb_lsb_din = mac_p[2]; - 8'b00001000: calc_rcmb_lsb_din = mac_p[3]; - 8'b00010000: calc_rcmb_lsb_din = mac_p[4]; - 8'b00100000: calc_rcmb_lsb_din = mac_p[5]; - 8'b01000000: calc_rcmb_lsb_din = mac_p[6]; - 8'b10000000: calc_rcmb_lsb_din = mac_p[7]; - default: calc_rcmb_lsb_din = {MODEXPNG_MAC_WIDTH{1'bX}}; - endcase - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - rcmb_lsb_ce <= 1'b0; - else case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_ce <= calc_rcmb_lsb_ce; - default: rcmb_lsb_ce <= 1'b0; - endcase - - always @(posedge clk) - // - case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: rcmb_lsb_clr <= 1'b1; - default: rcmb_lsb_clr <= 1'b0; - endcase - - always @(posedge clk) - // - case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_din <= calc_rcmb_lsb_din; - default: rcmb_lsb_din <= {MODEXPNG_MAC_WIDTH{1'bX}}; - endcase - - - -*/ diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v deleted file mode 100644 index 2a050d4..0000000 --- a/rtl/dsp/dsp_array.v +++ /dev/null @@ -1,143 +0,0 @@ -module dsp_array -( - input clk, - - input ce_a, - input ce_b, - input ce_m, - input ce_p, - input ce_mode, - - input [9 -1:0] mode_z, - - input [5*18-1:0] a, - input [1*17-1:0] b, - output [9*47-1:0] p -); - - `include "../modexpng_parameters_x8.vh" - - wire [17:0] casc_a[0:3]; - wire [16:0] casc_b[0:3]; - - wire ce_a0 = ce_a; - reg ce_a1 = 1'b0; - reg ce_a2 = 1'b0; - - wire ce_b0 = ce_b; - reg ce_b1 = 1'b0; - - always @(posedge clk) begin - ce_a1 <= ce_a0; - ce_a2 <= ce_a1; - ce_b1 <= ce_b0; - end - - - genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) - // - begin : gen_DSP48E1 - // - dsp_slice # - ( - .AB_INPUT("DIRECT"), - .B_REG(2) - ) - dsp_direct - ( - .clk (clk), - - .ce_a1 (ce_a0), - .ce_b1 (ce_b0), - .ce_a2 (ce_a1), - .ce_b2 (ce_b1), - .ce_m (ce_m), - .ce_p (ce_p), - .ce_mode (ce_mode), - - .a (a[z*18+:18]), - .b (b), - .p (p[47*2*z+:47]), - - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), - - .casc_a_in ({18{1'b0}}), - .casc_b_in ({17{1'b0}}), - - .casc_a_out (casc_a[z]), - .casc_b_out (casc_b[z]) - ); - // - dsp_slice # - ( - .AB_INPUT("CASCADE"), - .B_REG(1) - ) - dsp_cascade - ( - .clk (clk), - - .ce_a1 (ce_a1), - .ce_b1 (1'b0), - .ce_a2 (ce_a2), - .ce_b2 (ce_b1), - .ce_m (ce_m), - .ce_p (ce_p), - .ce_mode (ce_mode), - - .a (a[z*18+:18]), - .b (b), - .p (p[47*(2*z+1)+:47]), - - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), - - .casc_a_in (casc_a[z]), - .casc_b_in (casc_b[z]), - - .casc_a_out (), - .casc_b_out () - ); - // - end - // - endgenerate - - dsp_slice # - ( - .AB_INPUT("DIRECT"), - .B_REG(2) - ) - dsp_aux - ( - .clk (clk), - - .ce_a1 (ce_a0), - .ce_b1 (ce_b0), - .ce_a2 (ce_a1), - .ce_b2 (ce_b1), - .ce_m (ce_m), - .ce_p (ce_p), - .ce_mode (ce_mode), - - .a (a[4*18+:18]), - .b (b), - .p (p[47*2*4+:47]), - - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), - - .casc_a_in ({18{1'b0}}), - .casc_b_in ({17{1'b0}}), - - .casc_a_out (), - .casc_b_out () - ); - - -endmodule diff --git a/rtl/dsp/dsp_slice.v b/rtl/dsp/dsp_slice.v deleted file mode 100644 index 9f1298b..0000000 --- a/rtl/dsp/dsp_slice.v +++ /dev/null @@ -1,125 +0,0 @@ -module dsp_slice # -( - AB_INPUT = "DIRECT", - B_REG = 2 -) -( - input clk, - input ce_a1, - input ce_b1, - input ce_a2, - input ce_b2, - input ce_m, - input ce_p, - input ce_mode, - input [17:0] a, - input [16:0] b, - output [46:0] p, - input [ 4:0] inmode, - input [ 6:0] opmode, - input [ 3:0] alumode, - input [17:0] casc_a_in, - input [16:0] casc_b_in, - output [17:0] casc_a_out, - output [16:0] casc_b_out -); - - wire [30-18-1:0] casc_a_dummy; - wire [18-17-1:0] casc_b_dummy; - wire [48-47-1:0] p_dummy; - - DSP48E1 # - ( - .AREG (2), - .BREG (B_REG), - .CREG (0), - .DREG (0), - .ADREG (0), - .MREG (1), - .PREG (1), - .ACASCREG (1), - .BCASCREG (1), - .INMODEREG (0), - .OPMODEREG (1), - .ALUMODEREG (0), - .CARRYINREG (0), - .CARRYINSELREG (0), - - .A_INPUT (AB_INPUT), - .B_INPUT (AB_INPUT), - - .USE_DPORT ("FALSE"), - .USE_MULT ("DYNAMIC"), - .USE_SIMD ("ONE48"), - - .MASK (48'h3fffffffffff), - .PATTERN (48'h000000000000), - .SEL_MASK ("MASK"), - .SEL_PATTERN ("PATTERN"), - - .USE_PATTERN_DETECT ("NO_PATDET"), - .AUTORESET_PATDET ("NO_RESET") - ) - DSP48E1_inst - ( - .CLK (clk), - - .CEA1 (ce_a1), - .CEB1 (ce_b1), - .CEA2 (ce_a2), - .CEB2 (ce_b2), - .CEAD (1'b0), - .CEC (1'b0), - .CED (1'b0), - .CEM (ce_m), - .CEP (ce_p), - .CEINMODE (1'b0), - .CECTRL (ce_mode), - .CEALUMODE (1'b0), - .CECARRYIN (1'b0), - - .A ({{(30-18){1'b0}}, a}), - .B ({{(18-17){1'b0}}, b}), - .C ({48{1'b0}}), - .D ({25{1'b0}}), - .P ({p_dummy, p}), - - .INMODE (inmode), - .OPMODE (opmode), - .ALUMODE (alumode), - - .ACIN ({{(30-18){1'b0}}, casc_a_in}), - .BCIN ({{(18-17){1'b0}}, casc_b_in}), - .ACOUT ({casc_a_dummy, casc_a_out}), - .BCOUT ({casc_b_dummy, casc_b_out}), - .PCIN ({48{1'b0}}), - .PCOUT (), - .CARRYCASCIN (1'b0), - .CARRYCASCOUT (), - - .RSTA (1'b0), - .RSTB (1'b0), - .RSTC (1'b0), - .RSTD (1'b0), - .RSTM (1'b0), - .RSTP (1'b0), - .RSTINMODE (1'b0), - .RSTCTRL (1'b0), - .RSTALUMODE (1'b0), - .RSTALLCARRYIN (1'b0), - - .UNDERFLOW (), - .OVERFLOW (), - .PATTERNDETECT (), - .PATTERNBDETECT (), - - .CARRYIN (1'b0), - .CARRYOUT (), - .CARRYINSEL (3'b000), - - .MULTSIGNIN (1'b0), - .MULTSIGNOUT () - ); - - -endmodule diff --git a/rtl/modexpng_dsp48e1.vh b/rtl/modexpng_dsp48e1.vh new file mode 100644 index 0000000..bc3d55c --- /dev/null +++ b/rtl/modexpng_dsp48e1.vh @@ -0,0 +1,8 @@ +localparam DSP48E1_A_W = 30; +localparam DSP48E1_B_W = 18; +localparam DSP48E1_C_W = 48; +localparam DSP48E1_D_W = 25; +localparam DSP48E1_P_W = 48; +localparam DSP48E1_INMODE_W = 5; +localparam DSP48E1_OPMODE_W = 7; +localparam DSP48E1_ALUMODE_W = 4; diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v index 9c4ee93..8ab64f0 100644 --- a/rtl/modexpng_dsp_array_block.v +++ b/rtl/modexpng_dsp_array_block.v @@ -1,24 +1,30 @@ module modexpng_dsp_array_block ( - input clk, - - input ce_a, - input ce_b, - input ce_m, - input ce_p, - input ce_mode, + clk, + ce_a, ce_b, ce_m, ce_p, ce_mode, + mode_z, + a, b, p +); + + `include "modexpng_dsp48e1.vh" + `include "modexpng_parameters.vh" - input [9 -1:0] mode_z, + input clk; - input [5*18-1:0] a, - input [1*16-1:0] b, - output [9*47-1:0] p -); + input ce_a; + input ce_b; + input ce_m; + input ce_p; + input ce_mode; - `include "modexpng_parameters_x8.vh" + input [ NUM_MULTS_AUX -1:0] mode_z; + + input [NUM_MULTS_HALF_AUX * WORD_EXT_W -1:0] a; + input [ WORD_W -1:0] b; + output [NUM_MULTS_AUX * MAC_W -1:0] p; - wire [17:0] casc_a[0:3]; - wire [15:0] casc_b[0:3]; + wire [WORD_EXT_W -1:0] casc_a[0:NUM_MULTS_HALF-1]; + wire [ WORD_W -1:0] casc_b[0:NUM_MULTS_HALF-1]; wire ce_a0 = ce_a; reg ce_a1 = 1'b0; @@ -35,7 +41,7 @@ module modexpng_dsp_array_block genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) // begin : gen_DSP48E1 // @@ -56,16 +62,16 @@ module modexpng_dsp_array_block .ce_p (ce_p), .ce_mode (ce_mode), - .a (a[z*18+:18]), + .a (a[z*WORD_EXT_W +: WORD_EXT_W]), .b (b), - .p (p[47*2*z+:47]), + .p (p[(2*z)*MAC_W +: MAC_W]), - .inmode (5'b00000), + .inmode ({DSP48E1_INMODE_W{1'b0}}), .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in ({18{1'b0}}), - .casc_b_in ({16{1'b0}}), + .casc_a_in (WORD_EXT_NULL), + .casc_b_in (WORD_NULL), .casc_a_out (casc_a[z]), .casc_b_out (casc_b[z]) @@ -88,13 +94,13 @@ module modexpng_dsp_array_block .ce_p (ce_p), .ce_mode (ce_mode), - .a (a[z*18+:18]), + .a (a[z*WORD_EXT_W +: WORD_EXT_W]), .b (b), - .p (p[47*(2*z+1)+:47]), + .p (p[(2*z+1)*MAC_W +: MAC_W]), - .inmode (5'b00000), + .inmode ({DSP48E1_INMODE_W{1'b0}}), .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), .casc_a_in (casc_a[z]), .casc_b_in (casc_b[z]), @@ -124,16 +130,16 @@ module modexpng_dsp_array_block .ce_p (ce_p), .ce_mode (ce_mode), - .a (a[4*18+:18]), + .a (a[NUM_MULTS_HALF*WORD_EXT_W +: WORD_EXT_W]), .b (b), - .p (p[47*2*4+:47]), + .p (p[(2*NUM_MULTS_HALF)*MAC_W +: MAC_W]), - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), + .inmode ({DSP48E1_INMODE_W{1'b0}}), + .opmode ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in ({18{1'b0}}), - .casc_b_in ({16{1'b0}}), + .casc_a_in (WORD_EXT_NULL), + .casc_b_in (WORD_NULL), .casc_a_out (), .casc_b_out () diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper.v index f565eec..3d13570 100644 --- a/rtl/modexpng_dsp_slice_wrapper.v +++ b/rtl/modexpng_dsp_slice_wrapper.v @@ -4,30 +4,41 @@ module modexpng_dsp_slice_wrapper # B_REG = 2 ) ( - input clk, - input ce_a1, - input ce_b1, - input ce_a2, - input ce_b2, - input ce_m, - input ce_p, - input ce_mode, - input [17:0] a, - input [15:0] b, - output [46:0] p, - input [ 4:0] inmode, - input [ 6:0] opmode, - input [ 3:0] alumode, - input [17:0] casc_a_in, - input [15:0] casc_b_in, - output [17:0] casc_a_out, - output [15:0] casc_b_out + clk, + ce_a1, ce_b1, ce_a2, ce_b2, + ce_m, ce_p, ce_mode, + a, b, p, + inmode, opmode, alumode, + casc_a_in, casc_b_in, + casc_a_out, casc_b_out ); - wire [30-18-1:0] casc_a_dummy; - wire [18-16-1:0] casc_b_dummy; - wire [48-47-1:0] p_dummy; + `include "modexpng_parameters.vh" + `include "modexpng_dsp48e1.vh" + + input clk; + input ce_a1; + input ce_b1; + input ce_a2; + input ce_b2; + input ce_m; + input ce_p; + input ce_mode; + input [ WORD_EXT_W -1:0] a; + input [ WORD_W -1:0] b; + output [ MAC_W -1:0] p; + input [ DSP48E1_INMODE_W -1:0] inmode; + input [ DSP48E1_OPMODE_W -1:0] opmode; + input [DSP48E1_ALUMODE_W -1:0] alumode; + input [ WORD_EXT_W -1:0] casc_a_in; + input [ WORD_W -1:0] casc_b_in; + output [ WORD_EXT_W -1:0] casc_a_out; + output [ WORD_W -1:0] casc_b_out; + wire [DSP48E1_A_W - WORD_EXT_W -1:0] casc_a_dummy; + wire [DSP48E1_B_W - WORD_W -1:0] casc_b_dummy; + wire [DSP48E1_P_W - MAC_W -1:0] p_dummy; + DSP48E1 # ( .AREG (2), @@ -52,8 +63,8 @@ module modexpng_dsp_slice_wrapper # .USE_MULT ("DYNAMIC"), .USE_SIMD ("ONE48"), - .MASK (48'h3fffffffffff), - .PATTERN (48'h000000000000), + .MASK ({DSP48E1_P_W{1'b1}}), + .PATTERN ({DSP48E1_P_W{1'b0}}), .SEL_MASK ("MASK"), .SEL_PATTERN ("PATTERN"), @@ -78,21 +89,21 @@ module modexpng_dsp_slice_wrapper # .CEALUMODE (1'b0), .CECARRYIN (1'b0), - .A ({{(30-18){1'b0}}, a}), - .B ({{(18-16){1'b0}}, b}), - .C ({48{1'b0}}), - .D ({25{1'b0}}), + .A ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, a}), + .B ({{(DSP48E1_B_W-WORD_W){1'b0}}, b}), + .C ({DSP48E1_C_W{1'b0}}), + .D ({DSP48E1_D_W{1'b0}}), .P ({p_dummy, p}), .INMODE (inmode), .OPMODE (opmode), .ALUMODE (alumode), - .ACIN ({{(30-18){1'b0}}, casc_a_in}), - .BCIN ({{(18-16){1'b0}}, casc_b_in}), + .ACIN ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, casc_a_in}), + .BCIN ({{(DSP48E1_B_W-WORD_W){1'b0}}, casc_b_in}), .ACOUT ({casc_a_dummy, casc_a_out}), .BCOUT ({casc_b_dummy, casc_b_out}), - .PCIN ({48{1'b0}}), + .PCIN ({DSP48E1_P_W{1'b0}}), .PCOUT (), .CARRYCASCIN (1'b0), .CARRYCASCOUT (), @@ -121,5 +132,4 @@ module modexpng_dsp_slice_wrapper # .MULTSIGNOUT () ); - endmodule diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual.v index 2e4f4e0..df0f823 100644 --- a/rtl/modexpng_mmm_dual_x8.v +++ b/rtl/modexpng_mmm_dual.v @@ -1,14 +1,15 @@ -module modexpng_mmm_dual_x8 +module modexpng_mmm_dual ( clk, rst, ena, rdy, - ladder_mode, word_index_last, word_index_last_minus1, + sel_wide_in, sel_narrow_in, + rd_wide_xy_ena, rd_wide_xy_ena_aux, rd_wide_xy_bank, @@ -51,9 +52,10 @@ module modexpng_mmm_dual_x8 // // Headers // + `include "modexpng_parameters.vh" `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" + //`include "../rtl_1/modexpng_parameters_old.vh" + //`include "../rtl_1/modexpng_parameters_x8_old.vh" // @@ -69,10 +71,13 @@ module modexpng_mmm_dual_x8 input [7:0] word_index_last; input [7:0] word_index_last_minus1; + input [BANK_ADDR_W-1:0] sel_wide_in; + input [BANK_ADDR_W-1:0] sel_narrow_in; + output rd_wide_xy_ena; output rd_wide_xy_ena_aux; - output [ 1:0] rd_wide_xy_bank; - output [ 1:0] rd_wide_xy_bank_aux; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; output [ 8-1:0] rd_wide_xy_addr_aux; input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; @@ -81,24 +86,24 @@ module modexpng_mmm_dual_x8 input [ 18-1:0] rd_wide_y_dout_aux; output rd_narrow_xy_ena; - output [ 1:0] rd_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; output [ 7:0] rd_narrow_xy_addr; input [18-1:0] rd_narrow_x_dout; input [18-1:0] rd_narrow_y_dout; - output [ 1:0] rcmb_wide_xy_bank; + output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [17:0] rcmb_wide_x_dout; output [17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; - output [ 1:0] rcmb_narrow_xy_bank; + output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [17:0] rcmb_narrow_x_dout; output [17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; - output [ 1:0] rcmb_xy_bank; + output [BANK_ADDR_W -1:0] rcmb_xy_bank; output [ 7:0] rcmb_xy_addr; output [17:0] rcmb_x_dout; output [17:0] rcmb_y_dout; @@ -132,13 +137,13 @@ module modexpng_mmm_dual_x8 // reg wide_xy_ena = 1'b0; reg wide_xy_ena_aux = 1'b0; - reg [ 1:0] wide_xy_bank; - reg [ 1:0] wide_xy_bank_aux; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; reg [ 8-1:0] wide_xy_addr[0:3]; reg [ 8-1:0] wide_xy_addr_aux; reg narrow_xy_ena = 1'b0; - reg [ 1:0] narrow_xy_bank; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [ 7:0] narrow_xy_addr_dly; @@ -332,7 +337,7 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; // FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, @@ -340,7 +345,7 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? - BANK_NARROW_EXT : BANK_NARROW_N_COEFF; + BANK_NARROW_EXT : BANK_NARROW_COEFF; // FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, @@ -486,13 +491,13 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, @@ -510,13 +515,13 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, @@ -525,8 +530,8 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) case (rcmb_xy_bank) - BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL; - BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH; + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX default: wide_xy_bank_aux <= 3'bXXX; endcase diff --git a/rtl/modexpng_mmm_fsm_old.vh b/rtl/modexpng_mmm_fsm_old.vh deleted file mode 100644 index 3bdae66..0000000 --- a/rtl/modexpng_mmm_fsm_old.vh +++ /dev/null @@ -1,43 +0,0 @@ -localparam FSM_STATE_WIDTH = 32; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh new file mode 100644 index 0000000..514fc21 --- /dev/null +++ b/rtl/modexpng_parameters.vh @@ -0,0 +1,71 @@ +`include "modexpng_parameters_x8.vh" + +function integer cryptech_clog2; + input integer value; + integer temp_value; + integer result; + // + begin + temp_value = value - 1; + for (result = 0; temp_value > 0; result = result + 1) + temp_value = temp_value >> 1; + cryptech_clog2 = result; + end + // +endfunction + +localparam WORD_W = 16; +localparam WORD_EXT_W = 18; +localparam MAC_W = 47; + +localparam MAX_OP_W = 4096; + +localparam BANK_ADDR_W = 3; +localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); +localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS); + +localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); + +localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W; + +localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}}; + +localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_N = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_L = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_H = 3'd7; + +localparam [BANK_ADDR_W-1:0] BANK_NARROW_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_COEFF = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_Q = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_EXT = 3'd7; // [0] -> COEFF', [1] -> Q' + +localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH' + +localparam [BANK_ADDR_W-1:0] BANK_DONT_CARE = {BANK_ADDR_W{1'bX}}; + +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; + +localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}}; +localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1}; +localparam [OP_ADDR_W-1:0] OP_ADDR_DONT_CARE = {OP_ADDR_W{1'bX}}; + +localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}}; +localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}}; + +localparam [WORD_EXT_W-1:0] WORD_EXT_DONT_CARE = {WORD_EXT_W{1'bX}}; + +localparam [MAC_INDEX_W-1:0] MAC_INDEX_DONT_CARE = {MAC_INDEX_W{1'bX}}; + +localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}};
\ No newline at end of file diff --git a/rtl/modexpng_parameters_old.vh b/rtl/modexpng_parameters_old.vh deleted file mode 100644 index d30b751..0000000 --- a/rtl/modexpng_parameters_old.vh +++ /dev/null @@ -1,40 +0,0 @@ - -//localparam WORD_WIDTH = 17; -//localparam MAC_WIDTH = 47; - -localparam BANK_ADDR_WIDTH = 2; // TODO: Replace everywhere! - -localparam [1:0] BANK_WIDE_T1T2 = 2'd0; -localparam [1:0] BANK_WIDE_ABL = 2'd1; -localparam [1:0] BANK_WIDE_ABH = 2'd2; -localparam [1:0] BANK_WIDE_N = 2'd3; - -localparam [1:0] BANK_RCMB_ML = 2'd0; -localparam [1:0] BANK_RCMB_MH = 2'd1; -localparam [1:0] BANK_RCMB_EXT = 2'd2; // 0 -> MH' - -localparam [1:0] BANK_NARROW_T1T2 = 2'd0; -localparam [1:0] BANK_NARROW_N_COEFF = 2'd1; -localparam [1:0] BANK_NARROW_Q = 2'd2; -localparam [1:0] BANK_NARROW_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q' - - -//localparam BANK_Y_T2 = 3'd0; -//localparam BANK_XY_T1T2 = 3'd0; - -//localparam BANK_XY_AB_LSB = 3'd1; -//localparam BANK_XY_AB_MSB = 3'd2; - -//localparam BANK_X_N = 3'd3; -//localparam BANK_Y_N_COEFF = 3'd3; - -//localparam BANK_XY_M = 3'd4; - -//localparam BANK_XY_Q_LSB = 3'd5; -//localparam BANK_XY_Q_MSB = 3'd6; - -//localparam BANK_XY_AUX = 3'd7; - -//localparam BANK_XY_ANY = 3'bXXX; - -//localparam BANK_XY_AUX_ADDR_N_COEFF = 0; diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8.vh new file mode 100644 index 0000000..0dcc3d6 --- /dev/null +++ b/rtl/modexpng_parameters_x8.vh @@ -0,0 +1,4 @@ +localparam NUM_MULTS = 8; +localparam NUM_MULTS_AUX = NUM_MULTS + 1; +localparam NUM_MULTS_HALF = NUM_MULTS / 2; +localparam NUM_MULTS_HALF_AUX = NUM_MULTS_HALF + 1; diff --git a/rtl/modexpng_parameters_x8_old.vh b/rtl/modexpng_parameters_x8_old.vh deleted file mode 100644 index 8734354..0000000 --- a/rtl/modexpng_parameters_x8_old.vh +++ /dev/null @@ -1 +0,0 @@ -localparam NUM_MULTS = 8; diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index d6b1ad1..de60d1f 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -17,9 +17,9 @@ module modexpng_recombinator_block // // Headers // + `include "modexpng_parameters.vh" `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" + //`include "../rtl_1/modexpng_parameters_x8_old.vh" input clk; @@ -34,22 +34,22 @@ module modexpng_recombinator_block input [ 4:0] col_index; input [ 4:0] col_index_last; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; input [ 7:0] rd_narrow_xy_addr; - input [ 1:0] rd_narrow_xy_bank; - output [ 1:0] rcmb_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [ 17:0] rcmb_wide_x_dout; output [ 17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; - output [ 1:0] rcmb_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [ 17:0] rcmb_narrow_x_dout; output [ 17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; - output [ 1:0] rdct_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; output [ 7:0] rdct_narrow_xy_addr; output [ 17:0] rdct_narrow_x_dout; output [ 17:0] rdct_narrow_y_dout; @@ -167,7 +167,7 @@ module modexpng_recombinator_block function calc_square_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -182,7 +182,7 @@ module modexpng_recombinator_block function calc_square_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -192,7 +192,7 @@ module modexpng_recombinator_block function calc_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -202,7 +202,7 @@ module modexpng_recombinator_block function calc_rectangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -217,7 +217,7 @@ module modexpng_recombinator_block function calc_triangle_aux_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -232,7 +232,7 @@ module modexpng_recombinator_block function [7:0] calc_square_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -258,7 +258,7 @@ module modexpng_recombinator_block function [7:0] calc_square_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -268,7 +268,7 @@ module modexpng_recombinator_block function [7:0] calc_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -278,7 +278,7 @@ module modexpng_recombinator_block function [7:0] calc_rectangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -313,7 +313,7 @@ module modexpng_recombinator_block function [2:0] calc_square_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -339,7 +339,7 @@ module modexpng_recombinator_block function [2:0] calc_square_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -349,7 +349,7 @@ module modexpng_recombinator_block function [2:0] calc_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -359,7 +359,7 @@ module modexpng_recombinator_block function [2:0] calc_rectangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] slim_bram_xy_bank_value; + input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value; input [7:0] slim_bram_xy_addr_value; begin // @@ -385,7 +385,7 @@ module modexpng_recombinator_block function calc_square_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -400,7 +400,7 @@ module modexpng_recombinator_block function calc_square_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -410,7 +410,7 @@ module modexpng_recombinator_block function calc_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -420,7 +420,7 @@ module modexpng_recombinator_block function calc_square_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -436,7 +436,7 @@ module modexpng_recombinator_block function calc_rectangle_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -452,7 +452,7 @@ module modexpng_recombinator_block function [7:0] calc_square_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -469,7 +469,7 @@ module modexpng_recombinator_block function [7:0] calc_rectangle_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -485,7 +485,7 @@ module modexpng_recombinator_block function calc_square_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -501,7 +501,7 @@ module modexpng_recombinator_block function calc_rectangle_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -783,19 +783,19 @@ module modexpng_recombinator_block end - reg [ 1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; reg [ 7:0] wide_xy_addr; reg [17:0] wide_x_dout; reg [17:0] wide_y_dout; reg wide_xy_valid = 1'b0; - reg [ 1:0] narrow_xy_bank; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [17:0] narrow_x_dout; reg [17:0] narrow_y_dout; reg narrow_xy_valid = 1'b0; - reg [ 1:0] rdct_xy_bank; + reg [ BANK_ADDR_W -1:0] rdct_xy_bank; reg [ 7:0] rdct_xy_addr; reg [17:0] rdct_x_dout; reg [17:0] rdct_y_dout; @@ -883,7 +883,7 @@ module modexpng_recombinator_block endtask task _update_wide; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -898,7 +898,7 @@ module modexpng_recombinator_block endtask task _update_narrow; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -913,7 +913,7 @@ module modexpng_recombinator_block endtask task _update_rdct; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -928,7 +928,7 @@ module modexpng_recombinator_block endtask task set_wide; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -938,7 +938,7 @@ module modexpng_recombinator_block endtask task set_narrow; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -948,7 +948,7 @@ module modexpng_recombinator_block endtask task set_rdct; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -959,19 +959,19 @@ module modexpng_recombinator_block task clear_wide; begin - _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_wide(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask task clear_narrow; begin - _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_narrow(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask task clear_rdct; begin - _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_rdct(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask @@ -1074,16 +1074,16 @@ module modexpng_recombinator_block // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); else clear_wide; // - 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // 2'b10: if (cnt_msb < 8'd2) clear_wide; - else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // - 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); - else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); + else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // endcase // diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v deleted file mode 100644 index 1ecf56a..0000000 --- a/rtl/modexpng_recombinator_cell.v +++ /dev/null @@ -1,35 +0,0 @@ -module modexpng_recombinator_cell -( - clk, - ce, clr, - din, dout -); - - input clk; - input ce; - input clr; - input [46:0] din; - output [15:0] dout; - - reg [14:0] z; - reg [16:0] y; - reg [17:0] x; - //reg [15:0] w; - - //assign dout = w; - assign dout = x[15:0]; - - wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here - wire [15:0] din_y = din[31:16]; - wire [15:0] din_x = din[15: 0]; - - always @(posedge clk) - // - if (ce) begin - z <= din_z; - y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; - x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; - //w <= clr ? {16{1'bX}} : x[15:0]; - end - -endmodule diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index 0f5e461..aafb38c 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -17,8 +17,9 @@ module modexpng_reductor // // Headers // + `include "modexpng_parameters.vh" //`include "../rtl_1/modexpng_mmm_fsm.vh" - `include "../rtl_1/modexpng_parameters_old.vh" + //`include "../rtl_1/modexpng_parameters_x8.vh" @@ -39,12 +40,12 @@ module modexpng_reductor input [ 7:0] rd_narrow_xy_addr; input [ 1:0] rd_narrow_xy_bank; */ - input [ 1:0] rd_wide_xy_bank_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; input [ 7:0] rd_wide_xy_addr_aux; input [ 17:0] rd_wide_x_dout_aux; input [ 17:0] rd_wide_y_dout_aux; // - input [ 1:0] rcmb_final_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank; input [ 7:0] rcmb_final_xy_addr; input [ 17:0] rcmb_final_x_dout; input [ 17:0] rcmb_final_y_dout; @@ -60,7 +61,7 @@ module modexpng_reductor // Ready // reg rdy_reg = 1'b1; - reg busy_now = 1'b0; + wire busy_now; assign rdy = rdy_reg; @@ -81,9 +82,9 @@ module modexpng_reductor reg rcmb_xy_valid_dly2 = 1'b0; reg rcmb_xy_valid_dly3 = 1'b0; - reg [2:0] rcmb_xy_bank_dly1; - reg [2:0] rcmb_xy_bank_dly2; - reg [2:0] rcmb_xy_bank_dly3; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; reg [7:0] rcmb_xy_addr_dly1; reg [7:0] rcmb_xy_addr_dly2; @@ -236,7 +237,7 @@ module modexpng_reductor // clear_rdct; // - if (busy_now && rcmb_xy_valid_dly3) + if (rcmb_xy_valid_dly3) // case (rcmb_xy_bank_dly3) @@ -258,12 +259,25 @@ module modexpng_reductor // // Busy // + reg busy_next = 1'b0; + reg [2:0] busy_now_shreg = {3{1'b0}}; + + assign busy_now = busy_now_shreg[2]; + + always @(posedge clk) + // + if (rst) busy_now_shreg <= {3{1'b0}}; + else begin + if (rdy && ena) busy_now_shreg <= {3{1'b1}}; + else busy_now_shreg <= {busy_now_shreg[1:0], busy_next}; + end + always @(posedge clk) // - if (rst) busy_now <= 1'b0; + if (rst) busy_next <= 1'b0; else begin - if (rdy && ena) busy_now <= 1'b1; - //if (!rdy && !busy_now) rdy <= 1'b1; + if (rdy && ena) busy_next <= 1'b1; + if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0; end diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_wrapper.v new file mode 100644 index 0000000..0295697 --- /dev/null +++ b/rtl/modexpng_sdp_36k_wrapper.v @@ -0,0 +1,72 @@ +module modexpng_sdp_36k_wrapper +( + clk, + + ena, wea, + addra, dina, + + enb, regceb, + addrb, doutb +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + + input ena; + input wea; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addra; + input [ WORD_EXT_W -1:0] dina; + + input enb; + input regceb; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb; + output [ WORD_EXT_W -1:0] doutb; + + // + // BRAM_SDP_MACRO + // + BRAM_SDP_MACRO # + ( + .DEVICE ("7SERIES"), + + .BRAM_SIZE ("36Kb"), + + .WRITE_WIDTH (WORD_EXT_W), + .READ_WIDTH (WORD_EXT_W), + + .DO_REG (1), + .WRITE_MODE ("READ_FIRST"), + + .SRVAL (72'h000000000000000000), + .INIT (72'h000000000000000000), + + .INIT_FILE ("NONE"), + .SIM_COLLISION_CHECK ("NONE") + ) + BRAM_SDP_MACRO_inst + ( + .RST (1'b0), + + .WRCLK (clk), + .WREN (ena), + .WE ({2{wea}}), + .WRADDR (addra), + .DI (dina), + + .RDCLK (clk), + .RDEN (enb), + .REGCE (regceb), + .RDADDR (addrb), + .DO (doutb) + ); + +endmodule diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v index d6f9fb1..be04c7c 100644 --- a/rtl/modexpng_storage_block.v +++ b/rtl/modexpng_storage_block.v @@ -32,49 +32,46 @@ module modexpng_storage_block rd_narrow_y_dout ); - // // Headers // - `include "../rtl_1/modexpng_parameters_x8_old.vh" - + `include "modexpng_parameters.vh" // // Ports // - input clk; - input rst; - - input wr_wide_xy_ena; - input [ 1:0] wr_wide_xy_bank; - input [ 7:0] wr_wide_xy_addr; - input [17:0] wr_wide_x_din; - input [17:0] wr_wide_y_din; + input clk; + input rst; + + input wr_wide_xy_ena; + input [ BANK_ADDR_W -1:0] wr_wide_xy_bank; + input [ OP_ADDR_W -1:0] wr_wide_xy_addr; + input [ WORD_EXT_W -1:0] wr_wide_x_din; + input [ WORD_EXT_W -1:0] wr_wide_y_din; - input wr_narrow_xy_ena; - input [ 1:0] wr_narrow_xy_bank; - input [ 7:0] wr_narrow_xy_addr; - input [17:0] wr_narrow_x_din; - input [17:0] wr_narrow_y_din; - - input rd_wide_xy_ena; - input rd_wide_xy_ena_aux; - input [ 1:0] rd_wide_xy_bank; - input [ 1:0] rd_wide_xy_bank_aux; - input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; - input [ 8-1:0] rd_wide_xy_addr_aux; - output [18*NUM_MULTS/2-1:0] rd_wide_x_dout; - output [18*NUM_MULTS/2-1:0] rd_wide_y_dout; - output [ 18-1:0] rd_wide_x_dout_aux; - output [ 18-1:0] rd_wide_y_dout_aux; + input wr_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] wr_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + input [ WORD_EXT_W -1:0] wr_narrow_x_din; + input [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; - input rd_narrow_xy_ena; - input [ 1:0] rd_narrow_xy_bank; - input [ 7:0] rd_narrow_xy_addr; - output [18-1:0] rd_narrow_x_dout; - output [18-1:0] rd_narrow_y_dout; + input rd_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rd_narrow_x_dout; + output [ WORD_EXT_W -1:0] rd_narrow_y_dout; - // // Internal Registers // @@ -82,43 +79,44 @@ module modexpng_storage_block reg rd_wide_xy_reg_ena_aux = 1'b0; reg rd_narrow_xy_reg_ena = 1'b0; - always @(posedge clk) begin - // - rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; - rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; - rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + always @(posedge clk) // - end - + if (rst) begin + rd_wide_xy_reg_ena <= 1'b0; + rd_wide_xy_reg_ena_aux <= 1'b0; + rd_narrow_xy_reg_ena <= 1'b0; + end else begin + rd_wide_xy_reg_ena <= rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rd_narrow_xy_ena; + end // // Helper Signals // - wire [2+8-1:0] wr_wide_xy_offset; - wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1]; - wire [2+8-1:0] rd_wide_xy_offset_aux; - wire [2+8-1:0] wr_narrow_xy_offset; - wire [2+8-1:0] rd_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1]; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; - assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; - assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; - + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; // // "Wide" Storage // genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) begin : gen_wide_bram // - assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[8*z+:8]}; + assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]}; // - ip_bram_18k wide_bram_x + modexpng_sdp_36k_wrapper wide_bram_x ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -128,13 +126,12 @@ module modexpng_storage_block .enb (rd_wide_xy_ena), .regceb (rd_wide_xy_reg_ena), .addrb (rd_wide_xy_offset[z]), - .doutb (rd_wide_x_dout[18*z+:18]) + .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W]) ); // - ip_bram_18k wide_bram_y + modexpng_sdp_36k_wrapper wide_bram_y ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -144,20 +141,18 @@ module modexpng_storage_block .enb (rd_wide_xy_ena), .regceb (rd_wide_xy_reg_ena), .addrb (rd_wide_xy_offset[z]), - .doutb (rd_wide_y_dout[18*z+:18]) + .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W]) ); // end endgenerate - // // Auxilary Storage // - ip_bram_18k wide_bram_x_aux + modexpng_sdp_36k_wrapper wide_bram_x_aux ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -170,10 +165,9 @@ module modexpng_storage_block .doutb (rd_wide_x_dout_aux) ); // - ip_bram_18k wide_bram_y_aux + modexpng_sdp_36k_wrapper wide_bram_y_aux ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -186,14 +180,12 @@ module modexpng_storage_block .doutb (rd_wide_y_dout_aux) ); - // // "Narrow" Storage // - ip_bram_18k narrow_bram_x + modexpng_sdp_36k_wrapper narrow_bram_x ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_narrow_xy_ena), .wea (wr_narrow_xy_ena), @@ -206,10 +198,9 @@ module modexpng_storage_block .doutb (rd_narrow_x_dout) ); - ip_bram_18k narrow_bram_y + modexpng_sdp_36k_wrapper narrow_bram_y ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_narrow_xy_ena), .wea (wr_narrow_xy_ena), @@ -222,5 +213,4 @@ module modexpng_storage_block .doutb (rd_narrow_y_dout) ); - endmodule diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v index fa1e4a1..e5ac83f 100644 --- a/rtl/modexpng_storage_manager.v +++ b/rtl/modexpng_storage_manager.v @@ -43,70 +43,69 @@ module modexpng_storage_manager // // Headers // - `include "../rtl_1/modexpng_parameters_x8_old.vh" + `include "modexpng_parameters.vh" // // Ports // - input clk; - input rst; + input clk; + input rst; - output wr_wide_xy_ena; - output [ 1:0] wr_wide_xy_bank; - output [ 7:0] wr_wide_xy_addr; - output [17:0] wr_wide_x_din; - output [17:0] wr_wide_y_din; + output wr_wide_xy_ena; + output [BANK_ADDR_W -1:0] wr_wide_xy_bank; + output [ OP_ADDR_W -1:0] wr_wide_xy_addr; + output [ WORD_EXT_W -1:0] wr_wide_x_din; + output [ WORD_EXT_W -1:0] wr_wide_y_din; - output wr_narrow_xy_ena; - output [ 1:0] wr_narrow_xy_bank; - output [ 7:0] wr_narrow_xy_addr; - output [17:0] wr_narrow_x_din; - output [17:0] wr_narrow_y_din; + output wr_narrow_xy_ena; + output [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + output [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + output [ WORD_EXT_W -1:0] wr_narrow_x_din; + output [ WORD_EXT_W -1:0] wr_narrow_y_din; - input ext_wide_xy_ena; - input [ 1:0] ext_wide_xy_bank; - input [ 7:0] ext_wide_xy_addr; - input [17:0] ext_wide_x_din; - input [17:0] ext_wide_y_din; + input ext_wide_xy_ena; + input [BANK_ADDR_W -1:0] ext_wide_xy_bank; + input [ OP_ADDR_W -1:0] ext_wide_xy_addr; + input [ WORD_EXT_W -1:0] ext_wide_x_din; + input [ WORD_EXT_W -1:0] ext_wide_y_din; - input ext_narrow_xy_ena; - input [ 1:0] ext_narrow_xy_bank; - input [ 7:0] ext_narrow_xy_addr; - input [17:0] ext_narrow_x_din; - input [17:0] ext_narrow_y_din; + input ext_narrow_xy_ena; + input [BANK_ADDR_W -1:0] ext_narrow_xy_bank; + input [ OP_ADDR_W -1:0] ext_narrow_xy_addr; + input [ WORD_EXT_W -1:0] ext_narrow_x_din; + input [ WORD_EXT_W -1:0] ext_narrow_y_din; input rcmb_wide_xy_ena; - input [ 1:0] rcmb_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; input [ 7:0] rcmb_wide_xy_addr; input [17:0] rcmb_wide_x_din; input [17:0] rcmb_wide_y_din; input rcmb_narrow_xy_ena; - input [ 1:0] rcmb_narrow_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; input [ 7:0] rcmb_narrow_xy_addr; input [17:0] rcmb_narrow_x_din; input [17:0] rcmb_narrow_y_din; - - reg wr_wide_xy_ena_reg = 1'b0; - reg [ 1:0] wr_wide_xy_bank_reg; - reg [ 7:0] wr_wide_xy_addr_reg; - reg [17:0] wr_wide_x_din_reg; - reg [17:0] wr_wide_y_din_reg; + reg wr_wide_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg; - reg wr_narrow_xy_ena_reg = 1'b0; - reg [ 1:0] wr_narrow_xy_bank_reg; - reg [ 7:0] wr_narrow_xy_addr_reg; - reg [17:0] wr_narrow_x_din_reg; - reg [17:0] wr_narrow_y_din_reg; + reg wr_narrow_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg; task _update_wide; - input xy_ena; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input xy_ena; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin wr_wide_xy_ena_reg <= xy_ena; wr_wide_xy_bank_reg <= xy_bank; @@ -118,10 +117,10 @@ module modexpng_storage_manager task _update_narrow; input xy_ena; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin wr_narrow_xy_ena_reg <= xy_ena; wr_narrow_xy_bank_reg <= xy_bank; @@ -132,20 +131,20 @@ module modexpng_storage_manager endtask task enable_wide; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din); end endtask task enable_narrow; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din); end @@ -153,13 +152,13 @@ module modexpng_storage_manager task disable_wide; begin - _update_wide(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}); + _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); end endtask task disable_narrow; begin - _update_narrow(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}); + _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); end endtask |