diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 16:18:33 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 16:18:33 +0300 |
commit | 71f70252dfc7e41103dde420a721be8aa48486d5 (patch) | |
tree | 182c413b590d6056b02c5d20818c3385c83610e3 | |
parent | fde62e373fdfcefefb7da10757a3db933160c911 (diff) |
Redesigned core architecture, unified bank structure. All storage blocks now
have eight 4kbit entries and occupy one 36K BRAM tile.
25 files changed, 3389 insertions, 1042 deletions
diff --git a/rtl/_modexpng_mmm_dual_x8.v b/rtl/_modexpng_mmm_dual_x8.v new file mode 100644 index 0000000..ffd5ccf --- /dev/null +++ b/rtl/_modexpng_mmm_dual_x8.v @@ -0,0 +1,961 @@ +module modexpng_mmm_dual_x8 +( + clk, rst, + + ena, rdy, + + ladder_mode, + word_index_last, + word_index_last_minus1, + + sel_wide_in, + sel_narrow_in, + sel_wide_out, + sel_narrow_out, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout, + + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_dout, + rcmb_wide_y_dout, + rcmb_wide_xy_valid, + + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_dout, + rcmb_narrow_y_dout, + rcmb_narrow_xy_valid, + + rcmb_xy_bank, + rcmb_xy_addr, + rcmb_x_dout, + rcmb_y_dout, + rcmb_xy_valid, + + rdct_ena, rdct_rdy +); + + + // + // Headers + // + `include "../rtl/modexpng_mmm_fsm.vh" + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input ladder_mode; + input [ OP_ADDR_W -1:0] word_index_last; + input [ OP_ADDR_W -1:0] word_index_last_minus1; + + input [ BANK_ADDR_W -1:0] sel_wide_in; + input [ BANK_ADDR_W -1:0] sel_narrow_in; + input [ BANK_ADDR_W -1:0] sel_wide_out; + input [ BANK_ADDR_W -1:0] sel_narrow_out; + + output rd_wide_xy_ena; + output rd_wide_xy_ena_aux; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + output [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + output rd_narrow_xy_ena; + output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + input [ WORD_EXT_W -1:0] rd_narrow_x_dout; + input [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_x_dout; + output [ WORD_EXT_W -1:0] rcmb_y_dout; + output rcmb_xy_valid; + + output rdct_ena; + input rdct_rdy; + + + // + // FSM Declaration + // + reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE; + reg [MMM_FSM_STATE_W-1:0] fsm_state_next; + + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square; + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle; + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle; + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) fsm_state <= MMM_FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + // + // Storage Control Interface + // + reg wide_xy_ena = 1'b0; + reg wide_xy_ena_aux = 1'b0; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; + reg [OP_ADDR_W -1:0] wide_xy_addr[0:3]; + reg [OP_ADDR_W -1:0] wide_xy_addr_aux; + + reg narrow_xy_ena = 1'b0; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; + reg [OP_ADDR_W -1:0] narrow_xy_addr; + reg [OP_ADDR_W -1:0] narrow_xy_addr_dly; + + assign rd_wide_xy_ena = wide_xy_ena; + assign rd_wide_xy_ena_aux = wide_xy_ena_aux; + assign rd_wide_xy_bank = wide_xy_bank; + assign rd_wide_xy_bank_aux = wide_xy_bank_aux; + assign rd_wide_xy_addr_aux = wide_xy_addr_aux; + + assign rd_narrow_xy_ena = narrow_xy_ena; + assign rd_narrow_xy_bank = narrow_xy_bank; + assign rd_narrow_xy_addr = narrow_xy_addr; + + genvar z; + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) + begin : gen_rd_wide_xy_addr + assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z]; + end + endgenerate + + // + // Column Counter + // + reg [4:0] col_index; // current column index + reg [4:0] col_index_prev; // delayed column index value + reg [4:0] col_index_last; // index of the very last column + reg [4:0] col_index_next; // precomputed next column index + reg col_is_last; // flag set during the very last column + + always @(posedge clk) + // + col_index_prev <= col_index; + + // + // Column Counter Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin + col_index <= 5'd0; + col_index_last <= word_index_last[7:3]; + col_index_next <= 5'd1; + col_is_last <= 1'b0; + + end + // + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin + col_index <= col_index_next; + col_is_last <= col_index_next == col_index_last; + col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; + end + // + endcase + + + // + // Completion Flags + // + wire square_almost_done_comb; + reg square_almost_done_flop = 1'b0; + reg square_surely_done_flop = 1'b0; + + wire triangle_almost_done_comb; + reg triangle_almost_done_flop = 1'b0; + reg triangle_surely_done_flop = 1'b0; + reg triangle_tardy_done_flop = 1'b0; + + wire rectangle_almost_done_comb; + reg rectangle_almost_done_flop = 1'b0; + reg rectangle_surely_done_flop = 1'b0; + reg rectangle_tardy_done_flop = 1'b0; + + assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); + assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + + // + // Square Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: + square_almost_done_flop <= square_almost_done_comb; + // + default: + square_almost_done_flop <= 1'b0; + // + endcase + // + square_surely_done_flop <= square_almost_done_flop; + // + end + + // + // Triangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: + triangle_almost_done_flop <= triangle_almost_done_comb; + // + default: + triangle_almost_done_flop <= 1'b0; + // + endcase + // + triangle_surely_done_flop <= triangle_almost_done_flop; + triangle_tardy_done_flop <= triangle_surely_done_flop; + // + end + + // + // Rectangle Completion Flags + // + always @(posedge clk) begin + // + case (fsm_state) + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: + rectangle_almost_done_flop <= rectangle_almost_done_comb; + // + default: + rectangle_almost_done_flop <= 1'b0; + // + endcase + // + rectangle_surely_done_flop <= rectangle_almost_done_flop; + rectangle_tardy_done_flop <= rectangle_surely_done_flop; + // + end + + + // + // Narrow Storage Control Logic + // + always @(posedge clk) + // + if (rst) narrow_xy_ena <= 1'b0; + else begin + // + // Narrow Address + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? + 8'd0 : narrow_xy_addr + 1'b1; + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + 8'd1 : narrow_xy_addr + 1'b1; + // + default: narrow_xy_addr <= 8'dX; + // + endcase + // + // Narrow Bank + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? + BANK_NARROW_EXT : BANK_NARROW_COEFF; + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? + BANK_NARROW_EXT : BANK_NARROW_Q; + // + default: narrow_xy_bank <= 2'bXX; + // + endcase + // + case (fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; + // + default: narrow_xy_ena <= 1'b0; + // + endcase + // + end + + + // + // Wide Storage Control Logic + // + + wire [2:0] wide_offset_rom[0:3]; + + generate for (z=1; z<NUM_MULTS; z=z+2) + begin : gen_wide_offset_rom + assign wide_offset_rom[(z-1)/2] = z[2:0]; + end + endgenerate + + function [7:0] wide_xy_addr_next; + input [7:0] wide_xy_addr_current; + input [7:0] wide_xy_addr_last; + begin + if (wide_xy_addr_current > 8'd0) + wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else + wide_xy_addr_next = wide_xy_addr_last; + end + endfunction + + integer j; + always @(posedge clk) + // + if (rst) begin + wide_xy_ena <= 1'b0; + wide_xy_ena_aux <= 1'b0; + end else begin + // + // Wide Address + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= 8'dX; + endcase + // + // Wide Aux Address + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; + //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + // + default: wide_xy_addr_aux <= 8'dX; + endcase + // + // Wide Bank + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; // ? combine ? + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + default: wide_xy_bank <= 3'bXXX; + endcase + // + // Wide Aux Bank + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; + //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX + default: wide_xy_bank_aux <= 3'bXXX; + endcase + else wide_xy_bank_aux <= 3'bXXX; + default: wide_xy_bank_aux <= 3'bXXX; + endcase + // + // Wide Enable + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + default: wide_xy_ena <= 1'b0; + endcase + // + // Wide Aux Enable + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: wide_xy_ena_aux <= 1'b0; + endcase + // + end + + + // + // Delay Lines + // + always @(posedge clk) + // + narrow_xy_addr_dly <= narrow_xy_addr; + + + // + // DSP Array Logic + // + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_b_dly = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; + + reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + + wire [5*18-1:0] dsp_x_a; + wire [5*18-1:0] dsp_y_a; + + reg [1*16-1:0] dsp_x_b; + reg [1*16-1:0] dsp_y_b; + + reg [ 1:0] dsp_xy_b_carry; + + wire [9*47-1:0] dsp_x_p; + wire [9*47-1:0] dsp_y_p; + + //generate for (z=0; z<(NUM_MULTS/2); z=z+1) + //begin : gen_dsp_xy_a_split + //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; + //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; + //end + //endgenerate + + assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; + assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + + //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; + //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; + + always @(posedge clk) + // + dsp_xy_ce_b_dly <= dsp_xy_ce_b; + + + modexpng_dsp_array_block dsp_array_block_x + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + modexpng_dsp_array_block dsp_array_block_y + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + + + // + // DSP Control Logic + // + reg narrow_xy_ena_dly1 = 1'b0; + reg narrow_xy_ena_dly2 = 1'b0; + + always @(posedge clk) + // + if (rst) begin + // + narrow_xy_ena_dly1 <= 1'b0; + narrow_xy_ena_dly2 <= 1'b0; + // + dsp_xy_ce_a <= 1'b0; + dsp_xy_ce_b <= 1'b0; + dsp_xy_ce_m <= 1'b0; + dsp_xy_ce_p <= 1'b0; + dsp_xy_ce_mode <= 1'b0; + // + end else begin + // + narrow_xy_ena_dly1 <= narrow_xy_ena; + narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; + // + dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; + dsp_xy_ce_b <= narrow_xy_ena_dly2; + dsp_xy_ce_m <= dsp_xy_ce_b_dly; + dsp_xy_ce_p <= dsp_xy_ce_m; + dsp_xy_ce_mode <= dsp_xy_ce_b_dly; + // + end + + // + // DSP Feed Logic + // + reg dsp_merge_xy_b; + + always @(posedge clk) + // + case (fsm_state) + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; + endcase + + // + // On-the-fly Carry Recombination + // + wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + + always @(posedge clk) + // + if (narrow_xy_ena_dly2) begin // rewrite + // + if (!dsp_merge_xy_b) begin + dsp_x_b <= rd_narrow_x_dout[15:0]; + dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_xy_b_carry <= 2'b00; + end else begin + dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + end + // + end else begin + // + dsp_x_b <= {16{1'bX}}; + dsp_y_b <= {16{1'bX}}; + // + dsp_xy_b_carry <= 2'b00; + // + end + + + reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; + + function [NUM_MULTS:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + always @(posedge clk) + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + endcase + + always @(posedge clk) begin + dsp_xy_mode_z <= dsp_xy_mode_z_adv1; + // + dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; + dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; + dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + end + + + + + + // + // Recombinator + // + reg rcmb_ena = 1'b0; + wire rcmb_rdy; + + modexpng_recombinator_block recombinator_block + ( + .clk (clk), + .rst (rst), + + .ena (rcmb_ena), + .rdy (rcmb_rdy), + + .mmm_fsm_state_next (fsm_state_next), + + .word_index_last (word_index_last), + + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + + .col_index (col_index), + .col_index_last (col_index_last), + + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) + + ); + + + // + // Recombinator Enable Logic + // + always @(posedge clk) + // + if (rst) rcmb_ena <= 1'b0; + else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; + + + // + // Handy Completion Flags + // + wire square_done = square_surely_done_flop; + wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; + wire rectangle_done = rectangle_tardy_done_flop; + + + // + // FSM Transition Logic + // + assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + + always @* begin + // + fsm_state_next = MMM_FSM_STATE_IDLE; + // + case (fsm_state) + MMM_FSM_STATE_IDLE: fsm_state_next = ena ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_IDLE; + + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF; + + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF; + + default: fsm_state_next = MMM_FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Reductor Control Logic + // + reg rdct_ena_reg = 1'b0; + + assign rdct_ena = rdct_ena_reg; + + always @(posedge clk) // add reset!!! + // + case (fsm_state) + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; + default: rdct_ena_reg <= 1'b0; + endcase + + + +endmodule diff --git a/rtl/_modexpng_mmm_fsm.vh b/rtl/_modexpng_mmm_fsm.vh new file mode 100644 index 0000000..1c2a57b --- /dev/null +++ b/rtl/_modexpng_mmm_fsm.vh @@ -0,0 +1,43 @@ +localparam MMM_FSM_STATE_W = 32; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_IDLE = 0; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_1 = 1; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_2 = 2; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_3 = 3; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_1 = 4; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_2 = 5; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_3 = 6; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_STOP = 40; diff --git a/rtl/_modexpng_recombinator_block.v b/rtl/_modexpng_recombinator_block.v new file mode 100644 index 0000000..61bf734 --- /dev/null +++ b/rtl/_modexpng_recombinator_block.v @@ -0,0 +1,1225 @@ +module modexpng_recombinator_block +( + clk, rst, + ena, rdy, + mmm_fsm_state_next, + word_index_last, + dsp_xy_ce_p, + dsp_x_p, dsp_y_p, + col_index, col_index_last, + rd_narrow_xy_addr, rd_narrow_xy_bank, + rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, + rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid, + rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_mmm_fsm.vh" + + + input clk; + input rst; + input ena; + output rdy; + input [ MMM_FSM_STATE_W -1:0] mmm_fsm_state_next; + input [ OP_ADDR_W -1:0] word_index_last; + input dsp_xy_ce_p; + input [(NUM_MULTS+1) * MAC_W -1:0] dsp_x_p; + input [(NUM_MULTS+1) * MAC_W -1:0] dsp_y_p; + input [ COL_INDEX_W -1:0] col_index; + input [ COL_INDEX_W -1:0] col_index_last; + + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rdct_narrow_x_dout; + output [ WORD_EXT_W -1:0] rdct_narrow_y_dout; + output rdct_narrow_xy_valid; + + + // + // Latches + // + reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS]; + reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS]; + + + // + // Mapping + // + wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS]; + wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS]; + + genvar z; + generate for (z=0; z<NUM_MULTS_AUX; z=z+1) + begin : gen_dsp_xy_p_split + assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W]; + assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W]; + end + endgenerate + + + // + // Delays + // + reg dsp_xy_ce_p_dly1 = 1'b0; + + always @(posedge clk) + // + if (rst) dsp_xy_ce_p_dly1 <= 1'b0; + else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p; + + + // + // Registers + // + + // valid + reg xy_valid_lsb = 1'b0; + reg xy_aux_lsb = 1'b0; + reg xy_valid_msb = 1'b0; + + // bitmap + reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}}; + reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}}; + + // index + reg [2:0] xy_index_lsb = 3'dX; + + // purge + reg xy_purge_lsb = 1'b0; + reg xy_purge_msb = 1'b0; + + // valid - latch + reg xy_valid_latch_lsb = 1'b0; + + // aux - latch + reg xy_aux_latch_lsb = 1'b0; + + // bitmap - latch + reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = MULT_BITMAP_ZEROES; + reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = MULT_BITMAP_ZEROES; + + // index - latch + reg [MAC_INDEX_W-1:0] xy_index_latch_lsb = MAC_INDEX_DONT_CARE; + + // purge - index + reg xy_purge_latch_lsb = 1'b0; + reg xy_purge_latch_msb = 1'b0; + + // + reg xy_valid_lsb_adv[1:6]; + reg xy_valid_msb_adv[1:6]; + reg xy_aux_lsb_adv[1:6]; + reg [NUM_MULTS-1:0] xy_bitmap_lsb_adv[1:6]; + reg [NUM_MULTS-1:0] xy_bitmap_msb_adv[1:6]; + reg [MAC_INDEX_W-1:0] xy_index_lsb_adv[1:6]; + reg [MAC_INDEX_W-1:0] xy_index_msb_adv[1:6]; + reg xy_purge_lsb_adv[1:6]; + reg xy_purge_msb_adv[1:6]; + + reg [1:0] rcmb_mode; + + always @(posedge clk) + // + if (ena) + // + case (mmm_fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; + default: rcmb_mode <= 2'd0; + endcase + + + integer i; + initial for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv[i] = 1'b0; + xy_valid_msb_adv[i] = 1'b0; + xy_aux_lsb_adv[i] = 1'b0; + xy_bitmap_lsb_adv[i] = {8{1'b0}}; + xy_bitmap_msb_adv[i] = {8{1'b0}}; + xy_index_lsb_adv[i] = 3'dX; + xy_index_msb_adv[i] = 3'dX; + xy_purge_lsb_adv[i] = 1'b0; + xy_purge_msb_adv[i] = 1'b0; + end + + function calc_square_triangle_valid_lsb; + // + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [ OP_ADDR_W-1:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_square_triangle_valid_lsb = 1'b1; + else + calc_square_triangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_square_valid_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [ OP_ADDR_W-1:0] narrow_xy_addr_value; + begin + calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_triangle_valid_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_rectangle_valid_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT; + else + calc_rectangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_triangle_aux_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_bank_value == BANK_NARROW_EXT) + calc_triangle_aux_lsb = 1'b1; + else + calc_triangle_aux_lsb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_triangle_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_square_triangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + function [7:0] calc_square_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [7:0] calc_triangle_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [7:0] calc_rectangle_bitmap_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT)) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_rectangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + + /* + * These can be simplified (the difference between square/triangle and + * rectangle is that the bank is checked or not). A universal function would + * accept a parameter that tells it whether it should check the bank or not. + * Let's do it later, too early to optimize now, it seems. + * + * + */ + + function [2:0] calc_square_triangle_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + // + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_square_triangle_index_lsb = 3'd0; + 3'b001: calc_square_triangle_index_lsb = 3'd1; + 3'b010: calc_square_triangle_index_lsb = 3'd2; + 3'b011: calc_square_triangle_index_lsb = 3'd3; + 3'b100: calc_square_triangle_index_lsb = 3'd4; + 3'b101: calc_square_triangle_index_lsb = 3'd5; + 3'b110: calc_square_triangle_index_lsb = 3'd6; + 3'b111: calc_square_triangle_index_lsb = 3'd7; + endcase + // + else + calc_square_triangle_index_lsb = 3'dX; + // + end + endfunction + + function [2:0] calc_square_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [2:0] calc_triangle_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function [2:0] calc_rectangle_index_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] slim_bram_xy_bank_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT)) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_rectangle_index_lsb = 3'd0; + 3'b001: calc_rectangle_index_lsb = 3'd1; + 3'b010: calc_rectangle_index_lsb = 3'd2; + 3'b011: calc_rectangle_index_lsb = 3'd3; + 3'b100: calc_rectangle_index_lsb = 3'd4; + 3'b101: calc_rectangle_index_lsb = 3'd5; + 3'b110: calc_rectangle_index_lsb = 3'd6; + 3'b111: calc_rectangle_index_lsb = 3'd7; + endcase + // + else + calc_rectangle_index_lsb = 3'dX; + // + end + endfunction + + function calc_square_rectangle_purge_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + // + if (narrow_xy_addr_value[7:3] == col_index_value) + calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value; + else + calc_square_rectangle_purge_lsb = 1'b0; + // + end + endfunction + + function calc_square_purge_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_rectangle_purge_lsb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + begin + calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); + end + endfunction + + function calc_square_valid_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) + calc_square_valid_msb = 1'b1; + else + calc_square_valid_msb = 1'b0; + // + end + endfunction + + function calc_rectangle_valid_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) + calc_rectangle_valid_msb = 1'b1; + else + calc_rectangle_valid_msb = 1'b0; + // + end + endfunction + + function [7:0] calc_square_bitmap_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) begin + calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; + calc_square_bitmap_msb[6:0] = 7'b1111111; + end else + calc_square_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function [7:0] calc_rectangle_bitmap_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin + calc_rectangle_bitmap_msb[7:0] = 8'b11111111; + end else + calc_rectangle_bitmap_msb[7:0] = 8'b00000000; + // + end + endfunction + + function calc_square_purge_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if (narrow_xy_addr_value == index_last_value) + calc_square_purge_msb = col_index_value == col_index_last_value; + else + calc_square_purge_msb = 1'b0; + // + end + endfunction + + function calc_rectangle_purge_msb; + input [COL_INDEX_W-1:0] col_index_value; + input [COL_INDEX_W-1:0] col_index_last_value; + input [BANK_ADDR_W-1:0] narrow_xy_bank_value; + input [7:0] narrow_xy_addr_value; + input [7:0] index_last_value; + begin + // + if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) + calc_rectangle_purge_msb = col_index_value == col_index_last_value; + else + calc_rectangle_purge_msb = 1'b0; + // + end + endfunction + + + reg rcmb_xy_lsb_ce = 1'b0; + reg rcmb_xy_lsb_ce_aux; + reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; + wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; + reg rcmb_xy_lsb_clr; + + reg [46:0] rcmb_x_lsb_din; + reg [46:0] rcmb_y_lsb_din; + wire [15:0] rcmb_x_lsb_dout; + wire [15:0] rcmb_y_lsb_dout; + + reg rcmb_xy_msb_ce = 1'b0; + reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; + wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; + reg rcmb_xy_msb_clr; + + reg [46:0] rcmb_x_msb_din; + reg [46:0] rcmb_y_msb_din; + wire [15:0] rcmb_x_msb_dout; + wire [15:0] rcmb_y_msb_dout; + + modexpng_recombinator_cell recomb_x_lsb + ( + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_x_lsb_din), + .dout (rcmb_x_lsb_dout) + ); + modexpng_recombinator_cell recomb_y_lsb + ( + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .din (rcmb_y_lsb_din), + .dout (rcmb_y_lsb_dout) + ); + + modexpng_recombinator_cell recomb_x_msb + ( + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .din (rcmb_x_msb_din), + .dout (rcmb_x_msb_dout) + ); + + modexpng_recombinator_cell recomb_y_msb + ( + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .din (rcmb_y_msb_din), + .dout (rcmb_y_msb_dout) + ); + + always @(posedge clk) begin + // + rcmb_xy_lsb_ce <= xy_valid_latch_lsb; + rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb; + rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0]; + // + if (xy_purge_latch_lsb) + rcmb_xy_lsb_ce_purge <= 3'b111; + else + rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]}; + // + if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) + rcmb_xy_msb_ce_purge = 2'b11; + else + rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]}; + // + end + + + always @(posedge clk) + // + if (ena) begin + rcmb_xy_lsb_clr <= 1'b1; + rcmb_xy_msb_clr <= 1'b1; + end else begin + if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0; + if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0; + end + + always @(posedge clk) + // + if (xy_valid_latch_lsb) begin + rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb]; + rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb]; + end else if (xy_aux_latch_lsb) begin + rcmb_x_lsb_din <= dsp_x_p_latch[8]; + rcmb_y_lsb_din <= dsp_y_p_latch[8]; + end else begin + rcmb_x_lsb_din <= {47{1'b0}}; + rcmb_y_lsb_din <= {47{1'b0}}; + end + + always @(posedge clk) + // + if (xy_bitmap_latch_msb[0]) begin + rcmb_x_msb_din <= dsp_x_p_latch[0]; + rcmb_y_msb_din <= dsp_y_p_latch[0]; + end else begin + rcmb_x_msb_din <= {47{1'b0}}; + rcmb_y_msb_din <= {47{1'b0}}; + end + + + always @(posedge clk) + // + case (mmm_fsm_state_next) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + // + xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + // + end + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr); + // + xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last); + // + end + // + default: begin + // + xy_valid_lsb_adv [6] <= 1'b0; + xy_aux_lsb_adv [6] <= 1'b0; + xy_bitmap_lsb_adv[6] <= {8{1'b0}}; + xy_index_lsb_adv [6] <= 3'dX; + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // + endcase + + + always @(posedge clk) begin + // + xy_valid_lsb <= xy_valid_lsb_adv [1]; + xy_aux_lsb <= xy_aux_lsb_adv [1]; + xy_bitmap_lsb <= xy_bitmap_lsb_adv[1]; + xy_index_lsb <= xy_index_lsb_adv [1]; + xy_purge_lsb <= xy_purge_lsb_adv [1]; + // + xy_valid_latch_lsb <= xy_valid_lsb; + xy_aux_latch_lsb <= xy_aux_lsb; + xy_bitmap_latch_lsb <= xy_bitmap_lsb; + xy_index_latch_lsb <= xy_index_lsb; + xy_purge_latch_lsb <= xy_purge_lsb; + // + xy_valid_msb <= xy_valid_msb_adv[1]; + xy_bitmap_msb <= xy_bitmap_msb_adv[1]; + xy_purge_msb <= xy_purge_msb_adv[1]; + // + if (xy_valid_msb) begin + xy_bitmap_latch_msb <= xy_bitmap_msb; + xy_purge_latch_msb <= xy_purge_msb; + end else begin + xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]}; + end + // + // + for (i=1; i<6; i=i+1) begin + xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; + xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; + xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; + xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; + xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; + // + xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1]; + xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1]; + xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1]; + end + // + end + + always @(posedge clk) + // + if (xy_bitmap_latch_msb[1]) // only shift 7 times + // + for (i=0; i<8; i=i+1) + // + if (i < 7) begin + dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; + dsp_y_p_latch[i] <= dsp_y_p_latch[i+1]; + end else begin + dsp_x_p_latch[i] <= {47{1'bX}}; + dsp_y_p_latch[i] <= {47{1'bX}}; + end + // + else if (dsp_xy_ce_p_dly1) begin + // + for (i=0; i<8; i=i+1) + // + if (xy_bitmap_lsb[i]) begin + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + dsp_y_p_latch[i] <= dsp_y_p_split[i]; + end else if (xy_valid_msb && xy_bitmap_msb[i]) begin + dsp_x_p_latch[i] <= dsp_x_p_split[i]; + dsp_y_p_latch[i] <= dsp_y_p_split[i]; + end + // + if (xy_aux_lsb) begin + dsp_x_p_latch[8] <= dsp_x_p_split[8]; + dsp_y_p_latch[8] <= dsp_y_p_split[8]; + end + // + end + + reg rcmb_xy_lsb_valid = 1'b0; + reg rcmb_xy_msb_valid = 1'b0; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_lsb_valid <= 1'b0; + rcmb_xy_msb_valid <= 1'b0; + end else begin + rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined; + rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined; + end + + + reg [BANK_ADDR_W-1:0] wide_xy_bank; + reg [ 7:0] wide_xy_addr; + reg [17:0] wide_x_dout; + reg [17:0] wide_y_dout; + reg wide_xy_valid = 1'b0; + + reg [BANK_ADDR_W-1:0] narrow_xy_bank; + reg [ 7:0] narrow_xy_addr; + reg [17:0] narrow_x_dout; + reg [17:0] narrow_y_dout; + reg narrow_xy_valid = 1'b0; + + reg [BANK_ADDR_W-1:0] rdct_xy_bank; + reg [ 7:0] rdct_xy_addr; + reg [17:0] rdct_x_dout; + reg [17:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + reg [ 7:0] cnt_lsb; + reg [ 7:0] cnt_msb; + + reg cnt_lsb_wrapped; + reg cnt_msb_wrapped; + + reg [31:0] rcmb_xy_msb_carry_0; + reg [31:0] rcmb_xy_msb_carry_1; + + reg [31:0] rcmb_xy_msb_delay_0; + reg [31:0] rcmb_xy_msb_delay_1; + reg [31:0] rcmb_xy_msb_delay_2; + + reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0; + reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0; + reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0; + + reg rcmb_msb_flag_delay_0; + reg rcmb_msb_flag_delay_1; + reg rcmb_msb_flag_delay_2; + + assign rcmb_wide_xy_bank = wide_xy_bank; + assign rcmb_wide_xy_addr = wide_xy_addr; + assign rcmb_wide_x_dout = wide_x_dout; + assign rcmb_wide_y_dout = wide_y_dout; + assign rcmb_wide_xy_valid = wide_xy_valid; + + assign rcmb_narrow_xy_bank = narrow_xy_bank; + assign rcmb_narrow_xy_addr = narrow_xy_addr; + assign rcmb_narrow_x_dout = narrow_x_dout; + assign rcmb_narrow_y_dout = narrow_y_dout; + assign rcmb_narrow_xy_valid = narrow_xy_valid; + + assign rdct_narrow_xy_bank = rdct_xy_bank; + assign rdct_narrow_xy_addr = rdct_xy_addr; + assign rdct_narrow_x_dout = rdct_x_dout; + assign rdct_narrow_y_dout = rdct_y_dout; + assign rdct_narrow_xy_valid = rdct_xy_valid; + + reg rdy_reg = 1'b1; + reg rdy_adv = 1'b1; + + assign rdy = rdy_reg; + + + always @(posedge clk) + // + if (ena) rdy_reg <= 1'b0; + else rdy_reg <= rdy_adv; + + task advance_rcmb_msb_delay; + input [15:0] dout_x; + input [15:0] dout_y; + input [ 7:0] cnt; + input flag; + begin + // + rcmb_xy_msb_delay_0 <= {dout_y, dout_x}; + rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0; + rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1; + // + rcmb_msb_cnt_delay_0 <= cnt; + rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0; + rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1; + // + rcmb_msb_flag_delay_0 <= flag; + rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0; + rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1; + // + end + endtask + + task shift_rcmb_msb_carry; + input [15:0] dout_x; + input [15:0] dout_y; + begin + rcmb_xy_msb_carry_0 <= {dout_y, dout_x}; + rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0; + end + endtask + + task _update_wide; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + wide_xy_bank <= bank; + wide_xy_addr <= addr; + wide_x_dout <= dout_x; + wide_y_dout <= dout_y; + wide_xy_valid <= valid; + end + endtask + + task _update_narrow; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + narrow_xy_bank <= bank; + narrow_xy_addr <= addr; + narrow_x_dout <= dout_x; + narrow_y_dout <= dout_y; + narrow_xy_valid <= valid; + end + endtask + + task _update_rdct; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + rdct_xy_bank <= bank; + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_wide; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_wide(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task set_narrow; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_narrow(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task set_rdct; + input [BANK_ADDR_W-1:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_rdct(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_wide; + begin + _update_wide(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task clear_narrow; + begin + _update_narrow(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task clear_rdct; + begin + _update_rdct(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task _set_cnt_lsb; + input [7:0] cnt; + input wrapped; + begin + cnt_lsb <= cnt; + cnt_lsb_wrapped <= wrapped; + end + endtask + + task _set_cnt_msb; + input [7:0] cnt; + input wrapped; + begin + cnt_msb <= cnt; + cnt_msb_wrapped <= wrapped; + end + endtask + + task inc_cnt_lsb; + begin + if (cnt_lsb == word_index_last) + _set_cnt_lsb(8'd0, 1'b1); + else + _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped); + end + endtask + + task inc_cnt_both; + begin + inc_cnt_lsb; + inc_cnt_msb; + end + endtask + + task inc_cnt_msb; + begin + if (cnt_msb == word_index_last) + _set_cnt_msb(8'd0, 1'b1); + else + _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped); + end + endtask + + task clr_cnt_lsb; + begin + _set_cnt_lsb(8'd0, 1'b0); + end + endtask + + task clr_cnt_msb; + begin + _set_cnt_msb(8'd0, 1'b0); + end + endtask + + + + wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; + + always @(posedge clk) + // + if (ena) begin + clr_cnt_lsb(); + clr_cnt_msb(); + end else if (!rdy) + // + case (rcmb_mode) + 2'd1: recombine_square(); + 2'd2: recombine_triangle(); + 2'd3: recombine_rectangle(); + endcase + + wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout}; + wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout}; + + wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout}; + wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout}; + + wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]}; + wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]}; + + wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}}; + wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}}; + + + task recombine_square; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_wide; + // + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + 2'b10: if (cnt_msb < 8'd2) clear_wide; + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + // + 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); + else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + // + 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + end + // + endcase + // + end + // + endtask + + + task recombine_triangle; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb(); + // + endcase + // + case (rcmb_xy_valid) + // + 2'b00: clear_narrow; + 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b10: clear_narrow; + 2'b11: clear_narrow; + // + endcase + // + end + // + endtask + + + task recombine_rectangle; + // + begin + // + case (rcmb_xy_valid) + // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; + // + endcase +// // + case (rcmb_xy_valid) +// // + 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_rdct; + 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b10: if (!cnt_msb_wrapped) begin + if (cnt_msb < 8'd2) clear_rdct; + else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + + 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); +// // + endcase +// // + case (rcmb_xy_valid) +// // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + 2'b10: begin + if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); + end +// // + 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + end +// // + endcase + // + end + // + endtask + + + always @(posedge clk) + // + if (ena) begin + rdy_adv <= 1'b0; + end else if (!rdy_reg) begin + // + case (rcmb_mode) + // + 2'd1: case (rcmb_xy_valid) + // + 2'b00: begin + // + if (rcmb_msb_flag_delay_2) begin + // + rdy_adv <= ~rcmb_msb_flag_delay_1; + // + end + // + end + // + endcase + // + 2'd2: case (rcmb_xy_valid) + // + 2'b01: rdy_adv <= cnt_lsb_wrapped; // + // + endcase + // + 2'd3: case (rcmb_xy_valid) + // + 2'b00: begin + // + if (rcmb_msb_flag_delay_2) begin + // + rdy_adv <= ~rcmb_msb_flag_delay_1; + // + end + // + end + // + endcase + // + endcase + // + end + + + + // add ready for mode=3 +endmodule diff --git a/rtl/_modexpng_recombinator_cell.v b/rtl/_modexpng_recombinator_cell.v new file mode 100644 index 0000000..b72395e --- /dev/null +++ b/rtl/_modexpng_recombinator_cell.v @@ -0,0 +1,40 @@ +module modexpng_recombinator_cell +( + clk, + ce, clr, + din, dout +); + + // + // Headers + // + `include "modexpng_parameters.vh" + + // + // Ports + // + input clk; + input ce; + input clr; + input [ MAC_W -1:0] din; + output [WORD_W -1:0] dout; + + reg [WORD_W -2:0] z; + reg [WORD_W :0] y; + reg [WORD_W +1:0] x; + + assign dout = x[WORD_W-1:0]; + + wire [WORD_W -2:0] din_z = din[3*WORD_W -2 :2*WORD_W]; // [46:32] + wire [WORD_W -1:0] din_y = din[2*WORD_W -1 : WORD_W]; // [31:16] + wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0] + + always @(posedge clk) + // + if (ce) begin + z <= din_z; + y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; + x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {WORD_NULL, x[WORD_EXT_W-1:WORD_W]}; + end + +endmodule diff --git a/rtl/_modexpng_reductor.v b/rtl/_modexpng_reductor.v new file mode 100644 index 0000000..25cf394 --- /dev/null +++ b/rtl/_modexpng_reductor.v @@ -0,0 +1,252 @@ +module modexpng_reductor +( + clk, rst, + ena, rdy, + word_index_last, + rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux, + rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid, + rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid +); + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + input clk; + input rst; + // + input ena; + output rdy; + // + input [ OP_ADDR_W -1:0] word_index_last; + // + input [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + // + input [BANK_ADDR_W -1:0] rcmb_final_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_final_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_final_x_dout; + input [ WORD_EXT_W -1:0] rcmb_final_y_dout; + input rcmb_final_xy_valid; + // + output [ OP_ADDR_W -1:0] rdct_final_xy_addr; + output [ WORD_EXT_W -1:0] rdct_final_x_dout; + output [ WORD_EXT_W -1:0] rdct_final_y_dout; + output rdct_final_xy_valid; + + + // + // Ready + // + reg rdy_reg = 1'b1; + reg busy_now = 1'b0; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else begin + if (rdy && ena) rdy_reg <= 1'b0; + if (!rdy && !busy_now) rdy_reg <= 1'b1; + end + + + // + // Pipeline (Delay Match) + // + reg rcmb_xy_valid_dly1 = 1'b0; + reg rcmb_xy_valid_dly2 = 1'b0; + reg rcmb_xy_valid_dly3 = 1'b0; + + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; + + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3; + + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3; + + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_valid_dly1 <= 1'b0; + rcmb_xy_valid_dly2 <= 1'b0; + rcmb_xy_valid_dly3 <= 1'b0; + end else begin + rcmb_xy_valid_dly1 <= rcmb_final_xy_valid; + rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1; + rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2; + end + + + always @(posedge clk) begin + // + if (rcmb_final_xy_valid) begin + rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; + rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; + rcmb_x_dout_dly1 <= rcmb_final_x_dout; + rcmb_y_dout_dly1 <= rcmb_final_y_dout; + end + // + if (rcmb_xy_valid_dly1) begin + rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1; + rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1; + rcmb_x_dout_dly2 <= rcmb_x_dout_dly1; + rcmb_y_dout_dly2 <= rcmb_y_dout_dly1; + end + // + if (rcmb_xy_valid_dly2) begin + rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2; + rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2; + rcmb_x_dout_dly3 <= rcmb_x_dout_dly2; + rcmb_y_dout_dly3 <= rcmb_y_dout_dly2; + end + // + end + + + // + // Carry Logic + // + reg [RDCT_CARRY_W -1:0] rcmb_x_lsb_carry; + reg [WORD_W -1:0] rcmb_x_lsb_dummy; + reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout; + + reg [RDCT_CARRY_W -1:0] rcmb_y_lsb_carry; + reg [WORD_W -1:0] rcmb_y_lsb_dummy; + reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout; + + + // + // Carry Computation + // + always @(posedge clk) + // + if (ena) begin + rcmb_x_lsb_carry <= RDCT_CARRY_ZEROES; + rcmb_y_lsb_carry <= RDCT_CARRY_ZEROES; + end else if (rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_ML: begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + endcase + + + // + // Reduction + // + reg [ OP_ADDR_W -1:0] rdct_xy_addr; + reg [WORD_EXT_W -1:0] rdct_x_dout; + reg [WORD_EXT_W -1:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + assign rdct_final_xy_addr = rdct_xy_addr; + assign rdct_final_x_dout = rdct_x_dout; + assign rdct_final_y_dout = rdct_y_dout; + assign rdct_final_xy_valid = rdct_xy_valid; + + task _update_rdct; + input [ OP_ADDR_W -1:0] addr; + input [WORD_EXT_W -1:0] dout_x; + input [WORD_EXT_W -1:0] dout_y; + input valid; + begin + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_rdct; + input [ OP_ADDR_W -1:0] addr; + input [WORD_EXT_W -1:0] dout_x; + input [WORD_EXT_W -1:0] dout_y; + begin + _update_rdct(addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_rdct; + begin + _update_rdct(OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); + end + endtask + + + // + // Helper Wires + // + wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux; + wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux; + + wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_NULL, rcmb_x_lsb_carry}; + wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_NULL, rcmb_y_lsb_carry}; + + + // + // + // + always @(posedge clk) + // + if (rst) clear_rdct; + else begin + // + clear_rdct; + // + if (busy_now && rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) + set_rdct(OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); + else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) + set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); + + BANK_RCMB_EXT: + set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); + + endcase + // + end + + + + // + // Busy + // + always @(posedge clk) + // + if (rst) busy_now <= 1'b0; + else begin + if (rdy && ena) busy_now <= 1'b1; + //if (!rdy && !busy_now) rdy <= 1'b1; + end + + +endmodule diff --git a/rtl/_modexpng_storage_block.v b/rtl/_modexpng_storage_block.v new file mode 100644 index 0000000..d6ef1ee --- /dev/null +++ b/rtl/_modexpng_storage_block.v @@ -0,0 +1,219 @@ +module modexpng_storage_block +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + input wr_wide_xy_ena; + input [BANK_ADDR_W -1:0] wr_wide_xy_bank; + input [ OP_ADDR_W -1:0] wr_wide_xy_addr; + input [ WORD_EXT_W -1:0] wr_wide_x_din; + input [ WORD_EXT_W -1:0] wr_wide_y_din; + + input wr_narrow_xy_ena; + input [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + input [ WORD_EXT_W -1:0] wr_narrow_x_din; + input [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + input rd_narrow_xy_ena; + input [BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rd_narrow_x_dout; + output [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + + // + // Internal Registers + // + reg rd_wide_xy_reg_ena = 1'b0; + reg rd_wide_xy_reg_ena_aux = 1'b0; + reg rd_narrow_xy_reg_ena = 1'b0; + + always @(posedge clk) begin + // + rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + // + end + + + // + // Helper Signals + // + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1]; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; + + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; + assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; + + // + // "Wide" Storage + // + genvar z; + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) + begin : gen_wide_bram + // + assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]}; + // + modexpng_sdp_36k_wrapper wide_bram_x + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_x_din), + + .enb (rd_wide_xy_ena), + .regceb (rd_wide_xy_reg_ena), + .addrb (rd_wide_xy_offset[z]), + .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W]) + ); + // + modexpng_sdp_36k_wrapper wide_bram_y + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_y_din), + + .enb (rd_wide_xy_ena), + .regceb (rd_wide_xy_reg_ena), + .addrb (rd_wide_xy_offset[z]), + .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W]) + ); + // + end + endgenerate + + + // + // Auxilary Storage + // + modexpng_sdp_36k_wrapper wide_bram_x_aux + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_x_din), + + .enb (rd_wide_xy_ena_aux), + .regceb (rd_wide_xy_reg_ena_aux), + .addrb (rd_wide_xy_offset_aux), + .doutb (rd_wide_x_dout_aux) + ); + // + modexpng_sdp_36k_wrapper wide_bram_y_aux + ( + .clk (clk), + + .ena (wr_wide_xy_ena), + .wea (wr_wide_xy_ena), + .addra (wr_wide_xy_offset), + .dina (wr_wide_y_din), + + .enb (rd_wide_xy_ena_aux), + .regceb (rd_wide_xy_reg_ena_aux), + .addrb (rd_wide_xy_offset_aux), + .doutb (rd_wide_y_dout_aux) + ); + + + // + // "Narrow" Storage + // + modexpng_sdp_36k_wrapper narrow_bram_x + ( + .clk (clk), + + .ena (wr_narrow_xy_ena), + .wea (wr_narrow_xy_ena), + .addra (wr_narrow_xy_offset), + .dina (wr_narrow_x_din), + + .enb (rd_narrow_xy_ena), + .regceb (rd_narrow_xy_reg_ena), + .addrb (rd_narrow_xy_offset), + .doutb (rd_narrow_x_dout) + ); + + modexpng_sdp_36k_wrapper narrow_bram_y + ( + .clk (clk), + + .ena (wr_narrow_xy_ena), + .wea (wr_narrow_xy_ena), + .addra (wr_narrow_xy_offset), + .dina (wr_narrow_y_din), + + .enb (rd_narrow_xy_ena), + .regceb (rd_narrow_xy_reg_ena), + .addrb (rd_narrow_xy_offset), + .doutb (rd_narrow_y_dout) + ); + + +endmodule diff --git a/rtl/_modexpng_storage_manager.v b/rtl/_modexpng_storage_manager.v new file mode 100644 index 0000000..958596a --- /dev/null +++ b/rtl/_modexpng_storage_manager.v @@ -0,0 +1,199 @@ +module modexpng_storage_manager +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + ext_wide_xy_ena, + ext_wide_xy_bank, + ext_wide_xy_addr, + ext_wide_x_din, + ext_wide_y_din, + + ext_narrow_xy_ena, + ext_narrow_xy_bank, + ext_narrow_xy_addr, + ext_narrow_x_din, + ext_narrow_y_din, + + rcmb_wide_xy_ena, + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_din, + rcmb_wide_y_din, + + rcmb_narrow_xy_ena, + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_din, + rcmb_narrow_y_din +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + output wr_wide_xy_ena; + output [BANK_ADDR_W -1:0] wr_wide_xy_bank; + output [ OP_ADDR_W -1:0] wr_wide_xy_addr; + output [ WORD_EXT_W -1:0] wr_wide_x_din; + output [ WORD_EXT_W -1:0] wr_wide_y_din; + + output wr_narrow_xy_ena; + output [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + output [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + output [ WORD_EXT_W -1:0] wr_narrow_x_din; + output [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input ext_wide_xy_ena; + input [BANK_ADDR_W -1:0] ext_wide_xy_bank; + input [ OP_ADDR_W -1:0] ext_wide_xy_addr; + input [ WORD_EXT_W -1:0] ext_wide_x_din; + input [ WORD_EXT_W -1:0] ext_wide_y_din; + + input ext_narrow_xy_ena; + input [BANK_ADDR_W -1:0] ext_narrow_xy_bank; + input [ OP_ADDR_W -1:0] ext_narrow_xy_addr; + input [ WORD_EXT_W -1:0] ext_narrow_x_din; + input [ WORD_EXT_W -1:0] ext_narrow_y_din; + + input rcmb_wide_xy_ena; + input [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_wide_x_din; + input [ WORD_EXT_W -1:0] rcmb_wide_y_din; + + input rcmb_narrow_xy_ena; + input [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_narrow_x_din; + input [ WORD_EXT_W -1:0] rcmb_narrow_y_din; + + reg wr_wide_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg; + + reg wr_narrow_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg; + + task _update_wide; + input xy_ena; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + wr_wide_xy_ena_reg <= xy_ena; + wr_wide_xy_bank_reg <= xy_bank; + wr_wide_xy_addr_reg <= xy_addr; + wr_wide_x_din_reg <= x_din; + wr_wide_y_din_reg <= y_din; + end + endtask + + task _update_narrow; + input xy_ena; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + wr_narrow_xy_ena_reg <= xy_ena; + wr_narrow_xy_bank_reg <= xy_bank; + wr_narrow_xy_addr_reg <= xy_addr; + wr_narrow_x_din_reg <= x_din; + wr_narrow_y_din_reg <= y_din; + end + endtask + + task enable_wide; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din); + end + endtask + + task enable_narrow; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; + begin + _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din); + end + endtask + + task disable_wide; + begin + _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); + end + endtask + + task disable_narrow; + begin + _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); + end + endtask + + always @(posedge clk) + // + if (rst) disable_wide; + else begin + // + if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din); + else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din); + else disable_wide; + // + end + + always @(posedge clk) + // + if (rst) disable_narrow; + else begin + // + if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din); + else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din); + else disable_narrow; + // + end + + assign wr_wide_xy_ena = wr_wide_xy_ena_reg; + assign wr_wide_xy_bank = wr_wide_xy_bank_reg; + assign wr_wide_xy_addr = wr_wide_xy_addr_reg; + assign wr_wide_x_din = wr_wide_x_din_reg; + assign wr_wide_y_din = wr_wide_y_din_reg; + + assign wr_narrow_xy_ena = wr_narrow_xy_ena_reg; + assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg; + assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg; + assign wr_narrow_x_din = wr_narrow_x_din_reg; + assign wr_narrow_y_din = wr_narrow_y_din_reg; + +endmodule diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt deleted file mode 100644 index 987bd86..0000000 --- a/rtl/dev/temp.txt +++ /dev/null @@ -1,384 +0,0 @@ - // - // Helper Functions - // - /* - function [INDEX_WIDTH-1:0] calc_preset_a_index; - input [INDEX_WIDTH-4:0] col_in; - input integer x_in; - integer index_out; - begin - index_out = col_in * NUM_MULTS + x_in; - calc_preset_a_index = index_out[INDEX_WIDTH-1:0]; - end - endfunction - - function [INDEX_WIDTH-1:0] calc_rotate_a_index; - input [INDEX_WIDTH-1:0] current_index_in; - input [INDEX_WIDTH-1:0] last_index_in; - begin - if (current_index_in > {INDEX_WIDTH{1'b0}}) - calc_rotate_a_index = current_index_in - 1'b1; - else - calc_rotate_a_index = last_index_in; - end - endfunction - */ - - /* - // - // Narrow Counters - // - reg [INDEX_WIDTH-1:0] din_addr_narrow_reg; - reg [INDEX_WIDTH-1:0] din_addr_narrow_dly; - localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}}; - wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ? - din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero; - wire din_addr_narrow_done = din_addr_narrow_reg == index_last; - - assign din_addr_narrow = din_addr_narrow_reg; - - always @(posedge clk) - // - din_addr_narrow_dly <= din_addr_narrow_reg; - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; - FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; - FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; - endcase - - - // - // Helper Functions - // - function [NUM_MULTS-1:0] calc_mac_clear_bitmask; - input [2:0] t; - begin - case (t) - 3'd0: calc_mac_clear_bitmask = 8'b00000001; - 3'd1: calc_mac_clear_bitmask = 8'b00000010; - 3'd2: calc_mac_clear_bitmask = 8'b00000100; - 3'd3: calc_mac_clear_bitmask = 8'b00001000; - 3'd4: calc_mac_clear_bitmask = 8'b00010000; - 3'd5: calc_mac_clear_bitmask = 8'b00100000; - 3'd6: calc_mac_clear_bitmask = 8'b01000000; - 3'd7: calc_mac_clear_bitmask = 8'b10000000; - endcase - end - endfunction - - function [NUM_MULTS:0] calc_mac_clear_square; - input [INDEX_WIDTH-4:0] current_col_index; - input [INDEX_WIDTH-1:0] b_addr_prev; - begin - if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index) - calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])}; - else - calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}}; - end - endfunction - - - // - // Wide Counters - // - reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1]; - - integer xi; - always @(posedge clk) - // - for (xi=0; xi<NUM_MULTS; xi=xi+1) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(0, xi); - FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(col_index + 1'b1, xi); - // - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_wide_reg[xi] <= calc_rotate_a_index(din_addr_wide_reg[xi], index_last); - // - endcase - - - // - // Enables - // - reg din_ena_narrow_reg = 1'b0; - reg [NUM_MULTS-1:0] din_ena_wide_reg = {NUM_MULTS{1'b0}}; - - assign din_ena_narrow = din_ena_narrow_reg; - assign din_ena_wide = din_ena_wide_reg; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) din_ena_narrow_reg <= 1'b0; - else case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_narrow_reg <= 1'b1; - default: din_ena_narrow_reg <= 1'b0; - endcase - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) din_ena_wide_reg <= {NUM_MULTS{1'b0}}; - else case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_wide_reg <= {NUM_MULTS{1'b1}}; - default: din_ena_wide_reg <= {NUM_MULTS{1'b0}}; - endcase - - - // - // Modes - // - reg [2-1:0] din_mode_wide_reg; - reg [2-1:0] din_mode_narrow_reg; - reg [2-1:0] dout_mode_wide_reg; - reg [2-1:0] dout_mode_narrow_reg; - - assign din_mode_wide = din_mode_wide_reg; - assign din_mode_narrow = din_mode_narrow_reg; - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_wide_reg <= MODEXPNG_MODE_A; - default: din_mode_wide_reg <= 2'bXX; - endcase - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_narrow_reg <= MODEXPNG_MODE_B; - default: din_mode_narrow_reg <= 2'bXX; - endcase - - - // - // MAC Array - // - wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_a[0:NUM_MULTS]; - wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_b; - reg [ NUM_MULTS :0] mac_ce; - reg [ NUM_MULTS :0] mac_clr; - wire [ MODEXPNG_MAC_WIDTH-1:0] mac_p[0:NUM_MULTS]; - reg [ NUM_MULTS :0] mac_rdy_lsb; - reg [ NUM_MULTS :0] mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1:0]; - - //reg [ NUM_MULTS :0] mac_ce_dly[MODEXPNG_MAC_LATENCY-1:0]; - //wire [ NUM_MULTS :0] mac_rdy; - - - - - - assign mac_din_b = din_narrow; - - - genvar x; - generate for (x=0; x<=NUM_MULTS; x=x+1) - begin : gen_macs - // - //assign mac_rdy[x] = mac_ce_dly[MODEXPNG_MAC_LATENCY-1][x]; - // - modexpng_mac mac_inst - ( - .clk (clk), - .ce (mac_ce[x]), - .clr (mac_clr[x]), - .a (mac_din_a[x]), - .b (mac_din_b), - .p (mac_p[x]) - ); - // - end - // - endgenerate - - generate for (x=0; x<NUM_MULTS; x=x+1) - begin : gen_mac_din_a - // - assign mac_din_a[x] = din_wide[x*MODEXPNG_WORD_WIDTH+:MODEXPNG_WORD_WIDTH]; - // - end - endgenerate - - generate for (x=0; x<NUM_MULTS; x=x+1) - begin : gen_din_addr_wide - // - assign din_addr_wide[x*INDEX_WIDTH+:INDEX_WIDTH] = din_addr_wide_reg[x]; - // - end - endgenerate - - - // - // MAC Clock Enable Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; - else case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_ce <= {1'b0, {NUM_MULTS{1'b1}}}; - default: mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; - endcase - - - // - // MAC Valid Logic - // - integer y; - - always @(posedge clk) - // - for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin - mac_rdy_lsb_dly[0][xi] <= mac_rdy_lsb[xi]; - for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) - mac_rdy_lsb_dly[y][xi] <= mac_rdy_lsb_dly[y-1][xi]; - end - - always @(posedge clk) begin - // - fsm_state_dly[0] <= fsm_state; - for (y=1; y<=MODEXPNG_MAC_LATENCY; y=y+1) - fsm_state_dly[y] <= fsm_state_dly[y-1]; - end - - */ - - /* - always @(posedge clk) - // - for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin - mac_ce_dly[0][xi] <= mac_ce[xi]; - for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) - mac_ce_dly[y][xi] <= mac_ce_dly[y-1][xi]; - end - */ - /* - always @(posedge clk) - // - for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin - mac_clr_dly[0][xi] <= mac_clr[xi]; - for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) - mac_clr_dly[y][xi] <= mac_clr_dly[y-1][xi]; - end - */ - - /* - // - // MAC Clear Logic - // - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_clr <= {1'b0, {NUM_MULTS{1'b1}}}; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_clr <= calc_mac_clear_square(col_index, din_addr_narrow_dly); - default: mac_clr <= {1'bX, {NUM_MULTS{1'bX}}}; - endcase - - - // - // MAC Ready Logic - // - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_rdy_lsb <= calc_mac_clear_square(col_index, din_addr_narrow); - default: mac_rdy_lsb <= {1'bX, {NUM_MULTS{1'bX}}}; - endcase - - - - // - // Recombinators - // - reg rcmb_lsb_ce; - reg rcmb_lsb_clr; - reg [MODEXPNG_MAC_WIDTH-1: 0] rcmb_lsb_din; - wire [15: 0] rcmb_lsb_dout; - - modexpng_part_recombinator recomb_lsb - ( - .clk (clk), - .ce (rcmb_lsb_ce), - .clr (rcmb_lsb_clr), - .din (rcmb_lsb_din), - .dout (rcmb_lsb_dout) - ); - - - reg calc_rcmb_lsb_ce; - always @* - // - calc_rcmb_lsb_ce = | mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]; - - reg [MODEXPNG_MAC_WIDTH-1:0] calc_rcmb_lsb_din; - - always @* - // - casez (mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]) - 8'b00000001: calc_rcmb_lsb_din = mac_p[0]; - 8'b00000010: calc_rcmb_lsb_din = mac_p[1]; - 8'b00000100: calc_rcmb_lsb_din = mac_p[2]; - 8'b00001000: calc_rcmb_lsb_din = mac_p[3]; - 8'b00010000: calc_rcmb_lsb_din = mac_p[4]; - 8'b00100000: calc_rcmb_lsb_din = mac_p[5]; - 8'b01000000: calc_rcmb_lsb_din = mac_p[6]; - 8'b10000000: calc_rcmb_lsb_din = mac_p[7]; - default: calc_rcmb_lsb_din = {MODEXPNG_MAC_WIDTH{1'bX}}; - endcase - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - rcmb_lsb_ce <= 1'b0; - else case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_ce <= calc_rcmb_lsb_ce; - default: rcmb_lsb_ce <= 1'b0; - endcase - - always @(posedge clk) - // - case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: rcmb_lsb_clr <= 1'b1; - default: rcmb_lsb_clr <= 1'b0; - endcase - - always @(posedge clk) - // - case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_din <= calc_rcmb_lsb_din; - default: rcmb_lsb_din <= {MODEXPNG_MAC_WIDTH{1'bX}}; - endcase - - - -*/ diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v deleted file mode 100644 index 2a050d4..0000000 --- a/rtl/dsp/dsp_array.v +++ /dev/null @@ -1,143 +0,0 @@ -module dsp_array -( - input clk, - - input ce_a, - input ce_b, - input ce_m, - input ce_p, - input ce_mode, - - input [9 -1:0] mode_z, - - input [5*18-1:0] a, - input [1*17-1:0] b, - output [9*47-1:0] p -); - - `include "../modexpng_parameters_x8.vh" - - wire [17:0] casc_a[0:3]; - wire [16:0] casc_b[0:3]; - - wire ce_a0 = ce_a; - reg ce_a1 = 1'b0; - reg ce_a2 = 1'b0; - - wire ce_b0 = ce_b; - reg ce_b1 = 1'b0; - - always @(posedge clk) begin - ce_a1 <= ce_a0; - ce_a2 <= ce_a1; - ce_b1 <= ce_b0; - end - - - genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) - // - begin : gen_DSP48E1 - // - dsp_slice # - ( - .AB_INPUT("DIRECT"), - .B_REG(2) - ) - dsp_direct - ( - .clk (clk), - - .ce_a1 (ce_a0), - .ce_b1 (ce_b0), - .ce_a2 (ce_a1), - .ce_b2 (ce_b1), - .ce_m (ce_m), - .ce_p (ce_p), - .ce_mode (ce_mode), - - .a (a[z*18+:18]), - .b (b), - .p (p[47*2*z+:47]), - - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), - - .casc_a_in ({18{1'b0}}), - .casc_b_in ({17{1'b0}}), - - .casc_a_out (casc_a[z]), - .casc_b_out (casc_b[z]) - ); - // - dsp_slice # - ( - .AB_INPUT("CASCADE"), - .B_REG(1) - ) - dsp_cascade - ( - .clk (clk), - - .ce_a1 (ce_a1), - .ce_b1 (1'b0), - .ce_a2 (ce_a2), - .ce_b2 (ce_b1), - .ce_m (ce_m), - .ce_p (ce_p), - .ce_mode (ce_mode), - - .a (a[z*18+:18]), - .b (b), - .p (p[47*(2*z+1)+:47]), - - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), - - .casc_a_in (casc_a[z]), - .casc_b_in (casc_b[z]), - - .casc_a_out (), - .casc_b_out () - ); - // - end - // - endgenerate - - dsp_slice # - ( - .AB_INPUT("DIRECT"), - .B_REG(2) - ) - dsp_aux - ( - .clk (clk), - - .ce_a1 (ce_a0), - .ce_b1 (ce_b0), - .ce_a2 (ce_a1), - .ce_b2 (ce_b1), - .ce_m (ce_m), - .ce_p (ce_p), - .ce_mode (ce_mode), - - .a (a[4*18+:18]), - .b (b), - .p (p[47*2*4+:47]), - - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), - - .casc_a_in ({18{1'b0}}), - .casc_b_in ({17{1'b0}}), - - .casc_a_out (), - .casc_b_out () - ); - - -endmodule diff --git a/rtl/dsp/dsp_slice.v b/rtl/dsp/dsp_slice.v deleted file mode 100644 index 9f1298b..0000000 --- a/rtl/dsp/dsp_slice.v +++ /dev/null @@ -1,125 +0,0 @@ -module dsp_slice # -( - AB_INPUT = "DIRECT", - B_REG = 2 -) -( - input clk, - input ce_a1, - input ce_b1, - input ce_a2, - input ce_b2, - input ce_m, - input ce_p, - input ce_mode, - input [17:0] a, - input [16:0] b, - output [46:0] p, - input [ 4:0] inmode, - input [ 6:0] opmode, - input [ 3:0] alumode, - input [17:0] casc_a_in, - input [16:0] casc_b_in, - output [17:0] casc_a_out, - output [16:0] casc_b_out -); - - wire [30-18-1:0] casc_a_dummy; - wire [18-17-1:0] casc_b_dummy; - wire [48-47-1:0] p_dummy; - - DSP48E1 # - ( - .AREG (2), - .BREG (B_REG), - .CREG (0), - .DREG (0), - .ADREG (0), - .MREG (1), - .PREG (1), - .ACASCREG (1), - .BCASCREG (1), - .INMODEREG (0), - .OPMODEREG (1), - .ALUMODEREG (0), - .CARRYINREG (0), - .CARRYINSELREG (0), - - .A_INPUT (AB_INPUT), - .B_INPUT (AB_INPUT), - - .USE_DPORT ("FALSE"), - .USE_MULT ("DYNAMIC"), - .USE_SIMD ("ONE48"), - - .MASK (48'h3fffffffffff), - .PATTERN (48'h000000000000), - .SEL_MASK ("MASK"), - .SEL_PATTERN ("PATTERN"), - - .USE_PATTERN_DETECT ("NO_PATDET"), - .AUTORESET_PATDET ("NO_RESET") - ) - DSP48E1_inst - ( - .CLK (clk), - - .CEA1 (ce_a1), - .CEB1 (ce_b1), - .CEA2 (ce_a2), - .CEB2 (ce_b2), - .CEAD (1'b0), - .CEC (1'b0), - .CED (1'b0), - .CEM (ce_m), - .CEP (ce_p), - .CEINMODE (1'b0), - .CECTRL (ce_mode), - .CEALUMODE (1'b0), - .CECARRYIN (1'b0), - - .A ({{(30-18){1'b0}}, a}), - .B ({{(18-17){1'b0}}, b}), - .C ({48{1'b0}}), - .D ({25{1'b0}}), - .P ({p_dummy, p}), - - .INMODE (inmode), - .OPMODE (opmode), - .ALUMODE (alumode), - - .ACIN ({{(30-18){1'b0}}, casc_a_in}), - .BCIN ({{(18-17){1'b0}}, casc_b_in}), - .ACOUT ({casc_a_dummy, casc_a_out}), - .BCOUT ({casc_b_dummy, casc_b_out}), - .PCIN ({48{1'b0}}), - .PCOUT (), - .CARRYCASCIN (1'b0), - .CARRYCASCOUT (), - - .RSTA (1'b0), - .RSTB (1'b0), - .RSTC (1'b0), - .RSTD (1'b0), - .RSTM (1'b0), - .RSTP (1'b0), - .RSTINMODE (1'b0), - .RSTCTRL (1'b0), - .RSTALUMODE (1'b0), - .RSTALLCARRYIN (1'b0), - - .UNDERFLOW (), - .OVERFLOW (), - .PATTERNDETECT (), - .PATTERNBDETECT (), - - .CARRYIN (1'b0), - .CARRYOUT (), - .CARRYINSEL (3'b000), - - .MULTSIGNIN (1'b0), - .MULTSIGNOUT () - ); - - -endmodule diff --git a/rtl/modexpng_dsp48e1.vh b/rtl/modexpng_dsp48e1.vh new file mode 100644 index 0000000..bc3d55c --- /dev/null +++ b/rtl/modexpng_dsp48e1.vh @@ -0,0 +1,8 @@ +localparam DSP48E1_A_W = 30; +localparam DSP48E1_B_W = 18; +localparam DSP48E1_C_W = 48; +localparam DSP48E1_D_W = 25; +localparam DSP48E1_P_W = 48; +localparam DSP48E1_INMODE_W = 5; +localparam DSP48E1_OPMODE_W = 7; +localparam DSP48E1_ALUMODE_W = 4; diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v index 9c4ee93..8ab64f0 100644 --- a/rtl/modexpng_dsp_array_block.v +++ b/rtl/modexpng_dsp_array_block.v @@ -1,24 +1,30 @@ module modexpng_dsp_array_block ( - input clk, - - input ce_a, - input ce_b, - input ce_m, - input ce_p, - input ce_mode, + clk, + ce_a, ce_b, ce_m, ce_p, ce_mode, + mode_z, + a, b, p +); + + `include "modexpng_dsp48e1.vh" + `include "modexpng_parameters.vh" - input [9 -1:0] mode_z, + input clk; - input [5*18-1:0] a, - input [1*16-1:0] b, - output [9*47-1:0] p -); + input ce_a; + input ce_b; + input ce_m; + input ce_p; + input ce_mode; - `include "modexpng_parameters_x8.vh" + input [ NUM_MULTS_AUX -1:0] mode_z; + + input [NUM_MULTS_HALF_AUX * WORD_EXT_W -1:0] a; + input [ WORD_W -1:0] b; + output [NUM_MULTS_AUX * MAC_W -1:0] p; - wire [17:0] casc_a[0:3]; - wire [15:0] casc_b[0:3]; + wire [WORD_EXT_W -1:0] casc_a[0:NUM_MULTS_HALF-1]; + wire [ WORD_W -1:0] casc_b[0:NUM_MULTS_HALF-1]; wire ce_a0 = ce_a; reg ce_a1 = 1'b0; @@ -35,7 +41,7 @@ module modexpng_dsp_array_block genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) // begin : gen_DSP48E1 // @@ -56,16 +62,16 @@ module modexpng_dsp_array_block .ce_p (ce_p), .ce_mode (ce_mode), - .a (a[z*18+:18]), + .a (a[z*WORD_EXT_W +: WORD_EXT_W]), .b (b), - .p (p[47*2*z+:47]), + .p (p[(2*z)*MAC_W +: MAC_W]), - .inmode (5'b00000), + .inmode ({DSP48E1_INMODE_W{1'b0}}), .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in ({18{1'b0}}), - .casc_b_in ({16{1'b0}}), + .casc_a_in (WORD_EXT_NULL), + .casc_b_in (WORD_NULL), .casc_a_out (casc_a[z]), .casc_b_out (casc_b[z]) @@ -88,13 +94,13 @@ module modexpng_dsp_array_block .ce_p (ce_p), .ce_mode (ce_mode), - .a (a[z*18+:18]), + .a (a[z*WORD_EXT_W +: WORD_EXT_W]), .b (b), - .p (p[47*(2*z+1)+:47]), + .p (p[(2*z+1)*MAC_W +: MAC_W]), - .inmode (5'b00000), + .inmode ({DSP48E1_INMODE_W{1'b0}}), .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), .casc_a_in (casc_a[z]), .casc_b_in (casc_b[z]), @@ -124,16 +130,16 @@ module modexpng_dsp_array_block .ce_p (ce_p), .ce_mode (ce_mode), - .a (a[4*18+:18]), + .a (a[NUM_MULTS_HALF*WORD_EXT_W +: WORD_EXT_W]), .b (b), - .p (p[47*2*4+:47]), + .p (p[(2*NUM_MULTS_HALF)*MAC_W +: MAC_W]), - .inmode (5'b00000), - .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), - .alumode (4'b0000), + .inmode ({DSP48E1_INMODE_W{1'b0}}), + .opmode ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}), + .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in ({18{1'b0}}), - .casc_b_in ({16{1'b0}}), + .casc_a_in (WORD_EXT_NULL), + .casc_b_in (WORD_NULL), .casc_a_out (), .casc_b_out () diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper.v index f565eec..3d13570 100644 --- a/rtl/modexpng_dsp_slice_wrapper.v +++ b/rtl/modexpng_dsp_slice_wrapper.v @@ -4,30 +4,41 @@ module modexpng_dsp_slice_wrapper # B_REG = 2 ) ( - input clk, - input ce_a1, - input ce_b1, - input ce_a2, - input ce_b2, - input ce_m, - input ce_p, - input ce_mode, - input [17:0] a, - input [15:0] b, - output [46:0] p, - input [ 4:0] inmode, - input [ 6:0] opmode, - input [ 3:0] alumode, - input [17:0] casc_a_in, - input [15:0] casc_b_in, - output [17:0] casc_a_out, - output [15:0] casc_b_out + clk, + ce_a1, ce_b1, ce_a2, ce_b2, + ce_m, ce_p, ce_mode, + a, b, p, + inmode, opmode, alumode, + casc_a_in, casc_b_in, + casc_a_out, casc_b_out ); - wire [30-18-1:0] casc_a_dummy; - wire [18-16-1:0] casc_b_dummy; - wire [48-47-1:0] p_dummy; + `include "modexpng_parameters.vh" + `include "modexpng_dsp48e1.vh" + + input clk; + input ce_a1; + input ce_b1; + input ce_a2; + input ce_b2; + input ce_m; + input ce_p; + input ce_mode; + input [ WORD_EXT_W -1:0] a; + input [ WORD_W -1:0] b; + output [ MAC_W -1:0] p; + input [ DSP48E1_INMODE_W -1:0] inmode; + input [ DSP48E1_OPMODE_W -1:0] opmode; + input [DSP48E1_ALUMODE_W -1:0] alumode; + input [ WORD_EXT_W -1:0] casc_a_in; + input [ WORD_W -1:0] casc_b_in; + output [ WORD_EXT_W -1:0] casc_a_out; + output [ WORD_W -1:0] casc_b_out; + wire [DSP48E1_A_W - WORD_EXT_W -1:0] casc_a_dummy; + wire [DSP48E1_B_W - WORD_W -1:0] casc_b_dummy; + wire [DSP48E1_P_W - MAC_W -1:0] p_dummy; + DSP48E1 # ( .AREG (2), @@ -52,8 +63,8 @@ module modexpng_dsp_slice_wrapper # .USE_MULT ("DYNAMIC"), .USE_SIMD ("ONE48"), - .MASK (48'h3fffffffffff), - .PATTERN (48'h000000000000), + .MASK ({DSP48E1_P_W{1'b1}}), + .PATTERN ({DSP48E1_P_W{1'b0}}), .SEL_MASK ("MASK"), .SEL_PATTERN ("PATTERN"), @@ -78,21 +89,21 @@ module modexpng_dsp_slice_wrapper # .CEALUMODE (1'b0), .CECARRYIN (1'b0), - .A ({{(30-18){1'b0}}, a}), - .B ({{(18-16){1'b0}}, b}), - .C ({48{1'b0}}), - .D ({25{1'b0}}), + .A ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, a}), + .B ({{(DSP48E1_B_W-WORD_W){1'b0}}, b}), + .C ({DSP48E1_C_W{1'b0}}), + .D ({DSP48E1_D_W{1'b0}}), .P ({p_dummy, p}), .INMODE (inmode), .OPMODE (opmode), .ALUMODE (alumode), - .ACIN ({{(30-18){1'b0}}, casc_a_in}), - .BCIN ({{(18-16){1'b0}}, casc_b_in}), + .ACIN ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, casc_a_in}), + .BCIN ({{(DSP48E1_B_W-WORD_W){1'b0}}, casc_b_in}), .ACOUT ({casc_a_dummy, casc_a_out}), .BCOUT ({casc_b_dummy, casc_b_out}), - .PCIN ({48{1'b0}}), + .PCIN ({DSP48E1_P_W{1'b0}}), .PCOUT (), .CARRYCASCIN (1'b0), .CARRYCASCOUT (), @@ -121,5 +132,4 @@ module modexpng_dsp_slice_wrapper # .MULTSIGNOUT () ); - endmodule diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual.v index 2e4f4e0..df0f823 100644 --- a/rtl/modexpng_mmm_dual_x8.v +++ b/rtl/modexpng_mmm_dual.v @@ -1,14 +1,15 @@ -module modexpng_mmm_dual_x8 +module modexpng_mmm_dual ( clk, rst, ena, rdy, - ladder_mode, word_index_last, word_index_last_minus1, + sel_wide_in, sel_narrow_in, + rd_wide_xy_ena, rd_wide_xy_ena_aux, rd_wide_xy_bank, @@ -51,9 +52,10 @@ module modexpng_mmm_dual_x8 // // Headers // + `include "modexpng_parameters.vh" `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" + //`include "../rtl_1/modexpng_parameters_old.vh" + //`include "../rtl_1/modexpng_parameters_x8_old.vh" // @@ -69,10 +71,13 @@ module modexpng_mmm_dual_x8 input [7:0] word_index_last; input [7:0] word_index_last_minus1; + input [BANK_ADDR_W-1:0] sel_wide_in; + input [BANK_ADDR_W-1:0] sel_narrow_in; + output rd_wide_xy_ena; output rd_wide_xy_ena_aux; - output [ 1:0] rd_wide_xy_bank; - output [ 1:0] rd_wide_xy_bank_aux; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; output [ 8-1:0] rd_wide_xy_addr_aux; input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; @@ -81,24 +86,24 @@ module modexpng_mmm_dual_x8 input [ 18-1:0] rd_wide_y_dout_aux; output rd_narrow_xy_ena; - output [ 1:0] rd_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; output [ 7:0] rd_narrow_xy_addr; input [18-1:0] rd_narrow_x_dout; input [18-1:0] rd_narrow_y_dout; - output [ 1:0] rcmb_wide_xy_bank; + output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [17:0] rcmb_wide_x_dout; output [17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; - output [ 1:0] rcmb_narrow_xy_bank; + output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [17:0] rcmb_narrow_x_dout; output [17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; - output [ 1:0] rcmb_xy_bank; + output [BANK_ADDR_W -1:0] rcmb_xy_bank; output [ 7:0] rcmb_xy_addr; output [17:0] rcmb_x_dout; output [17:0] rcmb_y_dout; @@ -132,13 +137,13 @@ module modexpng_mmm_dual_x8 // reg wide_xy_ena = 1'b0; reg wide_xy_ena_aux = 1'b0; - reg [ 1:0] wide_xy_bank; - reg [ 1:0] wide_xy_bank_aux; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; reg [ 8-1:0] wide_xy_addr[0:3]; reg [ 8-1:0] wide_xy_addr_aux; reg narrow_xy_ena = 1'b0; - reg [ 1:0] narrow_xy_bank; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [ 7:0] narrow_xy_addr_dly; @@ -332,7 +337,7 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; // FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, @@ -340,7 +345,7 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? - BANK_NARROW_EXT : BANK_NARROW_N_COEFF; + BANK_NARROW_EXT : BANK_NARROW_COEFF; // FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, @@ -486,13 +491,13 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, @@ -510,13 +515,13 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, @@ -525,8 +530,8 @@ module modexpng_mmm_dual_x8 FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) case (rcmb_xy_bank) - BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL; - BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH; + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX default: wide_xy_bank_aux <= 3'bXXX; endcase diff --git a/rtl/modexpng_mmm_fsm_old.vh b/rtl/modexpng_mmm_fsm_old.vh deleted file mode 100644 index 3bdae66..0000000 --- a/rtl/modexpng_mmm_fsm_old.vh +++ /dev/null @@ -1,43 +0,0 @@ -localparam FSM_STATE_WIDTH = 32; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh new file mode 100644 index 0000000..514fc21 --- /dev/null +++ b/rtl/modexpng_parameters.vh @@ -0,0 +1,71 @@ +`include "modexpng_parameters_x8.vh" + +function integer cryptech_clog2; + input integer value; + integer temp_value; + integer result; + // + begin + temp_value = value - 1; + for (result = 0; temp_value > 0; result = result + 1) + temp_value = temp_value >> 1; + cryptech_clog2 = result; + end + // +endfunction + +localparam WORD_W = 16; +localparam WORD_EXT_W = 18; +localparam MAC_W = 47; + +localparam MAX_OP_W = 4096; + +localparam BANK_ADDR_W = 3; +localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); +localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS); + +localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); + +localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W; + +localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}}; + +localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_N = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_L = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_H = 3'd7; + +localparam [BANK_ADDR_W-1:0] BANK_NARROW_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_COEFF = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_Q = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_EXT = 3'd7; // [0] -> COEFF', [1] -> Q' + +localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH' + +localparam [BANK_ADDR_W-1:0] BANK_DONT_CARE = {BANK_ADDR_W{1'bX}}; + +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; + +localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}}; +localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1}; +localparam [OP_ADDR_W-1:0] OP_ADDR_DONT_CARE = {OP_ADDR_W{1'bX}}; + +localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}}; +localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}}; + +localparam [WORD_EXT_W-1:0] WORD_EXT_DONT_CARE = {WORD_EXT_W{1'bX}}; + +localparam [MAC_INDEX_W-1:0] MAC_INDEX_DONT_CARE = {MAC_INDEX_W{1'bX}}; + +localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}};
\ No newline at end of file diff --git a/rtl/modexpng_parameters_old.vh b/rtl/modexpng_parameters_old.vh deleted file mode 100644 index d30b751..0000000 --- a/rtl/modexpng_parameters_old.vh +++ /dev/null @@ -1,40 +0,0 @@ - -//localparam WORD_WIDTH = 17; -//localparam MAC_WIDTH = 47; - -localparam BANK_ADDR_WIDTH = 2; // TODO: Replace everywhere! - -localparam [1:0] BANK_WIDE_T1T2 = 2'd0; -localparam [1:0] BANK_WIDE_ABL = 2'd1; -localparam [1:0] BANK_WIDE_ABH = 2'd2; -localparam [1:0] BANK_WIDE_N = 2'd3; - -localparam [1:0] BANK_RCMB_ML = 2'd0; -localparam [1:0] BANK_RCMB_MH = 2'd1; -localparam [1:0] BANK_RCMB_EXT = 2'd2; // 0 -> MH' - -localparam [1:0] BANK_NARROW_T1T2 = 2'd0; -localparam [1:0] BANK_NARROW_N_COEFF = 2'd1; -localparam [1:0] BANK_NARROW_Q = 2'd2; -localparam [1:0] BANK_NARROW_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q' - - -//localparam BANK_Y_T2 = 3'd0; -//localparam BANK_XY_T1T2 = 3'd0; - -//localparam BANK_XY_AB_LSB = 3'd1; -//localparam BANK_XY_AB_MSB = 3'd2; - -//localparam BANK_X_N = 3'd3; -//localparam BANK_Y_N_COEFF = 3'd3; - -//localparam BANK_XY_M = 3'd4; - -//localparam BANK_XY_Q_LSB = 3'd5; -//localparam BANK_XY_Q_MSB = 3'd6; - -//localparam BANK_XY_AUX = 3'd7; - -//localparam BANK_XY_ANY = 3'bXXX; - -//localparam BANK_XY_AUX_ADDR_N_COEFF = 0; diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8.vh new file mode 100644 index 0000000..0dcc3d6 --- /dev/null +++ b/rtl/modexpng_parameters_x8.vh @@ -0,0 +1,4 @@ +localparam NUM_MULTS = 8; +localparam NUM_MULTS_AUX = NUM_MULTS + 1; +localparam NUM_MULTS_HALF = NUM_MULTS / 2; +localparam NUM_MULTS_HALF_AUX = NUM_MULTS_HALF + 1; diff --git a/rtl/modexpng_parameters_x8_old.vh b/rtl/modexpng_parameters_x8_old.vh deleted file mode 100644 index 8734354..0000000 --- a/rtl/modexpng_parameters_x8_old.vh +++ /dev/null @@ -1 +0,0 @@ -localparam NUM_MULTS = 8; diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index d6b1ad1..de60d1f 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -17,9 +17,9 @@ module modexpng_recombinator_block // // Headers // + `include "modexpng_parameters.vh" `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" + //`include "../rtl_1/modexpng_parameters_x8_old.vh" input clk; @@ -34,22 +34,22 @@ module modexpng_recombinator_block input [ 4:0] col_index; input [ 4:0] col_index_last; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; input [ 7:0] rd_narrow_xy_addr; - input [ 1:0] rd_narrow_xy_bank; - output [ 1:0] rcmb_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [ 17:0] rcmb_wide_x_dout; output [ 17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; - output [ 1:0] rcmb_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [ 17:0] rcmb_narrow_x_dout; output [ 17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; - output [ 1:0] rdct_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; output [ 7:0] rdct_narrow_xy_addr; output [ 17:0] rdct_narrow_x_dout; output [ 17:0] rdct_narrow_y_dout; @@ -167,7 +167,7 @@ module modexpng_recombinator_block function calc_square_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -182,7 +182,7 @@ module modexpng_recombinator_block function calc_square_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -192,7 +192,7 @@ module modexpng_recombinator_block function calc_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -202,7 +202,7 @@ module modexpng_recombinator_block function calc_rectangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -217,7 +217,7 @@ module modexpng_recombinator_block function calc_triangle_aux_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -232,7 +232,7 @@ module modexpng_recombinator_block function [7:0] calc_square_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -258,7 +258,7 @@ module modexpng_recombinator_block function [7:0] calc_square_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -268,7 +268,7 @@ module modexpng_recombinator_block function [7:0] calc_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -278,7 +278,7 @@ module modexpng_recombinator_block function [7:0] calc_rectangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -313,7 +313,7 @@ module modexpng_recombinator_block function [2:0] calc_square_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -339,7 +339,7 @@ module modexpng_recombinator_block function [2:0] calc_square_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -349,7 +349,7 @@ module modexpng_recombinator_block function [2:0] calc_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -359,7 +359,7 @@ module modexpng_recombinator_block function [2:0] calc_rectangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] slim_bram_xy_bank_value; + input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value; input [7:0] slim_bram_xy_addr_value; begin // @@ -385,7 +385,7 @@ module modexpng_recombinator_block function calc_square_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -400,7 +400,7 @@ module modexpng_recombinator_block function calc_square_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -410,7 +410,7 @@ module modexpng_recombinator_block function calc_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -420,7 +420,7 @@ module modexpng_recombinator_block function calc_square_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -436,7 +436,7 @@ module modexpng_recombinator_block function calc_rectangle_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -452,7 +452,7 @@ module modexpng_recombinator_block function [7:0] calc_square_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -469,7 +469,7 @@ module modexpng_recombinator_block function [7:0] calc_rectangle_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -485,7 +485,7 @@ module modexpng_recombinator_block function calc_square_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -501,7 +501,7 @@ module modexpng_recombinator_block function calc_rectangle_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -783,19 +783,19 @@ module modexpng_recombinator_block end - reg [ 1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; reg [ 7:0] wide_xy_addr; reg [17:0] wide_x_dout; reg [17:0] wide_y_dout; reg wide_xy_valid = 1'b0; - reg [ 1:0] narrow_xy_bank; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [17:0] narrow_x_dout; reg [17:0] narrow_y_dout; reg narrow_xy_valid = 1'b0; - reg [ 1:0] rdct_xy_bank; + reg [ BANK_ADDR_W -1:0] rdct_xy_bank; reg [ 7:0] rdct_xy_addr; reg [17:0] rdct_x_dout; reg [17:0] rdct_y_dout; @@ -883,7 +883,7 @@ module modexpng_recombinator_block endtask task _update_wide; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -898,7 +898,7 @@ module modexpng_recombinator_block endtask task _update_narrow; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -913,7 +913,7 @@ module modexpng_recombinator_block endtask task _update_rdct; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -928,7 +928,7 @@ module modexpng_recombinator_block endtask task set_wide; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -938,7 +938,7 @@ module modexpng_recombinator_block endtask task set_narrow; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -948,7 +948,7 @@ module modexpng_recombinator_block endtask task set_rdct; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -959,19 +959,19 @@ module modexpng_recombinator_block task clear_wide; begin - _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_wide(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask task clear_narrow; begin - _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_narrow(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask task clear_rdct; begin - _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_rdct(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask @@ -1074,16 +1074,16 @@ module modexpng_recombinator_block // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); else clear_wide; // - 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // 2'b10: if (cnt_msb < 8'd2) clear_wide; - else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // - 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); - else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); + else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // endcase // diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v deleted file mode 100644 index 1ecf56a..0000000 --- a/rtl/modexpng_recombinator_cell.v +++ /dev/null @@ -1,35 +0,0 @@ -module modexpng_recombinator_cell -( - clk, - ce, clr, - din, dout -); - - input clk; - input ce; - input clr; - input [46:0] din; - output [15:0] dout; - - reg [14:0] z; - reg [16:0] y; - reg [17:0] x; - //reg [15:0] w; - - //assign dout = w; - assign dout = x[15:0]; - - wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here - wire [15:0] din_y = din[31:16]; - wire [15:0] din_x = din[15: 0]; - - always @(posedge clk) - // - if (ce) begin - z <= din_z; - y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; - x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; - //w <= clr ? {16{1'bX}} : x[15:0]; - end - -endmodule diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index 0f5e461..aafb38c 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -17,8 +17,9 @@ module modexpng_reductor // // Headers // + `include "modexpng_parameters.vh" //`include "../rtl_1/modexpng_mmm_fsm.vh" - `include "../rtl_1/modexpng_parameters_old.vh" + //`include "../rtl_1/modexpng_parameters_x8.vh" @@ -39,12 +40,12 @@ module modexpng_reductor input [ 7:0] rd_narrow_xy_addr; input [ 1:0] rd_narrow_xy_bank; */ - input [ 1:0] rd_wide_xy_bank_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; input [ 7:0] rd_wide_xy_addr_aux; input [ 17:0] rd_wide_x_dout_aux; input [ 17:0] rd_wide_y_dout_aux; // - input [ 1:0] rcmb_final_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank; input [ 7:0] rcmb_final_xy_addr; input [ 17:0] rcmb_final_x_dout; input [ 17:0] rcmb_final_y_dout; @@ -60,7 +61,7 @@ module modexpng_reductor // Ready // reg rdy_reg = 1'b1; - reg busy_now = 1'b0; + wire busy_now; assign rdy = rdy_reg; @@ -81,9 +82,9 @@ module modexpng_reductor reg rcmb_xy_valid_dly2 = 1'b0; reg rcmb_xy_valid_dly3 = 1'b0; - reg [2:0] rcmb_xy_bank_dly1; - reg [2:0] rcmb_xy_bank_dly2; - reg [2:0] rcmb_xy_bank_dly3; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; reg [7:0] rcmb_xy_addr_dly1; reg [7:0] rcmb_xy_addr_dly2; @@ -236,7 +237,7 @@ module modexpng_reductor // clear_rdct; // - if (busy_now && rcmb_xy_valid_dly3) + if (rcmb_xy_valid_dly3) // case (rcmb_xy_bank_dly3) @@ -258,12 +259,25 @@ module modexpng_reductor // // Busy // + reg busy_next = 1'b0; + reg [2:0] busy_now_shreg = {3{1'b0}}; + + assign busy_now = busy_now_shreg[2]; + + always @(posedge clk) + // + if (rst) busy_now_shreg <= {3{1'b0}}; + else begin + if (rdy && ena) busy_now_shreg <= {3{1'b1}}; + else busy_now_shreg <= {busy_now_shreg[1:0], busy_next}; + end + always @(posedge clk) // - if (rst) busy_now <= 1'b0; + if (rst) busy_next <= 1'b0; else begin - if (rdy && ena) busy_now <= 1'b1; - //if (!rdy && !busy_now) rdy <= 1'b1; + if (rdy && ena) busy_next <= 1'b1; + if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0; end diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_wrapper.v new file mode 100644 index 0000000..0295697 --- /dev/null +++ b/rtl/modexpng_sdp_36k_wrapper.v @@ -0,0 +1,72 @@ +module modexpng_sdp_36k_wrapper +( + clk, + + ena, wea, + addra, dina, + + enb, regceb, + addrb, doutb +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + + input ena; + input wea; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addra; + input [ WORD_EXT_W -1:0] dina; + + input enb; + input regceb; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb; + output [ WORD_EXT_W -1:0] doutb; + + // + // BRAM_SDP_MACRO + // + BRAM_SDP_MACRO # + ( + .DEVICE ("7SERIES"), + + .BRAM_SIZE ("36Kb"), + + .WRITE_WIDTH (WORD_EXT_W), + .READ_WIDTH (WORD_EXT_W), + + .DO_REG (1), + .WRITE_MODE ("READ_FIRST"), + + .SRVAL (72'h000000000000000000), + .INIT (72'h000000000000000000), + + .INIT_FILE ("NONE"), + .SIM_COLLISION_CHECK ("NONE") + ) + BRAM_SDP_MACRO_inst + ( + .RST (1'b0), + + .WRCLK (clk), + .WREN (ena), + .WE ({2{wea}}), + .WRADDR (addra), + .DI (dina), + + .RDCLK (clk), + .RDEN (enb), + .REGCE (regceb), + .RDADDR (addrb), + .DO (doutb) + ); + +endmodule diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v index d6f9fb1..be04c7c 100644 --- a/rtl/modexpng_storage_block.v +++ b/rtl/modexpng_storage_block.v @@ -32,49 +32,46 @@ module modexpng_storage_block rd_narrow_y_dout ); - // // Headers // - `include "../rtl_1/modexpng_parameters_x8_old.vh" - + `include "modexpng_parameters.vh" // // Ports // - input clk; - input rst; - - input wr_wide_xy_ena; - input [ 1:0] wr_wide_xy_bank; - input [ 7:0] wr_wide_xy_addr; - input [17:0] wr_wide_x_din; - input [17:0] wr_wide_y_din; + input clk; + input rst; + + input wr_wide_xy_ena; + input [ BANK_ADDR_W -1:0] wr_wide_xy_bank; + input [ OP_ADDR_W -1:0] wr_wide_xy_addr; + input [ WORD_EXT_W -1:0] wr_wide_x_din; + input [ WORD_EXT_W -1:0] wr_wide_y_din; - input wr_narrow_xy_ena; - input [ 1:0] wr_narrow_xy_bank; - input [ 7:0] wr_narrow_xy_addr; - input [17:0] wr_narrow_x_din; - input [17:0] wr_narrow_y_din; - - input rd_wide_xy_ena; - input rd_wide_xy_ena_aux; - input [ 1:0] rd_wide_xy_bank; - input [ 1:0] rd_wide_xy_bank_aux; - input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; - input [ 8-1:0] rd_wide_xy_addr_aux; - output [18*NUM_MULTS/2-1:0] rd_wide_x_dout; - output [18*NUM_MULTS/2-1:0] rd_wide_y_dout; - output [ 18-1:0] rd_wide_x_dout_aux; - output [ 18-1:0] rd_wide_y_dout_aux; + input wr_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] wr_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + input [ WORD_EXT_W -1:0] wr_narrow_x_din; + input [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; - input rd_narrow_xy_ena; - input [ 1:0] rd_narrow_xy_bank; - input [ 7:0] rd_narrow_xy_addr; - output [18-1:0] rd_narrow_x_dout; - output [18-1:0] rd_narrow_y_dout; + input rd_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rd_narrow_x_dout; + output [ WORD_EXT_W -1:0] rd_narrow_y_dout; - // // Internal Registers // @@ -82,43 +79,44 @@ module modexpng_storage_block reg rd_wide_xy_reg_ena_aux = 1'b0; reg rd_narrow_xy_reg_ena = 1'b0; - always @(posedge clk) begin - // - rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; - rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; - rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + always @(posedge clk) // - end - + if (rst) begin + rd_wide_xy_reg_ena <= 1'b0; + rd_wide_xy_reg_ena_aux <= 1'b0; + rd_narrow_xy_reg_ena <= 1'b0; + end else begin + rd_wide_xy_reg_ena <= rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rd_narrow_xy_ena; + end // // Helper Signals // - wire [2+8-1:0] wr_wide_xy_offset; - wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1]; - wire [2+8-1:0] rd_wide_xy_offset_aux; - wire [2+8-1:0] wr_narrow_xy_offset; - wire [2+8-1:0] rd_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1]; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; - assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; - assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; - + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; // // "Wide" Storage // genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) begin : gen_wide_bram // - assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[8*z+:8]}; + assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]}; // - ip_bram_18k wide_bram_x + modexpng_sdp_36k_wrapper wide_bram_x ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -128,13 +126,12 @@ module modexpng_storage_block .enb (rd_wide_xy_ena), .regceb (rd_wide_xy_reg_ena), .addrb (rd_wide_xy_offset[z]), - .doutb (rd_wide_x_dout[18*z+:18]) + .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W]) ); // - ip_bram_18k wide_bram_y + modexpng_sdp_36k_wrapper wide_bram_y ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -144,20 +141,18 @@ module modexpng_storage_block .enb (rd_wide_xy_ena), .regceb (rd_wide_xy_reg_ena), .addrb (rd_wide_xy_offset[z]), - .doutb (rd_wide_y_dout[18*z+:18]) + .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W]) ); // end endgenerate - // // Auxilary Storage // - ip_bram_18k wide_bram_x_aux + modexpng_sdp_36k_wrapper wide_bram_x_aux ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -170,10 +165,9 @@ module modexpng_storage_block .doutb (rd_wide_x_dout_aux) ); // - ip_bram_18k wide_bram_y_aux + modexpng_sdp_36k_wrapper wide_bram_y_aux ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_wide_xy_ena), .wea (wr_wide_xy_ena), @@ -186,14 +180,12 @@ module modexpng_storage_block .doutb (rd_wide_y_dout_aux) ); - // // "Narrow" Storage // - ip_bram_18k narrow_bram_x + modexpng_sdp_36k_wrapper narrow_bram_x ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_narrow_xy_ena), .wea (wr_narrow_xy_ena), @@ -206,10 +198,9 @@ module modexpng_storage_block .doutb (rd_narrow_x_dout) ); - ip_bram_18k narrow_bram_y + modexpng_sdp_36k_wrapper narrow_bram_y ( - .clka (clk), - .clkb (clk), + .clk (clk), .ena (wr_narrow_xy_ena), .wea (wr_narrow_xy_ena), @@ -222,5 +213,4 @@ module modexpng_storage_block .doutb (rd_narrow_y_dout) ); - endmodule diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v index fa1e4a1..e5ac83f 100644 --- a/rtl/modexpng_storage_manager.v +++ b/rtl/modexpng_storage_manager.v @@ -43,70 +43,69 @@ module modexpng_storage_manager // // Headers // - `include "../rtl_1/modexpng_parameters_x8_old.vh" + `include "modexpng_parameters.vh" // // Ports // - input clk; - input rst; + input clk; + input rst; - output wr_wide_xy_ena; - output [ 1:0] wr_wide_xy_bank; - output [ 7:0] wr_wide_xy_addr; - output [17:0] wr_wide_x_din; - output [17:0] wr_wide_y_din; + output wr_wide_xy_ena; + output [BANK_ADDR_W -1:0] wr_wide_xy_bank; + output [ OP_ADDR_W -1:0] wr_wide_xy_addr; + output [ WORD_EXT_W -1:0] wr_wide_x_din; + output [ WORD_EXT_W -1:0] wr_wide_y_din; - output wr_narrow_xy_ena; - output [ 1:0] wr_narrow_xy_bank; - output [ 7:0] wr_narrow_xy_addr; - output [17:0] wr_narrow_x_din; - output [17:0] wr_narrow_y_din; + output wr_narrow_xy_ena; + output [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + output [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + output [ WORD_EXT_W -1:0] wr_narrow_x_din; + output [ WORD_EXT_W -1:0] wr_narrow_y_din; - input ext_wide_xy_ena; - input [ 1:0] ext_wide_xy_bank; - input [ 7:0] ext_wide_xy_addr; - input [17:0] ext_wide_x_din; - input [17:0] ext_wide_y_din; + input ext_wide_xy_ena; + input [BANK_ADDR_W -1:0] ext_wide_xy_bank; + input [ OP_ADDR_W -1:0] ext_wide_xy_addr; + input [ WORD_EXT_W -1:0] ext_wide_x_din; + input [ WORD_EXT_W -1:0] ext_wide_y_din; - input ext_narrow_xy_ena; - input [ 1:0] ext_narrow_xy_bank; - input [ 7:0] ext_narrow_xy_addr; - input [17:0] ext_narrow_x_din; - input [17:0] ext_narrow_y_din; + input ext_narrow_xy_ena; + input [BANK_ADDR_W -1:0] ext_narrow_xy_bank; + input [ OP_ADDR_W -1:0] ext_narrow_xy_addr; + input [ WORD_EXT_W -1:0] ext_narrow_x_din; + input [ WORD_EXT_W -1:0] ext_narrow_y_din; input rcmb_wide_xy_ena; - input [ 1:0] rcmb_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; input [ 7:0] rcmb_wide_xy_addr; input [17:0] rcmb_wide_x_din; input [17:0] rcmb_wide_y_din; input rcmb_narrow_xy_ena; - input [ 1:0] rcmb_narrow_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; input [ 7:0] rcmb_narrow_xy_addr; input [17:0] rcmb_narrow_x_din; input [17:0] rcmb_narrow_y_din; - - reg wr_wide_xy_ena_reg = 1'b0; - reg [ 1:0] wr_wide_xy_bank_reg; - reg [ 7:0] wr_wide_xy_addr_reg; - reg [17:0] wr_wide_x_din_reg; - reg [17:0] wr_wide_y_din_reg; + reg wr_wide_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg; - reg wr_narrow_xy_ena_reg = 1'b0; - reg [ 1:0] wr_narrow_xy_bank_reg; - reg [ 7:0] wr_narrow_xy_addr_reg; - reg [17:0] wr_narrow_x_din_reg; - reg [17:0] wr_narrow_y_din_reg; + reg wr_narrow_xy_ena_reg = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg; + reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg; task _update_wide; - input xy_ena; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input xy_ena; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin wr_wide_xy_ena_reg <= xy_ena; wr_wide_xy_bank_reg <= xy_bank; @@ -118,10 +117,10 @@ module modexpng_storage_manager task _update_narrow; input xy_ena; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin wr_narrow_xy_ena_reg <= xy_ena; wr_narrow_xy_bank_reg <= xy_bank; @@ -132,20 +131,20 @@ module modexpng_storage_manager endtask task enable_wide; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din); end endtask task enable_narrow; - input [ 1:0] xy_bank; - input [ 7:0] xy_addr; - input [17:0] x_din; - input [17:0] y_din; + input [BANK_ADDR_W -1:0] xy_bank; + input [ OP_ADDR_W -1:0] xy_addr; + input [ WORD_EXT_W -1:0] x_din; + input [ WORD_EXT_W -1:0] y_din; begin _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din); end @@ -153,13 +152,13 @@ module modexpng_storage_manager task disable_wide; begin - _update_wide(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}); + _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); end endtask task disable_narrow; begin - _update_narrow(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}); + _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE); end endtask |