From 71f70252dfc7e41103dde420a721be8aa48486d5 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 16:18:33 +0300 Subject: Redesigned core architecture, unified bank structure. All storage blocks now have eight 4kbit entries and occupy one 36K BRAM tile. --- rtl/_modexpng_mmm_dual_x8.v | 961 ++++++++++++++++++++++++++++ rtl/_modexpng_mmm_fsm.vh | 43 ++ rtl/_modexpng_recombinator_block.v | 1225 ++++++++++++++++++++++++++++++++++++ rtl/_modexpng_recombinator_cell.v | 40 ++ rtl/_modexpng_reductor.v | 252 ++++++++ rtl/_modexpng_storage_block.v | 219 +++++++ rtl/_modexpng_storage_manager.v | 199 ++++++ rtl/dev/temp.txt | 384 ----------- rtl/dsp/dsp_array.v | 143 ----- rtl/dsp/dsp_slice.v | 125 ---- rtl/modexpng_dsp48e1.vh | 8 + rtl/modexpng_dsp_array_block.v | 72 ++- rtl/modexpng_dsp_slice_wrapper.v | 72 ++- rtl/modexpng_mmm_dual.v | 956 ++++++++++++++++++++++++++++ rtl/modexpng_mmm_dual_x8.v | 951 ---------------------------- rtl/modexpng_mmm_fsm_old.vh | 43 -- rtl/modexpng_parameters.vh | 71 +++ rtl/modexpng_parameters_old.vh | 40 -- rtl/modexpng_parameters_x8.vh | 4 + rtl/modexpng_parameters_x8_old.vh | 1 - rtl/modexpng_recombinator_block.v | 90 +-- rtl/modexpng_recombinator_cell.v | 35 -- rtl/modexpng_reductor.v | 36 +- rtl/modexpng_sdp_36k_wrapper.v | 72 +++ rtl/modexpng_storage_block.v | 136 ++-- rtl/modexpng_storage_manager.v | 109 ++-- 26 files changed, 4317 insertions(+), 1970 deletions(-) create mode 100644 rtl/_modexpng_mmm_dual_x8.v create mode 100644 rtl/_modexpng_mmm_fsm.vh create mode 100644 rtl/_modexpng_recombinator_block.v create mode 100644 rtl/_modexpng_recombinator_cell.v create mode 100644 rtl/_modexpng_reductor.v create mode 100644 rtl/_modexpng_storage_block.v create mode 100644 rtl/_modexpng_storage_manager.v delete mode 100644 rtl/dev/temp.txt delete mode 100644 rtl/dsp/dsp_array.v delete mode 100644 rtl/dsp/dsp_slice.v create mode 100644 rtl/modexpng_dsp48e1.vh create mode 100644 rtl/modexpng_mmm_dual.v delete mode 100644 rtl/modexpng_mmm_dual_x8.v delete mode 100644 rtl/modexpng_mmm_fsm_old.vh create mode 100644 rtl/modexpng_parameters.vh delete mode 100644 rtl/modexpng_parameters_old.vh create mode 100644 rtl/modexpng_parameters_x8.vh delete mode 100644 rtl/modexpng_parameters_x8_old.vh delete mode 100644 rtl/modexpng_recombinator_cell.v create mode 100644 rtl/modexpng_sdp_36k_wrapper.v diff --git a/rtl/_modexpng_mmm_dual_x8.v b/rtl/_modexpng_mmm_dual_x8.v new file mode 100644 index 0000000..ffd5ccf --- /dev/null +++ b/rtl/_modexpng_mmm_dual_x8.v @@ -0,0 +1,961 @@ +module modexpng_mmm_dual_x8 +( + clk, rst, + + ena, rdy, + + ladder_mode, + word_index_last, + word_index_last_minus1, + + sel_wide_in, + sel_narrow_in, + sel_wide_out, + sel_narrow_out, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout, + + rcmb_wide_xy_bank, + rcmb_wide_xy_addr, + rcmb_wide_x_dout, + rcmb_wide_y_dout, + rcmb_wide_xy_valid, + + rcmb_narrow_xy_bank, + rcmb_narrow_xy_addr, + rcmb_narrow_x_dout, + rcmb_narrow_y_dout, + rcmb_narrow_xy_valid, + + rcmb_xy_bank, + rcmb_xy_addr, + rcmb_x_dout, + rcmb_y_dout, + rcmb_xy_valid, + + rdct_ena, rdct_rdy +); + + + // + // Headers + // + `include "../rtl/modexpng_mmm_fsm.vh" + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input ladder_mode; + input [ OP_ADDR_W -1:0] word_index_last; + input [ OP_ADDR_W -1:0] word_index_last_minus1; + + input [ BANK_ADDR_W -1:0] sel_wide_in; + input [ BANK_ADDR_W -1:0] sel_narrow_in; + input [ BANK_ADDR_W -1:0] sel_wide_out; + input [ BANK_ADDR_W -1:0] sel_narrow_out; + + output rd_wide_xy_ena; + output rd_wide_xy_ena_aux; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + output [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + output rd_narrow_xy_ena; + output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + input [ WORD_EXT_W -1:0] rd_narrow_x_dout; + input [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_x_dout; + output [ WORD_EXT_W -1:0] rcmb_y_dout; + output rcmb_xy_valid; + + output rdct_ena; + input rdct_rdy; + + + // + // FSM Declaration + // + reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE; + reg [MMM_FSM_STATE_W-1:0] fsm_state_next; + + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square; + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle; + wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle; + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) fsm_state <= MMM_FSM_STATE_IDLE; + else fsm_state <= fsm_state_next; + + + // + // Storage Control Interface + // + reg wide_xy_ena = 1'b0; + reg wide_xy_ena_aux = 1'b0; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; + reg [OP_ADDR_W -1:0] wide_xy_addr[0:3]; + reg [OP_ADDR_W -1:0] wide_xy_addr_aux; + + reg narrow_xy_ena = 1'b0; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; + reg [OP_ADDR_W -1:0] narrow_xy_addr; + reg [OP_ADDR_W -1:0] narrow_xy_addr_dly; + + assign rd_wide_xy_ena = wide_xy_ena; + assign rd_wide_xy_ena_aux = wide_xy_ena_aux; + assign rd_wide_xy_bank = wide_xy_bank; + assign rd_wide_xy_bank_aux = wide_xy_bank_aux; + assign rd_wide_xy_addr_aux = wide_xy_addr_aux; + + assign rd_narrow_xy_ena = narrow_xy_ena; + assign rd_narrow_xy_bank = narrow_xy_bank; + assign rd_narrow_xy_addr = narrow_xy_addr; + + genvar z; + generate for (z=0; z 8'd0) + wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else + wide_xy_addr_next = wide_xy_addr_last; + end + endfunction + + integer j; + always @(posedge clk) + // + if (rst) begin + wide_xy_ena <= 1'b0; + wide_xy_ena_aux <= 1'b0; + end else begin + // + // Wide Address + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= 8'dX; + endcase + // + // Wide Aux Address + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; + //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + // + default: wide_xy_addr_aux <= 8'dX; + endcase + // + // Wide Bank + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; // ? combine ? + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + default: wide_xy_bank <= 3'bXXX; + endcase + // + // Wide Aux Bank + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; + //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX + default: wide_xy_bank_aux <= 3'bXXX; + endcase + else wide_xy_bank_aux <= 3'bXXX; + default: wide_xy_bank_aux <= 3'bXXX; + endcase + // + // Wide Enable + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + default: wide_xy_ena <= 1'b0; + endcase + // + // Wide Aux Enable + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: wide_xy_ena_aux <= 1'b0; + endcase + // + end + + + // + // Delay Lines + // + always @(posedge clk) + // + narrow_xy_addr_dly <= narrow_xy_addr; + + + // + // DSP Array Logic + // + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_b_dly = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; + + reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + + wire [5*18-1:0] dsp_x_a; + wire [5*18-1:0] dsp_y_a; + + reg [1*16-1:0] dsp_x_b; + reg [1*16-1:0] dsp_y_b; + + reg [ 1:0] dsp_xy_b_carry; + + wire [9*47-1:0] dsp_x_p; + wire [9*47-1:0] dsp_y_p; + + //generate for (z=0; z<(NUM_MULTS/2); z=z+1) + //begin : gen_dsp_xy_a_split + //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; + //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; + //end + //endgenerate + + assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; + assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + + //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; + //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; + + always @(posedge clk) + // + dsp_xy_ce_b_dly <= dsp_xy_ce_b; + + + modexpng_dsp_array_block dsp_array_block_x + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + modexpng_dsp_array_block dsp_array_block_y + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + + + // + // DSP Control Logic + // + reg narrow_xy_ena_dly1 = 1'b0; + reg narrow_xy_ena_dly2 = 1'b0; + + always @(posedge clk) + // + if (rst) begin + // + narrow_xy_ena_dly1 <= 1'b0; + narrow_xy_ena_dly2 <= 1'b0; + // + dsp_xy_ce_a <= 1'b0; + dsp_xy_ce_b <= 1'b0; + dsp_xy_ce_m <= 1'b0; + dsp_xy_ce_p <= 1'b0; + dsp_xy_ce_mode <= 1'b0; + // + end else begin + // + narrow_xy_ena_dly1 <= narrow_xy_ena; + narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; + // + dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; + dsp_xy_ce_b <= narrow_xy_ena_dly2; + dsp_xy_ce_m <= dsp_xy_ce_b_dly; + dsp_xy_ce_p <= dsp_xy_ce_m; + dsp_xy_ce_mode <= dsp_xy_ce_b_dly; + // + end + + // + // DSP Feed Logic + // + reg dsp_merge_xy_b; + + always @(posedge clk) + // + case (fsm_state) + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; + endcase + + // + // On-the-fly Carry Recombination + // + wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + + always @(posedge clk) + // + if (narrow_xy_ena_dly2) begin // rewrite + // + if (!dsp_merge_xy_b) begin + dsp_x_b <= rd_narrow_x_dout[15:0]; + dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_xy_b_carry <= 2'b00; + end else begin + dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + end + // + end else begin + // + dsp_x_b <= {16{1'bX}}; + dsp_y_b <= {16{1'bX}}; + // + dsp_xy_b_carry <= 2'b00; + // + end + + + reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; + + function [NUM_MULTS:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + always @(posedge clk) + // + case (fsm_state_next) + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + endcase + + always @(posedge clk) begin + dsp_xy_mode_z <= dsp_xy_mode_z_adv1; + // + dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; + dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; + dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + end + + + + + + // + // Recombinator + // + reg rcmb_ena = 1'b0; + wire rcmb_rdy; + + modexpng_recombinator_block recombinator_block + ( + .clk (clk), + .rst (rst), + + .ena (rcmb_ena), + .rdy (rcmb_rdy), + + .mmm_fsm_state_next (fsm_state_next), + + .word_index_last (word_index_last), + + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + + .col_index (col_index), + .col_index_last (col_index_last), + + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) + + ); + + + // + // Recombinator Enable Logic + // + always @(posedge clk) + // + if (rst) rcmb_ena <= 1'b0; + else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; + + + // + // Handy Completion Flags + // + wire square_done = square_surely_done_flop; + wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; + wire rectangle_done = rectangle_tardy_done_flop; + + + // + // FSM Transition Logic + // + assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + + always @* begin + // + fsm_state_next = MMM_FSM_STATE_IDLE; + // + case (fsm_state) + MMM_FSM_STATE_IDLE: fsm_state_next = ena ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_IDLE; + + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF; + + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF; + + default: fsm_state_next = MMM_FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Reductor Control Logic + // + reg rdct_ena_reg = 1'b0; + + assign rdct_ena = rdct_ena_reg; + + always @(posedge clk) // add reset!!! + // + case (fsm_state) + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; + default: rdct_ena_reg <= 1'b0; + endcase + + + +endmodule diff --git a/rtl/_modexpng_mmm_fsm.vh b/rtl/_modexpng_mmm_fsm.vh new file mode 100644 index 0000000..1c2a57b --- /dev/null +++ b/rtl/_modexpng_mmm_fsm.vh @@ -0,0 +1,43 @@ +localparam MMM_FSM_STATE_W = 32; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_IDLE = 0; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_1 = 1; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_2 = 2; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_3 = 3; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_1 = 4; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_2 = 5; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_3 = 6; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; + +localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_STOP = 40; diff --git a/rtl/_modexpng_recombinator_block.v b/rtl/_modexpng_recombinator_block.v new file mode 100644 index 0000000..61bf734 --- /dev/null +++ b/rtl/_modexpng_recombinator_block.v @@ -0,0 +1,1225 @@ +module modexpng_recombinator_block +( + clk, rst, + ena, rdy, + mmm_fsm_state_next, + word_index_last, + dsp_xy_ce_p, + dsp_x_p, dsp_y_p, + col_index, col_index_last, + rd_narrow_xy_addr, rd_narrow_xy_bank, + rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, + rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid, + rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_mmm_fsm.vh" + + + input clk; + input rst; + input ena; + output rdy; + input [ MMM_FSM_STATE_W -1:0] mmm_fsm_state_next; + input [ OP_ADDR_W -1:0] word_index_last; + input dsp_xy_ce_p; + input [(NUM_MULTS+1) * MAC_W -1:0] dsp_x_p; + input [(NUM_MULTS+1) * MAC_W -1:0] dsp_y_p; + input [ COL_INDEX_W -1:0] col_index; + input [ COL_INDEX_W -1:0] col_index_last; + + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; + + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; + + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rdct_narrow_x_dout; + output [ WORD_EXT_W -1:0] rdct_narrow_y_dout; + output rdct_narrow_xy_valid; + + + // + // Latches + // + reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS]; + reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS]; + + + // + // Mapping + // + wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS]; + wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS]; + + genvar z; + generate for (z=0; z OP_ADDR_ONE) + set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); + + BANK_RCMB_EXT: + set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); + + endcase + // + end + + + + // + // Busy + // + always @(posedge clk) + // + if (rst) busy_now <= 1'b0; + else begin + if (rdy && ena) busy_now <= 1'b1; + //if (!rdy && !busy_now) rdy <= 1'b1; + end + + +endmodule diff --git a/rtl/_modexpng_storage_block.v b/rtl/_modexpng_storage_block.v new file mode 100644 index 0000000..d6ef1ee --- /dev/null +++ b/rtl/_modexpng_storage_block.v @@ -0,0 +1,219 @@ +module modexpng_storage_block +( + clk, rst, + + wr_wide_xy_ena, + wr_wide_xy_bank, + wr_wide_xy_addr, + wr_wide_x_din, + wr_wide_y_din, + + wr_narrow_xy_ena, + wr_narrow_xy_bank, + wr_narrow_xy_addr, + wr_narrow_x_din, + wr_narrow_y_din, + + rd_wide_xy_ena, + rd_wide_xy_ena_aux, + rd_wide_xy_bank, + rd_wide_xy_bank_aux, + rd_wide_xy_addr, + rd_wide_xy_addr_aux, + rd_wide_x_dout, + rd_wide_y_dout, + rd_wide_x_dout_aux, + rd_wide_y_dout_aux, + + rd_narrow_xy_ena, + rd_narrow_xy_bank, + rd_narrow_xy_addr, + rd_narrow_x_dout, + rd_narrow_y_dout +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input rst; + + input wr_wide_xy_ena; + input [BANK_ADDR_W -1:0] wr_wide_xy_bank; + input [ OP_ADDR_W -1:0] wr_wide_xy_addr; + input [ WORD_EXT_W -1:0] wr_wide_x_din; + input [ WORD_EXT_W -1:0] wr_wide_y_din; + + input wr_narrow_xy_ena; + input [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + input [ WORD_EXT_W -1:0] wr_narrow_x_din; + input [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + input rd_narrow_xy_ena; + input [BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rd_narrow_x_dout; + output [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + + // + // Internal Registers + // + reg rd_wide_xy_reg_ena = 1'b0; + reg rd_wide_xy_reg_ena_aux = 1'b0; + reg rd_narrow_xy_reg_ena = 1'b0; + + always @(posedge clk) begin + // + rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + // + end + + + // + // Helper Signals + // + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1]; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; + + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; + assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; + + // + // "Wide" Storage + // + genvar z; + generate for (z=0; z {INDEX_WIDTH{1'b0}}) - calc_rotate_a_index = current_index_in - 1'b1; - else - calc_rotate_a_index = last_index_in; - end - endfunction - */ - - /* - // - // Narrow Counters - // - reg [INDEX_WIDTH-1:0] din_addr_narrow_reg; - reg [INDEX_WIDTH-1:0] din_addr_narrow_dly; - localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}}; - wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ? - din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero; - wire din_addr_narrow_done = din_addr_narrow_reg == index_last; - - assign din_addr_narrow = din_addr_narrow_reg; - - always @(posedge clk) - // - din_addr_narrow_dly <= din_addr_narrow_reg; - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; - FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; - FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; - FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; - endcase - - - // - // Helper Functions - // - function [NUM_MULTS-1:0] calc_mac_clear_bitmask; - input [2:0] t; - begin - case (t) - 3'd0: calc_mac_clear_bitmask = 8'b00000001; - 3'd1: calc_mac_clear_bitmask = 8'b00000010; - 3'd2: calc_mac_clear_bitmask = 8'b00000100; - 3'd3: calc_mac_clear_bitmask = 8'b00001000; - 3'd4: calc_mac_clear_bitmask = 8'b00010000; - 3'd5: calc_mac_clear_bitmask = 8'b00100000; - 3'd6: calc_mac_clear_bitmask = 8'b01000000; - 3'd7: calc_mac_clear_bitmask = 8'b10000000; - endcase - end - endfunction - - function [NUM_MULTS:0] calc_mac_clear_square; - input [INDEX_WIDTH-4:0] current_col_index; - input [INDEX_WIDTH-1:0] b_addr_prev; - begin - if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index) - calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])}; - else - calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}}; - end - endfunction - - - // - // Wide Counters - // - reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1]; - - integer xi; - always @(posedge clk) - // - for (xi=0; xi 8'd0) + wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else + wide_xy_addr_next = wide_xy_addr_last; + end + endfunction + + integer j; + always @(posedge clk) + // + if (rst) begin + wide_xy_ena <= 1'b0; + wide_xy_ena_aux <= 1'b0; + end else begin + // + // Wide Address + // + for (j=0; j<(NUM_MULTS/2); j=j+1) + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= 8'dX; + endcase + // + // Wide Aux Address + // + case (fsm_state_next) + // + // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; + //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + // + default: wide_xy_addr_aux <= 8'dX; + endcase + // + // Wide Bank + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + default: wide_xy_bank <= 3'bXXX; + endcase + // + // Wide Aux Bank + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; + //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX + default: wide_xy_bank_aux <= 3'bXXX; + endcase + else wide_xy_bank_aux <= 3'bXXX; + default: wide_xy_bank_aux <= 3'bXXX; + endcase + // + // Wide Enable + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + default: wide_xy_ena <= 1'b0; + endcase + // + // Wide Aux Enable + // + case (fsm_state_next) + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: wide_xy_ena_aux <= 1'b0; + endcase + // + end + + + // + // Delay Lines + // + always @(posedge clk) + // + narrow_xy_addr_dly <= narrow_xy_addr; + + + // + // DSP Array Logic + // + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_b_dly = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; + + reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + + wire [5*18-1:0] dsp_x_a; + wire [5*18-1:0] dsp_y_a; + + reg [1*16-1:0] dsp_x_b; + reg [1*16-1:0] dsp_y_b; + + reg [ 1:0] dsp_xy_b_carry; + + wire [9*47-1:0] dsp_x_p; + wire [9*47-1:0] dsp_y_p; + + //generate for (z=0; z<(NUM_MULTS/2); z=z+1) + //begin : gen_dsp_xy_a_split + //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; + //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; + //end + //endgenerate + + assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; + assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + + //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; + //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; + + always @(posedge clk) + // + dsp_xy_ce_b_dly <= dsp_xy_ce_b; + + + modexpng_dsp_array_block dsp_array_block_x + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) + ); + + modexpng_dsp_array_block dsp_array_block_y + ( + .clk (clk), + + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), + + .mode_z (dsp_xy_mode_z), + + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) + ); + + + + + // + // DSP Control Logic + // + reg narrow_xy_ena_dly1 = 1'b0; + reg narrow_xy_ena_dly2 = 1'b0; + + always @(posedge clk) + // + if (rst) begin + // + narrow_xy_ena_dly1 <= 1'b0; + narrow_xy_ena_dly2 <= 1'b0; + // + dsp_xy_ce_a <= 1'b0; + dsp_xy_ce_b <= 1'b0; + dsp_xy_ce_m <= 1'b0; + dsp_xy_ce_p <= 1'b0; + dsp_xy_ce_mode <= 1'b0; + // + end else begin + // + narrow_xy_ena_dly1 <= narrow_xy_ena; + narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; + // + dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; + dsp_xy_ce_b <= narrow_xy_ena_dly2; + dsp_xy_ce_m <= dsp_xy_ce_b_dly; + dsp_xy_ce_p <= dsp_xy_ce_m; + dsp_xy_ce_mode <= dsp_xy_ce_b_dly; + // + end + + // + // DSP Feed Logic + // + reg dsp_merge_xy_b; + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; + endcase + + // + // On-the-fly Carry Recombination + // + wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + + always @(posedge clk) + // + if (narrow_xy_ena_dly2) begin // rewrite + // + if (!dsp_merge_xy_b) begin + dsp_x_b <= rd_narrow_x_dout[15:0]; + dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_xy_b_carry <= 2'b00; + end else begin + dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + end + // + end else begin + // + dsp_x_b <= {16{1'bX}}; + dsp_y_b <= {16{1'bX}}; + // + dsp_xy_b_carry <= 2'b00; + // + end + + + reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; + reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; + + function [NUM_MULTS:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + function [NUM_MULTS:0] calc_mac_mode_z_rectangle; + input [ 4:0] col_index_value; + input [ 7:0] narrow_xy_addr_value; + begin + if (narrow_xy_addr_value[7:3] == col_index_value) + case (narrow_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; + endcase + else + calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + end + endfunction + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + endcase + + always @(posedge clk) begin + dsp_xy_mode_z <= dsp_xy_mode_z_adv1; + // + dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; + dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; + dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + end + + + + + + // + // Recombinator + // + reg rcmb_ena = 1'b0; + wire rcmb_rdy; + + modexpng_recombinator_block recombinator_block + ( + .clk (clk), + + .ena (rcmb_ena), + .rdy (rcmb_rdy), + + .fsm_state_next (fsm_state_next), + + .word_index_last (word_index_last), + + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + + .col_index (col_index), + .col_index_last (col_index_last), + + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) + + ); + + + // + // Recombinator Enable Logic + // + always @(posedge clk) + // + if (rst) rcmb_ena <= 1'b0; + else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; + + + // + // Handy Completion Flags + // + wire square_done = square_surely_done_flop; + wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; + wire rectangle_done = rectangle_tardy_done_flop; + + + // + // FSM Transition Logic + // + assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; + assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + + always @* begin + // + fsm_state_next = FSM_STATE_IDLE; + // + case (fsm_state) + FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + + FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF; + + FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + + FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + + FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; + + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; + + FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + + FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; + + default: fsm_state_next = FSM_STATE_IDLE ; + + endcase + // + end + + + // + // Reductor Control Logic + // + reg rdct_ena_reg = 1'b0; + + assign rdct_ena = rdct_ena_reg; + + always @(posedge clk) // add reset!!! + // + case (fsm_state) + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; + default: rdct_ena_reg <= 1'b0; + endcase + + + +endmodule diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual_x8.v deleted file mode 100644 index 2e4f4e0..0000000 --- a/rtl/modexpng_mmm_dual_x8.v +++ /dev/null @@ -1,951 +0,0 @@ -module modexpng_mmm_dual_x8 -( - clk, rst, - - ena, rdy, - - - ladder_mode, - word_index_last, - word_index_last_minus1, - - rd_wide_xy_ena, - rd_wide_xy_ena_aux, - rd_wide_xy_bank, - rd_wide_xy_bank_aux, - rd_wide_xy_addr, - rd_wide_xy_addr_aux, - rd_wide_x_dout, - rd_wide_y_dout, - rd_wide_x_dout_aux, - rd_wide_y_dout_aux, - - rd_narrow_xy_ena, - rd_narrow_xy_bank, - rd_narrow_xy_addr, - rd_narrow_x_dout, - rd_narrow_y_dout, - - rcmb_wide_xy_bank, - rcmb_wide_xy_addr, - rcmb_wide_x_dout, - rcmb_wide_y_dout, - rcmb_wide_xy_valid, - - rcmb_narrow_xy_bank, - rcmb_narrow_xy_addr, - rcmb_narrow_x_dout, - rcmb_narrow_y_dout, - rcmb_narrow_xy_valid, - - rcmb_xy_bank, - rcmb_xy_addr, - rcmb_x_dout, - rcmb_y_dout, - rcmb_xy_valid, - - rdct_ena -); - - - // - // Headers - // - `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" - - - // - // Ports - // - input clk; - input rst; - - input ena; - output rdy; - - input ladder_mode; - input [7:0] word_index_last; - input [7:0] word_index_last_minus1; - - output rd_wide_xy_ena; - output rd_wide_xy_ena_aux; - output [ 1:0] rd_wide_xy_bank; - output [ 1:0] rd_wide_xy_bank_aux; - output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; - output [ 8-1:0] rd_wide_xy_addr_aux; - input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; - input [18*NUM_MULTS/2-1:0] rd_wide_y_dout; - input [ 18-1:0] rd_wide_x_dout_aux; - input [ 18-1:0] rd_wide_y_dout_aux; - - output rd_narrow_xy_ena; - output [ 1:0] rd_narrow_xy_bank; - output [ 7:0] rd_narrow_xy_addr; - input [18-1:0] rd_narrow_x_dout; - input [18-1:0] rd_narrow_y_dout; - - output [ 1:0] rcmb_wide_xy_bank; - output [ 7:0] rcmb_wide_xy_addr; - output [17:0] rcmb_wide_x_dout; - output [17:0] rcmb_wide_y_dout; - output rcmb_wide_xy_valid; - - output [ 1:0] rcmb_narrow_xy_bank; - output [ 7:0] rcmb_narrow_xy_addr; - output [17:0] rcmb_narrow_x_dout; - output [17:0] rcmb_narrow_y_dout; - output rcmb_narrow_xy_valid; - - output [ 1:0] rcmb_xy_bank; - output [ 7:0] rcmb_xy_addr; - output [17:0] rcmb_x_dout; - output [17:0] rcmb_y_dout; - output rcmb_xy_valid; - - output rdct_ena; - - - // - // FSM Declaration - // - reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; - reg [FSM_STATE_WIDTH-1:0] fsm_state_next; - - wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; - wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; - wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; - - - // - // FSM Process - // - always @(posedge clk) - // - if (rst) fsm_state <= FSM_STATE_IDLE; - else fsm_state <= fsm_state_next; - - - // - // Storage Control Interface - // - reg wide_xy_ena = 1'b0; - reg wide_xy_ena_aux = 1'b0; - reg [ 1:0] wide_xy_bank; - reg [ 1:0] wide_xy_bank_aux; - reg [ 8-1:0] wide_xy_addr[0:3]; - reg [ 8-1:0] wide_xy_addr_aux; - - reg narrow_xy_ena = 1'b0; - reg [ 1:0] narrow_xy_bank; - reg [ 7:0] narrow_xy_addr; - reg [ 7:0] narrow_xy_addr_dly; - - assign rd_wide_xy_ena = wide_xy_ena; - assign rd_wide_xy_ena_aux = wide_xy_ena_aux; - assign rd_wide_xy_bank = wide_xy_bank; - assign rd_wide_xy_bank_aux = wide_xy_bank_aux; - assign rd_wide_xy_addr_aux = wide_xy_addr_aux; - - assign rd_narrow_xy_ena = narrow_xy_ena; - assign rd_narrow_xy_bank = narrow_xy_bank; - assign rd_narrow_xy_addr = narrow_xy_addr; - - genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) - begin : gen_rd_wide_xy_addr - assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z]; - end - endgenerate - - // - // Column Counter - // - reg [4:0] col_index; // current column index - reg [4:0] col_index_prev; // delayed column index value - reg [4:0] col_index_last; // index of the very last column - reg [4:0] col_index_next; // precomputed next column index - reg col_is_last; // flag set during the very last column - - always @(posedge clk) - // - col_index_prev <= col_index; - - // - // Column Counter Increment Logic - // - always @(posedge clk) - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin - col_index <= 5'd0; - col_index_last <= word_index_last[7:3]; - col_index_next <= 5'd1; - col_is_last <= 1'b0; - - end - // - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin - col_index <= col_index_next; - col_is_last <= col_index_next == col_index_last; - col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; - end - // - endcase - - - // - // Completion Flags - // - wire square_almost_done_comb; - reg square_almost_done_flop = 1'b0; - reg square_surely_done_flop = 1'b0; - - wire triangle_almost_done_comb; - reg triangle_almost_done_flop = 1'b0; - reg triangle_surely_done_flop = 1'b0; - reg triangle_tardy_done_flop = 1'b0; - - wire rectangle_almost_done_comb; - reg rectangle_almost_done_flop = 1'b0; - reg rectangle_surely_done_flop = 1'b0; - reg rectangle_tardy_done_flop = 1'b0; - - assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; - assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); - assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; - - // - // Square Completion Flags - // - always @(posedge clk) begin - // - case (fsm_state) - // - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - square_almost_done_flop <= square_almost_done_comb; - // - default: - square_almost_done_flop <= 1'b0; - // - endcase - // - square_surely_done_flop <= square_almost_done_flop; - // - end - - // - // Triangle Completion Flags - // - always @(posedge clk) begin - // - case (fsm_state) - // - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: - triangle_almost_done_flop <= triangle_almost_done_comb; - // - default: - triangle_almost_done_flop <= 1'b0; - // - endcase - // - triangle_surely_done_flop <= triangle_almost_done_flop; - triangle_tardy_done_flop <= triangle_surely_done_flop; - // - end - - // - // Rectangle Completion Flags - // - always @(posedge clk) begin - // - case (fsm_state) - // - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: - rectangle_almost_done_flop <= rectangle_almost_done_comb; - // - default: - rectangle_almost_done_flop <= 1'b0; - // - endcase - // - rectangle_surely_done_flop <= rectangle_almost_done_flop; - rectangle_tardy_done_flop <= rectangle_surely_done_flop; - // - end - - - // - // Narrow Storage Control Logic - // - always @(posedge clk) - // - if (rst) narrow_xy_ena <= 1'b0; - else begin - // - // Narrow Address - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? - 8'd0 : narrow_xy_addr + 1'b1; - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? - 8'd1 : narrow_xy_addr + 1'b1; - // - default: narrow_xy_addr <= 8'dX; - // - endcase - // - // Narrow Bank - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2; - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? - BANK_NARROW_EXT : BANK_NARROW_N_COEFF; - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? - BANK_NARROW_EXT : BANK_NARROW_Q; - // - default: narrow_xy_bank <= 2'bXX; - // - endcase - // - case (fsm_state_next) - // - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; - // - default: narrow_xy_ena <= 1'b0; - // - endcase - // - end - - - // - // Wide Storage Control Logic - // - - wire [2:0] wide_offset_rom[0:3]; - - generate for (z=1; z 8'd0) - wide_xy_addr_next = wide_xy_addr_current - 1'b1; - else - wide_xy_addr_next = wide_xy_addr_last; - end - endfunction - - integer j; - always @(posedge clk) - // - if (rst) begin - wide_xy_ena <= 1'b0; - wide_xy_ena_aux <= 1'b0; - end else begin - // - // Wide Address - // - for (j=0; j<(NUM_MULTS/2); j=j+1) - // - case (fsm_state_next) - // - // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! - // - FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - default: wide_xy_addr[j] <= 8'dX; - endcase - // - // Wide Aux Address - // - case (fsm_state_next) - // - // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! - // - FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); - // - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); - // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; - //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? - //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; - // - default: wide_xy_addr_aux <= 8'dX; - endcase - // - // Wide Bank - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2; - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; - default: wide_xy_bank <= 3'bXXX; - endcase - // - // Wide Aux Bank - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2; - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) - case (rcmb_xy_bank) - BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL; - BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH; - //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX - default: wide_xy_bank_aux <= 3'bXXX; - endcase - else wide_xy_bank_aux <= 3'bXXX; - default: wide_xy_bank_aux <= 3'bXXX; - endcase - // - // Wide Enable - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_INIT, - FSM_STATE_MULT_SQUARE_COL_N_INIT, - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; - default: wide_xy_ena <= 1'b0; - endcase - // - // Wide Aux Enable - // - case (fsm_state_next) - FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - FSM_STATE_MULT_TRIANGLE_COL_N_INIT, - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT, - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); - default: wide_xy_ena_aux <= 1'b0; - endcase - // - end - - - // - // Delay Lines - // - always @(posedge clk) - // - narrow_xy_addr_dly <= narrow_xy_addr; - - - // - // DSP Array Logic - // - reg dsp_xy_ce_a = 1'b0; - reg dsp_xy_ce_b = 1'b0; - reg dsp_xy_ce_b_dly = 1'b0; - reg dsp_xy_ce_m = 1'b0; - reg dsp_xy_ce_p = 1'b0; - reg dsp_xy_ce_mode = 1'b0; - - reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; - - wire [5*18-1:0] dsp_x_a; - wire [5*18-1:0] dsp_y_a; - - reg [1*16-1:0] dsp_x_b; - reg [1*16-1:0] dsp_y_b; - - reg [ 1:0] dsp_xy_b_carry; - - wire [9*47-1:0] dsp_x_p; - wire [9*47-1:0] dsp_y_p; - - //generate for (z=0; z<(NUM_MULTS/2); z=z+1) - //begin : gen_dsp_xy_a_split - //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; - //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; - //end - //endgenerate - - assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; - assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; - - //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; - //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; - - always @(posedge clk) - // - dsp_xy_ce_b_dly <= dsp_xy_ce_b; - - - modexpng_dsp_array_block dsp_array_block_x - ( - .clk (clk), - - .ce_a (dsp_xy_ce_a), - .ce_b (dsp_xy_ce_b), - .ce_m (dsp_xy_ce_m), - .ce_p (dsp_xy_ce_p), - .ce_mode (dsp_xy_ce_mode), - - .mode_z (dsp_xy_mode_z), - - .a (dsp_x_a), - .b (dsp_x_b), - .p (dsp_x_p) - ); - - modexpng_dsp_array_block dsp_array_block_y - ( - .clk (clk), - - .ce_a (dsp_xy_ce_a), - .ce_b (dsp_xy_ce_b), - .ce_m (dsp_xy_ce_m), - .ce_p (dsp_xy_ce_p), - .ce_mode (dsp_xy_ce_mode), - - .mode_z (dsp_xy_mode_z), - - .a (dsp_y_a), - .b (dsp_y_b), - .p (dsp_y_p) - ); - - - - - // - // DSP Control Logic - // - reg narrow_xy_ena_dly1 = 1'b0; - reg narrow_xy_ena_dly2 = 1'b0; - - always @(posedge clk) - // - if (rst) begin - // - narrow_xy_ena_dly1 <= 1'b0; - narrow_xy_ena_dly2 <= 1'b0; - // - dsp_xy_ce_a <= 1'b0; - dsp_xy_ce_b <= 1'b0; - dsp_xy_ce_m <= 1'b0; - dsp_xy_ce_p <= 1'b0; - dsp_xy_ce_mode <= 1'b0; - // - end else begin - // - narrow_xy_ena_dly1 <= narrow_xy_ena; - narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; - // - dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; - dsp_xy_ce_b <= narrow_xy_ena_dly2; - dsp_xy_ce_m <= dsp_xy_ce_b_dly; - dsp_xy_ce_p <= dsp_xy_ce_m; - dsp_xy_ce_mode <= dsp_xy_ce_b_dly; - // - end - - // - // DSP Feed Logic - // - reg dsp_merge_xy_b; - - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; - endcase - - // - // On-the-fly Carry Recombination - // - wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; - - always @(posedge clk) - // - if (narrow_xy_ena_dly2) begin // rewrite - // - if (!dsp_merge_xy_b) begin - dsp_x_b <= rd_narrow_x_dout[15:0]; - dsp_y_b <= rd_narrow_y_dout[15:0]; - dsp_xy_b_carry <= 2'b00; - end else begin - dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; - dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; - dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; - end - // - end else begin - // - dsp_x_b <= {16{1'bX}}; - dsp_y_b <= {16{1'bX}}; - // - dsp_xy_b_carry <= 2'b00; - // - end - - - reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; - - function [NUM_MULTS:0] calc_mac_mode_z_square; - input [ 4:0] col_index_value; - input [ 7:0] narrow_xy_addr_value; - begin - if (narrow_xy_addr_value[7:3] == col_index_value) - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; - 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; - 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; - 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; - 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; - 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; - 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; - 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; - endcase - else - calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; - end - endfunction - - function [NUM_MULTS:0] calc_mac_mode_z_rectangle; - input [ 4:0] col_index_value; - input [ 7:0] narrow_xy_addr_value; - begin - if (narrow_xy_addr_value[7:3] == col_index_value) - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; - 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; - 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; - 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; - 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; - 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; - 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; - 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; - endcase - else - calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; - end - endfunction - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); - default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; - endcase - - always @(posedge clk) begin - dsp_xy_mode_z <= dsp_xy_mode_z_adv1; - // - dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; - dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; - dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; - end - - - - - - // - // Recombinator - // - reg rcmb_ena = 1'b0; - wire rcmb_rdy; - - modexpng_recombinator_block recombinator_block - ( - .clk (clk), - - .ena (rcmb_ena), - .rdy (rcmb_rdy), - - .fsm_state_next (fsm_state_next), - - .word_index_last (word_index_last), - - .dsp_xy_ce_p (dsp_xy_ce_p), - .dsp_x_p (dsp_x_p), - .dsp_y_p (dsp_y_p), - - .col_index (col_index), - .col_index_last (col_index_last), - - .rd_narrow_xy_addr (narrow_xy_addr), - .rd_narrow_xy_bank (narrow_xy_bank), - - .rcmb_wide_xy_bank (rcmb_wide_xy_bank), - .rcmb_wide_xy_addr (rcmb_wide_xy_addr), - .rcmb_wide_x_dout (rcmb_wide_x_dout), - .rcmb_wide_y_dout (rcmb_wide_y_dout), - .rcmb_wide_xy_valid (rcmb_wide_xy_valid), - - .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), - .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), - .rcmb_narrow_x_dout (rcmb_narrow_x_dout), - .rcmb_narrow_y_dout (rcmb_narrow_y_dout), - .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), - - .rdct_narrow_xy_bank (rcmb_xy_bank), - .rdct_narrow_xy_addr (rcmb_xy_addr), - .rdct_narrow_x_dout (rcmb_x_dout), - .rdct_narrow_y_dout (rcmb_y_dout), - .rdct_narrow_xy_valid (rcmb_xy_valid) - - ); - - - // - // Recombinator Enable Logic - // - always @(posedge clk) - // - if (rst) rcmb_ena <= 1'b0; - else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; - - - // - // Handy Completion Flags - // - wire square_done = square_surely_done_flop; - wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; - wire rectangle_done = rectangle_tardy_done_flop; - - - // - // FSM Transition Logic - // - assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; - assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; - assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; - - always @* begin - // - fsm_state_next = FSM_STATE_IDLE; - // - case (fsm_state) - FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; - - FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; - FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; - FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; - - FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; - FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; - FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; - - FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF; - - FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; - FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; - FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; - - FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; - - FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF; - - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; - FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; - FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; - - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; - FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; - - FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF; - - default: fsm_state_next = FSM_STATE_IDLE ; - - endcase - // - end - - - // - // Reductor Control Logic - // - reg rdct_ena_reg = 1'b0; - - assign rdct_ena = rdct_ena_reg; - - always @(posedge clk) // add reset!!! - // - case (fsm_state) - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; - default: rdct_ena_reg <= 1'b0; - endcase - - - -endmodule diff --git a/rtl/modexpng_mmm_fsm_old.vh b/rtl/modexpng_mmm_fsm_old.vh deleted file mode 100644 index 3bdae66..0000000 --- a/rtl/modexpng_mmm_fsm_old.vh +++ /dev/null @@ -1,43 +0,0 @@ -localparam FSM_STATE_WIDTH = 32; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35; -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37; - -localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh new file mode 100644 index 0000000..514fc21 --- /dev/null +++ b/rtl/modexpng_parameters.vh @@ -0,0 +1,71 @@ +`include "modexpng_parameters_x8.vh" + +function integer cryptech_clog2; + input integer value; + integer temp_value; + integer result; + // + begin + temp_value = value - 1; + for (result = 0; temp_value > 0; result = result + 1) + temp_value = temp_value >> 1; + cryptech_clog2 = result; + end + // +endfunction + +localparam WORD_W = 16; +localparam WORD_EXT_W = 18; +localparam MAC_W = 47; + +localparam MAX_OP_W = 4096; + +localparam BANK_ADDR_W = 3; +localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); +localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS); + +localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); + +localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W; + +localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}}; + +localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_N = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_L = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_WIDE_H = 3'd7; + +localparam [BANK_ADDR_W-1:0] BANK_NARROW_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_COEFF = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_Q = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_EXT = 3'd7; // [0] -> COEFF', [1] -> Q' + +localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH' + +localparam [BANK_ADDR_W-1:0] BANK_DONT_CARE = {BANK_ADDR_W{1'bX}}; + +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; + +localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}}; +localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1}; +localparam [OP_ADDR_W-1:0] OP_ADDR_DONT_CARE = {OP_ADDR_W{1'bX}}; + +localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}}; +localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}}; + +localparam [WORD_EXT_W-1:0] WORD_EXT_DONT_CARE = {WORD_EXT_W{1'bX}}; + +localparam [MAC_INDEX_W-1:0] MAC_INDEX_DONT_CARE = {MAC_INDEX_W{1'bX}}; + +localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}}; \ No newline at end of file diff --git a/rtl/modexpng_parameters_old.vh b/rtl/modexpng_parameters_old.vh deleted file mode 100644 index d30b751..0000000 --- a/rtl/modexpng_parameters_old.vh +++ /dev/null @@ -1,40 +0,0 @@ - -//localparam WORD_WIDTH = 17; -//localparam MAC_WIDTH = 47; - -localparam BANK_ADDR_WIDTH = 2; // TODO: Replace everywhere! - -localparam [1:0] BANK_WIDE_T1T2 = 2'd0; -localparam [1:0] BANK_WIDE_ABL = 2'd1; -localparam [1:0] BANK_WIDE_ABH = 2'd2; -localparam [1:0] BANK_WIDE_N = 2'd3; - -localparam [1:0] BANK_RCMB_ML = 2'd0; -localparam [1:0] BANK_RCMB_MH = 2'd1; -localparam [1:0] BANK_RCMB_EXT = 2'd2; // 0 -> MH' - -localparam [1:0] BANK_NARROW_T1T2 = 2'd0; -localparam [1:0] BANK_NARROW_N_COEFF = 2'd1; -localparam [1:0] BANK_NARROW_Q = 2'd2; -localparam [1:0] BANK_NARROW_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q' - - -//localparam BANK_Y_T2 = 3'd0; -//localparam BANK_XY_T1T2 = 3'd0; - -//localparam BANK_XY_AB_LSB = 3'd1; -//localparam BANK_XY_AB_MSB = 3'd2; - -//localparam BANK_X_N = 3'd3; -//localparam BANK_Y_N_COEFF = 3'd3; - -//localparam BANK_XY_M = 3'd4; - -//localparam BANK_XY_Q_LSB = 3'd5; -//localparam BANK_XY_Q_MSB = 3'd6; - -//localparam BANK_XY_AUX = 3'd7; - -//localparam BANK_XY_ANY = 3'bXXX; - -//localparam BANK_XY_AUX_ADDR_N_COEFF = 0; diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8.vh new file mode 100644 index 0000000..0dcc3d6 --- /dev/null +++ b/rtl/modexpng_parameters_x8.vh @@ -0,0 +1,4 @@ +localparam NUM_MULTS = 8; +localparam NUM_MULTS_AUX = NUM_MULTS + 1; +localparam NUM_MULTS_HALF = NUM_MULTS / 2; +localparam NUM_MULTS_HALF_AUX = NUM_MULTS_HALF + 1; diff --git a/rtl/modexpng_parameters_x8_old.vh b/rtl/modexpng_parameters_x8_old.vh deleted file mode 100644 index 8734354..0000000 --- a/rtl/modexpng_parameters_x8_old.vh +++ /dev/null @@ -1 +0,0 @@ -localparam NUM_MULTS = 8; diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index d6b1ad1..de60d1f 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -17,9 +17,9 @@ module modexpng_recombinator_block // // Headers // + `include "modexpng_parameters.vh" `include "../rtl_1/modexpng_mmm_fsm_old.vh" - `include "../rtl_1/modexpng_parameters_old.vh" - `include "../rtl_1/modexpng_parameters_x8_old.vh" + //`include "../rtl_1/modexpng_parameters_x8_old.vh" input clk; @@ -34,22 +34,22 @@ module modexpng_recombinator_block input [ 4:0] col_index; input [ 4:0] col_index_last; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; input [ 7:0] rd_narrow_xy_addr; - input [ 1:0] rd_narrow_xy_bank; - output [ 1:0] rcmb_wide_xy_bank; + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [ 17:0] rcmb_wide_x_dout; output [ 17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; - output [ 1:0] rcmb_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [ 17:0] rcmb_narrow_x_dout; output [ 17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; - output [ 1:0] rdct_narrow_xy_bank; + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; output [ 7:0] rdct_narrow_xy_addr; output [ 17:0] rdct_narrow_x_dout; output [ 17:0] rdct_narrow_y_dout; @@ -167,7 +167,7 @@ module modexpng_recombinator_block function calc_square_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -182,7 +182,7 @@ module modexpng_recombinator_block function calc_square_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -192,7 +192,7 @@ module modexpng_recombinator_block function calc_triangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -202,7 +202,7 @@ module modexpng_recombinator_block function calc_rectangle_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -217,7 +217,7 @@ module modexpng_recombinator_block function calc_triangle_aux_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -232,7 +232,7 @@ module modexpng_recombinator_block function [7:0] calc_square_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -258,7 +258,7 @@ module modexpng_recombinator_block function [7:0] calc_square_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -268,7 +268,7 @@ module modexpng_recombinator_block function [7:0] calc_triangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -278,7 +278,7 @@ module modexpng_recombinator_block function [7:0] calc_rectangle_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -313,7 +313,7 @@ module modexpng_recombinator_block function [2:0] calc_square_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -339,7 +339,7 @@ module modexpng_recombinator_block function [2:0] calc_square_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -349,7 +349,7 @@ module modexpng_recombinator_block function [2:0] calc_triangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -359,7 +359,7 @@ module modexpng_recombinator_block function [2:0] calc_rectangle_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] slim_bram_xy_bank_value; + input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value; input [7:0] slim_bram_xy_addr_value; begin // @@ -385,7 +385,7 @@ module modexpng_recombinator_block function calc_square_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin // @@ -400,7 +400,7 @@ module modexpng_recombinator_block function calc_square_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -410,7 +410,7 @@ module modexpng_recombinator_block function calc_rectangle_purge_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; begin calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); @@ -420,7 +420,7 @@ module modexpng_recombinator_block function calc_square_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -436,7 +436,7 @@ module modexpng_recombinator_block function calc_rectangle_valid_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -452,7 +452,7 @@ module modexpng_recombinator_block function [7:0] calc_square_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -469,7 +469,7 @@ module modexpng_recombinator_block function [7:0] calc_rectangle_bitmap_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -485,7 +485,7 @@ module modexpng_recombinator_block function calc_square_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -501,7 +501,7 @@ module modexpng_recombinator_block function calc_rectangle_purge_msb; input [4:0] col_index_value; input [4:0] col_index_last_value; - input [1:0] narrow_xy_bank_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; input [7:0] narrow_xy_addr_value; input [7:0] index_last_value; begin @@ -783,19 +783,19 @@ module modexpng_recombinator_block end - reg [ 1:0] wide_xy_bank; + reg [ BANK_ADDR_W -1:0] wide_xy_bank; reg [ 7:0] wide_xy_addr; reg [17:0] wide_x_dout; reg [17:0] wide_y_dout; reg wide_xy_valid = 1'b0; - reg [ 1:0] narrow_xy_bank; + reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [17:0] narrow_x_dout; reg [17:0] narrow_y_dout; reg narrow_xy_valid = 1'b0; - reg [ 1:0] rdct_xy_bank; + reg [ BANK_ADDR_W -1:0] rdct_xy_bank; reg [ 7:0] rdct_xy_addr; reg [17:0] rdct_x_dout; reg [17:0] rdct_y_dout; @@ -883,7 +883,7 @@ module modexpng_recombinator_block endtask task _update_wide; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -898,7 +898,7 @@ module modexpng_recombinator_block endtask task _update_narrow; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -913,7 +913,7 @@ module modexpng_recombinator_block endtask task _update_rdct; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -928,7 +928,7 @@ module modexpng_recombinator_block endtask task set_wide; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -938,7 +938,7 @@ module modexpng_recombinator_block endtask task set_narrow; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -948,7 +948,7 @@ module modexpng_recombinator_block endtask task set_rdct; - input [ 1:0] bank; + input [ BANK_ADDR_W -1:0] bank; input [ 7:0] addr; input [17:0] dout_x; input [17:0] dout_y; @@ -959,19 +959,19 @@ module modexpng_recombinator_block task clear_wide; begin - _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_wide(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask task clear_narrow; begin - _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_narrow(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask task clear_rdct; begin - _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + _update_rdct(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); end endtask @@ -1074,16 +1074,16 @@ module modexpng_recombinator_block // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); else clear_wide; // - 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // 2'b10: if (cnt_msb < 8'd2) clear_wide; - else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // - 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); - else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); + else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // endcase // diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v deleted file mode 100644 index 1ecf56a..0000000 --- a/rtl/modexpng_recombinator_cell.v +++ /dev/null @@ -1,35 +0,0 @@ -module modexpng_recombinator_cell -( - clk, - ce, clr, - din, dout -); - - input clk; - input ce; - input clr; - input [46:0] din; - output [15:0] dout; - - reg [14:0] z; - reg [16:0] y; - reg [17:0] x; - //reg [15:0] w; - - //assign dout = w; - assign dout = x[15:0]; - - wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here - wire [15:0] din_y = din[31:16]; - wire [15:0] din_x = din[15: 0]; - - always @(posedge clk) - // - if (ce) begin - z <= din_z; - y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; - x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; - //w <= clr ? {16{1'bX}} : x[15:0]; - end - -endmodule diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index 0f5e461..aafb38c 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -17,8 +17,9 @@ module modexpng_reductor // // Headers // + `include "modexpng_parameters.vh" //`include "../rtl_1/modexpng_mmm_fsm.vh" - `include "../rtl_1/modexpng_parameters_old.vh" + //`include "../rtl_1/modexpng_parameters_x8.vh" @@ -39,12 +40,12 @@ module modexpng_reductor input [ 7:0] rd_narrow_xy_addr; input [ 1:0] rd_narrow_xy_bank; */ - input [ 1:0] rd_wide_xy_bank_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; input [ 7:0] rd_wide_xy_addr_aux; input [ 17:0] rd_wide_x_dout_aux; input [ 17:0] rd_wide_y_dout_aux; // - input [ 1:0] rcmb_final_xy_bank; + input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank; input [ 7:0] rcmb_final_xy_addr; input [ 17:0] rcmb_final_x_dout; input [ 17:0] rcmb_final_y_dout; @@ -60,7 +61,7 @@ module modexpng_reductor // Ready // reg rdy_reg = 1'b1; - reg busy_now = 1'b0; + wire busy_now; assign rdy = rdy_reg; @@ -81,9 +82,9 @@ module modexpng_reductor reg rcmb_xy_valid_dly2 = 1'b0; reg rcmb_xy_valid_dly3 = 1'b0; - reg [2:0] rcmb_xy_bank_dly1; - reg [2:0] rcmb_xy_bank_dly2; - reg [2:0] rcmb_xy_bank_dly3; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; reg [7:0] rcmb_xy_addr_dly1; reg [7:0] rcmb_xy_addr_dly2; @@ -236,7 +237,7 @@ module modexpng_reductor // clear_rdct; // - if (busy_now && rcmb_xy_valid_dly3) + if (rcmb_xy_valid_dly3) // case (rcmb_xy_bank_dly3) @@ -258,12 +259,25 @@ module modexpng_reductor // // Busy // + reg busy_next = 1'b0; + reg [2:0] busy_now_shreg = {3{1'b0}}; + + assign busy_now = busy_now_shreg[2]; + + always @(posedge clk) + // + if (rst) busy_now_shreg <= {3{1'b0}}; + else begin + if (rdy && ena) busy_now_shreg <= {3{1'b1}}; + else busy_now_shreg <= {busy_now_shreg[1:0], busy_next}; + end + always @(posedge clk) // - if (rst) busy_now <= 1'b0; + if (rst) busy_next <= 1'b0; else begin - if (rdy && ena) busy_now <= 1'b1; - //if (!rdy && !busy_now) rdy <= 1'b1; + if (rdy && ena) busy_next <= 1'b1; + if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0; end diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_wrapper.v new file mode 100644 index 0000000..0295697 --- /dev/null +++ b/rtl/modexpng_sdp_36k_wrapper.v @@ -0,0 +1,72 @@ +module modexpng_sdp_36k_wrapper +( + clk, + + ena, wea, + addra, dina, + + enb, regceb, + addrb, doutb +); + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Ports + // + input clk; + + input ena; + input wea; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addra; + input [ WORD_EXT_W -1:0] dina; + + input enb; + input regceb; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb; + output [ WORD_EXT_W -1:0] doutb; + + // + // BRAM_SDP_MACRO + // + BRAM_SDP_MACRO # + ( + .DEVICE ("7SERIES"), + + .BRAM_SIZE ("36Kb"), + + .WRITE_WIDTH (WORD_EXT_W), + .READ_WIDTH (WORD_EXT_W), + + .DO_REG (1), + .WRITE_MODE ("READ_FIRST"), + + .SRVAL (72'h000000000000000000), + .INIT (72'h000000000000000000), + + .INIT_FILE ("NONE"), + .SIM_COLLISION_CHECK ("NONE") + ) + BRAM_SDP_MACRO_inst + ( + .RST (1'b0), + + .WRCLK (clk), + .WREN (ena), + .WE ({2{wea}}), + .WRADDR (addra), + .DI (dina), + + .RDCLK (clk), + .RDEN (enb), + .REGCE (regceb), + .RDADDR (addrb), + .DO (doutb) + ); + +endmodule diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v index d6f9fb1..be04c7c 100644 --- a/rtl/modexpng_storage_block.v +++ b/rtl/modexpng_storage_block.v @@ -32,49 +32,46 @@ module modexpng_storage_block rd_narrow_y_dout ); - // // Headers // - `include "../rtl_1/modexpng_parameters_x8_old.vh" - + `include "modexpng_parameters.vh" // // Ports // - input clk; - input rst; - - input wr_wide_xy_ena; - input [ 1:0] wr_wide_xy_bank; - input [ 7:0] wr_wide_xy_addr; - input [17:0] wr_wide_x_din; - input [17:0] wr_wide_y_din; + input clk; + input rst; + + input wr_wide_xy_ena; + input [ BANK_ADDR_W -1:0] wr_wide_xy_bank; + input [ OP_ADDR_W -1:0] wr_wide_xy_addr; + input [ WORD_EXT_W -1:0] wr_wide_x_din; + input [ WORD_EXT_W -1:0] wr_wide_y_din; - input wr_narrow_xy_ena; - input [ 1:0] wr_narrow_xy_bank; - input [ 7:0] wr_narrow_xy_addr; - input [17:0] wr_narrow_x_din; - input [17:0] wr_narrow_y_din; - - input rd_wide_xy_ena; - input rd_wide_xy_ena_aux; - input [ 1:0] rd_wide_xy_bank; - input [ 1:0] rd_wide_xy_bank_aux; - input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; - input [ 8-1:0] rd_wide_xy_addr_aux; - output [18*NUM_MULTS/2-1:0] rd_wide_x_dout; - output [18*NUM_MULTS/2-1:0] rd_wide_y_dout; - output [ 18-1:0] rd_wide_x_dout_aux; - output [ 18-1:0] rd_wide_y_dout_aux; + input wr_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] wr_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + input [ WORD_EXT_W -1:0] wr_narrow_x_din; + input [ WORD_EXT_W -1:0] wr_narrow_y_din; + + input rd_wide_xy_ena; + input rd_wide_xy_ena_aux; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; - input rd_narrow_xy_ena; - input [ 1:0] rd_narrow_xy_bank; - input [ 7:0] rd_narrow_xy_addr; - output [18-1:0] rd_narrow_x_dout; - output [18-1:0] rd_narrow_y_dout; + input rd_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rd_narrow_x_dout; + output [ WORD_EXT_W -1:0] rd_narrow_y_dout; - // // Internal Registers // @@ -82,43 +79,44 @@ module modexpng_storage_block reg rd_wide_xy_reg_ena_aux = 1'b0; reg rd_narrow_xy_reg_ena = 1'b0; - always @(posedge clk) begin - // - rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena; - rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux; - rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena; + always @(posedge clk) // - end - + if (rst) begin + rd_wide_xy_reg_ena <= 1'b0; + rd_wide_xy_reg_ena_aux <= 1'b0; + rd_narrow_xy_reg_ena <= 1'b0; + end else begin + rd_wide_xy_reg_ena <= rd_wide_xy_ena; + rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux; + rd_narrow_xy_reg_ena <= rd_narrow_xy_ena; + end // // Helper Signals // - wire [2+8-1:0] wr_wide_xy_offset; - wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1]; - wire [2+8-1:0] rd_wide_xy_offset_aux; - wire [2+8-1:0] wr_narrow_xy_offset; - wire [2+8-1:0] rd_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1]; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; - assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; - assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; - + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; // // "Wide" Storage // genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) + generate for (z=0; z