diff options
Diffstat (limited to 'rtl/modexpng_mmm_dual.v')
-rw-r--r-- | rtl/modexpng_mmm_dual.v | 733 |
1 files changed, 380 insertions, 353 deletions
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v index 8d8b83d..bb1a55c 100644 --- a/rtl/modexpng_mmm_dual.v +++ b/rtl/modexpng_mmm_dual.v @@ -94,72 +94,73 @@ module modexpng_mmm_dual // // Ports // - input clk; - input rst_n; + input clk; + input rst_n; - input ena; - output rdy; + input ena; + output rdy; - input ladder_mode; - input [7:0] word_index_last; - input [7:0] word_index_last_minus1; - input force_unity_b; - input only_reduce; - input just_multiply; + input ladder_mode; + input [ OP_ADDR_W -1:0] word_index_last; + input [ OP_ADDR_W -1:0] word_index_last_minus1; + input force_unity_b; + input only_reduce; + input just_multiply; - input [BANK_ADDR_W-1:0] sel_wide_in; - input [BANK_ADDR_W-1:0] sel_narrow_in; + input [BANK_ADDR_W -1:0] sel_wide_in; + input [BANK_ADDR_W -1:0] sel_narrow_in; - output rd_wide_xy_ena; - output rd_wide_xy_ena_aux; - output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; - output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; - output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; - output [ 8-1:0] rd_wide_xy_addr_aux; - input [18*NUM_MULTS/2-1:0] rd_wide_x_din; - input [18*NUM_MULTS/2-1:0] rd_wide_y_din; - input [ 18-1:0] rd_wide_x_din_aux; - input [ 18-1:0] rd_wide_y_din_aux; + output rd_wide_xy_ena; + output rd_wide_xy_ena_aux; + output [BANK_ADDR_W -1:0] rd_wide_xy_bank; + output [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + + output [ OP_ADDR_W * NUM_MULTS_HALF -1:0] rd_wide_xy_addr; + output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_x_din; + input [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_y_din; + input [ WORD_EXT_W -1:0] rd_wide_x_din_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_din_aux; - output rd_narrow_xy_ena; - output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; - output [ 7:0] rd_narrow_xy_addr; - input [18-1:0] rd_narrow_x_din; - input [18-1:0] rd_narrow_y_din; + output rd_narrow_xy_ena; + output [BANK_ADDR_W -1:0] rd_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + input [ WORD_EXT_W -1:0] rd_narrow_x_din; + input [ WORD_EXT_W -1:0] rd_narrow_y_din; - output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; - output [ 7:0] rcmb_wide_xy_addr; - output [17:0] rcmb_wide_x_dout; - output [17:0] rcmb_wide_y_dout; - output rcmb_wide_xy_valid; + output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; - output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; - output [ 7:0] rcmb_narrow_xy_addr; - output [17:0] rcmb_narrow_x_dout; - output [17:0] rcmb_narrow_y_dout; - output rcmb_narrow_xy_valid; + output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; - output [BANK_ADDR_W -1:0] rcmb_xy_bank; - output [ 7:0] rcmb_xy_addr; - output [17:0] rcmb_x_dout; - output [17:0] rcmb_y_dout; - output rcmb_xy_valid; + output [BANK_ADDR_W -1:0] rcmb_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_x_dout; + output [ WORD_EXT_W -1:0] rcmb_y_dout; + output rcmb_xy_valid; - output rdct_ena; - input rdct_rdy; + output rdct_ena; + input rdct_rdy; // // FSM Declaration // - reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE; - reg [MMM_FSM_STATE_W-1:0] fsm_state_next; + reg [MMM_FSM_STATE_W -1:0] fsm_state = MMM_FSM_STATE_IDLE; + reg [MMM_FSM_STATE_W -1:0] fsm_state_next; - wire [MMM_FSM_STATE_W-1:0] fsm_state_after_idle; - wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square; - wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle; - wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle; - wire [MMM_FSM_STATE_W-1:0] fsm_state_after_square_holdoff; + wire [MMM_FSM_STATE_W -1:0] fsm_state_after_idle; + wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_square; + wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_triangle; + wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_rectangle; + wire [MMM_FSM_STATE_W -1:0] fsm_state_after_square_holdoff; // @@ -174,48 +175,55 @@ module modexpng_mmm_dual // // Storage Control Interface // - reg wide_xy_ena = 1'b0; - reg wide_xy_ena_aux = 1'b0; - reg [ BANK_ADDR_W -1:0] wide_xy_bank; - reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; - reg [ 8-1:0] wide_xy_addr[0:3]; - reg [ 8-1:0] wide_xy_addr_aux; + reg wide_xy_ena = 1'b0; + reg wide_xy_ena_aux = 1'b0; + reg [BANK_ADDR_W -1:0] wide_xy_bank; + reg [BANK_ADDR_W -1:0] wide_xy_bank_aux; + reg [ OP_ADDR_W -1:0] wide_xy_addr[0:NUM_MULTS_HALF-1]; + reg [ OP_ADDR_W -1:0] wide_xy_addr_aux; + + reg narrow_xy_ena = 1'b0; + reg [BANK_ADDR_W -1:0] narrow_xy_bank; + reg [ OP_ADDR_W -1:0] narrow_xy_addr; + reg [ OP_ADDR_W -1:0] narrow_xy_addr_dly; + wire [ OP_ADDR_W -1:0] narrow_xy_addr_inc = narrow_xy_addr + 1'b1; - reg narrow_xy_ena = 1'b0; - reg [ BANK_ADDR_W -1:0] narrow_xy_bank; - reg [ 7:0] narrow_xy_addr; - reg [ 7:0] narrow_xy_addr_dly; - assign rd_wide_xy_ena = wide_xy_ena; + // + // Outmap Port Mapping + // + assign rd_wide_xy_ena = wide_xy_ena; assign rd_wide_xy_ena_aux = wide_xy_ena_aux; - assign rd_wide_xy_bank = wide_xy_bank; + assign rd_wide_xy_bank = wide_xy_bank; assign rd_wide_xy_bank_aux = wide_xy_bank_aux; assign rd_wide_xy_addr_aux = wide_xy_addr_aux; - assign rd_narrow_xy_ena = narrow_xy_ena; - assign rd_narrow_xy_bank = narrow_xy_bank; - assign rd_narrow_xy_addr = narrow_xy_addr; + assign rd_narrow_xy_ena = narrow_xy_ena; + assign rd_narrow_xy_bank = narrow_xy_bank; + assign rd_narrow_xy_addr = narrow_xy_addr; genvar z; - generate for (z=0; z<(NUM_MULTS/2); z=z+1) + generate for (z=0; z<NUM_MULTS_HALF; z=z+1) begin : gen_rd_wide_xy_addr - assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z]; + assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z]; end endgenerate + // // Column Counter // - reg [4:0] col_index; // current column index - reg [4:0] col_index_prev; // delayed column index value - reg [4:0] col_index_last; // index of the very last column - reg [4:0] col_index_next; // precomputed next column index - reg col_is_last; // flag set during the very last column + reg [COL_INDEX_W -1:0] col_index; // current column index + reg [COL_INDEX_W -1:0] col_index_prev; // delayed column index value + reg [COL_INDEX_W -1:0] col_index_last; // index of the very last column + reg [COL_INDEX_W -1:0] col_index_next; // precomputed next column index + reg col_is_last; // flag set during the very last column always @(posedge clk) // col_index_prev <= col_index; + // // Column Counter Increment Logic // @@ -226,9 +234,9 @@ module modexpng_mmm_dual MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin - col_index <= 5'd0; - col_index_last <= word_index_last[7:3]; - col_index_next <= 5'd1; + col_index <= COL_INDEX_ZERO; + col_index_last <= word_index_last[OP_ADDR_W-1:MAC_INDEX_W]; + col_index_next <= COL_INDEX_ONE; col_is_last <= 1'b0; end @@ -236,9 +244,9 @@ module modexpng_mmm_dual MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin - col_index <= col_index_next; - col_is_last <= col_index_next == col_index_last; - col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; + col_index <= col_index_next; + col_is_last <= col_index_next == col_index_last; + col_index_next <= col_index_next == col_index_last ? COL_INDEX_ZERO : col_index_next + 1'b1; end // endcase @@ -261,9 +269,10 @@ module modexpng_mmm_dual reg rectangle_surely_done_flop = 1'b0; reg rectangle_tardy_done_flop = 1'b0; - assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; - assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); + assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + assign triangle_almost_done_comb = narrow_xy_addr == {col_index, word_index_last_minus1[MAC_INDEX_W-1:0]}; assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; + // // Square Completion Flags @@ -273,11 +282,9 @@ module modexpng_mmm_dual case (fsm_state) // MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: - square_almost_done_flop <= square_almost_done_comb; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: square_almost_done_flop <= square_almost_done_comb; // - default: - square_almost_done_flop <= 1'b0; + default: square_almost_done_flop <= 1'b0; // endcase // @@ -293,11 +300,9 @@ module modexpng_mmm_dual case (fsm_state) // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: - triangle_almost_done_flop <= triangle_almost_done_comb; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: triangle_almost_done_flop <= triangle_almost_done_comb; // - default: - triangle_almost_done_flop <= 1'b0; + default: triangle_almost_done_flop <= 1'b0; // endcase // @@ -314,11 +319,9 @@ module modexpng_mmm_dual case (fsm_state) // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: - rectangle_almost_done_flop <= rectangle_almost_done_comb; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: rectangle_almost_done_flop <= rectangle_almost_done_comb; // - default: - rectangle_almost_done_flop <= 1'b0; + default: rectangle_almost_done_flop <= 1'b0; // endcase // @@ -341,29 +344,28 @@ module modexpng_mmm_dual case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, - MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= OP_ADDR_ZERO; + // MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr_inc : OP_ADDR_ZERO; // - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? - 8'd0 : narrow_xy_addr + 1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? OP_ADDR_ZERO : narrow_xy_addr_inc; // - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? - 8'd1 : narrow_xy_addr + 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? OP_ADDR_ONE : narrow_xy_addr_inc; // - default: narrow_xy_addr <= 8'dX; + default: narrow_xy_addr <= OP_ADDR_DNC; // endcase // @@ -376,50 +378,53 @@ module modexpng_mmm_dual MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? - BANK_NARROW_EXT : BANK_NARROW_COEFF; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? BANK_NARROW_EXT : BANK_NARROW_COEFF; // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? - BANK_NARROW_EXT : BANK_NARROW_Q; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? BANK_NARROW_EXT : BANK_NARROW_Q; // - default: narrow_xy_bank <= 2'bXX; + default: narrow_xy_bank <= BANK_DNC; // endcase // + // Narrow Enable + // case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, - MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; + // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; // - default: narrow_xy_ena <= 1'b0; + default: narrow_xy_ena <= 1'b0; // endcase // @@ -430,23 +435,19 @@ module modexpng_mmm_dual // Wide Storage Control Logic // - wire [2:0] wide_offset_rom[0:3]; + wire [MAC_INDEX_W-1:0] wide_offset_rom[0:NUM_MULTS_HALF-1]; generate for (z=1; z<NUM_MULTS; z=z+2) begin : gen_wide_offset_rom - assign wide_offset_rom[(z-1)/2] = z[2:0]; + assign wide_offset_rom[(z-1)/2] = z[MAC_INDEX_W-1:0]; end endgenerate - function [7:0] wide_xy_addr_next; - input [7:0] wide_xy_addr_current; - input [7:0] wide_xy_addr_last; - begin - if (wide_xy_addr_current > 8'd0) - wide_xy_addr_next = wide_xy_addr_current - 1'b1; - else - wide_xy_addr_next = wide_xy_addr_last; - end + function [OP_ADDR_W-1:0] wide_xy_addr_next; + input [OP_ADDR_W-1:0] wide_xy_addr_current; + input [OP_ADDR_W-1:0] wide_xy_addr_last; + if (wide_xy_addr_current > OP_ADDR_ZERO) wide_xy_addr_next = wide_xy_addr_current - 1'b1; + else wide_xy_addr_next = wide_xy_addr_last; endfunction integer j; @@ -459,128 +460,143 @@ module modexpng_mmm_dual // // Wide Address // - for (j=0; j<(NUM_MULTS/2); j=j+1) + for (j=0; j<NUM_MULTS_HALF; j=j+1) // case (fsm_state_next) // - // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // another way to code this is to extend the look-up table to 8 entries and just use shifts + // instead of subtractions (this requires further research, let's maybe not do it right now) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {OP_ADDR_ZERO, wide_offset_rom[j]}; + // + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; // - MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); - // - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); + // + default: wide_xy_addr[j] <= OP_ADDR_DNC; // - default: wide_xy_addr[j] <= 8'dX; endcase // // Wide Aux Address // case (fsm_state_next) // - // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! + // there's a potentially more efficient way to code the switch (see above) + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= OP_ADDR_ONE; // - MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); - // - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= OP_ADDR_DNC; // - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; - //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? - //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : OP_ADDR_DNC; + // + default: wide_xy_addr_aux <= OP_ADDR_DNC; // - default: wide_xy_addr_aux <= 8'dX; endcase // // Wide Bank // case (fsm_state_next) + // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; + // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; + // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; + // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; - default: wide_xy_bank <= 3'bXXX; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; + // + default: wide_xy_bank <= BANK_DNC; + // endcase // // Wide Aux Bank // case (fsm_state_next) + // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; + // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; + // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; + // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) - case (rcmb_xy_bank) - BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; - BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; - //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX - default: wide_xy_bank_aux <= 3'bXXX; - endcase - else wide_xy_bank_aux <= 3'bXXX; - default: wide_xy_bank_aux <= 3'bXXX; + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: begin + wide_xy_bank_aux <= BANK_DNC; + if (rcmb_xy_valid) + case (rcmb_xy_bank) + BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; + BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; + endcase + end + // + default: wide_xy_bank_aux <= BANK_DNC; + // endcase // // Wide Enable // case (fsm_state_next) + // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, @@ -598,27 +614,34 @@ module modexpng_mmm_dual MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; - default: wide_xy_ena <= 1'b0; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; + // + default: wide_xy_ena <= 1'b0; + // endcase // // Wide Aux Enable // case (fsm_state_next) + // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; + // + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0; + // MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); - default: wide_xy_ena_aux <= 1'b0; + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid; + // + default: wide_xy_ena_aux <= 1'b0; + // endcase // end @@ -635,79 +658,66 @@ module modexpng_mmm_dual // // DSP Array Logic // - reg dsp_xy_ce_a = 1'b0; - reg dsp_xy_ce_b = 1'b0; + reg dsp_xy_ce_a = 1'b0; + reg dsp_xy_ce_b = 1'b0; reg dsp_xy_ce_b_dly = 1'b0; - reg dsp_xy_ce_m = 1'b0; - reg dsp_xy_ce_p = 1'b0; - reg dsp_xy_ce_mode = 1'b0; + reg dsp_xy_ce_m = 1'b0; + reg dsp_xy_ce_p = 1'b0; + reg dsp_xy_ce_mode = 1'b0; - reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; + reg [ NUM_MULTS_AUX -1:0] dsp_xy_mode_z = {NUM_MULTS_AUX{1'b1}}; - wire [5*18-1:0] dsp_x_a; - wire [5*18-1:0] dsp_y_a; + wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_x_a; + wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_y_a; - reg [1*16-1:0] dsp_x_b; - reg [1*16-1:0] dsp_y_b; + reg [WORD_W -1:0] dsp_x_b; + reg [WORD_W -1:0] dsp_y_b; - reg [ 1:0] dsp_xy_b_carry; + reg [CARRY_W -1:0] dsp_xy_b_carry; - wire [9*47-1:0] dsp_x_p; - wire [9*47-1:0] dsp_y_p; - - //generate for (z=0; z<(NUM_MULTS/2); z=z+1) - //begin : gen_dsp_xy_a_split - //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; - //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; - //end - //endgenerate + wire [MAC_W * NUM_MULTS_AUX -1:0] dsp_x_p; + wire [MAC_W * NUM_MULTS_AUX -1:0] dsp_y_p; assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din}; assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din}; - - //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; - //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; always @(posedge clk) // dsp_xy_ce_b_dly <= dsp_xy_ce_b; - modexpng_dsp_array_block dsp_array_block_x ( - .clk (clk), + .clk (clk), - .ce_a (dsp_xy_ce_a), - .ce_b (dsp_xy_ce_b), - .ce_m (dsp_xy_ce_m), - .ce_p (dsp_xy_ce_p), - .ce_mode (dsp_xy_ce_mode), + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), - .mode_z (dsp_xy_mode_z), + .mode_z (dsp_xy_mode_z), - .a (dsp_x_a), - .b (dsp_x_b), - .p (dsp_x_p) + .a (dsp_x_a), + .b (dsp_x_b), + .p (dsp_x_p) ); modexpng_dsp_array_block dsp_array_block_y ( - .clk (clk), + .clk (clk), - .ce_a (dsp_xy_ce_a), - .ce_b (dsp_xy_ce_b), - .ce_m (dsp_xy_ce_m), - .ce_p (dsp_xy_ce_p), - .ce_mode (dsp_xy_ce_mode), + .ce_a (dsp_xy_ce_a), + .ce_b (dsp_xy_ce_b), + .ce_m (dsp_xy_ce_m), + .ce_p (dsp_xy_ce_p), + .ce_mode (dsp_xy_ce_mode), - .mode_z (dsp_xy_mode_z), + .mode_z (dsp_xy_mode_z), - .a (dsp_y_a), - .b (dsp_y_b), - .p (dsp_y_p) + .a (dsp_y_a), + .b (dsp_y_b), + .p (dsp_y_p) ); - - // @@ -742,6 +752,7 @@ module modexpng_mmm_dual // end + // // DSP Feed Logic // @@ -758,76 +769,87 @@ module modexpng_mmm_dual case (fsm_state) MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1; - default: dsp_merge_xy_b_first <= 1'b0; + default: dsp_merge_xy_b_first <= 1'b0; endcase // end + // - // On-the-fly Carry Recombination + // On-the-fly Carry Recombination Logic // - wire [17:0] rd_narrow_x_din_carry = rd_narrow_x_din + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_y_din_carry = rd_narrow_y_din + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry; + wire [WORD_EXT_W-1:0] rd_narrow_x_din_carry = rd_narrow_x_din + {WORD_ZERO, dsp_xy_b_carry}; + wire [WORD_EXT_W-1:0] rd_narrow_y_din_carry = rd_narrow_y_din + {WORD_ZERO, dsp_xy_b_carry}; + wire [WORD_EXT_W-1:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry; - wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ? - rd_narrow_xy_din_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO; + wire [WORD_W-1:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ? + rd_narrow_xy_din_carry_mux[WORD_W-1:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO; - always @(posedge clk) + always @(posedge clk) begin // - if (narrow_xy_ena_dly2) begin // rewrite + dsp_x_b <= WORD_DNC; + dsp_y_b <= WORD_DNC; + // + dsp_xy_b_carry <= CARRY_ZERO; + // + if (narrow_xy_ena_dly2) begin // if (!dsp_merge_xy_b) begin - dsp_x_b <= rd_narrow_x_din[15:0]; - dsp_y_b <= rd_narrow_y_din[15:0]; - dsp_xy_b_carry <= 2'b00; + // + dsp_x_b <= rd_narrow_x_din[WORD_W-1:0]; + dsp_y_b <= rd_narrow_y_din[WORD_W-1:0]; + // + dsp_xy_b_carry <= CARRY_ZERO; + // end else begin + // dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity; dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity; - dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[17:16]; + // + dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[WORD_EXT_W-1:WORD_W]; + // end // - end else begin - // - dsp_x_b <= WORD_DNC; - dsp_y_b <= WORD_DNC; - // - dsp_xy_b_carry <= 2'b00; - // end + // + end + + // + // DSP Mode Logic + // + reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv1 = {NUM_MULTS_AUX{1'b1}}; + reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv2 = {NUM_MULTS_AUX{1'b1}}; + reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv3 = {NUM_MULTS_AUX{1'b1}}; + reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv4 = {NUM_MULTS_AUX{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; - reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; - - function [NUM_MULTS:0] calc_mac_mode_z_square; - input [ 4:0] col_index_value; - input [ 7:0] narrow_xy_addr_value; - begin - if (narrow_xy_addr_value[7:3] == col_index_value) - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; - 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; - 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; - 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; - 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; - 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; - 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; - 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; - endcase - else - calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; - end + function [NUM_MULTS_AUX -1:0] calc_mac_mode_z_square; + // + input [NUM_MULTS_HALF_AUX-1:0] col_index_value; + input [OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) + case (narrow_xy_addr_value[MAC_INDEX_W-1:0]) + 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; + 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; + 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; + 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; + 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; + 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; + 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; + 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; + endcase + else calc_mac_mode_z_square = {1'b1, 8'b11111111}; endfunction - function [NUM_MULTS:0] calc_mac_mode_z_rectangle; - input [ 4:0] col_index_value; - input [ 7:0] narrow_xy_addr_value; + function [NUM_MULTS_AUX -1:0] calc_mac_mode_z_rectangle; + // + input [NUM_MULTS_HALF_AUX -1:0] col_index_value; + input [OP_ADDR_W -1:0] narrow_xy_addr_value; + // begin - if (narrow_xy_addr_value[7:3] == col_index_value) - case (narrow_xy_addr_value[2:0]) + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) + case (narrow_xy_addr_value[MAC_INDEX_W-1:0]) 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; @@ -837,41 +859,45 @@ module modexpng_mmm_dual 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; endcase - else - calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; + else calc_mac_mode_z_rectangle = {1'b1, 8'b11111111}; end endfunction always @(posedge clk) // case (fsm_state_next) + // MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, - MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; - MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b0}}; + // + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); + // + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}}; + // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); - default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); + // + default: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}}; + // endcase always @(posedge clk) begin + // dsp_xy_mode_z <= dsp_xy_mode_z_adv1; // dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; + // end - - - // // Recombinator // @@ -880,43 +906,43 @@ module modexpng_mmm_dual modexpng_recombinator_block recombinator_block ( - .clk (clk), - .rst_n (rst_n), + .clk (clk), + .rst_n (rst_n), - .ena (rcmb_ena), - .rdy (rcmb_rdy), + .ena (rcmb_ena), + .rdy (rcmb_rdy), - .fsm_state_next (fsm_state_next), + .fsm_state_next (fsm_state_next), - .word_index_last (word_index_last), + .word_index_last (word_index_last), - .dsp_xy_ce_p (dsp_xy_ce_p), - .dsp_x_p (dsp_x_p), - .dsp_y_p (dsp_y_p), + .dsp_xy_ce_p (dsp_xy_ce_p), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), - .col_index (col_index), - .col_index_last (col_index_last), + .col_index (col_index), + .col_index_last (col_index_last), - .rd_narrow_xy_addr (narrow_xy_addr), - .rd_narrow_xy_bank (narrow_xy_bank), + .rd_narrow_xy_addr (narrow_xy_addr), + .rd_narrow_xy_bank (narrow_xy_bank), - .rcmb_wide_xy_bank (rcmb_wide_xy_bank), - .rcmb_wide_xy_addr (rcmb_wide_xy_addr), - .rcmb_wide_x_dout (rcmb_wide_x_dout), - .rcmb_wide_y_dout (rcmb_wide_y_dout), - .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), - .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), - .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), - .rcmb_narrow_x_dout (rcmb_narrow_x_dout), - .rcmb_narrow_y_dout (rcmb_narrow_y_dout), - .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), - .rdct_narrow_xy_bank (rcmb_xy_bank), - .rdct_narrow_xy_addr (rcmb_xy_addr), - .rdct_narrow_x_dout (rcmb_x_dout), - .rdct_narrow_y_dout (rcmb_y_dout), - .rdct_narrow_xy_valid (rcmb_xy_valid) + .rdct_narrow_xy_bank (rcmb_xy_bank), + .rdct_narrow_xy_addr (rcmb_xy_addr), + .rdct_narrow_x_dout (rcmb_x_dout), + .rdct_narrow_y_dout (rcmb_y_dout), + .rdct_narrow_xy_valid (rcmb_xy_valid) ); @@ -941,11 +967,11 @@ module modexpng_mmm_dual // // FSM Transition Logic // - assign fsm_state_after_idle = !only_reduce ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT; - assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT; - assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT; - assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT; - assign fsm_state_after_square_holdoff = just_multiply ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT; + assign fsm_state_after_idle = !only_reduce ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT ; + assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT ; + assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT ; + assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT ; + assign fsm_state_after_square_holdoff = just_multiply ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT ; always @* begin @@ -953,43 +979,43 @@ module modexpng_mmm_dual fsm_state_next = MMM_FSM_STATE_IDLE; // case (fsm_state) - MMM_FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle /*MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : MMM_FSM_STATE_IDLE; + MMM_FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle : MMM_FSM_STATE_IDLE; - MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ; - MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ; - MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY; + MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY; - MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ; - MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ; - MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY; + MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY; - MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? fsm_state_after_square_holdoff : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF; + MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? fsm_state_after_square_holdoff : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF; - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; - MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; - MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF; + MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF; - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; - MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; + MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; - MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_WAIT_REDUCTOR : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF; + MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_WAIT_REDUCTOR : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF; - MMM_FSM_STATE_WAIT_REDUCTOR: fsm_state_next = rdct_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_WAIT_REDUCTOR; + MMM_FSM_STATE_WAIT_REDUCTOR: fsm_state_next = rdct_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_WAIT_REDUCTOR; - MMM_FSM_STATE_STOP: fsm_state_next = MMM_FSM_STATE_IDLE ; + MMM_FSM_STATE_STOP: fsm_state_next = MMM_FSM_STATE_IDLE ; - default: fsm_state_next = MMM_FSM_STATE_IDLE ; + default: fsm_state_next = MMM_FSM_STATE_IDLE ; endcase // @@ -1005,10 +1031,10 @@ module modexpng_mmm_dual always @(posedge clk or negedge rst_n) // - if (!rst_n) rdct_ena_reg <= 1'b0; + if (!rst_n) rdct_ena_reg <= 1'b0; else case (fsm_state) MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; - default: rdct_ena_reg <= 1'b0; + default: rdct_ena_reg <= 1'b0; endcase @@ -1021,9 +1047,9 @@ module modexpng_mmm_dual always @(posedge clk or negedge rst_n) // - if (!rst_n) rdy_reg <= 1'b1; + if (!rst_n) rdy_reg <= 1'b1; else begin - if (rdy && ena) rdy_reg <= 1'b0; + if (rdy && ena) rdy_reg <= 1'b0; if (!rdy && (fsm_state == MMM_FSM_STATE_STOP)) rdy_reg <= 1'b1; end @@ -1032,11 +1058,12 @@ module modexpng_mmm_dual // Debug // `ifdef MODEXPNG_ENABLE_DEBUG - real load_cyc_mult = 0.0; - always @(posedge clk) - // - if (dsp_xy_ce_m) - load_cyc_mult <= load_cyc_mult + 1.0; + real load_cyc_mult = 0.0; + always @(posedge clk) + // + if (dsp_xy_ce_m) + load_cyc_mult <= load_cyc_mult + 1.0; `endif + endmodule |