module modexpng_mmm_x8_dual ( clk, rst_n, ena, rdy, mode, transfer, index_last, x_din, y_din, x_dout, y_dout, x_din_addr, y_din_addr, x_dout_addr, y_dout_addr, x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena, x_din_bank, y_din_bank, x_dout_bank, y_dout_bank, load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req, load_x_din, load_y_din ); // // Includes // `include "modexpng_parameters.vh" `include "modexpng_parameters_x8.vh" `include "modexpng_mmm_fsm.vh" // // Parameters // parameter INDEX_WIDTH = 6; // // Ports // input clk; input rst_n; input ena; output rdy; input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2 // load/unload: 0 = load, 1 = unload input transfer; // 0 = multiply, 1 = load/unload input [INDEX_WIDTH-1:0] index_last; input [NUM_MULTS*WORD_WIDTH-1:0] x_din; input [NUM_MULTS*WORD_WIDTH-1:0] y_din; output [NUM_MULTS*WORD_WIDTH-1:0] x_dout; output [NUM_MULTS*WORD_WIDTH-1:0] y_dout; output [INDEX_WIDTH-4:0] x_din_addr; output [INDEX_WIDTH-4:0] y_din_addr; output [INDEX_WIDTH-4:0] x_dout_addr; output [INDEX_WIDTH-4:0] y_dout_addr; output [ 1-1:0] x_din_ena; output [ 1-1:0] y_din_ena; output [NUM_MULTS-1:0] x_dout_ena; output [NUM_MULTS-1:0] y_dout_ena; output [ 1-1:0] x_din_reg_ena; output [ 1-1:0] y_din_reg_ena; output [3-1:0] x_din_bank; output [3-1:0] y_din_bank; output [3-1:0] x_dout_bank; output [3-1:0] y_dout_bank; output load_phase; // 0 = T1, T2; 1 = N, N_COEFF output [ INDEX_WIDTH:0] load_xy_addr; // address output load_xy_addr_vld; // address valid output load_xy_req; // data request input [WORD_WIDTH-1:0] load_x_din; // data input input [WORD_WIDTH-1:0] load_y_din; // data input // // FSM State and Next States // reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; reg [FSM_STATE_WIDTH-1:0] fsm_state_next; reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; // // FSM Idle Next State // always @* // case ({transfer, mode}) 2'b00, 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG; 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1; 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload? endcase // // Column Counter // wire [ INDEX_WIDTH-4:0] col_index; wire col_index_done; wire [ INDEX_WIDTH-4:0] col_index_zero; wire [ INDEX_WIDTH-4:0] col_index_next; wire [ INDEX_WIDTH-4:0] col_index_prev; modexpng_mmm_col_index # ( .INDEX_WIDTH(INDEX_WIDTH) ) mmm_col_index ( .clk (clk), .index_last (index_last), .fsm_state_next (fsm_state_next), .col_index (col_index), .col_index_done (col_index_done), .col_index_zero (col_index_zero), .col_index_next (col_index_next), .col_index_prev (col_index_prev) ); // // Load Address Generator // wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0]; wire load_addr_zero; wire load_t1t2_addr_done; wire load_nn_coeff_addr_done; modexpng_mmm_transporter # ( .INDEX_WIDTH(INDEX_WIDTH) ) transporter ( .clk (clk), .ena (ena), .index_last (index_last), .fsm_state (fsm_state), .fsm_state_next (fsm_state_next), .load_phase (load_phase), .load_xy_addr (load_xy_addr), .load_xy_addr_vld (load_xy_addr_vld), .load_xy_req (load_xy_req), .load_addr_zero (load_addr_zero), .load_t1t2_addr_done (load_t1t2_addr_done), .load_nn_coeff_addr_done (load_nn_coeff_addr_done) ); // // X, Y Address // wire [INDEX_WIDTH-1:0] x_din_addr_cnt; wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last; wire [ 3-1:0] x_din_addr_cnt_lower_prev; wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev; modexpng_mmm_din_addr # ( .INDEX_WIDTH(INDEX_WIDTH) ) din_addr_x ( .clk (clk), .rst_n (rst_n), .index_last (index_last), .fsm_state_next (fsm_state_next), .col_index_zero (col_index_zero), .col_index_next (col_index_next), .din_addr (x_din_addr), .din_bank (x_din_bank), .din_ena (x_din_ena), .din_reg_ena (x_din_reg_ena), .din_addr_cnt (x_din_addr_cnt), .din_addr_cnt_last (x_din_addr_cnt_last), .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev), .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev) ); modexpng_mmm_dout_addr # ( .INDEX_WIDTH(INDEX_WIDTH) ) dout_addr_xy ( .clk (clk), .rst_n (rst_n), .fsm_state (fsm_state), .load_xy_addr (load_xy_addr), .load_addr_zero (load_addr_zero), .load_nn_coeff_addr_done (load_nn_coeff_addr_done), .x_dout_addr (x_dout_addr), .y_dout_addr (y_dout_addr), .x_dout_ena (x_dout_ena), .y_dout_ena (y_dout_ena), .x_dout_bank (x_dout_bank), .y_dout_bank (y_dout_bank) ); // // Helper Memories ("Scratchpad") // reg [INDEX_WIDTH-1:0] pad_xy_rd_addr; reg pad_xy_rd_ena = 1'b0; wire [ WORD_WIDTH-1:0] pad_x_rd_dout; wire [ WORD_WIDTH-1:0] pad_y_rd_dout; wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}}; wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1; modexpng_mmm_pad pad ( .clk (clk), .rst_n (rst_n), .fsm_state (fsm_state), .load_xy_addr_lsb (load_xy_addr_lsb), .load_x_din (load_x_din), .load_y_din (load_y_din), .pad_x_rd_addr (pad_xy_rd_addr), .pad_y_rd_addr (pad_xy_rd_addr), .pad_x_rd_ena (pad_xy_rd_ena), .pad_y_rd_ena (pad_xy_rd_ena), .pad_x_rd_dout (pad_x_rd_dout), .pad_y_rd_dout (pad_y_rd_dout) ); always @(posedge clk or negedge rst_n) // if (!rst_n) begin pad_xy_rd_ena <= 1'b0; end else case (fsm_state_next) FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_N_BUSY: pad_xy_rd_ena <= 1'b1; default: pad_xy_rd_ena <= 1'b0; endcase always @(posedge clk) // case (fsm_state_next) FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG: pad_xy_rd_addr <= pad_xy_rd_addr_zero; FSM_STATE_MULT_SQUARE_COL_0_BUSY, FSM_STATE_MULT_SQUARE_COL_N_BUSY: pad_xy_rd_addr <= pad_xy_rd_addr_next; default: pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}}; endcase // // Flags // wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last; always @* // fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;; // // MAC Arrays // reg mac_x_ce = 1'b0; reg mac_x_ce_aux = 1'b0; reg [NUM_MULTS -1:0] mac_x_clr; reg mac_x_clr_aux; reg [NUM_MULTS -2:0] mac_x_casc_a; reg mac_x_casc_a_aux; wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a; reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux; //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1]; reg [ 1 * WORD_WIDTH -1:0] mac_x_b; wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p; wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux; reg mac_y_ce = 1'b0; reg mac_y_ce_aux = 1'b0; reg [NUM_MULTS -1:0] mac_y_clr; reg mac_y_clr_aux; reg [NUM_MULTS -2:0] mac_y_casc_a; reg mac_y_casc_a_aux; wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a; reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux; //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1]; reg [ 1 * WORD_WIDTH -1:0] mac_y_b; wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p; wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux; modexpng_mac_array mac_array_x ( .clk (clk), .ce (mac_x_ce), .ce_aux (mac_x_ce_aux), .clr (mac_x_clr), .clr_aux (mac_x_clr_aux), .casc_a (mac_x_casc_a), .casc_a_aux (mac_x_casc_a_aux), .a_in (mac_x_a), .a_in_aux (mac_x_a_aux), .b_in (mac_x_b), .p_out (mac_x_p), .p_out_aux (mac_x_p_aux) ); modexpng_mac_array mac_array_y ( .clk (clk), .ce (mac_y_ce), .ce_aux (mac_y_ce_aux), .clr (mac_y_clr), .clr_aux (mac_y_clr_aux), .casc_a (mac_y_casc_a), .casc_a_aux (mac_y_casc_a_aux), .a_in (mac_y_a), .a_in_aux (mac_y_a_aux), .b_in (mac_y_b), .p_out (mac_y_p), .p_out_aux (mac_y_p_aux) ); genvar gen_z; generate for (gen_z=0; gen_z