diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:16:58 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:16:58 +0300 |
commit | fde62e373fdfcefefb7da10757a3db933160c911 (patch) | |
tree | 7f0a5b37be84af4399a7f629ca062a3cbb147f37 /rtl/modexpng_mmm_x8_dual.v | |
parent | 3ea94c872afe6309c43ac7eccf877734c33f5421 (diff) |
Major rewrite (different core hierarchy, buses, wrappers, etc).
Diffstat (limited to 'rtl/modexpng_mmm_x8_dual.v')
-rw-r--r-- | rtl/modexpng_mmm_x8_dual.v | 550 |
1 files changed, 0 insertions, 550 deletions
diff --git a/rtl/modexpng_mmm_x8_dual.v b/rtl/modexpng_mmm_x8_dual.v deleted file mode 100644 index 99a37fa..0000000 --- a/rtl/modexpng_mmm_x8_dual.v +++ /dev/null @@ -1,550 +0,0 @@ -module modexpng_mmm_x8_dual -( - clk, rst_n, - ena, rdy, - mode, transfer, - index_last, - x_din, y_din, x_dout, y_dout, - x_din_addr, y_din_addr, x_dout_addr, y_dout_addr, - x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena, - x_din_bank, y_din_bank, x_dout_bank, y_dout_bank, - load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req, - load_x_din, load_y_din -); - - - // - // Includes - // - `include "modexpng_parameters.vh" - `include "modexpng_parameters_x8.vh" - `include "modexpng_mmm_fsm.vh" - - - // - // Parameters - // - parameter INDEX_WIDTH = 6; - - - // - // Ports - // - input clk; - input rst_n; - - input ena; - output rdy; - - input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2 - // load/unload: 0 = load, 1 = unload - input transfer; // 0 = multiply, 1 = load/unload - - input [INDEX_WIDTH-1:0] index_last; - - input [NUM_MULTS*WORD_WIDTH-1:0] x_din; - input [NUM_MULTS*WORD_WIDTH-1:0] y_din; - output [NUM_MULTS*WORD_WIDTH-1:0] x_dout; - output [NUM_MULTS*WORD_WIDTH-1:0] y_dout; - - output [INDEX_WIDTH-4:0] x_din_addr; - output [INDEX_WIDTH-4:0] y_din_addr; - output [INDEX_WIDTH-4:0] x_dout_addr; - output [INDEX_WIDTH-4:0] y_dout_addr; - - output [ 1-1:0] x_din_ena; - output [ 1-1:0] y_din_ena; - output [NUM_MULTS-1:0] x_dout_ena; - output [NUM_MULTS-1:0] y_dout_ena; - output [ 1-1:0] x_din_reg_ena; - output [ 1-1:0] y_din_reg_ena; - - output [3-1:0] x_din_bank; - output [3-1:0] y_din_bank; - output [3-1:0] x_dout_bank; - output [3-1:0] y_dout_bank; - - output load_phase; // 0 = T1, T2; 1 = N, N_COEFF - output [ INDEX_WIDTH:0] load_xy_addr; // address - output load_xy_addr_vld; // address valid - output load_xy_req; // data request - - input [WORD_WIDTH-1:0] load_x_din; // data input - input [WORD_WIDTH-1:0] load_y_din; // data input - - - // - // FSM State and Next States - // - reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; - reg [FSM_STATE_WIDTH-1:0] fsm_state_next; - reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; - reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; - - - // - // FSM Idle Next State - // - always @* - // - case ({transfer, mode}) - 2'b00, - 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG; - 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1; - 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload? - endcase - - - // - // Column Counter - // - wire [ INDEX_WIDTH-4:0] col_index; - wire col_index_done; - wire [ INDEX_WIDTH-4:0] col_index_zero; - wire [ INDEX_WIDTH-4:0] col_index_next; - wire [ INDEX_WIDTH-4:0] col_index_prev; - - modexpng_mmm_col_index # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - mmm_col_index - ( - .clk (clk), - .index_last (index_last), - .fsm_state_next (fsm_state_next), - .col_index (col_index), - .col_index_done (col_index_done), - .col_index_zero (col_index_zero), - .col_index_next (col_index_next), - .col_index_prev (col_index_prev) - ); - - - // - // Load Address Generator - // - wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0]; - wire load_addr_zero; - wire load_t1t2_addr_done; - wire load_nn_coeff_addr_done; - - modexpng_mmm_transporter # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - transporter - ( - .clk (clk), - .ena (ena), - .index_last (index_last), - .fsm_state (fsm_state), - .fsm_state_next (fsm_state_next), - .load_phase (load_phase), - .load_xy_addr (load_xy_addr), - .load_xy_addr_vld (load_xy_addr_vld), - .load_xy_req (load_xy_req), - .load_addr_zero (load_addr_zero), - .load_t1t2_addr_done (load_t1t2_addr_done), - .load_nn_coeff_addr_done (load_nn_coeff_addr_done) - ); - - - // - // X, Y Address - // - wire [INDEX_WIDTH-1:0] x_din_addr_cnt; - wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last; - wire [ 3-1:0] x_din_addr_cnt_lower_prev; - wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev; - - modexpng_mmm_din_addr # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - din_addr_x - ( - .clk (clk), - .rst_n (rst_n), - .index_last (index_last), - .fsm_state_next (fsm_state_next), - .col_index_zero (col_index_zero), - .col_index_next (col_index_next), - .din_addr (x_din_addr), - .din_bank (x_din_bank), - .din_ena (x_din_ena), - .din_reg_ena (x_din_reg_ena), - .din_addr_cnt (x_din_addr_cnt), - .din_addr_cnt_last (x_din_addr_cnt_last), - .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev), - .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev) - ); - - modexpng_mmm_dout_addr # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - dout_addr_xy - ( - .clk (clk), - .rst_n (rst_n), - .fsm_state (fsm_state), - .load_xy_addr (load_xy_addr), - .load_addr_zero (load_addr_zero), - .load_nn_coeff_addr_done (load_nn_coeff_addr_done), - .x_dout_addr (x_dout_addr), - .y_dout_addr (y_dout_addr), - .x_dout_ena (x_dout_ena), - .y_dout_ena (y_dout_ena), - .x_dout_bank (x_dout_bank), - .y_dout_bank (y_dout_bank) - ); - - - // - // Helper Memories ("Scratchpad") - // - reg [INDEX_WIDTH-1:0] pad_xy_rd_addr; - reg pad_xy_rd_ena = 1'b0; - wire [ WORD_WIDTH-1:0] pad_x_rd_dout; - wire [ WORD_WIDTH-1:0] pad_y_rd_dout; - - wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}}; - wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1; - - modexpng_mmm_pad pad - ( - .clk (clk), - .rst_n (rst_n), - .fsm_state (fsm_state), - .load_xy_addr_lsb (load_xy_addr_lsb), - .load_x_din (load_x_din), - .load_y_din (load_y_din), - .pad_x_rd_addr (pad_xy_rd_addr), - .pad_y_rd_addr (pad_xy_rd_addr), - .pad_x_rd_ena (pad_xy_rd_ena), - .pad_y_rd_ena (pad_xy_rd_ena), - .pad_x_rd_dout (pad_x_rd_dout), - .pad_y_rd_dout (pad_y_rd_dout) - ); - - - always @(posedge clk or negedge rst_n) - // - if (!rst_n) begin - pad_xy_rd_ena <= 1'b0; - end else case (fsm_state_next) - - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - pad_xy_rd_ena <= 1'b1; - - default: - pad_xy_rd_ena <= 1'b0; - - endcase - - always @(posedge clk) - // - case (fsm_state_next) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: - pad_xy_rd_addr <= pad_xy_rd_addr_zero; - - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: - pad_xy_rd_addr <= pad_xy_rd_addr_next; - - default: - pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}}; - - endcase - - - - - // - // Flags - // - - wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last; - - always @* - // - fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;; - - - // - // MAC Arrays - // - reg mac_x_ce = 1'b0; - reg mac_x_ce_aux = 1'b0; - reg [NUM_MULTS -1:0] mac_x_clr; - reg mac_x_clr_aux; - reg [NUM_MULTS -2:0] mac_x_casc_a; - reg mac_x_casc_a_aux; - wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a; - reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux; - //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1]; - reg [ 1 * WORD_WIDTH -1:0] mac_x_b; - wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p; - wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux; - - reg mac_y_ce = 1'b0; - reg mac_y_ce_aux = 1'b0; - reg [NUM_MULTS -1:0] mac_y_clr; - reg mac_y_clr_aux; - reg [NUM_MULTS -2:0] mac_y_casc_a; - reg mac_y_casc_a_aux; - wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a; - reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux; - //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1]; - reg [ 1 * WORD_WIDTH -1:0] mac_y_b; - wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p; - wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux; - - modexpng_mac_array mac_array_x - ( - .clk (clk), - .ce (mac_x_ce), - .ce_aux (mac_x_ce_aux), - .clr (mac_x_clr), - .clr_aux (mac_x_clr_aux), - .casc_a (mac_x_casc_a), - .casc_a_aux (mac_x_casc_a_aux), - .a_in (mac_x_a), - .a_in_aux (mac_x_a_aux), - .b_in (mac_x_b), - .p_out (mac_x_p), - .p_out_aux (mac_x_p_aux) - ); - - modexpng_mac_array mac_array_y - ( - .clk (clk), - .ce (mac_y_ce), - .ce_aux (mac_y_ce_aux), - .clr (mac_y_clr), - .clr_aux (mac_y_clr_aux), - .casc_a (mac_y_casc_a), - .casc_a_aux (mac_y_casc_a_aux), - .a_in (mac_y_a), - .a_in_aux (mac_y_a_aux), - .b_in (mac_y_b), - .p_out (mac_y_p), - .p_out_aux (mac_y_p_aux) - ); - - genvar gen_z; - - generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1) - begin : gen_xy_din - //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z]; - //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z]; - //gen_xy_dout - assign mac_x_a[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_din[gen_z*WORD_WIDTH+:WORD_WIDTH]; - - //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z]; - //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z]; - end - endgenerate - - - // - // MAC Clock Enable Logic - // - reg mac_xy_ce_adv = 1'b0; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) mac_xy_ce_adv <= 1'b0; - else case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_ce_adv <= 1'b1; - default: mac_xy_ce_adv <= 1'b0; - endcase - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) {mac_y_ce, mac_x_ce} <= 2'b00; - else {mac_y_ce, mac_x_ce} <= {2{mac_xy_ce_adv}}; - - - // - // MAC Clear Logic - // - wire [NUM_MULTS-1:0] calc_mac_x_clear_square_value = - calc_mac_clear_square(col_index_prev, x_din_addr_cnt_lower_prev, x_din_addr_cnt_upper_prev); - - reg [NUM_MULTS-1:0] mac_xy_clr_adv; - - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_clr_adv <= {NUM_MULTS{1'b1}}; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_clr_adv <= calc_mac_x_clear_square_value; - default: mac_xy_clr_adv <= {NUM_MULTS{1'bX}}; - endcase - - always @(posedge clk) - // - {mac_y_clr, mac_x_clr} <= {2{mac_xy_clr_adv}}; - - - // - // MAC Cascade Logic - // - reg [NUM_MULTS-2:0] mac_xy_casc_a_adv; - - always @(posedge clk) - // - case (fsm_state) - FSM_STATE_MULT_SQUARE_COL_0_TRIG, - FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b0}}; - FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b1}}; - default: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'bX}}; - endcase - - always @(posedge clk) - // - {mac_y_casc_a, mac_x_casc_a} <= {2{mac_xy_casc_a_adv}}; - - - - // - // DOUT Mapping - // - generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1) - begin : gen_xy_dout - assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z]; - assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z]; - end - endgenerate - - - // - // DOUT - // - reg [WORD_WIDTH-1:0] x_dout_reg[0:NUM_MULTS-1]; - reg [WORD_WIDTH-1:0] y_dout_reg[0:NUM_MULTS-1]; - - - - - integer int_z; - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_LOAD_T1T2_3, - FSM_STATE_LOAD_NN_COEFF_3: - for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin - x_dout_reg[int_z] <= load_x_din; - y_dout_reg[int_z] <= load_y_din; - end - // - default: - for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin - x_dout_reg[int_z] <= {WORD_WIDTH{1'bX}}; - y_dout_reg[int_z] <= {WORD_WIDTH{1'bX}}; - end - // - endcase - - - - // - // FSM Process - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; - else fsm_state <= fsm_state_next; - - - // - // FSM Transition Logic - // - always @* begin - // - fsm_state_next = FSM_STATE_IDLE; - // - case (fsm_state) - FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle : FSM_STATE_IDLE; - - FSM_STATE_LOAD_T1T2_1: fsm_state_next = FSM_STATE_LOAD_T1T2_2 ; - FSM_STATE_LOAD_T1T2_2: fsm_state_next = FSM_STATE_LOAD_T1T2_3 ; - FSM_STATE_LOAD_T1T2_3: fsm_state_next = load_t1t2_addr_done ? FSM_STATE_LOAD_NN_COEFF_1 : FSM_STATE_LOAD_T1T2_1; - - FSM_STATE_LOAD_NN_COEFF_1: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_2 ; - FSM_STATE_LOAD_NN_COEFF_2: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_3 ; - FSM_STATE_LOAD_NN_COEFF_3: fsm_state_next = load_nn_coeff_addr_done ? FSM_STATE_STOP : FSM_STATE_LOAD_NN_COEFF_1; - - FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; - FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_TRIG : FSM_STATE_MULT_SQUARE_COL_0_BUSY; - FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; - FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; - - /* - FSM_STATE_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_0_BUSY ; - FSM_STATE_TRIANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_TRIANGLE_COL_N_TRIG : FSM_STATE_TRIANGLE_COL_0_BUSY; - FSM_STATE_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_N_BUSY ; - FSM_STATE_TRIANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_triangle : FSM_STATE_TRIANGLE_COL_N_BUSY; - - FSM_STATE_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_0_BUSY ; - FSM_STATE_RECTANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_RECTANGLE_COL_N_TRIG : FSM_STATE_RECTANGLE_COL_0_BUSY; - FSM_STATE_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_N_BUSY ; - FSM_STATE_RECTANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_rectangle : FSM_STATE_RECTANGLE_COL_N_BUSY; - */ - - FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ; - - endcase - // - end - - - // - // Ready Output - // - reg rdy_reg = 1'b1; - assign rdy = rdy_reg; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) rdy_reg <= 1'b1; - else case (fsm_state) - FSM_STATE_IDLE: if (ena) rdy_reg <= 1'b0; - FSM_STATE_STOP: rdy_reg <= 1'b1; - endcase - - function [ NUM_MULTS-1:0] calc_mac_clear_square; - input [INDEX_WIDTH-4:0] col_index_delayed; - input [ 3-1:0] x_din_addr_cnt_lower_delayed; - input [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_delayed; - begin - if (x_din_addr_cnt_upper_delayed == col_index_delayed) - case (x_din_addr_cnt_lower_delayed) - 3'b000: calc_mac_clear_square = 8'b00000001; - 3'b001: calc_mac_clear_square = 8'b00000010; - 3'b010: calc_mac_clear_square = 8'b00000100; - 3'b011: calc_mac_clear_square = 8'b00001000; - 3'b100: calc_mac_clear_square = 8'b00010000; - 3'b101: calc_mac_clear_square = 8'b00100000; - 3'b110: calc_mac_clear_square = 8'b01000000; - 3'b111: calc_mac_clear_square = 8'b10000000; - endcase - else - calc_mac_clear_square = {NUM_MULTS{1'b0}}; - end - endfunction - - -endmodule |