aboutsummaryrefslogtreecommitdiff
path: root/rtl/modexpng_mmm_x8_dual.v
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/modexpng_mmm_x8_dual.v')
-rw-r--r--rtl/modexpng_mmm_x8_dual.v550
1 files changed, 0 insertions, 550 deletions
diff --git a/rtl/modexpng_mmm_x8_dual.v b/rtl/modexpng_mmm_x8_dual.v
deleted file mode 100644
index 99a37fa..0000000
--- a/rtl/modexpng_mmm_x8_dual.v
+++ /dev/null
@@ -1,550 +0,0 @@
-module modexpng_mmm_x8_dual
-(
- clk, rst_n,
- ena, rdy,
- mode, transfer,
- index_last,
- x_din, y_din, x_dout, y_dout,
- x_din_addr, y_din_addr, x_dout_addr, y_dout_addr,
- x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena,
- x_din_bank, y_din_bank, x_dout_bank, y_dout_bank,
- load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req,
- load_x_din, load_y_din
-);
-
-
- //
- // Includes
- //
- `include "modexpng_parameters.vh"
- `include "modexpng_parameters_x8.vh"
- `include "modexpng_mmm_fsm.vh"
-
-
- //
- // Parameters
- //
- parameter INDEX_WIDTH = 6;
-
-
- //
- // Ports
- //
- input clk;
- input rst_n;
-
- input ena;
- output rdy;
-
- input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2
- // load/unload: 0 = load, 1 = unload
- input transfer; // 0 = multiply, 1 = load/unload
-
- input [INDEX_WIDTH-1:0] index_last;
-
- input [NUM_MULTS*WORD_WIDTH-1:0] x_din;
- input [NUM_MULTS*WORD_WIDTH-1:0] y_din;
- output [NUM_MULTS*WORD_WIDTH-1:0] x_dout;
- output [NUM_MULTS*WORD_WIDTH-1:0] y_dout;
-
- output [INDEX_WIDTH-4:0] x_din_addr;
- output [INDEX_WIDTH-4:0] y_din_addr;
- output [INDEX_WIDTH-4:0] x_dout_addr;
- output [INDEX_WIDTH-4:0] y_dout_addr;
-
- output [ 1-1:0] x_din_ena;
- output [ 1-1:0] y_din_ena;
- output [NUM_MULTS-1:0] x_dout_ena;
- output [NUM_MULTS-1:0] y_dout_ena;
- output [ 1-1:0] x_din_reg_ena;
- output [ 1-1:0] y_din_reg_ena;
-
- output [3-1:0] x_din_bank;
- output [3-1:0] y_din_bank;
- output [3-1:0] x_dout_bank;
- output [3-1:0] y_dout_bank;
-
- output load_phase; // 0 = T1, T2; 1 = N, N_COEFF
- output [ INDEX_WIDTH:0] load_xy_addr; // address
- output load_xy_addr_vld; // address valid
- output load_xy_req; // data request
-
- input [WORD_WIDTH-1:0] load_x_din; // data input
- input [WORD_WIDTH-1:0] load_y_din; // data input
-
-
- //
- // FSM State and Next States
- //
- reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
-
-
- //
- // FSM Idle Next State
- //
- always @*
- //
- case ({transfer, mode})
- 2'b00,
- 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG;
- 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1;
- 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload?
- endcase
-
-
- //
- // Column Counter
- //
- wire [ INDEX_WIDTH-4:0] col_index;
- wire col_index_done;
- wire [ INDEX_WIDTH-4:0] col_index_zero;
- wire [ INDEX_WIDTH-4:0] col_index_next;
- wire [ INDEX_WIDTH-4:0] col_index_prev;
-
- modexpng_mmm_col_index #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- mmm_col_index
- (
- .clk (clk),
- .index_last (index_last),
- .fsm_state_next (fsm_state_next),
- .col_index (col_index),
- .col_index_done (col_index_done),
- .col_index_zero (col_index_zero),
- .col_index_next (col_index_next),
- .col_index_prev (col_index_prev)
- );
-
-
- //
- // Load Address Generator
- //
- wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0];
- wire load_addr_zero;
- wire load_t1t2_addr_done;
- wire load_nn_coeff_addr_done;
-
- modexpng_mmm_transporter #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- transporter
- (
- .clk (clk),
- .ena (ena),
- .index_last (index_last),
- .fsm_state (fsm_state),
- .fsm_state_next (fsm_state_next),
- .load_phase (load_phase),
- .load_xy_addr (load_xy_addr),
- .load_xy_addr_vld (load_xy_addr_vld),
- .load_xy_req (load_xy_req),
- .load_addr_zero (load_addr_zero),
- .load_t1t2_addr_done (load_t1t2_addr_done),
- .load_nn_coeff_addr_done (load_nn_coeff_addr_done)
- );
-
-
- //
- // X, Y Address
- //
- wire [INDEX_WIDTH-1:0] x_din_addr_cnt;
- wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last;
- wire [ 3-1:0] x_din_addr_cnt_lower_prev;
- wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev;
-
- modexpng_mmm_din_addr #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- din_addr_x
- (
- .clk (clk),
- .rst_n (rst_n),
- .index_last (index_last),
- .fsm_state_next (fsm_state_next),
- .col_index_zero (col_index_zero),
- .col_index_next (col_index_next),
- .din_addr (x_din_addr),
- .din_bank (x_din_bank),
- .din_ena (x_din_ena),
- .din_reg_ena (x_din_reg_ena),
- .din_addr_cnt (x_din_addr_cnt),
- .din_addr_cnt_last (x_din_addr_cnt_last),
- .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev),
- .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev)
- );
-
- modexpng_mmm_dout_addr #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- dout_addr_xy
- (
- .clk (clk),
- .rst_n (rst_n),
- .fsm_state (fsm_state),
- .load_xy_addr (load_xy_addr),
- .load_addr_zero (load_addr_zero),
- .load_nn_coeff_addr_done (load_nn_coeff_addr_done),
- .x_dout_addr (x_dout_addr),
- .y_dout_addr (y_dout_addr),
- .x_dout_ena (x_dout_ena),
- .y_dout_ena (y_dout_ena),
- .x_dout_bank (x_dout_bank),
- .y_dout_bank (y_dout_bank)
- );
-
-
- //
- // Helper Memories ("Scratchpad")
- //
- reg [INDEX_WIDTH-1:0] pad_xy_rd_addr;
- reg pad_xy_rd_ena = 1'b0;
- wire [ WORD_WIDTH-1:0] pad_x_rd_dout;
- wire [ WORD_WIDTH-1:0] pad_y_rd_dout;
-
- wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}};
- wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1;
-
- modexpng_mmm_pad pad
- (
- .clk (clk),
- .rst_n (rst_n),
- .fsm_state (fsm_state),
- .load_xy_addr_lsb (load_xy_addr_lsb),
- .load_x_din (load_x_din),
- .load_y_din (load_y_din),
- .pad_x_rd_addr (pad_xy_rd_addr),
- .pad_y_rd_addr (pad_xy_rd_addr),
- .pad_x_rd_ena (pad_xy_rd_ena),
- .pad_y_rd_ena (pad_xy_rd_ena),
- .pad_x_rd_dout (pad_x_rd_dout),
- .pad_y_rd_dout (pad_y_rd_dout)
- );
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (!rst_n) begin
- pad_xy_rd_ena <= 1'b0;
- end else case (fsm_state_next)
-
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- pad_xy_rd_ena <= 1'b1;
-
- default:
- pad_xy_rd_ena <= 1'b0;
-
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG:
- pad_xy_rd_addr <= pad_xy_rd_addr_zero;
-
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- pad_xy_rd_addr <= pad_xy_rd_addr_next;
-
- default:
- pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}};
-
- endcase
-
-
-
-
- //
- // Flags
- //
-
- wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last;
-
- always @*
- //
- fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;;
-
-
- //
- // MAC Arrays
- //
- reg mac_x_ce = 1'b0;
- reg mac_x_ce_aux = 1'b0;
- reg [NUM_MULTS -1:0] mac_x_clr;
- reg mac_x_clr_aux;
- reg [NUM_MULTS -2:0] mac_x_casc_a;
- reg mac_x_casc_a_aux;
- wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a;
- reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux;
- //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1];
- reg [ 1 * WORD_WIDTH -1:0] mac_x_b;
- wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p;
- wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux;
-
- reg mac_y_ce = 1'b0;
- reg mac_y_ce_aux = 1'b0;
- reg [NUM_MULTS -1:0] mac_y_clr;
- reg mac_y_clr_aux;
- reg [NUM_MULTS -2:0] mac_y_casc_a;
- reg mac_y_casc_a_aux;
- wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a;
- reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux;
- //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1];
- reg [ 1 * WORD_WIDTH -1:0] mac_y_b;
- wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p;
- wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux;
-
- modexpng_mac_array mac_array_x
- (
- .clk (clk),
- .ce (mac_x_ce),
- .ce_aux (mac_x_ce_aux),
- .clr (mac_x_clr),
- .clr_aux (mac_x_clr_aux),
- .casc_a (mac_x_casc_a),
- .casc_a_aux (mac_x_casc_a_aux),
- .a_in (mac_x_a),
- .a_in_aux (mac_x_a_aux),
- .b_in (mac_x_b),
- .p_out (mac_x_p),
- .p_out_aux (mac_x_p_aux)
- );
-
- modexpng_mac_array mac_array_y
- (
- .clk (clk),
- .ce (mac_y_ce),
- .ce_aux (mac_y_ce_aux),
- .clr (mac_y_clr),
- .clr_aux (mac_y_clr_aux),
- .casc_a (mac_y_casc_a),
- .casc_a_aux (mac_y_casc_a_aux),
- .a_in (mac_y_a),
- .a_in_aux (mac_y_a_aux),
- .b_in (mac_y_b),
- .p_out (mac_y_p),
- .p_out_aux (mac_y_p_aux)
- );
-
- genvar gen_z;
-
- generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1)
- begin : gen_xy_din
- //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z];
- //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z];
- //gen_xy_dout
- assign mac_x_a[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_din[gen_z*WORD_WIDTH+:WORD_WIDTH];
-
- //assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z];
- //assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z];
- end
- endgenerate
-
-
- //
- // MAC Clock Enable Logic
- //
- reg mac_xy_ce_adv = 1'b0;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) mac_xy_ce_adv <= 1'b0;
- else case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_ce_adv <= 1'b1;
- default: mac_xy_ce_adv <= 1'b0;
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) {mac_y_ce, mac_x_ce} <= 2'b00;
- else {mac_y_ce, mac_x_ce} <= {2{mac_xy_ce_adv}};
-
-
- //
- // MAC Clear Logic
- //
- wire [NUM_MULTS-1:0] calc_mac_x_clear_square_value =
- calc_mac_clear_square(col_index_prev, x_din_addr_cnt_lower_prev, x_din_addr_cnt_upper_prev);
-
- reg [NUM_MULTS-1:0] mac_xy_clr_adv;
-
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_clr_adv <= {NUM_MULTS{1'b1}};
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_clr_adv <= calc_mac_x_clear_square_value;
- default: mac_xy_clr_adv <= {NUM_MULTS{1'bX}};
- endcase
-
- always @(posedge clk)
- //
- {mac_y_clr, mac_x_clr} <= {2{mac_xy_clr_adv}};
-
-
- //
- // MAC Cascade Logic
- //
- reg [NUM_MULTS-2:0] mac_xy_casc_a_adv;
-
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b0}};
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'b1}};
- default: mac_xy_casc_a_adv <= {(NUM_MULTS-1){1'bX}};
- endcase
-
- always @(posedge clk)
- //
- {mac_y_casc_a, mac_x_casc_a} <= {2{mac_xy_casc_a_adv}};
-
-
-
- //
- // DOUT Mapping
- //
- generate for (gen_z=0; gen_z<NUM_MULTS; gen_z=gen_z+1)
- begin : gen_xy_dout
- assign x_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = x_dout_reg[gen_z];
- assign y_dout[gen_z*WORD_WIDTH+:WORD_WIDTH] = y_dout_reg[gen_z];
- end
- endgenerate
-
-
- //
- // DOUT
- //
- reg [WORD_WIDTH-1:0] x_dout_reg[0:NUM_MULTS-1];
- reg [WORD_WIDTH-1:0] y_dout_reg[0:NUM_MULTS-1];
-
-
-
-
- integer int_z;
- always @(posedge clk)
- //
- case (fsm_state)
- //
- FSM_STATE_LOAD_T1T2_3,
- FSM_STATE_LOAD_NN_COEFF_3:
- for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin
- x_dout_reg[int_z] <= load_x_din;
- y_dout_reg[int_z] <= load_y_din;
- end
- //
- default:
- for (int_z=0; int_z<NUM_MULTS; int_z=int_z+1) begin
- x_dout_reg[int_z] <= {WORD_WIDTH{1'bX}};
- y_dout_reg[int_z] <= {WORD_WIDTH{1'bX}};
- end
- //
- endcase
-
-
-
- //
- // FSM Process
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
- else fsm_state <= fsm_state_next;
-
-
- //
- // FSM Transition Logic
- //
- always @* begin
- //
- fsm_state_next = FSM_STATE_IDLE;
- //
- case (fsm_state)
- FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle : FSM_STATE_IDLE;
-
- FSM_STATE_LOAD_T1T2_1: fsm_state_next = FSM_STATE_LOAD_T1T2_2 ;
- FSM_STATE_LOAD_T1T2_2: fsm_state_next = FSM_STATE_LOAD_T1T2_3 ;
- FSM_STATE_LOAD_T1T2_3: fsm_state_next = load_t1t2_addr_done ? FSM_STATE_LOAD_NN_COEFF_1 : FSM_STATE_LOAD_T1T2_1;
-
- FSM_STATE_LOAD_NN_COEFF_1: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_2 ;
- FSM_STATE_LOAD_NN_COEFF_2: fsm_state_next = FSM_STATE_LOAD_NN_COEFF_3 ;
- FSM_STATE_LOAD_NN_COEFF_3: fsm_state_next = load_nn_coeff_addr_done ? FSM_STATE_STOP : FSM_STATE_LOAD_NN_COEFF_1;
-
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_TRIG : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
-
- /*
- FSM_STATE_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_0_BUSY ;
- FSM_STATE_TRIANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_TRIANGLE_COL_N_TRIG : FSM_STATE_TRIANGLE_COL_0_BUSY;
- FSM_STATE_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_TRIANGLE_COL_N_BUSY ;
- FSM_STATE_TRIANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_triangle : FSM_STATE_TRIANGLE_COL_N_BUSY;
-
- FSM_STATE_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_0_BUSY ;
- FSM_STATE_RECTANGLE_COL_0_BUSY: fsm_state_next = din_addr_narrow_done ? FSM_STATE_RECTANGLE_COL_N_TRIG : FSM_STATE_RECTANGLE_COL_0_BUSY;
- FSM_STATE_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_RECTANGLE_COL_N_BUSY ;
- FSM_STATE_RECTANGLE_COL_N_BUSY: fsm_state_next = din_addr_narrow_done ? fsm_state_after_rectangle : FSM_STATE_RECTANGLE_COL_N_BUSY;
- */
-
- FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ;
-
- endcase
- //
- end
-
-
- //
- // Ready Output
- //
- reg rdy_reg = 1'b1;
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else case (fsm_state)
- FSM_STATE_IDLE: if (ena) rdy_reg <= 1'b0;
- FSM_STATE_STOP: rdy_reg <= 1'b1;
- endcase
-
- function [ NUM_MULTS-1:0] calc_mac_clear_square;
- input [INDEX_WIDTH-4:0] col_index_delayed;
- input [ 3-1:0] x_din_addr_cnt_lower_delayed;
- input [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_delayed;
- begin
- if (x_din_addr_cnt_upper_delayed == col_index_delayed)
- case (x_din_addr_cnt_lower_delayed)
- 3'b000: calc_mac_clear_square = 8'b00000001;
- 3'b001: calc_mac_clear_square = 8'b00000010;
- 3'b010: calc_mac_clear_square = 8'b00000100;
- 3'b011: calc_mac_clear_square = 8'b00001000;
- 3'b100: calc_mac_clear_square = 8'b00010000;
- 3'b101: calc_mac_clear_square = 8'b00100000;
- 3'b110: calc_mac_clear_square = 8'b01000000;
- 3'b111: calc_mac_clear_square = 8'b10000000;
- endcase
- else
- calc_mac_clear_square = {NUM_MULTS{1'b0}};
- end
- endfunction
-
-
-endmodule