From 29fb6afd018c601a2e0c7376656d5e37beb565d6 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:01:43 +0300 Subject: Started working on the pipelined Montgomery modular multiplier. Currently can do the "square" part of the multiplication, i.e. compute the twice larger intermediate product AB = A * B. --- rtl/modexpng_mmm_x8_dual.v | 550 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 550 insertions(+) create mode 100644 rtl/modexpng_mmm_x8_dual.v (limited to 'rtl/modexpng_mmm_x8_dual.v') diff --git a/rtl/modexpng_mmm_x8_dual.v b/rtl/modexpng_mmm_x8_dual.v new file mode 100644 index 0000000..99a37fa --- /dev/null +++ b/rtl/modexpng_mmm_x8_dual.v @@ -0,0 +1,550 @@ +module modexpng_mmm_x8_dual +( + clk, rst_n, + ena, rdy, + mode, transfer, + index_last, + x_din, y_din, x_dout, y_dout, + x_din_addr, y_din_addr, x_dout_addr, y_dout_addr, + x_din_ena, y_din_ena, x_dout_ena, y_dout_ena, x_din_reg_ena, y_din_reg_ena, + x_din_bank, y_din_bank, x_dout_bank, y_dout_bank, + load_phase, load_xy_addr, load_xy_addr_vld, load_xy_req, + load_x_din, load_y_din +); + + + // + // Includes + // + `include "modexpng_parameters.vh" + `include "modexpng_parameters_x8.vh" + `include "modexpng_mmm_fsm.vh" + + + // + // Parameters + // + parameter INDEX_WIDTH = 6; + + + // + // Ports + // + input clk; + input rst_n; + + input ena; + output rdy; + + input mode; // multiply: 0 = T1:T1*T1, T2:T2*T1, 1 = T1:T1*T2, T2:T2*T2 + // load/unload: 0 = load, 1 = unload + input transfer; // 0 = multiply, 1 = load/unload + + input [INDEX_WIDTH-1:0] index_last; + + input [NUM_MULTS*WORD_WIDTH-1:0] x_din; + input [NUM_MULTS*WORD_WIDTH-1:0] y_din; + output [NUM_MULTS*WORD_WIDTH-1:0] x_dout; + output [NUM_MULTS*WORD_WIDTH-1:0] y_dout; + + output [INDEX_WIDTH-4:0] x_din_addr; + output [INDEX_WIDTH-4:0] y_din_addr; + output [INDEX_WIDTH-4:0] x_dout_addr; + output [INDEX_WIDTH-4:0] y_dout_addr; + + output [ 1-1:0] x_din_ena; + output [ 1-1:0] y_din_ena; + output [NUM_MULTS-1:0] x_dout_ena; + output [NUM_MULTS-1:0] y_dout_ena; + output [ 1-1:0] x_din_reg_ena; + output [ 1-1:0] y_din_reg_ena; + + output [3-1:0] x_din_bank; + output [3-1:0] y_din_bank; + output [3-1:0] x_dout_bank; + output [3-1:0] y_dout_bank; + + output load_phase; // 0 = T1, T2; 1 = N, N_COEFF + output [ INDEX_WIDTH:0] load_xy_addr; // address + output load_xy_addr_vld; // address valid + output load_xy_req; // data request + + input [WORD_WIDTH-1:0] load_x_din; // data input + input [WORD_WIDTH-1:0] load_y_din; // data input + + + // + // FSM State and Next States + // + reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; + reg [FSM_STATE_WIDTH-1:0] fsm_state_next; + reg [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; + reg [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; + + + // + // FSM Idle Next State + // + always @* + // + case ({transfer, mode}) + 2'b00, + 2'b01: fsm_state_after_idle = FSM_STATE_MULT_SQUARE_COL_0_TRIG; + 2'b10: fsm_state_after_idle = FSM_STATE_LOAD_T1T2_1; + 2'b11: fsm_state_after_idle = FSM_STATE_IDLE; //unload? + endcase + + + // + // Column Counter + // + wire [ INDEX_WIDTH-4:0] col_index; + wire col_index_done; + wire [ INDEX_WIDTH-4:0] col_index_zero; + wire [ INDEX_WIDTH-4:0] col_index_next; + wire [ INDEX_WIDTH-4:0] col_index_prev; + + modexpng_mmm_col_index # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + mmm_col_index + ( + .clk (clk), + .index_last (index_last), + .fsm_state_next (fsm_state_next), + .col_index (col_index), + .col_index_done (col_index_done), + .col_index_zero (col_index_zero), + .col_index_next (col_index_next), + .col_index_prev (col_index_prev) + ); + + + // + // Load Address Generator + // + wire [INDEX_WIDTH-1:0] load_xy_addr_lsb = load_xy_addr[INDEX_WIDTH-1:0]; + wire load_addr_zero; + wire load_t1t2_addr_done; + wire load_nn_coeff_addr_done; + + modexpng_mmm_transporter # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + transporter + ( + .clk (clk), + .ena (ena), + .index_last (index_last), + .fsm_state (fsm_state), + .fsm_state_next (fsm_state_next), + .load_phase (load_phase), + .load_xy_addr (load_xy_addr), + .load_xy_addr_vld (load_xy_addr_vld), + .load_xy_req (load_xy_req), + .load_addr_zero (load_addr_zero), + .load_t1t2_addr_done (load_t1t2_addr_done), + .load_nn_coeff_addr_done (load_nn_coeff_addr_done) + ); + + + // + // X, Y Address + // + wire [INDEX_WIDTH-1:0] x_din_addr_cnt; + wire [INDEX_WIDTH-1:0] x_din_addr_cnt_last; + wire [ 3-1:0] x_din_addr_cnt_lower_prev; + wire [INDEX_WIDTH-4:0] x_din_addr_cnt_upper_prev; + + modexpng_mmm_din_addr # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + din_addr_x + ( + .clk (clk), + .rst_n (rst_n), + .index_last (index_last), + .fsm_state_next (fsm_state_next), + .col_index_zero (col_index_zero), + .col_index_next (col_index_next), + .din_addr (x_din_addr), + .din_bank (x_din_bank), + .din_ena (x_din_ena), + .din_reg_ena (x_din_reg_ena), + .din_addr_cnt (x_din_addr_cnt), + .din_addr_cnt_last (x_din_addr_cnt_last), + .din_addr_cnt_lower_prev (x_din_addr_cnt_lower_prev), + .din_addr_cnt_upper_prev (x_din_addr_cnt_upper_prev) + ); + + modexpng_mmm_dout_addr # + ( + .INDEX_WIDTH(INDEX_WIDTH) + ) + dout_addr_xy + ( + .clk (clk), + .rst_n (rst_n), + .fsm_state (fsm_state), + .load_xy_addr (load_xy_addr), + .load_addr_zero (load_addr_zero), + .load_nn_coeff_addr_done (load_nn_coeff_addr_done), + .x_dout_addr (x_dout_addr), + .y_dout_addr (y_dout_addr), + .x_dout_ena (x_dout_ena), + .y_dout_ena (y_dout_ena), + .x_dout_bank (x_dout_bank), + .y_dout_bank (y_dout_bank) + ); + + + // + // Helper Memories ("Scratchpad") + // + reg [INDEX_WIDTH-1:0] pad_xy_rd_addr; + reg pad_xy_rd_ena = 1'b0; + wire [ WORD_WIDTH-1:0] pad_x_rd_dout; + wire [ WORD_WIDTH-1:0] pad_y_rd_dout; + + wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_zero = {INDEX_WIDTH{1'b0}}; + wire [INDEX_WIDTH-1:0] pad_xy_rd_addr_next = pad_xy_rd_addr + 1'b1; + + modexpng_mmm_pad pad + ( + .clk (clk), + .rst_n (rst_n), + .fsm_state (fsm_state), + .load_xy_addr_lsb (load_xy_addr_lsb), + .load_x_din (load_x_din), + .load_y_din (load_y_din), + .pad_x_rd_addr (pad_xy_rd_addr), + .pad_y_rd_addr (pad_xy_rd_addr), + .pad_x_rd_ena (pad_xy_rd_ena), + .pad_y_rd_ena (pad_xy_rd_ena), + .pad_x_rd_dout (pad_x_rd_dout), + .pad_y_rd_dout (pad_y_rd_dout) + ); + + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) begin + pad_xy_rd_ena <= 1'b0; + end else case (fsm_state_next) + + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + pad_xy_rd_ena <= 1'b1; + + default: + pad_xy_rd_ena <= 1'b0; + + endcase + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: + pad_xy_rd_addr <= pad_xy_rd_addr_zero; + + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: + pad_xy_rd_addr <= pad_xy_rd_addr_next; + + default: + pad_xy_rd_addr <= {INDEX_WIDTH{1'bX}}; + + endcase + + + + + // + // Flags + // + + wire mult_square_addr_done = x_din_addr_cnt == x_din_addr_cnt_last; + + always @* + // + fsm_state_after_mult_square = col_index_done ? /*FSM_STATE_MULT_TRIANGLE_TRIG*/FSM_STATE_STOP : FSM_STATE_MULT_SQUARE_COL_N_TRIG;; + + + // + // MAC Arrays + // + reg mac_x_ce = 1'b0; + reg mac_x_ce_aux = 1'b0; + reg [NUM_MULTS -1:0] mac_x_clr; + reg mac_x_clr_aux; + reg [NUM_MULTS -2:0] mac_x_casc_a; + reg mac_x_casc_a_aux; + wire [NUM_MULTS * WORD_WIDTH -1:0] mac_x_a; + reg [ 1 * WORD_WIDTH -1:0] mac_x_a_aux; + //wire [ 1 * WORD_WIDTH -1:0] mac_x_a_split[0:NUM_MULTS-1]; + reg [ 1 * WORD_WIDTH -1:0] mac_x_b; + wire [NUM_MULTS * MAC_WIDTH -1:0] mac_x_p; + wire [ 1 * MAC_WIDTH -1:0] mac_x_p_aux; + + reg mac_y_ce = 1'b0; + reg mac_y_ce_aux = 1'b0; + reg [NUM_MULTS -1:0] mac_y_clr; + reg mac_y_clr_aux; + reg [NUM_MULTS -2:0] mac_y_casc_a; + reg mac_y_casc_a_aux; + wire [NUM_MULTS * WORD_WIDTH -1:0] mac_y_a; + reg [ 1 * WORD_WIDTH -1:0] mac_y_a_aux; + //wire [ 1 * WORD_WIDTH -1:0] mac_y_a_split[0:NUM_MULTS-1]; + reg [ 1 * WORD_WIDTH -1:0] mac_y_b; + wire [NUM_MULTS * MAC_WIDTH -1:0] mac_y_p; + wire [ 1 * MAC_WIDTH -1:0] mac_y_p_aux; + + modexpng_mac_array mac_array_x + ( + .clk (clk), + .ce (mac_x_ce), + .ce_aux (mac_x_ce_aux), + .clr (mac_x_clr), + .clr_aux (mac_x_clr_aux), + .casc_a (mac_x_casc_a), + .casc_a_aux (mac_x_casc_a_aux), + .a_in (mac_x_a), + .a_in_aux (mac_x_a_aux), + .b_in (mac_x_b), + .p_out (mac_x_p), + .p_out_aux (mac_x_p_aux) + ); + + modexpng_mac_array mac_array_y + ( + .clk (clk), + .ce (mac_y_ce), + .ce_aux (mac_y_ce_aux), + .clr (mac_y_clr), + .clr_aux (mac_y_clr_aux), + .casc_a (mac_y_casc_a), + .casc_a_aux (mac_y_casc_a_aux), + .a_in (mac_y_a), + .a_in_aux (mac_y_a_aux), + .b_in (mac_y_b), + .p_out (mac_y_p), + .p_out_aux (mac_y_p_aux) + ); + + genvar gen_z; + + generate for (gen_z=0; gen_z