//====================================================================== // // modexpa7_systolic_multiplier_array.v // ----------------------------------------------------------------------------- // Systolic Montgomery multiplier Processing Element Array // // Authors: Pavel Shatov // // Copyright (c) 2017, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // //====================================================================== module modexpa7_systolic_multiplier_array # ( parameter OPERAND_ADDR_WIDTH = 4, parameter SYSTOLIC_ARRAY_POWER = 2 ) ( input clk, input rst_n, input ena, output rdy, output [OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0] loader_addr_rd, input [32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0] pe_a_wide, input [32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0] pe_b_wide, output [ OPERAND_ADDR_WIDTH : 0] p_bram_addr, output [ 32 - 1 : 0] p_bram_in, output p_bram_wr, input [ OPERAND_ADDR_WIDTH - 1 : 0] n_num_words, input [ OPERAND_ADDR_WIDTH : 0] p_num_words ); /* * Include Settings */ `include "pe/modexpa7_primitive_switch.v" `include "modexpa7_settings.v" /* * FSM Declaration */ localparam [ 7: 0] FSM_STATE_IDLE = 8'h00; localparam [ 7: 0] FSM_STATE_MULT_START = 8'h11; localparam [ 7: 0] FSM_STATE_MULT_CRUNCH = 8'h12; localparam [ 7: 0] FSM_STATE_MULT_RELOAD = 8'h13; localparam [ 7: 0] FSM_STATE_MULT_FINAL = 8'h14; localparam [ 7: 0] FSM_STATE_STOP = 8'hFF; /* * FSM State / Next State */ reg [ 7: 0] fsm_state = FSM_STATE_IDLE; reg [ 7: 0] fsm_next_state; /* * Enable Delay and Trigger */ reg ena_dly = 1'b0; // delay enable by one clock cycle always @(posedge clk) ena_dly <= ena; // trigger new operation when enable goes high wire ena_trig = ena && !ena_dly; /* * Ready Flag Logic */ reg rdy_reg = 1'b1; assign rdy = rdy_reg; always @(posedge clk or negedge rst_n) // reset flag if (rst_n == 1'b0) rdy_reg <= 1'b1; else begin // clear flag when operation is started if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig; // set flag after operation is finished if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1; end /* * Parameters Latch */ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; // save number of words in n when new operation starts always @(posedge clk) // if ((fsm_state == FSM_STATE_IDLE) && ena_trig) begin n_num_words_latch <= n_num_words; p_num_words_latch <= p_num_words; end /* * Systolic Array of Processing Elements */ reg [31: 0] pe_a [0:SYSTOLIC_ARRAY_LENGTH-1]; reg [31: 0] pe_b [0:SYSTOLIC_ARRAY_LENGTH-1]; wire [31: 0] pe_t [0:SYSTOLIC_ARRAY_LENGTH-1]; wire [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1]; wire [31: 0] pe_p [0:SYSTOLIC_ARRAY_LENGTH-1]; wire [31: 0] pe_c_out [0:SYSTOLIC_ARRAY_LENGTH-1]; /* * FIFOs */ reg fifo_c_rst; reg fifo_t_rst; wire fifo_c_wren; wire fifo_c_rden; wire fifo_t_wren; wire fifo_t_rden; wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_din; wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_dout; wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_t_din; wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_t_dout; modexpa7_simple_fifo # ( .BUS_WIDTH (32 * SYSTOLIC_ARRAY_LENGTH), .DEPTH_BITS (SYSTOLIC_CNTR_WIDTH) ) fifo_c ( .clk (clk), .rst (fifo_c_rst), .wr_en (fifo_c_wren), .d_in (fifo_c_din), .rd_en (fifo_c_rden), .d_out (fifo_c_dout) ); modexpa7_simple_fifo # ( .BUS_WIDTH (32 * SYSTOLIC_ARRAY_LENGTH), .DEPTH_BITS (SYSTOLIC_CNTR_WIDTH) ) fifo_t ( .clk (clk), .rst (fifo_t_rst), .wr_en (fifo_t_wren), .d_in (fifo_t_din), .rd_en (fifo_t_rden), .d_out (fifo_t_dout) ); genvar i; generate for (i=0; i