diff options
Diffstat (limited to 'src/rtl/modexpa7_systolic_multiplier.v')
-rw-r--r-- | src/rtl/modexpa7_systolic_multiplier.v | 152 |
1 files changed, 113 insertions, 39 deletions
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index a1e141e..9d96f98 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -143,6 +143,7 @@ module modexpa7_systolic_multiplier # * Parameters Latch */ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; + reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; // save number of words in n when new operation starts always @(posedge clk) @@ -200,20 +201,25 @@ module modexpa7_systolic_multiplier # // loader input - reg [SYSTOLIC_CNTR_WIDTH-1:0] loader_addr[0:SYSTOLIC_ARRAY_LENGTH-1]; - reg loader_wren[0:SYSTOLIC_ARRAY_LENGTH-1]; + reg [SYSTOLIC_CNTR_WIDTH-1:0] loader_addr_wr; + wire [SYSTOLIC_CNTR_WIDTH-1:0] loader_addr_rd; + reg loader_wren; reg [ 32-1:0] loader_din [0:SYSTOLIC_ARRAY_LENGTH-1]; // loader output - wire [ 32-1:0] loader_dout[0:SYSTOLIC_ARRAY_LENGTH-1]; + wire [ 32-1:0] loader_dout[0:SYSTOLIC_ARRAY_LENGTH-1];
+
+ // array_input
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] pe_a_wide;
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] pe_b_wide;
// generate parallelized loader genvar i; generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1) // - begin : gen_bram_1rw_readfirst_loader + begin : gen_bram_1rw_1ro_readfirst_loader // - bram_1rw_readfirst # + bram_1rw_1ro_readfirst # ( .MEM_WIDTH (32), .MEM_ADDR_BITS (SYSTOLIC_CNTR_WIDTH) @@ -221,11 +227,15 @@ module modexpa7_systolic_multiplier # bram_loader ( .clk (clk), - .a_addr (loader_addr[i]), - .a_wr (loader_wren[i]), + .a_addr (loader_addr_wr), + .a_wr (loader_wren), .a_in (loader_din[i]), - .a_out (loader_dout[i]) - ); + .a_out (),
+ .b_addr (loader_addr_rd),
+ .b_out (loader_dout[i]) + );
+ //
+ assign pe_b_wide[32 * (i + 1) - 1 -: 32] = loader_dout[i]; // end // @@ -250,22 +260,40 @@ module modexpa7_systolic_multiplier # // address registers reg [OPERAND_ADDR_WIDTH-1:0] b_addr; + wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_wr; + reg [OPERAND_ADDR_WIDTH :0] p_addr_ext_rd; // handy increment values - wire [OPERAND_ADDR_WIDTH-1:0] b_addr_next = b_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] b_addr_next = b_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_rd_next = b_addr + 1'b1; +
+ // write enables
+ wire p_wren;
+
+ // data buses
+ wire [31: 0] p_data_in;
+ wire [31: 0] p_data_out;
// handy stop flags - wire b_addr_done = (b_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire b_addr_done = (b_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire p_addr_ext_rd_done = (p_addr_ext_rd == bram_addr_ext_last) ? 1'b1 : 1'b0; // delayed addresses reg [OPERAND_ADDR_WIDTH-1:0] b_addr_dly;
- + always @(posedge clk) b_addr_dly <= b_addr; // map registers to top-level ports assign b_bram_addr = b_addr; +
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1)) + bram_p
+ ( .clk(clk),
+ .a_addr(p_addr_ext_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p_addr_ext_rd), .b_out(p_data_out)); +
/* * Loader Data Input @@ -297,17 +325,8 @@ module modexpa7_systolic_multiplier # always @(posedge clk) // case (fsm_next_state) - - FSM_STATE_LOAD_WRITE:
- //
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1) - loader_wren[j] <= 1'b1; - - default: - // - for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1) - loader_wren[j] <= 1'b0; - + FSM_STATE_LOAD_WRITE: loader_wren <= 1'b1; + default: loader_wren <= 1'b0; endcase
@@ -317,17 +336,15 @@ module modexpa7_systolic_multiplier # always @(posedge clk) // - case (fsm_state) + case (fsm_state)
- FSM_STATE_LOAD_START: - // - for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1) - loader_addr[j] <= load_syst_cnt_zero; - - FSM_STATE_LOAD_WRITE: - // - for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1) - loader_addr[j] <= !load_syst_cnt_done ? load_syst_cnt_next : load_syst_cnt; + FSM_STATE_LOAD_START:
+ //
+ loader_addr_wr <= load_syst_cnt_zero;
+ + FSM_STATE_LOAD_WRITE:
+ //
+ loader_addr_wr <= !load_syst_cnt_done ? load_syst_cnt_next : load_syst_cnt; endcase @@ -344,12 +361,68 @@ module modexpa7_systolic_multiplier # //
end
+
+ /*
+ * Multiplier Array
+ */
+ reg pe_array_ena;
+ wire pe_array_rdy;
+
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_MULT_START: pe_array_ena <= 1'b1;
+ default: pe_array_ena <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_MULT_START: p_num_words_latch <= {n_num_words_latch, 1'b1};
+ endcase
+
+
+ modexpa7_systolic_multiplier_array # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH), + .SYSTOLIC_ARRAY_POWER (SYSTOLIC_ARRAY_POWER) + )
+ systolic_pe_array + ( + .clk (clk), + .rst_n (rst_n), + + .ena (pe_array_ena), + .rdy (pe_array_rdy), +
+ .loader_addr_rd (loader_addr_rd),
+
+ .pe_a_wide (),
+ .pe_b_wide (pe_b_wide),
+
+ .p_bram_addr (p_addr_ext_wr), + .p_bram_in (p_data_in), + .p_bram_wr (p_wren),
+
+ + .n_num_words (n_num_words_latch), + .p_num_words (p_num_words_latch) + );
+
+
+
+
+
+
+
+
+
/* * FSM Process - */ + - */ always @(posedge clk or negedge rst_n) // if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; @@ -373,13 +446,14 @@ module modexpa7_systolic_multiplier # else fsm_next_state = FSM_STATE_LOAD_SHIFT; FSM_STATE_LOAD_WRITE: if (load_syst_cnt_done) fsm_next_state = FSM_STATE_LOAD_FINAL; else fsm_next_state = FSM_STATE_LOAD_SHIFT; - FSM_STATE_LOAD_FINAL: fsm_next_state = FSM_STATE_STOP; + FSM_STATE_LOAD_FINAL: fsm_next_state = FSM_STATE_MULT_START; //
- //FSM_STATE_MULT_START: - //FSM_STATE_MULT_CRUNCH: - //FSM_STATE_MULT_FINAL: + FSM_STATE_MULT_START: fsm_next_state = FSM_STATE_MULT_CRUNCH; + FSM_STATE_MULT_CRUNCH: if (pe_array_rdy) fsm_next_state = FSM_STATE_MULT_FINAL;
+ else fsm_next_state = FSM_STATE_MULT_CRUNCH; + FSM_STATE_MULT_FINAL: fsm_next_state = FSM_STATE_STOP; // - FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE; + FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE; // endcase // |