From ce4b5740615d9097986f5149e53e4e053674b674 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 20 Jul 2017 03:36:32 +0300 Subject: Converted pe_c_out_mem two-dimensional array into a FIFO. --- src/rtl/modexpa7_simple_fifo.v | 209 +++++++++++++++++++++++++++++++++ src/rtl/modexpa7_systolic_multiplier.v | 83 +++++++++++-- 2 files changed, 280 insertions(+), 12 deletions(-) create mode 100644 src/rtl/modexpa7_simple_fifo.v diff --git a/src/rtl/modexpa7_simple_fifo.v b/src/rtl/modexpa7_simple_fifo.v new file mode 100644 index 0000000..84c21a9 --- /dev/null +++ b/src/rtl/modexpa7_simple_fifo.v @@ -0,0 +1,209 @@ +`timescale 1ns / 1ps + +module modexpa7_simple_fifo # + ( + parameter BUS_WIDTH = 128, + parameter DEPTH_BITS = 2 + ) + ( + input clk, + input rst, + input wr_en, + input rd_en, + input [BUS_WIDTH-1:0] d_in, + output [BUS_WIDTH-1:0] d_out + ); + + // + // Locals + // + localparam NUM_WORDS = 2 ** DEPTH_BITS; + + localparam [DEPTH_BITS:0] PTR_ZERO = {DEPTH_BITS{1'b0}}; + localparam [DEPTH_BITS:0] PTR_LAST = {DEPTH_BITS{1'b1}}; + + // + // Memory + // + reg [BUS_WIDTH-1:0] fifo[0:NUM_WORDS-1]; + + // + // Pointers + // + reg [DEPTH_BITS-1:0] ptr_wr; + reg [DEPTH_BITS-1:0] ptr_rd; + + // + // Output + // + reg [BUS_WIDTH-1:0] d_out_reg; + assign d_out = d_out_reg; + + // + // Write Pointer + // + always @(posedge clk) + // + if (rst) ptr_wr <= PTR_ZERO; + else if (wr_en) ptr_wr <= ptr_wr + 1'b1; + + // + // Read Pointer + // + always @(posedge clk) + // + if (rst) ptr_rd <= PTR_ZERO; + else if (rd_en) ptr_rd <= ptr_rd + 1'b1; + + // + // Read Logic + // + always @(posedge clk) + // + if (rst) d_out_reg <= {BUS_WIDTH{1'b0}}; + else if (rd_en) d_out_reg <= fifo[ptr_rd]; + + // + // Write Logic + // + always @(posedge clk) + // + if (!rst && wr_en) fifo[ptr_wr] <= d_in; + + +/* +generic_dpram #(aw,dw) u0( + .rclk( clk ), + .rrst( !rst ), + .rce( 1'b1 ), + .oe( 1'b1 ), + .raddr( rp ), + .do( dout ), + .wclk( clk ), + .wrst( !rst ), + .wce( 1'b1 ), + .we( we ), + .waddr( wp ), + .di( din ) + ); + +//////////////////////////////////////////////////////////////////// +// +// Misc Logic +// + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) wp <= #1 {aw{1'b0}}; + else + if(clr) wp <= #1 {aw{1'b0}}; + else + if(we) wp <= #1 wp_pl1; + +assign wp_pl1 = wp + { {aw-1{1'b0}}, 1'b1}; +assign wp_pl2 = wp + { {aw-2{1'b0}}, 2'b10}; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) rp <= #1 {aw{1'b0}}; + else + if(clr) rp <= #1 {aw{1'b0}}; + else + if(re) rp <= #1 rp_pl1; + +assign rp_pl1 = rp + { {aw-1{1'b0}}, 1'b1}; + +//////////////////////////////////////////////////////////////////// +// +// Combinatorial Full & Empty Flags +// + +assign empty = ((wp == rp) & !gb); +assign full = ((wp == rp) & gb); + +// Guard Bit ... +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) gb <= #1 1'b0; + else + if(clr) gb <= #1 1'b0; + else + if((wp_pl1 == rp) & we) gb <= #1 1'b1; + else + if(re) gb <= #1 1'b0; + +//////////////////////////////////////////////////////////////////// +// +// Registered Full & Empty Flags +// + +// Guard Bit ... +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) gb2 <= #1 1'b0; + else + if(clr) gb2 <= #1 1'b0; + else + if((wp_pl2 == rp) & we) gb2 <= #1 1'b1; + else + if((wp != rp) & re) gb2 <= #1 1'b0; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) full_r <= #1 1'b0; + else + if(clr) full_r <= #1 1'b0; + else + if(we & ((wp_pl1 == rp) & gb2) & !re) full_r <= #1 1'b1; + else + if(re & ((wp_pl1 != rp) | !gb2) & !we) full_r <= #1 1'b0; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) empty_r <= #1 1'b1; + else + if(clr) empty_r <= #1 1'b1; + else + if(we & ((wp != rp_pl1) | gb2) & !re) empty_r <= #1 1'b0; + else + if(re & ((wp == rp_pl1) & !gb2) & !we) empty_r <= #1 1'b1; + +//////////////////////////////////////////////////////////////////// +// +// Combinatorial Full_n & Empty_n Flags +// + +assign empty_n = cnt < n; +assign full_n = !(cnt < (max_size-n+1)); +assign level = {2{cnt[aw]}} | cnt[aw-1:aw-2]; + +// N entries status +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) cnt <= #1 {aw+1{1'b0}}; + else + if(clr) cnt <= #1 {aw+1{1'b0}}; + else + if( re & !we) cnt <= #1 cnt + { {aw{1'b1}}, 1'b1}; + else + if(!re & we) cnt <= #1 cnt + { {aw{1'b0}}, 1'b1}; + +//////////////////////////////////////////////////////////////////// +// +// Registered Full_n & Empty_n Flags +// + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) empty_n_r <= #1 1'b1; + else + if(clr) empty_n_r <= #1 1'b1; + else + if(we & (cnt >= (n-1) ) & !re) empty_n_r <= #1 1'b0; + else + if(re & (cnt <= n ) & !we) empty_n_r <= #1 1'b1; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) full_n_r <= #1 1'b0; + else + if(clr) full_n_r <= #1 1'b0; + else + if(we & (cnt >= (max_size-n) ) & !re) full_n_r <= #1 1'b1; + else + if(re & (cnt <= (max_size-n+1)) & !we) full_n_r <= #1 1'b0; +*/ + + +endmodule diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index 8cd28ff..382019c 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -600,7 +600,7 @@ module modexpa7_systolic_multiplier # reg [31: 0] pe_a [0:SYSTOLIC_ARRAY_LENGTH-1]; reg [31: 0] pe_b [0:SYSTOLIC_ARRAY_LENGTH-1]; reg [31: 0] pe_t [0:SYSTOLIC_ARRAY_LENGTH-1]; - reg [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1]; + wire [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1]; wire [31: 0] pe_p [0:SYSTOLIC_ARRAY_LENGTH-1]; wire [31: 0] pe_c_out[0:SYSTOLIC_ARRAY_LENGTH-1]; @@ -608,9 +608,49 @@ module modexpa7_systolic_multiplier # // // These can be turned into a FIFO (maybe later?)... // - reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1]; + //reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1]; reg [31: 0] pe_t_mem [0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1]; + reg fifo_c_rst; + + wire fifo_c_wren; + wire fifo_c_rden; + + wire debug_fifo_full; + wire debug_fifo_empty; + + wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_din; + wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_dout; + + /**/ + modexpa7_simple_fifo # + ( + .BUS_WIDTH (32 * SYSTOLIC_ARRAY_LENGTH), + .DEPTH_BITS (SYSTOLIC_CNTR_WIDTH) + ) + fifo_c + ( + .clk (clk), + .rst (fifo_c_rst), + .wr_en (fifo_c_wren), + .d_in (fifo_c_din), + .rd_en (fifo_c_rden), + .d_out (fifo_c_dout) + ); + /**/ + /* + ip_fifo_c fifo_c + ( + .clk (clk), + .srst (fifo_c_rst), + .wr_en (fifo_c_wren), + .din (fifo_c_din), + .rd_en (fifo_c_rden), + .dout (fifo_c_dout), + .full (debug_fifo_full), + .empty (debug_fifo_empty) + );*/ + generate for (i=0; i {1'b0, a_addr}) ? 32'd0 : a_bram_out; pe_b[j] <= loader_dout[j]; pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly]; - pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly]; + //pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly]; end else begin pe_a[j] <= 32'hXXXXXXXX; pe_b[j] <= 32'hXXXXXXXX; pe_t[j] <= 32'hXXXXXXXX; - pe_c_in[j] <= 32'hXXXXXXXX; + //pe_c_in[j] <= 32'hXXXXXXXX; end // if (fsm_state == FSM_STATE_MULT_AB_N_COEFF_CRUNCH) @@ -883,12 +942,12 @@ module modexpa7_systolic_multiplier # pe_a[j] <= ab_data_out; pe_b[j] <= loader_dout[j]; pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly]; - pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly]; + //pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly]; end else begin pe_a[j] <= 32'hXXXXXXXX; pe_b[j] <= 32'hXXXXXXXX; pe_t[j] <= 32'hXXXXXXXX; - pe_c_in[j] <= 32'hXXXXXXXX; + //pe_c_in[j] <= 32'hXXXXXXXX; end // if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH) @@ -899,12 +958,12 @@ module modexpa7_systolic_multiplier # pe_a[j] <= (qn_addr_ext > {1'b0, q_addr}) ? 32'd0 : q_data_out; pe_b[j] <= loader_dout[j]; pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly]; - pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly]; + //pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly]; end else begin pe_a[j] <= 32'hXXXXXXXX; pe_b[j] <= 32'hXXXXXXXX; pe_t[j] <= 32'hXXXXXXXX; - pe_c_in[j] <= 32'hXXXXXXXX; + //pe_c_in[j] <= 32'hXXXXXXXX; end // -- cgit v1.2.3