diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2017-07-20 03:36:32 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2017-07-20 03:36:32 +0300 |
commit | ce4b5740615d9097986f5149e53e4e053674b674 (patch) | |
tree | f9841f46b17d495f3f7e8ee9baf84ff62b3b47df /src | |
parent | c3d75e5cf67db823d506bb74583035cd73f9ed86 (diff) |
Converted pe_c_out_mem two-dimensional array into a FIFO.
Diffstat (limited to 'src')
-rw-r--r-- | src/rtl/modexpa7_simple_fifo.v | 209 | ||||
-rw-r--r-- | src/rtl/modexpa7_systolic_multiplier.v | 83 |
2 files changed, 280 insertions, 12 deletions
diff --git a/src/rtl/modexpa7_simple_fifo.v b/src/rtl/modexpa7_simple_fifo.v new file mode 100644 index 0000000..84c21a9 --- /dev/null +++ b/src/rtl/modexpa7_simple_fifo.v @@ -0,0 +1,209 @@ +`timescale 1ns / 1ps + +module modexpa7_simple_fifo # + ( + parameter BUS_WIDTH = 128, + parameter DEPTH_BITS = 2 + ) + ( + input clk, + input rst, + input wr_en, + input rd_en, + input [BUS_WIDTH-1:0] d_in, + output [BUS_WIDTH-1:0] d_out + ); + + // + // Locals + // + localparam NUM_WORDS = 2 ** DEPTH_BITS; + + localparam [DEPTH_BITS:0] PTR_ZERO = {DEPTH_BITS{1'b0}}; + localparam [DEPTH_BITS:0] PTR_LAST = {DEPTH_BITS{1'b1}}; + + // + // Memory + // + reg [BUS_WIDTH-1:0] fifo[0:NUM_WORDS-1]; + + // + // Pointers + // + reg [DEPTH_BITS-1:0] ptr_wr; + reg [DEPTH_BITS-1:0] ptr_rd; + + // + // Output + // + reg [BUS_WIDTH-1:0] d_out_reg; + assign d_out = d_out_reg; + + // + // Write Pointer + // + always @(posedge clk) + // + if (rst) ptr_wr <= PTR_ZERO; + else if (wr_en) ptr_wr <= ptr_wr + 1'b1; + + // + // Read Pointer + // + always @(posedge clk) + // + if (rst) ptr_rd <= PTR_ZERO; + else if (rd_en) ptr_rd <= ptr_rd + 1'b1; + + // + // Read Logic + // + always @(posedge clk) + // + if (rst) d_out_reg <= {BUS_WIDTH{1'b0}}; + else if (rd_en) d_out_reg <= fifo[ptr_rd]; + + // + // Write Logic + // + always @(posedge clk) + // + if (!rst && wr_en) fifo[ptr_wr] <= d_in; + + +/* +generic_dpram #(aw,dw) u0( + .rclk( clk ), + .rrst( !rst ), + .rce( 1'b1 ), + .oe( 1'b1 ), + .raddr( rp ), + .do( dout ), + .wclk( clk ), + .wrst( !rst ), + .wce( 1'b1 ), + .we( we ), + .waddr( wp ), + .di( din ) + ); + +//////////////////////////////////////////////////////////////////// +// +// Misc Logic +// + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) wp <= #1 {aw{1'b0}}; + else + if(clr) wp <= #1 {aw{1'b0}}; + else + if(we) wp <= #1 wp_pl1; + +assign wp_pl1 = wp + { {aw-1{1'b0}}, 1'b1}; +assign wp_pl2 = wp + { {aw-2{1'b0}}, 2'b10}; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) rp <= #1 {aw{1'b0}}; + else + if(clr) rp <= #1 {aw{1'b0}}; + else + if(re) rp <= #1 rp_pl1; + +assign rp_pl1 = rp + { {aw-1{1'b0}}, 1'b1}; + +//////////////////////////////////////////////////////////////////// +// +// Combinatorial Full & Empty Flags +// + +assign empty = ((wp == rp) & !gb); +assign full = ((wp == rp) & gb); + +// Guard Bit ... +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) gb <= #1 1'b0; + else + if(clr) gb <= #1 1'b0; + else + if((wp_pl1 == rp) & we) gb <= #1 1'b1; + else + if(re) gb <= #1 1'b0; + +//////////////////////////////////////////////////////////////////// +// +// Registered Full & Empty Flags +// + +// Guard Bit ... +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) gb2 <= #1 1'b0; + else + if(clr) gb2 <= #1 1'b0; + else + if((wp_pl2 == rp) & we) gb2 <= #1 1'b1; + else + if((wp != rp) & re) gb2 <= #1 1'b0; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) full_r <= #1 1'b0; + else + if(clr) full_r <= #1 1'b0; + else + if(we & ((wp_pl1 == rp) & gb2) & !re) full_r <= #1 1'b1; + else + if(re & ((wp_pl1 != rp) | !gb2) & !we) full_r <= #1 1'b0; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) empty_r <= #1 1'b1; + else + if(clr) empty_r <= #1 1'b1; + else + if(we & ((wp != rp_pl1) | gb2) & !re) empty_r <= #1 1'b0; + else + if(re & ((wp == rp_pl1) & !gb2) & !we) empty_r <= #1 1'b1; + +//////////////////////////////////////////////////////////////////// +// +// Combinatorial Full_n & Empty_n Flags +// + +assign empty_n = cnt < n; +assign full_n = !(cnt < (max_size-n+1)); +assign level = {2{cnt[aw]}} | cnt[aw-1:aw-2]; + +// N entries status +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) cnt <= #1 {aw+1{1'b0}}; + else + if(clr) cnt <= #1 {aw+1{1'b0}}; + else + if( re & !we) cnt <= #1 cnt + { {aw{1'b1}}, 1'b1}; + else + if(!re & we) cnt <= #1 cnt + { {aw{1'b0}}, 1'b1}; + +//////////////////////////////////////////////////////////////////// +// +// Registered Full_n & Empty_n Flags +// + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) empty_n_r <= #1 1'b1; + else + if(clr) empty_n_r <= #1 1'b1; + else + if(we & (cnt >= (n-1) ) & !re) empty_n_r <= #1 1'b0; + else + if(re & (cnt <= n ) & !we) empty_n_r <= #1 1'b1; + +always @(posedge clk `SC_FIFO_ASYNC_RESET) + if(!rst) full_n_r <= #1 1'b0; + else + if(clr) full_n_r <= #1 1'b0; + else + if(we & (cnt >= (max_size-n) ) & !re) full_n_r <= #1 1'b1; + else + if(re & (cnt <= (max_size-n+1)) & !we) full_n_r <= #1 1'b0; +*/ + + +endmodule diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index 8cd28ff..382019c 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -600,7 +600,7 @@ module modexpa7_systolic_multiplier # reg [31: 0] pe_a [0:SYSTOLIC_ARRAY_LENGTH-1];
reg [31: 0] pe_b [0:SYSTOLIC_ARRAY_LENGTH-1];
reg [31: 0] pe_t [0:SYSTOLIC_ARRAY_LENGTH-1];
- reg [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
+ wire [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_p [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_c_out[0:SYSTOLIC_ARRAY_LENGTH-1];
@@ -608,9 +608,49 @@ module modexpa7_systolic_multiplier # //
// These can be turned into a FIFO (maybe later?)...
//
- reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
+ //reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
reg [31: 0] pe_t_mem [0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
+ reg fifo_c_rst;
+
+ wire fifo_c_wren;
+ wire fifo_c_rden;
+
+ wire debug_fifo_full;
+ wire debug_fifo_empty;
+
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_din;
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_dout;
+
+ /**/
+ modexpa7_simple_fifo #
+ (
+ .BUS_WIDTH (32 * SYSTOLIC_ARRAY_LENGTH),
+ .DEPTH_BITS (SYSTOLIC_CNTR_WIDTH)
+ )
+ fifo_c
+ (
+ .clk (clk),
+ .rst (fifo_c_rst),
+ .wr_en (fifo_c_wren),
+ .d_in (fifo_c_din),
+ .rd_en (fifo_c_rden),
+ .d_out (fifo_c_dout)
+ );
+ /**/
+ /*
+ ip_fifo_c fifo_c
+ (
+ .clk (clk),
+ .srst (fifo_c_rst),
+ .wr_en (fifo_c_wren),
+ .din (fifo_c_din),
+ .rd_en (fifo_c_rden),
+ .dout (fifo_c_dout),
+ .full (debug_fifo_full),
+ .empty (debug_fifo_empty)
+ );*/
+
generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1)
begin : modexpa7_systolic_pe_multiplier
modexpa7_systolic_pe systolic_pe_inst
@@ -623,10 +663,13 @@ module modexpa7_systolic_multiplier # .p (pe_p[i]),
.c_out (pe_c_out[i])
);
+ assign pe_c_in[i] = fifo_c_dout[32 * (i + 1) - 1 -: 32];
+ assign fifo_c_din[32 * (i + 1) - 1 -: 32] = pe_c_out[i];
end
endgenerate
+
//
@@ -695,7 +738,23 @@ module modexpa7_systolic_multiplier # endcase
-
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_A_B_START,
+ FSM_STATE_MULT_AB_N_COEFF_START,
+ FSM_STATE_MULT_Q_N_START: fifo_c_rst <= 1'b1;
+
+ FSM_STATE_MULT_A_B_CRUNCH,
+ FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
+ FSM_STATE_MULT_Q_N_CRUNCH: if (shreg_done_load) fifo_c_rst <= 1'b0;
+ endcase
+
+
+ assign fifo_c_wren = shreg_now_unloading;
+ assign fifo_c_rden = shreg_now_loading;
+
+
always @(posedge clk) begin
@@ -835,9 +894,9 @@ module modexpa7_systolic_multiplier # FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
FSM_STATE_MULT_Q_N_CRUNCH: begin
- if (shreg_now_unloading)
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
+ //if (shreg_now_unloading)
+ //for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
+ //pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
if (shreg_now_unloading) begin
@@ -867,12 +926,12 @@ module modexpa7_systolic_multiplier # pe_a[j] <= (ab_addr_ext > {1'b0, a_addr}) ? 32'd0 : a_bram_out;
pe_b[j] <= loader_dout[j];
pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
- pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+ //pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
pe_t[j] <= 32'hXXXXXXXX;
- pe_c_in[j] <= 32'hXXXXXXXX;
+ //pe_c_in[j] <= 32'hXXXXXXXX;
end
//
if (fsm_state == FSM_STATE_MULT_AB_N_COEFF_CRUNCH)
@@ -883,12 +942,12 @@ module modexpa7_systolic_multiplier # pe_a[j] <= ab_data_out;
pe_b[j] <= loader_dout[j];
pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
- pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+ //pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
pe_t[j] <= 32'hXXXXXXXX;
- pe_c_in[j] <= 32'hXXXXXXXX;
+ //pe_c_in[j] <= 32'hXXXXXXXX;
end
//
if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
@@ -899,12 +958,12 @@ module modexpa7_systolic_multiplier # pe_a[j] <= (qn_addr_ext > {1'b0, q_addr}) ? 32'd0 : q_data_out;
pe_b[j] <= loader_dout[j];
pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
- pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+ //pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
pe_t[j] <= 32'hXXXXXXXX;
- pe_c_in[j] <= 32'hXXXXXXXX;
+ //pe_c_in[j] <= 32'hXXXXXXXX;
end
//
|