aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-20 03:36:32 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-20 03:36:32 +0300
commitce4b5740615d9097986f5149e53e4e053674b674 (patch)
treef9841f46b17d495f3f7e8ee9baf84ff62b3b47df
parentc3d75e5cf67db823d506bb74583035cd73f9ed86 (diff)
Converted pe_c_out_mem two-dimensional array into a FIFO.
-rw-r--r--src/rtl/modexpa7_simple_fifo.v209
-rw-r--r--src/rtl/modexpa7_systolic_multiplier.v83
2 files changed, 280 insertions, 12 deletions
diff --git a/src/rtl/modexpa7_simple_fifo.v b/src/rtl/modexpa7_simple_fifo.v
new file mode 100644
index 0000000..84c21a9
--- /dev/null
+++ b/src/rtl/modexpa7_simple_fifo.v
@@ -0,0 +1,209 @@
+`timescale 1ns / 1ps
+
+module modexpa7_simple_fifo #
+ (
+ parameter BUS_WIDTH = 128,
+ parameter DEPTH_BITS = 2
+ )
+ (
+ input clk,
+ input rst,
+ input wr_en,
+ input rd_en,
+ input [BUS_WIDTH-1:0] d_in,
+ output [BUS_WIDTH-1:0] d_out
+ );
+
+ //
+ // Locals
+ //
+ localparam NUM_WORDS = 2 ** DEPTH_BITS;
+
+ localparam [DEPTH_BITS:0] PTR_ZERO = {DEPTH_BITS{1'b0}};
+ localparam [DEPTH_BITS:0] PTR_LAST = {DEPTH_BITS{1'b1}};
+
+ //
+ // Memory
+ //
+ reg [BUS_WIDTH-1:0] fifo[0:NUM_WORDS-1];
+
+ //
+ // Pointers
+ //
+ reg [DEPTH_BITS-1:0] ptr_wr;
+ reg [DEPTH_BITS-1:0] ptr_rd;
+
+ //
+ // Output
+ //
+ reg [BUS_WIDTH-1:0] d_out_reg;
+ assign d_out = d_out_reg;
+
+ //
+ // Write Pointer
+ //
+ always @(posedge clk)
+ //
+ if (rst) ptr_wr <= PTR_ZERO;
+ else if (wr_en) ptr_wr <= ptr_wr + 1'b1;
+
+ //
+ // Read Pointer
+ //
+ always @(posedge clk)
+ //
+ if (rst) ptr_rd <= PTR_ZERO;
+ else if (rd_en) ptr_rd <= ptr_rd + 1'b1;
+
+ //
+ // Read Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) d_out_reg <= {BUS_WIDTH{1'b0}};
+ else if (rd_en) d_out_reg <= fifo[ptr_rd];
+
+ //
+ // Write Logic
+ //
+ always @(posedge clk)
+ //
+ if (!rst && wr_en) fifo[ptr_wr] <= d_in;
+
+
+/*
+generic_dpram #(aw,dw) u0(
+ .rclk( clk ),
+ .rrst( !rst ),
+ .rce( 1'b1 ),
+ .oe( 1'b1 ),
+ .raddr( rp ),
+ .do( dout ),
+ .wclk( clk ),
+ .wrst( !rst ),
+ .wce( 1'b1 ),
+ .we( we ),
+ .waddr( wp ),
+ .di( din )
+ );
+
+////////////////////////////////////////////////////////////////////
+//
+// Misc Logic
+//
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) wp <= #1 {aw{1'b0}};
+ else
+ if(clr) wp <= #1 {aw{1'b0}};
+ else
+ if(we) wp <= #1 wp_pl1;
+
+assign wp_pl1 = wp + { {aw-1{1'b0}}, 1'b1};
+assign wp_pl2 = wp + { {aw-2{1'b0}}, 2'b10};
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) rp <= #1 {aw{1'b0}};
+ else
+ if(clr) rp <= #1 {aw{1'b0}};
+ else
+ if(re) rp <= #1 rp_pl1;
+
+assign rp_pl1 = rp + { {aw-1{1'b0}}, 1'b1};
+
+////////////////////////////////////////////////////////////////////
+//
+// Combinatorial Full & Empty Flags
+//
+
+assign empty = ((wp == rp) & !gb);
+assign full = ((wp == rp) & gb);
+
+// Guard Bit ...
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) gb <= #1 1'b0;
+ else
+ if(clr) gb <= #1 1'b0;
+ else
+ if((wp_pl1 == rp) & we) gb <= #1 1'b1;
+ else
+ if(re) gb <= #1 1'b0;
+
+////////////////////////////////////////////////////////////////////
+//
+// Registered Full & Empty Flags
+//
+
+// Guard Bit ...
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) gb2 <= #1 1'b0;
+ else
+ if(clr) gb2 <= #1 1'b0;
+ else
+ if((wp_pl2 == rp) & we) gb2 <= #1 1'b1;
+ else
+ if((wp != rp) & re) gb2 <= #1 1'b0;
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) full_r <= #1 1'b0;
+ else
+ if(clr) full_r <= #1 1'b0;
+ else
+ if(we & ((wp_pl1 == rp) & gb2) & !re) full_r <= #1 1'b1;
+ else
+ if(re & ((wp_pl1 != rp) | !gb2) & !we) full_r <= #1 1'b0;
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) empty_r <= #1 1'b1;
+ else
+ if(clr) empty_r <= #1 1'b1;
+ else
+ if(we & ((wp != rp_pl1) | gb2) & !re) empty_r <= #1 1'b0;
+ else
+ if(re & ((wp == rp_pl1) & !gb2) & !we) empty_r <= #1 1'b1;
+
+////////////////////////////////////////////////////////////////////
+//
+// Combinatorial Full_n & Empty_n Flags
+//
+
+assign empty_n = cnt < n;
+assign full_n = !(cnt < (max_size-n+1));
+assign level = {2{cnt[aw]}} | cnt[aw-1:aw-2];
+
+// N entries status
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) cnt <= #1 {aw+1{1'b0}};
+ else
+ if(clr) cnt <= #1 {aw+1{1'b0}};
+ else
+ if( re & !we) cnt <= #1 cnt + { {aw{1'b1}}, 1'b1};
+ else
+ if(!re & we) cnt <= #1 cnt + { {aw{1'b0}}, 1'b1};
+
+////////////////////////////////////////////////////////////////////
+//
+// Registered Full_n & Empty_n Flags
+//
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) empty_n_r <= #1 1'b1;
+ else
+ if(clr) empty_n_r <= #1 1'b1;
+ else
+ if(we & (cnt >= (n-1) ) & !re) empty_n_r <= #1 1'b0;
+ else
+ if(re & (cnt <= n ) & !we) empty_n_r <= #1 1'b1;
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+ if(!rst) full_n_r <= #1 1'b0;
+ else
+ if(clr) full_n_r <= #1 1'b0;
+ else
+ if(we & (cnt >= (max_size-n) ) & !re) full_n_r <= #1 1'b1;
+ else
+ if(re & (cnt <= (max_size-n+1)) & !we) full_n_r <= #1 1'b0;
+*/
+
+
+endmodule
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 8cd28ff..382019c 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -600,7 +600,7 @@ module modexpa7_systolic_multiplier #
reg [31: 0] pe_a [0:SYSTOLIC_ARRAY_LENGTH-1];
reg [31: 0] pe_b [0:SYSTOLIC_ARRAY_LENGTH-1];
reg [31: 0] pe_t [0:SYSTOLIC_ARRAY_LENGTH-1];
- reg [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
+ wire [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_p [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_c_out[0:SYSTOLIC_ARRAY_LENGTH-1];
@@ -608,9 +608,49 @@ module modexpa7_systolic_multiplier #
//
// These can be turned into a FIFO (maybe later?)...
//
- reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
+ //reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
reg [31: 0] pe_t_mem [0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
+ reg fifo_c_rst;
+
+ wire fifo_c_wren;
+ wire fifo_c_rden;
+
+ wire debug_fifo_full;
+ wire debug_fifo_empty;
+
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_din;
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_dout;
+
+ /**/
+ modexpa7_simple_fifo #
+ (
+ .BUS_WIDTH (32 * SYSTOLIC_ARRAY_LENGTH),
+ .DEPTH_BITS (SYSTOLIC_CNTR_WIDTH)
+ )
+ fifo_c
+ (
+ .clk (clk),
+ .rst (fifo_c_rst),
+ .wr_en (fifo_c_wren),
+ .d_in (fifo_c_din),
+ .rd_en (fifo_c_rden),
+ .d_out (fifo_c_dout)
+ );
+ /**/
+ /*
+ ip_fifo_c fifo_c
+ (
+ .clk (clk),
+ .srst (fifo_c_rst),
+ .wr_en (fifo_c_wren),
+ .din (fifo_c_din),
+ .rd_en (fifo_c_rden),
+ .dout (fifo_c_dout),
+ .full (debug_fifo_full),
+ .empty (debug_fifo_empty)
+ );*/
+
generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1)
begin : modexpa7_systolic_pe_multiplier
modexpa7_systolic_pe systolic_pe_inst
@@ -623,10 +663,13 @@ module modexpa7_systolic_multiplier #
.p (pe_p[i]),
.c_out (pe_c_out[i])
);
+ assign pe_c_in[i] = fifo_c_dout[32 * (i + 1) - 1 -: 32];
+ assign fifo_c_din[32 * (i + 1) - 1 -: 32] = pe_c_out[i];
end
endgenerate
+
//
@@ -695,7 +738,23 @@ module modexpa7_systolic_multiplier #
endcase
-
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_A_B_START,
+ FSM_STATE_MULT_AB_N_COEFF_START,
+ FSM_STATE_MULT_Q_N_START: fifo_c_rst <= 1'b1;
+
+ FSM_STATE_MULT_A_B_CRUNCH,
+ FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
+ FSM_STATE_MULT_Q_N_CRUNCH: if (shreg_done_load) fifo_c_rst <= 1'b0;
+ endcase
+
+
+ assign fifo_c_wren = shreg_now_unloading;
+ assign fifo_c_rden = shreg_now_loading;
+
+
always @(posedge clk) begin
@@ -835,9 +894,9 @@ module modexpa7_systolic_multiplier #
FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
FSM_STATE_MULT_Q_N_CRUNCH: begin
- if (shreg_now_unloading)
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
+ //if (shreg_now_unloading)
+ //for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
+ //pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
if (shreg_now_unloading) begin
@@ -867,12 +926,12 @@ module modexpa7_systolic_multiplier #
pe_a[j] <= (ab_addr_ext > {1'b0, a_addr}) ? 32'd0 : a_bram_out;
pe_b[j] <= loader_dout[j];
pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
- pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+ //pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
pe_t[j] <= 32'hXXXXXXXX;
- pe_c_in[j] <= 32'hXXXXXXXX;
+ //pe_c_in[j] <= 32'hXXXXXXXX;
end
//
if (fsm_state == FSM_STATE_MULT_AB_N_COEFF_CRUNCH)
@@ -883,12 +942,12 @@ module modexpa7_systolic_multiplier #
pe_a[j] <= ab_data_out;
pe_b[j] <= loader_dout[j];
pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
- pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+ //pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
pe_t[j] <= 32'hXXXXXXXX;
- pe_c_in[j] <= 32'hXXXXXXXX;
+ //pe_c_in[j] <= 32'hXXXXXXXX;
end
//
if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
@@ -899,12 +958,12 @@ module modexpa7_systolic_multiplier #
pe_a[j] <= (qn_addr_ext > {1'b0, q_addr}) ? 32'd0 : q_data_out;
pe_b[j] <= loader_dout[j];
pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
- pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+ //pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
pe_t[j] <= 32'hXXXXXXXX;
- pe_c_in[j] <= 32'hXXXXXXXX;
+ //pe_c_in[j] <= 32'hXXXXXXXX;
end
//