aboutsummaryrefslogtreecommitdiff
path: root/src/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtl')
-rw-r--r--src/rtl/modexpa7_systolic_multiplier.v93
1 files changed, 81 insertions, 12 deletions
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 382019c..378dc63 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -41,7 +41,7 @@ module modexpa7_systolic_multiplier #
//
// This sets the address widths of memory buffers. Internal data
// width is 32 bits, so for e.g. 2048-bit operands buffers must store
- // 2048 / 32 = 64 words, and these need 5-bit address bus, because
+ // 2048 / 32 = 64 words, and these need 6-bit address bus, because
// 2 ** 6 = 64.
//
parameter OPERAND_ADDR_WIDTH = 4,
@@ -599,7 +599,7 @@ module modexpa7_systolic_multiplier #
//
reg [31: 0] pe_a [0:SYSTOLIC_ARRAY_LENGTH-1];
reg [31: 0] pe_b [0:SYSTOLIC_ARRAY_LENGTH-1];
- reg [31: 0] pe_t [0:SYSTOLIC_ARRAY_LENGTH-1];
+ wire [31: 0] pe_t [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_p [0:SYSTOLIC_ARRAY_LENGTH-1];
wire [31: 0] pe_c_out[0:SYSTOLIC_ARRAY_LENGTH-1];
@@ -609,19 +609,23 @@ module modexpa7_systolic_multiplier #
// These can be turned into a FIFO (maybe later?)...
//
//reg [31: 0] pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
- reg [31: 0] pe_t_mem [0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
+ //reg [31: 0] pe_t_mem [0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
reg fifo_c_rst;
+ reg fifo_t_rst;
wire fifo_c_wren;
wire fifo_c_rden;
- wire debug_fifo_full;
- wire debug_fifo_empty;
-
+ wire fifo_t_wren;
+ wire fifo_t_rden;
+
wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_din;
wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_c_dout;
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_t_din;
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] fifo_t_dout;
+
/**/
modexpa7_simple_fifo #
(
@@ -637,6 +641,36 @@ module modexpa7_systolic_multiplier #
.rd_en (fifo_c_rden),
.d_out (fifo_c_dout)
);
+
+ modexpa7_simple_fifo #
+ (
+ .BUS_WIDTH (32 * SYSTOLIC_ARRAY_LENGTH),
+ .DEPTH_BITS (SYSTOLIC_CNTR_WIDTH)
+ )
+ fifo_t
+ (
+ .clk (clk),
+ .rst (fifo_t_rst),
+ .wr_en (fifo_t_wren),
+ .d_in (fifo_t_din),
+ .rd_en (fifo_t_rden),
+ .d_out (fifo_t_dout)
+ );
+
+ /*
+ ip_fifo_t fifo_t
+ (
+ .clk (clk),
+ .srst (fifo_t_rst),
+ .wr_en (fifo_t_wren),
+ .din (fifo_t_din),
+ .rd_en (fifo_t_rden),
+ .dout (fifo_t_dout),
+ .full (),
+ .empty ()
+ );
+ */
+
/**/
/*
ip_fifo_c fifo_c
@@ -664,6 +698,7 @@ module modexpa7_systolic_multiplier #
.c_out (pe_c_out[i])
);
assign pe_c_in[i] = fifo_c_dout[32 * (i + 1) - 1 -: 32];
+ assign pe_t[i] = fifo_t_dout[32 * (i + 1) - 1 -: 32];
assign fifo_c_din[32 * (i + 1) - 1 -: 32] = pe_c_out[i];
end
endgenerate
@@ -749,11 +784,43 @@ module modexpa7_systolic_multiplier #
FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
FSM_STATE_MULT_Q_N_CRUNCH: if (shreg_done_load) fifo_c_rst <= 1'b0;
endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_A_B_START,
+ FSM_STATE_MULT_AB_N_COEFF_START,
+ FSM_STATE_MULT_Q_N_START: fifo_t_rst <= 1'b1;
+
+ FSM_STATE_MULT_A_B_CRUNCH,
+ FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
+ FSM_STATE_MULT_Q_N_CRUNCH: if (shreg_done_load) fifo_t_rst <= 1'b0;
+ endcase
+
+
+ reg [32 * (SYSTOLIC_ARRAY_LENGTH - 1) - 1 : 0] pe_p_msb_dly;
+
+ always @(posedge clk)
+ //
+ for (j=1; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
+ pe_p_msb_dly[32 * j - 1 -: 32] <= pe_p[j];
+
+ wire [31: 0] pe_p_lsb_masked = shreg_now_unloading ? pe_p[0] : 32'd0;
+ assign fifo_t_din = {pe_p_lsb_masked, pe_p_msb_dly};
+
+ reg shreg_now_unloading_dly;
+ always @(posedge clk)
+ shreg_now_unloading_dly <= shreg_now_unloading;
+
assign fifo_c_wren = shreg_now_unloading;
assign fifo_c_rden = shreg_now_loading;
+ assign fifo_t_wren = shreg_now_unloading_dly;
+
+ assign fifo_t_rden = shreg_now_loading;
+
@@ -887,6 +954,7 @@ module modexpa7_systolic_multiplier #
end
endcase
+ /*
always @(posedge clk)
//
case (fsm_state)
@@ -911,6 +979,7 @@ module modexpa7_systolic_multiplier #
end
end
endcase
+ */
//
@@ -925,12 +994,12 @@ module modexpa7_systolic_multiplier #
if (shreg_now_loading) begin
pe_a[j] <= (ab_addr_ext > {1'b0, a_addr}) ? 32'd0 : a_bram_out;
pe_b[j] <= loader_dout[j];
- pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
+ //pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
//pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
- pe_t[j] <= 32'hXXXXXXXX;
+ //pe_t[j] <= 32'hXXXXXXXX;
//pe_c_in[j] <= 32'hXXXXXXXX;
end
//
@@ -941,12 +1010,12 @@ module modexpa7_systolic_multiplier #
if (shreg_now_loading) begin
pe_a[j] <= ab_data_out;
pe_b[j] <= loader_dout[j];
- pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
+ //pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
//pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
- pe_t[j] <= 32'hXXXXXXXX;
+ //pe_t[j] <= 32'hXXXXXXXX;
//pe_c_in[j] <= 32'hXXXXXXXX;
end
//
@@ -957,12 +1026,12 @@ module modexpa7_systolic_multiplier #
if (shreg_now_loading) begin
pe_a[j] <= (qn_addr_ext > {1'b0, q_addr}) ? 32'd0 : q_data_out;
pe_b[j] <= loader_dout[j];
- pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
+ //pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
//pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
- pe_t[j] <= 32'hXXXXXXXX;
+ //pe_t[j] <= 32'hXXXXXXXX;
//pe_c_in[j] <= 32'hXXXXXXXX;
end
//