aboutsummaryrefslogtreecommitdiff
path: root/src/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtl')
-rw-r--r--src/rtl/modexpa7_systolic_multiplier.v25
1 files changed, 15 insertions, 10 deletions
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 513b5aa..8cd28ff 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -652,6 +652,11 @@ module modexpa7_systolic_multiplier #
always @(posedge clk)
//
case (fsm_state)
+ FSM_STATE_LOAD_N_FINAL: begin
+ shreg_load <= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b0};
+ shreg_latency <= {{SYSTOLIC_PE_LATENCY{1'b0}}, 1'b0};
+ shreg_unload <= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b0};
+ end
//
FSM_STATE_MULT_A_B_START,
FSM_STATE_MULT_AB_N_COEFF_START,
@@ -832,17 +837,17 @@ module modexpa7_systolic_multiplier #
if (shreg_now_unloading)
for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- pe_c_out_mem[syst_cnt_unload][j] <= pe_c_out[j];
+ pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
if (shreg_now_unloading) begin
for (j=1; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- pe_t_mem[syst_cnt_unload][j-1] <= pe_p[j];
+ pe_t_mem[j-1][syst_cnt_unload] <= pe_p[j];
if (syst_cnt_unload > syst_cnt_zero)
- pe_t_mem[syst_cnt_unload-1'b1][SYSTOLIC_ARRAY_LENGTH-1] <= pe_p[0];
+ pe_t_mem[SYSTOLIC_ARRAY_LENGTH-1][syst_cnt_unload-1'b1] <= pe_p[0];
else
- pe_t_mem[syst_cnt_last][SYSTOLIC_ARRAY_LENGTH-1] <= 32'd0;
+ pe_t_mem[SYSTOLIC_ARRAY_LENGTH-1][syst_cnt_last] <= 32'd0;
end
end
@@ -861,8 +866,8 @@ module modexpa7_systolic_multiplier #
if (shreg_now_loading) begin
pe_a[j] <= (ab_addr_ext > {1'b0, a_addr}) ? 32'd0 : a_bram_out;
pe_b[j] <= loader_dout[j];
- pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[syst_cnt_load_dly][j];
- pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[syst_cnt_load_dly][j];
+ pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
+ pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
@@ -877,8 +882,8 @@ module modexpa7_systolic_multiplier #
if (shreg_now_loading) begin
pe_a[j] <= ab_data_out;
pe_b[j] <= loader_dout[j];
- pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[syst_cnt_load_dly][j];
- pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[syst_cnt_load_dly][j];
+ pe_t[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
+ pe_c_in[j] <= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;
@@ -893,8 +898,8 @@ module modexpa7_systolic_multiplier #
if (shreg_now_loading) begin
pe_a[j] <= (qn_addr_ext > {1'b0, q_addr}) ? 32'd0 : q_data_out;
pe_b[j] <= loader_dout[j];
- pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[syst_cnt_load_dly][j];
- pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[syst_cnt_load_dly][j];
+ pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
+ pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
end else begin
pe_a[j] <= 32'hXXXXXXXX;
pe_b[j] <= 32'hXXXXXXXX;