aboutsummaryrefslogtreecommitdiff
path: root/src/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtl')
-rw-r--r--src/rtl/modexpa7_exponentiator.v344
-rw-r--r--src/rtl/modexpa7_systolic_multiplier.v21
-rw-r--r--src/rtl/modexpa7_systolic_multiplier_array.v11
-rw-r--r--src/rtl/pe/modexpa7_primitive_switch.v2
4 files changed, 246 insertions, 132 deletions
diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
index b33360a..93c8047 100644
--- a/src/rtl/modexpa7_exponentiator.v
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -58,6 +58,8 @@ module modexpa7_exponentiator #
input ena,
output rdy,
+ input crt,
+
output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] f_bram_addr,
@@ -86,76 +88,120 @@ module modexpa7_exponentiator #
//
// FSM Declaration
//
- localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
+ localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
+ //
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1;
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2;
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3;
+
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0;
+
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4;
+ //
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14;
+
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21;
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22;
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23;
//
- localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1;
- localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2;
- localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3;
- localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4;
-
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1;
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2;
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3;
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4;
-
- localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1;
- localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2;
- localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3;
-
- localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1;
- localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2;
- localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3;
- localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4;
-
- localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0;
-
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1;
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2;
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3;
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_1 = 8'h31;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_2 = 8'h32;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_3 = 8'h33;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_4 = 8'h34;
+
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_A_1 = 8'h41;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_A_2 = 8'h42;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_A_3 = 8'h43;
//
- localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11;
- localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12;
- localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13;
- localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14;
-
- localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21;
- localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22;
- localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_1 = 8'h51;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_2 = 8'h52;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_3 = 8'h53;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_4 = 8'h54;
+
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_B_1 = 8'h61;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_B_2 = 8'h62;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_B_3 = 8'h63;
//
- localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_1 = 8'h71;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_2 = 8'h72;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_3 = 8'h73;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_4 = 8'h74;
+
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_C_1 = 8'h81;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_C_2 = 8'h82;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_C_3 = 8'h83;
+ //
+ localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
/*
* //
*
- * MUL_INIT: P1 = F
- * P2 = F
- * P3 = F
- * T2 = M
+ * MUL_INIT: P1 <= F
+ * P2 <= F
+ * P3 <= F
+ * T2 <= M
*
- * MUL_CALC: TP = T2 * P3
+ * MUL_CALC: TP = T2 * P3
*
* //
*
- * EXP_INIT: P1 <= TP
- * P2 <= TP
- * P3 <= TP
- * T1 <= 1
- * T2 <= 1
+ * CRT_INIT_A: T2 <= M
+ *
+ * CRT_CALC_A: TP = T2 * P3 ("reduce only")
*
- * EXP_LOAD: T0 <= T1
+ * CRT_INIT_B: P1 <= F
+ * P2 <= F
+ * P3 <= F
+ * T2 <= TP
*
- * EXP_CALC: PP = P1 * P2
- * TP = T2 * P3
+ * CRT_CALC_B: TP = T2 * P3
*
- * EXP_FILL: P1 <= PP
- * P2 <= PP
- * P3 <= PP
- * T1 <= D[i] ? TP : T0
- * T2 <= D[i] ? TP : T0
+ * CRT_INIT_C: T2 <= TP
*
- * EXP_SAVE: R <= T1
+ * CRT_CALC_C: TP = T2 * P3
+ *
+ * //
+ *
+ * EXP_INIT: P1 <= TP
+ * P2 <= TP
+ * P3 <= TP
+ * T1 <= 1
+ * T2 <= 1
+ *
+ * EXP_LOAD: T0 <= T1
+ *
+ * EXP_CALC: PP = P1 * P2
+ * TP = T2 * P3
+ *
+ * EXP_FILL: P1 <= PP
+ * P2 <= PP
+ * P3 <= PP
+ * T1 <= D[i] ? TP : T0
+ * T2 <= D[i] ? TP : T0
+ *
+ * EXP_SAVE: R <= T1
*
* //
*
@@ -225,10 +271,12 @@ module modexpa7_exponentiator #
*/
/* the very first addresses */
- wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}};
/* the very last addresses */
- wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last_crt =
+ {m_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};
/* address registers */
reg [OPERAND_ADDR_WIDTH-1:0] m_addr;
@@ -261,16 +309,18 @@ module modexpa7_exponentiator #
wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd_next = tp_addr_rd + 1'b1;
/* handy stop flags */
- wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
- wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
- wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
- wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+ wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire m_addr_done_crt = (m_addr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+ wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t2_addr_wr_done_crt = (t2_addr_wr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+ wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+ wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
/* map registers to top-level ports */
assign m_bram_addr = m_addr;
@@ -392,10 +442,15 @@ module modexpa7_exponentiator #
// m_addr
//
case (fsm_next_state)
- FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero;
+ FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero;
FSM_STATE_MUL_INIT_2,
FSM_STATE_MUL_INIT_3,
- FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ //
+ FSM_STATE_CRT_INIT_A_1: m_addr <= bram_addr_zero;
+ FSM_STATE_CRT_INIT_A_2,
+ FSM_STATE_CRT_INIT_A_3,
+ FSM_STATE_CRT_INIT_A_4: m_addr <= !m_addr_done_crt ? m_addr_next : m_addr;
endcase
//
// d_addr
@@ -472,7 +527,10 @@ module modexpa7_exponentiator #
//
FSM_STATE_MUL_INIT_3: t2_addr_wr <= bram_addr_zero;
FSM_STATE_MUL_INIT_4: t2_addr_wr <= t2_addr_wr_next;
-
+ //
+ FSM_STATE_CRT_INIT_A_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_CRT_INIT_A_4: t2_addr_wr <= t2_addr_wr_next;
+ //
FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero;
FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next;
//
@@ -554,6 +612,8 @@ module modexpa7_exponentiator #
case (fsm_next_state)
FSM_STATE_MUL_INIT_3,
FSM_STATE_MUL_INIT_4,
+ FSM_STATE_CRT_INIT_A_3,
+ FSM_STATE_CRT_INIT_A_4,
FSM_STATE_EXP_INIT_3,
FSM_STATE_EXP_INIT_4,
FSM_STATE_EXP_FILL_3,
@@ -616,15 +676,19 @@ module modexpa7_exponentiator #
//
case (fsm_next_state)
//
- FSM_STATE_MUL_INIT_3,
- FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out;
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out;
+ //
+ FSM_STATE_CRT_INIT_A_3,
+ FSM_STATE_CRT_INIT_A_4: t2_data_in <= m_bram_out;
+
//
- FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
- FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
+ FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
+ FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
//
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
- default: t2_data_in <= 32'dX;
+ FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+ default: t2_data_in <= 32'dX;
endcase
//
end
@@ -634,6 +698,7 @@ module modexpa7_exponentiator #
// Double Multiplier
//
reg mul_ena;
+ reg mul_crt;
wire mul_rdy_pp;
wire mul_rdy_tp;
wire mul_rdy_all = mul_rdy_pp & mul_rdy_tp;
@@ -651,6 +716,8 @@ module modexpa7_exponentiator #
.ena (mul_ena),
.rdy (mul_rdy_pp),
+ .reduce_only (1'b0),
+
.a_bram_addr (p1_addr_rd),
.b_bram_addr (p2_addr_rd),
.n_bram_addr (n1_bram_addr),
@@ -681,6 +748,8 @@ module modexpa7_exponentiator #
.ena (mul_ena),
.rdy (mul_rdy_tp),
+ .reduce_only (mul_crt),
+
.a_bram_addr (t2_addr_rd),
.b_bram_addr (p3_addr_rd),
.n_bram_addr (n2_bram_addr),
@@ -703,8 +772,18 @@ module modexpa7_exponentiator #
//
case (fsm_next_state)
FSM_STATE_MUL_CALC_1,
- FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1;
- default: mul_ena <= 1'b0;
+ FSM_STATE_CRT_CALC_A_1,
+ FSM_STATE_CRT_CALC_B_1,
+ FSM_STATE_CRT_CALC_C_1,
+ FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1;
+ default: mul_ena <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_CRT_CALC_A_1: mul_crt <= 1'b1;
+ default: mul_crt <= 1'b0;
endcase
@@ -726,53 +805,70 @@ module modexpa7_exponentiator #
//
case (fsm_state)
//
- FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2;
- FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3;
- FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4;
- FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1;
- else fsm_next_state = FSM_STATE_MUL_INIT_4;
- //
- FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2;
- FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3;
- else fsm_next_state = FSM_STATE_MUL_CALC_2;
- FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
- //
- FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_MUL_INIT_1;
- else fsm_next_state = FSM_STATE_EXP_IDLE;
- //
- FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2;
- FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3;
- FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4;
- FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
- else fsm_next_state = FSM_STATE_EXP_INIT_4;
- //
- FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2;
- FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3;
- FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4;
- FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
- else fsm_next_state = FSM_STATE_EXP_LOAD_4;
- //
- FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2;
- FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
- else fsm_next_state = FSM_STATE_EXP_CALC_2;
- FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1;
- //
- FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2;
- FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3;
- FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4;
- FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
- else fsm_next_state = FSM_STATE_EXP_FILL_4;
- //
- FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
- else fsm_next_state = FSM_STATE_EXP_LOAD_1;
- //
- FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2;
- FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3;
- FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4;
- FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
- else fsm_next_state = FSM_STATE_EXP_SAVE_4;
- //
- FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
+ //
+ FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2;
+ FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3;
+ FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4;
+ FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1;
+ else fsm_next_state = FSM_STATE_MUL_INIT_4;
+ //
+ FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2;
+ FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3;
+ else fsm_next_state = FSM_STATE_MUL_CALC_2;
+ FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
+ //
+ //
+ FSM_STATE_CRT_INIT_A_1: fsm_next_state = FSM_STATE_CRT_INIT_A_2;
+ FSM_STATE_CRT_INIT_A_2: fsm_next_state = FSM_STATE_CRT_INIT_A_3;
+ FSM_STATE_CRT_INIT_A_3: fsm_next_state = FSM_STATE_CRT_INIT_A_4;
+ FSM_STATE_CRT_INIT_A_4: if (t2_addr_wr_done_crt) fsm_next_state = FSM_STATE_CRT_CALC_A_1;
+ else fsm_next_state = FSM_STATE_CRT_INIT_A_4;
+
+ //
+ FSM_STATE_CRT_CALC_A_1: fsm_next_state = FSM_STATE_CRT_CALC_A_2;
+ FSM_STATE_CRT_CALC_A_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_CRT_CALC_A_3;
+ else fsm_next_state = FSM_STATE_CRT_CALC_A_2;
+ FSM_STATE_CRT_CALC_A_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
+ //
+ //
+ FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = crt ?
+ FSM_STATE_CRT_INIT_A_1 : FSM_STATE_MUL_INIT_1;
+ else fsm_next_state = FSM_STATE_EXP_IDLE;
+ //
+ //
+ FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2;
+ FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3;
+ FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4;
+ FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ else fsm_next_state = FSM_STATE_EXP_INIT_4;
+ //
+ FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2;
+ FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3;
+ FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4;
+ FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_4;
+ //
+ FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
+ else fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1;
+ //
+ FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2;
+ FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3;
+ FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4;
+ FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
+ else fsm_next_state = FSM_STATE_EXP_FILL_4;
+ //
+ FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ //
+ FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2;
+ FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3;
+ FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4;
+ FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
+ else fsm_next_state = FSM_STATE_EXP_SAVE_4;
+ //
+ FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
//
endcase
//
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 7293998..444693d 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -57,6 +57,8 @@ module modexpa7_systolic_multiplier #
input ena,
output rdy,
+
+ input reduce_only,
output [OPERAND_ADDR_WIDTH-1:0] a_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] b_bram_addr,
@@ -155,7 +157,8 @@ module modexpa7_systolic_multiplier #
* Parameters Latch
*/
reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch;
- reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch;
+ reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch;
+ reg reduce_only_latch;
// save number of words in n when new operation starts
always @(posedge clk)
@@ -163,7 +166,12 @@ module modexpa7_systolic_multiplier #
if ((fsm_state == FSM_STATE_IDLE) && ena_trig)
n_num_words_latch <= n_num_words;
+ always @(posedge clk)
+ //
+ if ((fsm_state == FSM_STATE_IDLE) && ena_trig)
+ reduce_only_latch <= reduce_only;
+
/*
* Multiplication Phase
*/
@@ -174,6 +182,7 @@ module modexpa7_systolic_multiplier #
reg [ 1: 0] mult_phase;
+ wire mult_phase_ab = (mult_phase == MULT_PHASE_A_B) ? 1'b1 : 1'b0;
wire mult_phase_done = (mult_phase == MULT_PHASE_STALL) ? 1'b1 : 1'b0;
always @(posedge clk)
@@ -296,6 +305,7 @@ module modexpa7_systolic_multiplier #
wire [OPERAND_ADDR_WIDTH :0] bram_addr_ext_last = {n_num_words_latch, 1'b1};
// address registers
+ wire [OPERAND_ADDR_WIDTH-1:0] a_addr;
reg [OPERAND_ADDR_WIDTH-1:0] b_addr;
reg [OPERAND_ADDR_WIDTH-1:0] n_addr;
wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_wr;
@@ -570,8 +580,9 @@ module modexpa7_systolic_multiplier #
MULT_PHASE_Q_N: p_num_words_latch <= {n_num_words_latch, 1'b1};
endcase
- assign n_coeff_bram_addr = a_bram_addr;
- assign q_addr_rd = a_bram_addr;
+ assign a_bram_addr = a_addr;
+ assign n_coeff_bram_addr = a_addr;
+ assign q_addr_rd = a_addr;
reg [31: 0] a_data_out;
@@ -597,12 +608,14 @@ module modexpa7_systolic_multiplier #
.ena (pe_array_ena),
.rdy (pe_array_rdy),
+ .crt (reduce_only_latch && mult_phase_ab),
+
.loader_addr_rd (loader_addr_rd),
.pe_a_wide ({SYSTOLIC_ARRAY_LENGTH{a_data_out}}),
.pe_b_wide (pe_b_wide),
- .a_bram_addr (a_bram_addr),
+ .a_bram_addr (a_addr),
.p_bram_addr (p_addr_ext_wr),
.p_bram_in (p_data_in),
diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v
index 754203d..3280010 100644
--- a/src/rtl/modexpa7_systolic_multiplier_array.v
+++ b/src/rtl/modexpa7_systolic_multiplier_array.v
@@ -48,6 +48,8 @@ module modexpa7_systolic_multiplier_array #
input ena,
output rdy,
+ input crt,
+
output [OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0] loader_addr_rd,
input [ 32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0] pe_a_wide,
@@ -385,6 +387,8 @@ module modexpa7_systolic_multiplier_array #
// the very last address
wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last = n_num_words_latch;
+ wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last_crt =
+ {n_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};
wire [OPERAND_ADDR_WIDTH : 0] bram_addr_ext_last = p_num_words_latch;
// registers
@@ -398,8 +402,9 @@ module modexpa7_systolic_multiplier_array #
wire [OPERAND_ADDR_WIDTH : 0] p_addr_next = p_addr + 1'b1;
// handy flags
- wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0;
+ wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire a_addr_done_crt = (a_addr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+ wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0;
// map top-level ports to internal registers
assign a_bram_addr = a_addr;
@@ -452,7 +457,7 @@ module modexpa7_systolic_multiplier_array #
//
case (fsm_next_state)
FSM_STATE_MULT_START: a_addr <= bram_addr_zero;
- FSM_STATE_MULT_RELOAD: a_addr <= !a_addr_done ? a_addr_next : a_addr;
+ FSM_STATE_MULT_RELOAD: crt ? //a_addr <= !a_addr_done ? a_addr_next : a_addr;
endcase
//
end
diff --git a/src/rtl/pe/modexpa7_primitive_switch.v b/src/rtl/pe/modexpa7_primitive_switch.v
index fa958ec..17e8264 100644
--- a/src/rtl/pe/modexpa7_primitive_switch.v
+++ b/src/rtl/pe/modexpa7_primitive_switch.v
@@ -1,4 +1,4 @@
-`define USE_VENDOR_PRIMITIVES
+//`define USE_VENDOR_PRIMITIVES
`ifdef USE_VENDOR_PRIMITIVES