diff options
-rw-r--r-- | src/rtl/modexpa7_exponentiator.v | 344 | ||||
-rw-r--r-- | src/rtl/modexpa7_systolic_multiplier.v | 21 | ||||
-rw-r--r-- | src/rtl/modexpa7_systolic_multiplier_array.v | 11 | ||||
-rw-r--r-- | src/rtl/pe/modexpa7_primitive_switch.v | 2 | ||||
-rw-r--r-- | src/tb/modexp_fpga_model_vectors.v | 80 | ||||
-rw-r--r-- | src/tb/tb_exponentiator.v | 139 |
6 files changed, 461 insertions, 136 deletions
diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v index b33360a..93c8047 100644 --- a/src/rtl/modexpa7_exponentiator.v +++ b/src/rtl/modexpa7_exponentiator.v @@ -58,6 +58,8 @@ module modexpa7_exponentiator # input ena,
output rdy,
+ input crt,
+
output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] f_bram_addr,
@@ -86,76 +88,120 @@ module modexpa7_exponentiator # //
// FSM Declaration
//
- localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
+ localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
+ //
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1;
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2;
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3;
+
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0;
+
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4;
+ //
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14;
+
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21;
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22;
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23;
//
- localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1;
- localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2;
- localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3;
- localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4;
-
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1;
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2;
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3;
- localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4;
-
- localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1;
- localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2;
- localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3;
-
- localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1;
- localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2;
- localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3;
- localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4;
-
- localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0;
-
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1;
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2;
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3;
- localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_1 = 8'h31;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_2 = 8'h32;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_3 = 8'h33;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_A_4 = 8'h34;
+
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_A_1 = 8'h41;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_A_2 = 8'h42;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_A_3 = 8'h43;
//
- localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11;
- localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12;
- localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13;
- localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14;
-
- localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21;
- localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22;
- localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_1 = 8'h51;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_2 = 8'h52;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_3 = 8'h53;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_B_4 = 8'h54;
+
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_B_1 = 8'h61;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_B_2 = 8'h62;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_B_3 = 8'h63;
//
- localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_1 = 8'h71;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_2 = 8'h72;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_3 = 8'h73;
+ localparam [ 7: 0] FSM_STATE_CRT_INIT_C_4 = 8'h74;
+
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_C_1 = 8'h81;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_C_2 = 8'h82;
+ localparam [ 7: 0] FSM_STATE_CRT_CALC_C_3 = 8'h83;
+ //
+ localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
/*
* //
*
- * MUL_INIT: P1 = F
- * P2 = F
- * P3 = F
- * T2 = M
+ * MUL_INIT: P1 <= F
+ * P2 <= F
+ * P3 <= F
+ * T2 <= M
*
- * MUL_CALC: TP = T2 * P3
+ * MUL_CALC: TP = T2 * P3
*
* //
*
- * EXP_INIT: P1 <= TP
- * P2 <= TP
- * P3 <= TP
- * T1 <= 1
- * T2 <= 1
+ * CRT_INIT_A: T2 <= M
+ *
+ * CRT_CALC_A: TP = T2 * P3 ("reduce only")
*
- * EXP_LOAD: T0 <= T1
+ * CRT_INIT_B: P1 <= F
+ * P2 <= F
+ * P3 <= F
+ * T2 <= TP
*
- * EXP_CALC: PP = P1 * P2
- * TP = T2 * P3
+ * CRT_CALC_B: TP = T2 * P3
*
- * EXP_FILL: P1 <= PP
- * P2 <= PP
- * P3 <= PP
- * T1 <= D[i] ? TP : T0
- * T2 <= D[i] ? TP : T0
+ * CRT_INIT_C: T2 <= TP
*
- * EXP_SAVE: R <= T1
+ * CRT_CALC_C: TP = T2 * P3
+ *
+ * //
+ *
+ * EXP_INIT: P1 <= TP
+ * P2 <= TP
+ * P3 <= TP
+ * T1 <= 1
+ * T2 <= 1
+ *
+ * EXP_LOAD: T0 <= T1
+ *
+ * EXP_CALC: PP = P1 * P2
+ * TP = T2 * P3
+ *
+ * EXP_FILL: P1 <= PP
+ * P2 <= PP
+ * P3 <= PP
+ * T1 <= D[i] ? TP : T0
+ * T2 <= D[i] ? TP : T0
+ *
+ * EXP_SAVE: R <= T1
*
* //
*
@@ -225,10 +271,12 @@ module modexpa7_exponentiator # */
/* the very first addresses */
- wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}};
/* the very last addresses */
- wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last_crt =
+ {m_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};
/* address registers */
reg [OPERAND_ADDR_WIDTH-1:0] m_addr;
@@ -261,16 +309,18 @@ module modexpa7_exponentiator # wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd_next = tp_addr_rd + 1'b1;
/* handy stop flags */
- wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
- wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
- wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
- wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+ wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire m_addr_done_crt = (m_addr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+ wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire t2_addr_wr_done_crt = (t2_addr_wr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+ wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+ wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
/* map registers to top-level ports */
assign m_bram_addr = m_addr;
@@ -392,10 +442,15 @@ module modexpa7_exponentiator # // m_addr
//
case (fsm_next_state)
- FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero;
+ FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero;
FSM_STATE_MUL_INIT_2,
FSM_STATE_MUL_INIT_3,
- FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ //
+ FSM_STATE_CRT_INIT_A_1: m_addr <= bram_addr_zero;
+ FSM_STATE_CRT_INIT_A_2,
+ FSM_STATE_CRT_INIT_A_3,
+ FSM_STATE_CRT_INIT_A_4: m_addr <= !m_addr_done_crt ? m_addr_next : m_addr;
endcase
//
// d_addr
@@ -472,7 +527,10 @@ module modexpa7_exponentiator # //
FSM_STATE_MUL_INIT_3: t2_addr_wr <= bram_addr_zero;
FSM_STATE_MUL_INIT_4: t2_addr_wr <= t2_addr_wr_next;
-
+ //
+ FSM_STATE_CRT_INIT_A_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_CRT_INIT_A_4: t2_addr_wr <= t2_addr_wr_next;
+ //
FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero;
FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next;
//
@@ -554,6 +612,8 @@ module modexpa7_exponentiator # case (fsm_next_state)
FSM_STATE_MUL_INIT_3,
FSM_STATE_MUL_INIT_4,
+ FSM_STATE_CRT_INIT_A_3,
+ FSM_STATE_CRT_INIT_A_4,
FSM_STATE_EXP_INIT_3,
FSM_STATE_EXP_INIT_4,
FSM_STATE_EXP_FILL_3,
@@ -616,15 +676,19 @@ module modexpa7_exponentiator # //
case (fsm_next_state)
//
- FSM_STATE_MUL_INIT_3,
- FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out;
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out;
+ //
+ FSM_STATE_CRT_INIT_A_3,
+ FSM_STATE_CRT_INIT_A_4: t2_data_in <= m_bram_out;
+
//
- FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
- FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
+ FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
+ FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
//
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
- default: t2_data_in <= 32'dX;
+ FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+ default: t2_data_in <= 32'dX;
endcase
//
end
@@ -634,6 +698,7 @@ module modexpa7_exponentiator # // Double Multiplier
//
reg mul_ena;
+ reg mul_crt;
wire mul_rdy_pp;
wire mul_rdy_tp;
wire mul_rdy_all = mul_rdy_pp & mul_rdy_tp;
@@ -651,6 +716,8 @@ module modexpa7_exponentiator # .ena (mul_ena),
.rdy (mul_rdy_pp),
+ .reduce_only (1'b0),
+
.a_bram_addr (p1_addr_rd),
.b_bram_addr (p2_addr_rd),
.n_bram_addr (n1_bram_addr),
@@ -681,6 +748,8 @@ module modexpa7_exponentiator # .ena (mul_ena),
.rdy (mul_rdy_tp),
+ .reduce_only (mul_crt),
+
.a_bram_addr (t2_addr_rd),
.b_bram_addr (p3_addr_rd),
.n_bram_addr (n2_bram_addr),
@@ -703,8 +772,18 @@ module modexpa7_exponentiator # //
case (fsm_next_state)
FSM_STATE_MUL_CALC_1,
- FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1;
- default: mul_ena <= 1'b0;
+ FSM_STATE_CRT_CALC_A_1,
+ FSM_STATE_CRT_CALC_B_1,
+ FSM_STATE_CRT_CALC_C_1,
+ FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1;
+ default: mul_ena <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_CRT_CALC_A_1: mul_crt <= 1'b1;
+ default: mul_crt <= 1'b0;
endcase
@@ -726,53 +805,70 @@ module modexpa7_exponentiator # //
case (fsm_state)
//
- FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2;
- FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3;
- FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4;
- FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1;
- else fsm_next_state = FSM_STATE_MUL_INIT_4;
- //
- FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2;
- FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3;
- else fsm_next_state = FSM_STATE_MUL_CALC_2;
- FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
- //
- FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_MUL_INIT_1;
- else fsm_next_state = FSM_STATE_EXP_IDLE;
- //
- FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2;
- FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3;
- FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4;
- FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
- else fsm_next_state = FSM_STATE_EXP_INIT_4;
- //
- FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2;
- FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3;
- FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4;
- FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
- else fsm_next_state = FSM_STATE_EXP_LOAD_4;
- //
- FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2;
- FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
- else fsm_next_state = FSM_STATE_EXP_CALC_2;
- FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1;
- //
- FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2;
- FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3;
- FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4;
- FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
- else fsm_next_state = FSM_STATE_EXP_FILL_4;
- //
- FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
- else fsm_next_state = FSM_STATE_EXP_LOAD_1;
- //
- FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2;
- FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3;
- FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4;
- FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
- else fsm_next_state = FSM_STATE_EXP_SAVE_4;
- //
- FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
+ //
+ FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2;
+ FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3;
+ FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4;
+ FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1;
+ else fsm_next_state = FSM_STATE_MUL_INIT_4;
+ //
+ FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2;
+ FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3;
+ else fsm_next_state = FSM_STATE_MUL_CALC_2;
+ FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
+ //
+ //
+ FSM_STATE_CRT_INIT_A_1: fsm_next_state = FSM_STATE_CRT_INIT_A_2;
+ FSM_STATE_CRT_INIT_A_2: fsm_next_state = FSM_STATE_CRT_INIT_A_3;
+ FSM_STATE_CRT_INIT_A_3: fsm_next_state = FSM_STATE_CRT_INIT_A_4;
+ FSM_STATE_CRT_INIT_A_4: if (t2_addr_wr_done_crt) fsm_next_state = FSM_STATE_CRT_CALC_A_1;
+ else fsm_next_state = FSM_STATE_CRT_INIT_A_4;
+
+ //
+ FSM_STATE_CRT_CALC_A_1: fsm_next_state = FSM_STATE_CRT_CALC_A_2;
+ FSM_STATE_CRT_CALC_A_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_CRT_CALC_A_3;
+ else fsm_next_state = FSM_STATE_CRT_CALC_A_2;
+ FSM_STATE_CRT_CALC_A_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
+ //
+ //
+ FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = crt ?
+ FSM_STATE_CRT_INIT_A_1 : FSM_STATE_MUL_INIT_1;
+ else fsm_next_state = FSM_STATE_EXP_IDLE;
+ //
+ //
+ FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2;
+ FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3;
+ FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4;
+ FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ else fsm_next_state = FSM_STATE_EXP_INIT_4;
+ //
+ FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2;
+ FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3;
+ FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4;
+ FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_4;
+ //
+ FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
+ else fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1;
+ //
+ FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2;
+ FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3;
+ FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4;
+ FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
+ else fsm_next_state = FSM_STATE_EXP_FILL_4;
+ //
+ FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ //
+ FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2;
+ FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3;
+ FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4;
+ FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
+ else fsm_next_state = FSM_STATE_EXP_SAVE_4;
+ //
+ FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
//
endcase
//
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index 7293998..444693d 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -57,6 +57,8 @@ module modexpa7_systolic_multiplier # input ena, output rdy, +
+ input reduce_only,
output [OPERAND_ADDR_WIDTH-1:0] a_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] b_bram_addr, @@ -155,7 +157,8 @@ module modexpa7_systolic_multiplier # * Parameters Latch */ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; - reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; + reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch;
+ reg reduce_only_latch; // save number of words in n when new operation starts always @(posedge clk) @@ -163,7 +166,12 @@ module modexpa7_systolic_multiplier # if ((fsm_state == FSM_STATE_IDLE) && ena_trig) n_num_words_latch <= n_num_words; + always @(posedge clk)
+ //
+ if ((fsm_state == FSM_STATE_IDLE) && ena_trig)
+ reduce_only_latch <= reduce_only;
+
/*
* Multiplication Phase
*/
@@ -174,6 +182,7 @@ module modexpa7_systolic_multiplier # reg [ 1: 0] mult_phase;
+ wire mult_phase_ab = (mult_phase == MULT_PHASE_A_B) ? 1'b1 : 1'b0;
wire mult_phase_done = (mult_phase == MULT_PHASE_STALL) ? 1'b1 : 1'b0;
always @(posedge clk) @@ -296,6 +305,7 @@ module modexpa7_systolic_multiplier # wire [OPERAND_ADDR_WIDTH :0] bram_addr_ext_last = {n_num_words_latch, 1'b1}; // address registers
+ wire [OPERAND_ADDR_WIDTH-1:0] a_addr;
reg [OPERAND_ADDR_WIDTH-1:0] b_addr; reg [OPERAND_ADDR_WIDTH-1:0] n_addr; wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_wr; @@ -570,8 +580,9 @@ module modexpa7_systolic_multiplier # MULT_PHASE_Q_N: p_num_words_latch <= {n_num_words_latch, 1'b1}; endcase
- assign n_coeff_bram_addr = a_bram_addr;
- assign q_addr_rd = a_bram_addr;
+ assign a_bram_addr = a_addr;
+ assign n_coeff_bram_addr = a_addr;
+ assign q_addr_rd = a_addr;
reg [31: 0] a_data_out;
@@ -597,12 +608,14 @@ module modexpa7_systolic_multiplier # .ena (pe_array_ena), .rdy (pe_array_rdy), + .crt (reduce_only_latch && mult_phase_ab),
+
.loader_addr_rd (loader_addr_rd),
.pe_a_wide ({SYSTOLIC_ARRAY_LENGTH{a_data_out}}),
.pe_b_wide (pe_b_wide),
- .a_bram_addr (a_bram_addr),
+ .a_bram_addr (a_addr),
.p_bram_addr (p_addr_ext_wr), .p_bram_in (p_data_in), diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v index 754203d..3280010 100644 --- a/src/rtl/modexpa7_systolic_multiplier_array.v +++ b/src/rtl/modexpa7_systolic_multiplier_array.v @@ -48,6 +48,8 @@ module modexpa7_systolic_multiplier_array # input ena, output rdy, + input crt,
+
output [OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0] loader_addr_rd, input [ 32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0] pe_a_wide,
@@ -385,6 +387,8 @@ module modexpa7_systolic_multiplier_array # // the very last address
wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last = n_num_words_latch;
+ wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last_crt =
+ {n_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};
wire [OPERAND_ADDR_WIDTH : 0] bram_addr_ext_last = p_num_words_latch;
// registers
@@ -398,8 +402,9 @@ module modexpa7_systolic_multiplier_array # wire [OPERAND_ADDR_WIDTH : 0] p_addr_next = p_addr + 1'b1;
// handy flags
- wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0;
- wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0;
+ wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire a_addr_done_crt = (a_addr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+ wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0;
// map top-level ports to internal registers
assign a_bram_addr = a_addr;
@@ -452,7 +457,7 @@ module modexpa7_systolic_multiplier_array # //
case (fsm_next_state) FSM_STATE_MULT_START: a_addr <= bram_addr_zero; - FSM_STATE_MULT_RELOAD: a_addr <= !a_addr_done ? a_addr_next : a_addr; + FSM_STATE_MULT_RELOAD: crt ? //a_addr <= !a_addr_done ? a_addr_next : a_addr; endcase // end diff --git a/src/rtl/pe/modexpa7_primitive_switch.v b/src/rtl/pe/modexpa7_primitive_switch.v index fa958ec..17e8264 100644 --- a/src/rtl/pe/modexpa7_primitive_switch.v +++ b/src/rtl/pe/modexpa7_primitive_switch.v @@ -1,4 +1,4 @@ -`define USE_VENDOR_PRIMITIVES
+//`define USE_VENDOR_PRIMITIVES
`ifdef USE_VENDOR_PRIMITIVES
diff --git a/src/tb/modexp_fpga_model_vectors.v b/src/tb/modexp_fpga_model_vectors.v index d5284c9..c86f7ba 100644 --- a/src/tb/modexp_fpga_model_vectors.v +++ b/src/tb/modexp_fpga_model_vectors.v @@ -40,6 +40,46 @@ localparam [383:0] S_384 = 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16, 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819}; +localparam [191:0] P_192 = + {32'he9ac4cf6, 32'h03b2d80a, 32'h7f1d091e, 32'h49d5f1a0, + 32'hac2ae4ff, 32'hbf9bf375}; + +localparam [191:0] Q_192 = + {32'hc1468f3e, 32'hc6909231, 32'h5a4d74ba, 32'h477b303f, + 32'h4b2e10d1, 32'h1f44e815}; + +localparam [191:0] P_COEFF_192 = + {32'h8ba8d46c, 32'hb4ed830d, 32'hfbb97c6e, 32'h72d150d3, + 32'h72d21392, 32'h70d2fb23}; + +localparam [191:0] Q_COEFF_192 = + {32'hd863905a, 32'hc1541c8a, 32'h25952b0e, 32'ha62b0348, + 32'h837f149f, 32'hd6cc58c3}; + +localparam [191:0] FACTOR_P_192 = + {32'h886bad59, 32'h9bf7a46e, 32'h482ed232, 32'he55164cf, + 32'hcb46a9e8, 32'he9bd888b}; + +localparam [191:0] FACTOR_Q_192 = + {32'h324b776e, 32'h3734d186, 32'h73dc8796, 32'h9e1aba2c, + 32'h4d5df285, 32'he97656b7}; + +localparam [191:0] DP_192 = + {32'h69b6c286, 32'h95fbc613, 32'h51988034, 32'h8cb0d684, + 32'h9aff38e4, 32'h9ef9ddb5}; + +localparam [191:0] DQ_192 = + {32'h1eda82b7, 32'h84bf4377, 32'h39712ff7, 32'h24be179f, + 32'ha302c190, 32'h80ab6159}; + +localparam [191:0] MP_192 = + {32'h9e163bb5, 32'h35e718cb, 32'hcde52b7b, 32'h5db8552b, + 32'h46a300e0, 32'h34f91e6b}; + +localparam [191:0] MQ_192 = + {32'h7b01a724, 32'h90f0d5f9, 32'h9e237ce5, 32'h6d31fd28, + 32'h4ecb9dad, 32'h58bf366a}; + localparam [511:0] M_512 = {32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb, 32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613, @@ -88,3 +128,43 @@ localparam [511:0] S_512 = 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d, 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203}; +localparam [255:0] P_256 = + {32'hfedea889, 32'h97cfdb79, 32'hcca87074, 32'he5abcda1, + 32'h3be201c4, 32'hc416fd15, 32'hf2130931, 32'h61ff5937}; + +localparam [255:0] Q_256 = + {32'hf0889147, 32'h5aa60f93, 32'hb9927d86, 32'h8f795c5c, + 32'h8e98dcf2, 32'had3aad74, 32'h9441583a, 32'h967dce41}; + +localparam [255:0] P_COEFF_256 = + {32'h7af63ffc, 32'h428d9408, 32'h86e79fb9, 32'h018dad77, + 32'h4ff704df, 32'h93effb1e, 32'h265d181a, 32'h47ae5379}; + +localparam [255:0] Q_COEFF_256 = + {32'hd27f8aa0, 32'h9f2b9800, 32'h2dfd2392, 32'h4f868b9d, + 32'h0fc51e1d, 32'h022de65b, 32'ha55f9ad1, 32'h0676be3f}; + +localparam [255:0] FACTOR_P_256 = + {32'h1a5f27a1, 32'h8d16b0cb, 32'h8c2751b8, 32'h106a099c, + 32'ha6efbadd, 32'hcb313a5f, 32'hf530eeb6, 32'hbbc7d8f5}; + +localparam [255:0] FACTOR_Q_256 = + {32'h6794987c, 32'h932203a6, 32'h8c5b1e68, 32'h18d458e6, + 32'h6737f12a, 32'h664d4187, 32'hc4ec03ba, 32'h4bd3d0c2}; + +localparam [255:0] DP_256 = + {32'h2504d437, 32'hfffbe9e5, 32'hfc0aef22, 32'h9b8563bd, + 32'haa83fe3b, 32'hc53b8d91, 32'h15731c5f, 32'hb6db2eeb}; + +localparam [255:0] DQ_256 = + {32'hd3265fba, 32'h2eb65638, 32'h4d106ec7, 32'h000dfe69, + 32'h75f87505, 32'h47d299d0, 32'h1c115cdd, 32'h599ca8c1}; + +localparam [255:0] MP_256 = + {32'h23359955, 32'hcad299b6, 32'h049bb248, 32'h3828b6a5, + 32'h74c85825, 32'h7dd8e109, 32'h07edbda9, 32'h4980c2c9}; + +localparam [255:0] MQ_256 = + {32'h8578120b, 32'h91f4ca9e, 32'h371d3e70, 32'h0005bb89, + 32'hd31ed864, 32'h477bd9cf, 32'h65a1f03b, 32'h606d3bc8}; + diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v index 16be0a5..440fedc 100644 --- a/src/tb/tb_exponentiator.v +++ b/src/tb/tb_exponentiator.v @@ -63,6 +63,8 @@ module tb_exponentiator; reg rst_n;
reg ena;
+ reg crt;
+
reg [ 3: 0] n_num_words;
reg [ 8: 0] d_num_bits;
@@ -170,6 +172,8 @@ module tb_exponentiator; .ena (ena),
.rdy (rdy),
+ .crt (crt),
+
.m_bram_addr (core_m_addr),
.d_bram_addr (core_d_addr),
.f_bram_addr (core_f_addr),
@@ -206,9 +210,14 @@ module tb_exponentiator; #200;
rst_n = 1'b1;
#100;
-
- test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
- test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
+
+ // test "honest" exponentiation
+// test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
+// test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
+
+ // test crt mode
+ test_exponent_192(M_384, DP_192, FACTOR_P_192, P_192, P_COEFF_192, MP_192);
+ //test_exponent_192(M_384, DQ_192, FACTOR_Q_192, Q_192, Q_COEFF_192, MQ_192);
end
@@ -216,7 +225,6 @@ module tb_exponentiator; //
// Test Tasks
//
-
task test_exponent_384;
// input [383:0] m;
@@ -234,6 +242,8 @@ module tb_exponentiator; n_num_words = 4'd11; // set number of words
d_num_bits = 9'd383; // set number of bits
//
+ crt = 0; // disable crt mode
+ //
write_memory_384(m, d, f, n, n_coeff); // fill memory
ena = 1; // start operation @@ -276,6 +286,8 @@ module tb_exponentiator; n_num_words = 4'd15; // set number of words
d_num_bits = 9'd511; // set number of bits
//
+ crt = 0; // disable crt mode
+ //
write_memory_512(m, d, f, n, n_coeff); // fill memory
ena = 1; // start operation @@ -301,6 +313,49 @@ module tb_exponentiator; //
endtask + task test_exponent_192;
+ // + input [383:0] m;
+ input [191:0] d;
+ input [191:0] f;
+ input [191:0] n;
+ input [191:0] n_coeff;
+ input [191:0] s;
+ reg [191:0] r;
+ //
+ integer i;
+ // + begin
+ //
+ n_num_words = 4'd5; // set number of words
+ d_num_bits = 9'd191; // set number of bits
+ //
+ crt = 1; // enable crt mode
+ //
+ write_memory_192(m, d, f, n, n_coeff); // fill memory
+ + ena = 1; // start operation + #10; //
+ ena = 0; // clear flag
+ + while (!rdy) #10; // wait for operation to complete
+ read_memory_192(r); // get result from memory +
+ $display(" calculated: %x", r); // display result
+ $display(" expected: %x", s); //
+
+ // check calculated value
+ if (r === s) begin + $display(" OK");
+ $display("SUCCESS: Test passed."); + end else begin
+ $display(" ERROR");
+ $display("FAILURE: Test not passed.");
+ end
+ //
+ end + //
+ endtask //
// write_memory_384
@@ -409,6 +464,59 @@ module tb_exponentiator; //
+ // write_memory_192
+ //
+ task write_memory_192;
+ //
+ input [383:0] m;
+ input [191:0] d;
+ input [191:0] f;
+ input [191:0] n;
+ input [191:0] n_coeff;
+ reg [383:0] m_shreg;
+ reg [191:0] f_shreg;
+ reg [191:0] d_shreg;
+ reg [191:0] n_shreg;
+ reg [191:0] n_coeff_shreg;
+ //
+ begin + //
+ tb_mdfn_wren = 1; // start filling memories + m_shreg = m; // preload shift register
+ d_shreg = d; // preload shift register
+ f_shreg = f; // preload shift register
+ n_shreg = n; // preload shift register
+ n_coeff_shreg = n_coeff; // preload shift register
+ // + for (w=0; w<NUM_WORDS_384; w=w+1) begin // write all words + tb_mdfn_addr = w[3:0]; // set address + tb_m_data = m_shreg[31:0]; // set data + tb_d_data = d_shreg[31:0]; // set data + tb_f_data = f_shreg[31:0]; // set data + tb_n_data = n_shreg[31:0]; // set data + tb_n_coeff_data = n_coeff_shreg[31:0]; // set data + m_shreg = {{32{1'bX}}, m_shreg[383:32]}; // update shift register + d_shreg = {{32{1'bX}}, d_shreg[191:32]}; // update shift register + f_shreg = {{32{1'bX}}, f_shreg[191:32]}; // update shift register + n_shreg = {{32{1'bX}}, n_shreg[191:32]}; // update shift register + n_coeff_shreg = {{32{1'bX}}, n_coeff_shreg[191:32]}; // update shift register + #10; // wait for 1 clock tick + end + // + tb_mdfn_addr = {4{1'bX}}; // wipe addresses + tb_m_data = {32{1'bX}}; // wipe data + tb_d_data = {32{1'bX}}; // wipe data + tb_f_data = {32{1'bX}}; // wipe data + tb_n_data = {32{1'bX}}; // wipe data + tb_n_coeff_data = {32{1'bX}}; // wipe data + tb_mdfn_wren = 0; // stop filling memory
+ //
+ end
+ //
+ endtask +
+
+ //
// read_memory_384
//
task read_memory_384;
@@ -455,6 +563,29 @@ module tb_exponentiator; //
endtask + //
+ // read_memory_192
+ //
+ task read_memory_192;
+ //
+ output [191:0] r;
+ reg [191:0] r_shreg;
+ //
+ begin
+ //
+ for (w=0; w<NUM_WORDS_384/2; w=w+1) begin // read result word-by-word + tb_r_addr = w[3:0]; // set address + #10; // wait for 1 clock tick + r_shreg = {tb_r_data, r_shreg[191:32]}; // store data word + end
+ //
+ tb_r_addr = {4{1'bX}}; // wipe address
+ r = r_shreg; // return
+ //
+ end
+ //
+ endtask +
endmodule
|