From e48040122cddd4374d5600b24807ef8189f1c0c2 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Fri, 11 Aug 2017 01:16:48 +0300 Subject: Work in progress. --- src/rtl/modexpa7_exponentiator.v | 344 +++++++++++++++++---------- src/rtl/modexpa7_systolic_multiplier.v | 21 +- src/rtl/modexpa7_systolic_multiplier_array.v | 11 +- src/rtl/pe/modexpa7_primitive_switch.v | 2 +- src/tb/modexp_fpga_model_vectors.v | 80 +++++++ src/tb/tb_exponentiator.v | 139 ++++++++++- 6 files changed, 461 insertions(+), 136 deletions(-) diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v index b33360a..93c8047 100644 --- a/src/rtl/modexpa7_exponentiator.v +++ b/src/rtl/modexpa7_exponentiator.v @@ -58,6 +58,8 @@ module modexpa7_exponentiator # input ena, output rdy, + input crt, + output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] f_bram_addr, @@ -86,76 +88,120 @@ module modexpa7_exponentiator # // // FSM Declaration // - localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; + localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; + // + localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1; + localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2; + localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3; + localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4; + + localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4; + + localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1; + localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2; + localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3; + + localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1; + localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2; + localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3; + localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4; + + localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0; + + localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4; + // + localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11; + localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12; + localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13; + localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14; + + localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21; + localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22; + localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23; // - localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1; - localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2; - localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3; - localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4; - - localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1; - localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2; - localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3; - localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4; - - localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1; - localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2; - localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3; - - localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1; - localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2; - localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3; - localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4; - - localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0; - - localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1; - localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2; - localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3; - localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_1 = 8'h31; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_2 = 8'h32; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_3 = 8'h33; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_4 = 8'h34; + + localparam [ 7: 0] FSM_STATE_CRT_CALC_A_1 = 8'h41; + localparam [ 7: 0] FSM_STATE_CRT_CALC_A_2 = 8'h42; + localparam [ 7: 0] FSM_STATE_CRT_CALC_A_3 = 8'h43; // - localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11; - localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12; - localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13; - localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14; - - localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21; - localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22; - localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_1 = 8'h51; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_2 = 8'h52; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_3 = 8'h53; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_4 = 8'h54; + + localparam [ 7: 0] FSM_STATE_CRT_CALC_B_1 = 8'h61; + localparam [ 7: 0] FSM_STATE_CRT_CALC_B_2 = 8'h62; + localparam [ 7: 0] FSM_STATE_CRT_CALC_B_3 = 8'h63; // - localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_1 = 8'h71; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_2 = 8'h72; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_3 = 8'h73; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_4 = 8'h74; + + localparam [ 7: 0] FSM_STATE_CRT_CALC_C_1 = 8'h81; + localparam [ 7: 0] FSM_STATE_CRT_CALC_C_2 = 8'h82; + localparam [ 7: 0] FSM_STATE_CRT_CALC_C_3 = 8'h83; + // + localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; /* * // * - * MUL_INIT: P1 = F - * P2 = F - * P3 = F - * T2 = M + * MUL_INIT: P1 <= F + * P2 <= F + * P3 <= F + * T2 <= M * - * MUL_CALC: TP = T2 * P3 + * MUL_CALC: TP = T2 * P3 * * // * - * EXP_INIT: P1 <= TP - * P2 <= TP - * P3 <= TP - * T1 <= 1 - * T2 <= 1 + * CRT_INIT_A: T2 <= M + * + * CRT_CALC_A: TP = T2 * P3 ("reduce only") * - * EXP_LOAD: T0 <= T1 + * CRT_INIT_B: P1 <= F + * P2 <= F + * P3 <= F + * T2 <= TP * - * EXP_CALC: PP = P1 * P2 - * TP = T2 * P3 + * CRT_CALC_B: TP = T2 * P3 * - * EXP_FILL: P1 <= PP - * P2 <= PP - * P3 <= PP - * T1 <= D[i] ? TP : T0 - * T2 <= D[i] ? TP : T0 + * CRT_INIT_C: T2 <= TP * - * EXP_SAVE: R <= T1 + * CRT_CALC_C: TP = T2 * P3 + * + * // + * + * EXP_INIT: P1 <= TP + * P2 <= TP + * P3 <= TP + * T1 <= 1 + * T2 <= 1 + * + * EXP_LOAD: T0 <= T1 + * + * EXP_CALC: PP = P1 * P2 + * TP = T2 * P3 + * + * EXP_FILL: P1 <= PP + * P2 <= PP + * P3 <= PP + * T1 <= D[i] ? TP : T0 + * T2 <= D[i] ? TP : T0 + * + * EXP_SAVE: R <= T1 * * // * @@ -225,10 +271,12 @@ module modexpa7_exponentiator # */ /* the very first addresses */ - wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}}; /* the very last addresses */ - wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last_crt = + {m_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1}; /* address registers */ reg [OPERAND_ADDR_WIDTH-1:0] m_addr; @@ -261,16 +309,18 @@ module modexpa7_exponentiator # wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd_next = tp_addr_rd + 1'b1; /* handy stop flags */ - wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; - wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; - wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; - wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; + wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire m_addr_done_crt = (m_addr == bram_addr_last_crt) ? 1'b1 : 1'b0; + wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; + wire t2_addr_wr_done_crt = (t2_addr_wr == bram_addr_last_crt) ? 1'b1 : 1'b0; + wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; + wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; + wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; /* map registers to top-level ports */ assign m_bram_addr = m_addr; @@ -392,10 +442,15 @@ module modexpa7_exponentiator # // m_addr // case (fsm_next_state) - FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero; + FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero; FSM_STATE_MUL_INIT_2, FSM_STATE_MUL_INIT_3, - FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + // + FSM_STATE_CRT_INIT_A_1: m_addr <= bram_addr_zero; + FSM_STATE_CRT_INIT_A_2, + FSM_STATE_CRT_INIT_A_3, + FSM_STATE_CRT_INIT_A_4: m_addr <= !m_addr_done_crt ? m_addr_next : m_addr; endcase // // d_addr @@ -472,7 +527,10 @@ module modexpa7_exponentiator # // FSM_STATE_MUL_INIT_3: t2_addr_wr <= bram_addr_zero; FSM_STATE_MUL_INIT_4: t2_addr_wr <= t2_addr_wr_next; - + // + FSM_STATE_CRT_INIT_A_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_CRT_INIT_A_4: t2_addr_wr <= t2_addr_wr_next; + // FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero; FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next; // @@ -554,6 +612,8 @@ module modexpa7_exponentiator # case (fsm_next_state) FSM_STATE_MUL_INIT_3, FSM_STATE_MUL_INIT_4, + FSM_STATE_CRT_INIT_A_3, + FSM_STATE_CRT_INIT_A_4, FSM_STATE_EXP_INIT_3, FSM_STATE_EXP_INIT_4, FSM_STATE_EXP_FILL_3, @@ -616,15 +676,19 @@ module modexpa7_exponentiator # // case (fsm_next_state) // - FSM_STATE_MUL_INIT_3, - FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out; + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out; + // + FSM_STATE_CRT_INIT_A_3, + FSM_STATE_CRT_INIT_A_4: t2_data_in <= m_bram_out; + // - FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; - FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; + FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; + FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; // FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; - default: t2_data_in <= 32'dX; + FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; + default: t2_data_in <= 32'dX; endcase // end @@ -634,6 +698,7 @@ module modexpa7_exponentiator # // Double Multiplier // reg mul_ena; + reg mul_crt; wire mul_rdy_pp; wire mul_rdy_tp; wire mul_rdy_all = mul_rdy_pp & mul_rdy_tp; @@ -651,6 +716,8 @@ module modexpa7_exponentiator # .ena (mul_ena), .rdy (mul_rdy_pp), + .reduce_only (1'b0), + .a_bram_addr (p1_addr_rd), .b_bram_addr (p2_addr_rd), .n_bram_addr (n1_bram_addr), @@ -681,6 +748,8 @@ module modexpa7_exponentiator # .ena (mul_ena), .rdy (mul_rdy_tp), + .reduce_only (mul_crt), + .a_bram_addr (t2_addr_rd), .b_bram_addr (p3_addr_rd), .n_bram_addr (n2_bram_addr), @@ -703,8 +772,18 @@ module modexpa7_exponentiator # // case (fsm_next_state) FSM_STATE_MUL_CALC_1, - FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1; - default: mul_ena <= 1'b0; + FSM_STATE_CRT_CALC_A_1, + FSM_STATE_CRT_CALC_B_1, + FSM_STATE_CRT_CALC_C_1, + FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1; + default: mul_ena <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_next_state) + FSM_STATE_CRT_CALC_A_1: mul_crt <= 1'b1; + default: mul_crt <= 1'b0; endcase @@ -726,53 +805,70 @@ module modexpa7_exponentiator # // case (fsm_state) // - FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2; - FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3; - FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4; - FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1; - else fsm_next_state = FSM_STATE_MUL_INIT_4; - // - FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2; - FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3; - else fsm_next_state = FSM_STATE_MUL_CALC_2; - FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1; - // - FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_MUL_INIT_1; - else fsm_next_state = FSM_STATE_EXP_IDLE; - // - FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2; - FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3; - FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4; - FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; - else fsm_next_state = FSM_STATE_EXP_INIT_4; - // - FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2; - FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3; - FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4; - FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; - else fsm_next_state = FSM_STATE_EXP_LOAD_4; - // - FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2; - FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; - else fsm_next_state = FSM_STATE_EXP_CALC_2; - FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1; - // - FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2; - FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3; - FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4; - FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; - else fsm_next_state = FSM_STATE_EXP_FILL_4; - // - FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; - else fsm_next_state = FSM_STATE_EXP_LOAD_1; - // - FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2; - FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3; - FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4; - FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; - else fsm_next_state = FSM_STATE_EXP_SAVE_4; - // - FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; + // + FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2; + FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3; + FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4; + FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1; + else fsm_next_state = FSM_STATE_MUL_INIT_4; + // + FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2; + FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3; + else fsm_next_state = FSM_STATE_MUL_CALC_2; + FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1; + // + // + FSM_STATE_CRT_INIT_A_1: fsm_next_state = FSM_STATE_CRT_INIT_A_2; + FSM_STATE_CRT_INIT_A_2: fsm_next_state = FSM_STATE_CRT_INIT_A_3; + FSM_STATE_CRT_INIT_A_3: fsm_next_state = FSM_STATE_CRT_INIT_A_4; + FSM_STATE_CRT_INIT_A_4: if (t2_addr_wr_done_crt) fsm_next_state = FSM_STATE_CRT_CALC_A_1; + else fsm_next_state = FSM_STATE_CRT_INIT_A_4; + + // + FSM_STATE_CRT_CALC_A_1: fsm_next_state = FSM_STATE_CRT_CALC_A_2; + FSM_STATE_CRT_CALC_A_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_CRT_CALC_A_3; + else fsm_next_state = FSM_STATE_CRT_CALC_A_2; + FSM_STATE_CRT_CALC_A_3: fsm_next_state = FSM_STATE_EXP_INIT_1; + // + // + FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = crt ? + FSM_STATE_CRT_INIT_A_1 : FSM_STATE_MUL_INIT_1; + else fsm_next_state = FSM_STATE_EXP_IDLE; + // + // + FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2; + FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3; + FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4; + FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; + else fsm_next_state = FSM_STATE_EXP_INIT_4; + // + FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2; + FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3; + FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4; + FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_4; + // + FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; + else fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1; + // + FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2; + FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3; + FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4; + FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; + else fsm_next_state = FSM_STATE_EXP_FILL_4; + // + FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_1; + // + FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2; + FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3; + FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4; + FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; + else fsm_next_state = FSM_STATE_EXP_SAVE_4; + // + FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; // endcase // diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index 7293998..444693d 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -57,6 +57,8 @@ module modexpa7_systolic_multiplier # input ena, output rdy, + + input reduce_only, output [OPERAND_ADDR_WIDTH-1:0] a_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] b_bram_addr, @@ -155,7 +157,8 @@ module modexpa7_systolic_multiplier # * Parameters Latch */ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; - reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; + reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; + reg reduce_only_latch; // save number of words in n when new operation starts always @(posedge clk) @@ -163,7 +166,12 @@ module modexpa7_systolic_multiplier # if ((fsm_state == FSM_STATE_IDLE) && ena_trig) n_num_words_latch <= n_num_words; + always @(posedge clk) + // + if ((fsm_state == FSM_STATE_IDLE) && ena_trig) + reduce_only_latch <= reduce_only; + /* * Multiplication Phase */ @@ -174,6 +182,7 @@ module modexpa7_systolic_multiplier # reg [ 1: 0] mult_phase; + wire mult_phase_ab = (mult_phase == MULT_PHASE_A_B) ? 1'b1 : 1'b0; wire mult_phase_done = (mult_phase == MULT_PHASE_STALL) ? 1'b1 : 1'b0; always @(posedge clk) @@ -296,6 +305,7 @@ module modexpa7_systolic_multiplier # wire [OPERAND_ADDR_WIDTH :0] bram_addr_ext_last = {n_num_words_latch, 1'b1}; // address registers + wire [OPERAND_ADDR_WIDTH-1:0] a_addr; reg [OPERAND_ADDR_WIDTH-1:0] b_addr; reg [OPERAND_ADDR_WIDTH-1:0] n_addr; wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_wr; @@ -570,8 +580,9 @@ module modexpa7_systolic_multiplier # MULT_PHASE_Q_N: p_num_words_latch <= {n_num_words_latch, 1'b1}; endcase - assign n_coeff_bram_addr = a_bram_addr; - assign q_addr_rd = a_bram_addr; + assign a_bram_addr = a_addr; + assign n_coeff_bram_addr = a_addr; + assign q_addr_rd = a_addr; reg [31: 0] a_data_out; @@ -597,12 +608,14 @@ module modexpa7_systolic_multiplier # .ena (pe_array_ena), .rdy (pe_array_rdy), + .crt (reduce_only_latch && mult_phase_ab), + .loader_addr_rd (loader_addr_rd), .pe_a_wide ({SYSTOLIC_ARRAY_LENGTH{a_data_out}}), .pe_b_wide (pe_b_wide), - .a_bram_addr (a_bram_addr), + .a_bram_addr (a_addr), .p_bram_addr (p_addr_ext_wr), .p_bram_in (p_data_in), diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v index 754203d..3280010 100644 --- a/src/rtl/modexpa7_systolic_multiplier_array.v +++ b/src/rtl/modexpa7_systolic_multiplier_array.v @@ -48,6 +48,8 @@ module modexpa7_systolic_multiplier_array # input ena, output rdy, + input crt, + output [OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0] loader_addr_rd, input [ 32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0] pe_a_wide, @@ -385,6 +387,8 @@ module modexpa7_systolic_multiplier_array # // the very last address wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last = n_num_words_latch; + wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last_crt = + {n_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1}; wire [OPERAND_ADDR_WIDTH : 0] bram_addr_ext_last = p_num_words_latch; // registers @@ -398,8 +402,9 @@ module modexpa7_systolic_multiplier_array # wire [OPERAND_ADDR_WIDTH : 0] p_addr_next = p_addr + 1'b1; // handy flags - wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0; + wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire a_addr_done_crt = (a_addr == bram_addr_last_crt) ? 1'b1 : 1'b0; + wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0; // map top-level ports to internal registers assign a_bram_addr = a_addr; @@ -452,7 +457,7 @@ module modexpa7_systolic_multiplier_array # // case (fsm_next_state) FSM_STATE_MULT_START: a_addr <= bram_addr_zero; - FSM_STATE_MULT_RELOAD: a_addr <= !a_addr_done ? a_addr_next : a_addr; + FSM_STATE_MULT_RELOAD: crt ? //a_addr <= !a_addr_done ? a_addr_next : a_addr; endcase // end diff --git a/src/rtl/pe/modexpa7_primitive_switch.v b/src/rtl/pe/modexpa7_primitive_switch.v index fa958ec..17e8264 100644 --- a/src/rtl/pe/modexpa7_primitive_switch.v +++ b/src/rtl/pe/modexpa7_primitive_switch.v @@ -1,4 +1,4 @@ -`define USE_VENDOR_PRIMITIVES +//`define USE_VENDOR_PRIMITIVES `ifdef USE_VENDOR_PRIMITIVES diff --git a/src/tb/modexp_fpga_model_vectors.v b/src/tb/modexp_fpga_model_vectors.v index d5284c9..c86f7ba 100644 --- a/src/tb/modexp_fpga_model_vectors.v +++ b/src/tb/modexp_fpga_model_vectors.v @@ -40,6 +40,46 @@ localparam [383:0] S_384 = 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16, 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819}; +localparam [191:0] P_192 = + {32'he9ac4cf6, 32'h03b2d80a, 32'h7f1d091e, 32'h49d5f1a0, + 32'hac2ae4ff, 32'hbf9bf375}; + +localparam [191:0] Q_192 = + {32'hc1468f3e, 32'hc6909231, 32'h5a4d74ba, 32'h477b303f, + 32'h4b2e10d1, 32'h1f44e815}; + +localparam [191:0] P_COEFF_192 = + {32'h8ba8d46c, 32'hb4ed830d, 32'hfbb97c6e, 32'h72d150d3, + 32'h72d21392, 32'h70d2fb23}; + +localparam [191:0] Q_COEFF_192 = + {32'hd863905a, 32'hc1541c8a, 32'h25952b0e, 32'ha62b0348, + 32'h837f149f, 32'hd6cc58c3}; + +localparam [191:0] FACTOR_P_192 = + {32'h886bad59, 32'h9bf7a46e, 32'h482ed232, 32'he55164cf, + 32'hcb46a9e8, 32'he9bd888b}; + +localparam [191:0] FACTOR_Q_192 = + {32'h324b776e, 32'h3734d186, 32'h73dc8796, 32'h9e1aba2c, + 32'h4d5df285, 32'he97656b7}; + +localparam [191:0] DP_192 = + {32'h69b6c286, 32'h95fbc613, 32'h51988034, 32'h8cb0d684, + 32'h9aff38e4, 32'h9ef9ddb5}; + +localparam [191:0] DQ_192 = + {32'h1eda82b7, 32'h84bf4377, 32'h39712ff7, 32'h24be179f, + 32'ha302c190, 32'h80ab6159}; + +localparam [191:0] MP_192 = + {32'h9e163bb5, 32'h35e718cb, 32'hcde52b7b, 32'h5db8552b, + 32'h46a300e0, 32'h34f91e6b}; + +localparam [191:0] MQ_192 = + {32'h7b01a724, 32'h90f0d5f9, 32'h9e237ce5, 32'h6d31fd28, + 32'h4ecb9dad, 32'h58bf366a}; + localparam [511:0] M_512 = {32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb, 32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613, @@ -88,3 +128,43 @@ localparam [511:0] S_512 = 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d, 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203}; +localparam [255:0] P_256 = + {32'hfedea889, 32'h97cfdb79, 32'hcca87074, 32'he5abcda1, + 32'h3be201c4, 32'hc416fd15, 32'hf2130931, 32'h61ff5937}; + +localparam [255:0] Q_256 = + {32'hf0889147, 32'h5aa60f93, 32'hb9927d86, 32'h8f795c5c, + 32'h8e98dcf2, 32'had3aad74, 32'h9441583a, 32'h967dce41}; + +localparam [255:0] P_COEFF_256 = + {32'h7af63ffc, 32'h428d9408, 32'h86e79fb9, 32'h018dad77, + 32'h4ff704df, 32'h93effb1e, 32'h265d181a, 32'h47ae5379}; + +localparam [255:0] Q_COEFF_256 = + {32'hd27f8aa0, 32'h9f2b9800, 32'h2dfd2392, 32'h4f868b9d, + 32'h0fc51e1d, 32'h022de65b, 32'ha55f9ad1, 32'h0676be3f}; + +localparam [255:0] FACTOR_P_256 = + {32'h1a5f27a1, 32'h8d16b0cb, 32'h8c2751b8, 32'h106a099c, + 32'ha6efbadd, 32'hcb313a5f, 32'hf530eeb6, 32'hbbc7d8f5}; + +localparam [255:0] FACTOR_Q_256 = + {32'h6794987c, 32'h932203a6, 32'h8c5b1e68, 32'h18d458e6, + 32'h6737f12a, 32'h664d4187, 32'hc4ec03ba, 32'h4bd3d0c2}; + +localparam [255:0] DP_256 = + {32'h2504d437, 32'hfffbe9e5, 32'hfc0aef22, 32'h9b8563bd, + 32'haa83fe3b, 32'hc53b8d91, 32'h15731c5f, 32'hb6db2eeb}; + +localparam [255:0] DQ_256 = + {32'hd3265fba, 32'h2eb65638, 32'h4d106ec7, 32'h000dfe69, + 32'h75f87505, 32'h47d299d0, 32'h1c115cdd, 32'h599ca8c1}; + +localparam [255:0] MP_256 = + {32'h23359955, 32'hcad299b6, 32'h049bb248, 32'h3828b6a5, + 32'h74c85825, 32'h7dd8e109, 32'h07edbda9, 32'h4980c2c9}; + +localparam [255:0] MQ_256 = + {32'h8578120b, 32'h91f4ca9e, 32'h371d3e70, 32'h0005bb89, + 32'hd31ed864, 32'h477bd9cf, 32'h65a1f03b, 32'h606d3bc8}; + diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v index 16be0a5..440fedc 100644 --- a/src/tb/tb_exponentiator.v +++ b/src/tb/tb_exponentiator.v @@ -63,6 +63,8 @@ module tb_exponentiator; reg rst_n; reg ena; + reg crt; + reg [ 3: 0] n_num_words; reg [ 8: 0] d_num_bits; @@ -170,6 +172,8 @@ module tb_exponentiator; .ena (ena), .rdy (rdy), + .crt (crt), + .m_bram_addr (core_m_addr), .d_bram_addr (core_d_addr), .f_bram_addr (core_f_addr), @@ -206,9 +210,14 @@ module tb_exponentiator; #200; rst_n = 1'b1; #100; - - test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384); - test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512); + + // test "honest" exponentiation +// test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384); +// test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512); + + // test crt mode + test_exponent_192(M_384, DP_192, FACTOR_P_192, P_192, P_COEFF_192, MP_192); + //test_exponent_192(M_384, DQ_192, FACTOR_Q_192, Q_192, Q_COEFF_192, MQ_192); end @@ -216,7 +225,6 @@ module tb_exponentiator; // // Test Tasks // - task test_exponent_384; // input [383:0] m; @@ -234,6 +242,8 @@ module tb_exponentiator; n_num_words = 4'd11; // set number of words d_num_bits = 9'd383; // set number of bits // + crt = 0; // disable crt mode + // write_memory_384(m, d, f, n, n_coeff); // fill memory ena = 1; // start operation @@ -276,6 +286,8 @@ module tb_exponentiator; n_num_words = 4'd15; // set number of words d_num_bits = 9'd511; // set number of bits // + crt = 0; // disable crt mode + // write_memory_512(m, d, f, n, n_coeff); // fill memory ena = 1; // start operation @@ -301,6 +313,49 @@ module tb_exponentiator; // endtask + task test_exponent_192; + // + input [383:0] m; + input [191:0] d; + input [191:0] f; + input [191:0] n; + input [191:0] n_coeff; + input [191:0] s; + reg [191:0] r; + // + integer i; + // + begin + // + n_num_words = 4'd5; // set number of words + d_num_bits = 9'd191; // set number of bits + // + crt = 1; // enable crt mode + // + write_memory_192(m, d, f, n, n_coeff); // fill memory + + ena = 1; // start operation + #10; // + ena = 0; // clear flag + + while (!rdy) #10; // wait for operation to complete + read_memory_192(r); // get result from memory + + $display(" calculated: %x", r); // display result + $display(" expected: %x", s); // + + // check calculated value + if (r === s) begin + $display(" OK"); + $display("SUCCESS: Test passed."); + end else begin + $display(" ERROR"); + $display("FAILURE: Test not passed."); + end + // + end + // + endtask // // write_memory_384 @@ -408,6 +463,59 @@ module tb_exponentiator; endtask + // + // write_memory_192 + // + task write_memory_192; + // + input [383:0] m; + input [191:0] d; + input [191:0] f; + input [191:0] n; + input [191:0] n_coeff; + reg [383:0] m_shreg; + reg [191:0] f_shreg; + reg [191:0] d_shreg; + reg [191:0] n_shreg; + reg [191:0] n_coeff_shreg; + // + begin + // + tb_mdfn_wren = 1; // start filling memories + m_shreg = m; // preload shift register + d_shreg = d; // preload shift register + f_shreg = f; // preload shift register + n_shreg = n; // preload shift register + n_coeff_shreg = n_coeff; // preload shift register + // + for (w=0; w