From e48040122cddd4374d5600b24807ef8189f1c0c2 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Fri, 11 Aug 2017 01:16:48 +0300 Subject: Work in progress. --- src/rtl/modexpa7_exponentiator.v | 344 +++++++++++++++++---------- src/rtl/modexpa7_systolic_multiplier.v | 21 +- src/rtl/modexpa7_systolic_multiplier_array.v | 11 +- src/rtl/pe/modexpa7_primitive_switch.v | 2 +- 4 files changed, 246 insertions(+), 132 deletions(-) (limited to 'src/rtl') diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v index b33360a..93c8047 100644 --- a/src/rtl/modexpa7_exponentiator.v +++ b/src/rtl/modexpa7_exponentiator.v @@ -58,6 +58,8 @@ module modexpa7_exponentiator # input ena, output rdy, + input crt, + output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] f_bram_addr, @@ -86,76 +88,120 @@ module modexpa7_exponentiator # // // FSM Declaration // - localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; + localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; + // + localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1; + localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2; + localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3; + localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4; + + localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4; + + localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1; + localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2; + localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3; + + localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1; + localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2; + localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3; + localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4; + + localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0; + + localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4; + // + localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11; + localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12; + localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13; + localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14; + + localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21; + localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22; + localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23; // - localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1; - localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2; - localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3; - localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4; - - localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1; - localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2; - localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3; - localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4; - - localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1; - localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2; - localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3; - - localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1; - localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2; - localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3; - localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4; - - localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0; - - localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1; - localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2; - localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3; - localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_1 = 8'h31; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_2 = 8'h32; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_3 = 8'h33; + localparam [ 7: 0] FSM_STATE_CRT_INIT_A_4 = 8'h34; + + localparam [ 7: 0] FSM_STATE_CRT_CALC_A_1 = 8'h41; + localparam [ 7: 0] FSM_STATE_CRT_CALC_A_2 = 8'h42; + localparam [ 7: 0] FSM_STATE_CRT_CALC_A_3 = 8'h43; // - localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11; - localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12; - localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13; - localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14; - - localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21; - localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22; - localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_1 = 8'h51; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_2 = 8'h52; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_3 = 8'h53; + localparam [ 7: 0] FSM_STATE_CRT_INIT_B_4 = 8'h54; + + localparam [ 7: 0] FSM_STATE_CRT_CALC_B_1 = 8'h61; + localparam [ 7: 0] FSM_STATE_CRT_CALC_B_2 = 8'h62; + localparam [ 7: 0] FSM_STATE_CRT_CALC_B_3 = 8'h63; // - localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_1 = 8'h71; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_2 = 8'h72; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_3 = 8'h73; + localparam [ 7: 0] FSM_STATE_CRT_INIT_C_4 = 8'h74; + + localparam [ 7: 0] FSM_STATE_CRT_CALC_C_1 = 8'h81; + localparam [ 7: 0] FSM_STATE_CRT_CALC_C_2 = 8'h82; + localparam [ 7: 0] FSM_STATE_CRT_CALC_C_3 = 8'h83; + // + localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; /* * // * - * MUL_INIT: P1 = F - * P2 = F - * P3 = F - * T2 = M + * MUL_INIT: P1 <= F + * P2 <= F + * P3 <= F + * T2 <= M * - * MUL_CALC: TP = T2 * P3 + * MUL_CALC: TP = T2 * P3 * * // * - * EXP_INIT: P1 <= TP - * P2 <= TP - * P3 <= TP - * T1 <= 1 - * T2 <= 1 + * CRT_INIT_A: T2 <= M + * + * CRT_CALC_A: TP = T2 * P3 ("reduce only") * - * EXP_LOAD: T0 <= T1 + * CRT_INIT_B: P1 <= F + * P2 <= F + * P3 <= F + * T2 <= TP * - * EXP_CALC: PP = P1 * P2 - * TP = T2 * P3 + * CRT_CALC_B: TP = T2 * P3 * - * EXP_FILL: P1 <= PP - * P2 <= PP - * P3 <= PP - * T1 <= D[i] ? TP : T0 - * T2 <= D[i] ? TP : T0 + * CRT_INIT_C: T2 <= TP * - * EXP_SAVE: R <= T1 + * CRT_CALC_C: TP = T2 * P3 + * + * // + * + * EXP_INIT: P1 <= TP + * P2 <= TP + * P3 <= TP + * T1 <= 1 + * T2 <= 1 + * + * EXP_LOAD: T0 <= T1 + * + * EXP_CALC: PP = P1 * P2 + * TP = T2 * P3 + * + * EXP_FILL: P1 <= PP + * P2 <= PP + * P3 <= PP + * T1 <= D[i] ? TP : T0 + * T2 <= D[i] ? TP : T0 + * + * EXP_SAVE: R <= T1 * * // * @@ -225,10 +271,12 @@ module modexpa7_exponentiator # */ /* the very first addresses */ - wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}}; /* the very last addresses */ - wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last_crt = + {m_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1}; /* address registers */ reg [OPERAND_ADDR_WIDTH-1:0] m_addr; @@ -261,16 +309,18 @@ module modexpa7_exponentiator # wire [OPERAND_ADDR_WIDTH-1:0] tp_addr_rd_next = tp_addr_rd + 1'b1; /* handy stop flags */ - wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; - wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; - wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; - wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; + wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire m_addr_done_crt = (m_addr == bram_addr_last_crt) ? 1'b1 : 1'b0; + wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire t2_addr_wr_done = (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; + wire t2_addr_wr_done_crt = (t2_addr_wr == bram_addr_last_crt) ? 1'b1 : 1'b0; + wire p_addr_wr_done = (p_addr_wr == bram_addr_last) ? 1'b1 : 1'b0; + wire pp_addr_rd_done = (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; + wire tp_addr_rd_done = (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0; /* map registers to top-level ports */ assign m_bram_addr = m_addr; @@ -392,10 +442,15 @@ module modexpa7_exponentiator # // m_addr // case (fsm_next_state) - FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero; + FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero; FSM_STATE_MUL_INIT_2, FSM_STATE_MUL_INIT_3, - FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + // + FSM_STATE_CRT_INIT_A_1: m_addr <= bram_addr_zero; + FSM_STATE_CRT_INIT_A_2, + FSM_STATE_CRT_INIT_A_3, + FSM_STATE_CRT_INIT_A_4: m_addr <= !m_addr_done_crt ? m_addr_next : m_addr; endcase // // d_addr @@ -472,7 +527,10 @@ module modexpa7_exponentiator # // FSM_STATE_MUL_INIT_3: t2_addr_wr <= bram_addr_zero; FSM_STATE_MUL_INIT_4: t2_addr_wr <= t2_addr_wr_next; - + // + FSM_STATE_CRT_INIT_A_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_CRT_INIT_A_4: t2_addr_wr <= t2_addr_wr_next; + // FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero; FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next; // @@ -554,6 +612,8 @@ module modexpa7_exponentiator # case (fsm_next_state) FSM_STATE_MUL_INIT_3, FSM_STATE_MUL_INIT_4, + FSM_STATE_CRT_INIT_A_3, + FSM_STATE_CRT_INIT_A_4, FSM_STATE_EXP_INIT_3, FSM_STATE_EXP_INIT_4, FSM_STATE_EXP_FILL_3, @@ -616,15 +676,19 @@ module modexpa7_exponentiator # // case (fsm_next_state) // - FSM_STATE_MUL_INIT_3, - FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out; + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out; + // + FSM_STATE_CRT_INIT_A_3, + FSM_STATE_CRT_INIT_A_4: t2_data_in <= m_bram_out; + // - FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; - FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; + FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; + FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; // FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; - default: t2_data_in <= 32'dX; + FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; + default: t2_data_in <= 32'dX; endcase // end @@ -634,6 +698,7 @@ module modexpa7_exponentiator # // Double Multiplier // reg mul_ena; + reg mul_crt; wire mul_rdy_pp; wire mul_rdy_tp; wire mul_rdy_all = mul_rdy_pp & mul_rdy_tp; @@ -651,6 +716,8 @@ module modexpa7_exponentiator # .ena (mul_ena), .rdy (mul_rdy_pp), + .reduce_only (1'b0), + .a_bram_addr (p1_addr_rd), .b_bram_addr (p2_addr_rd), .n_bram_addr (n1_bram_addr), @@ -681,6 +748,8 @@ module modexpa7_exponentiator # .ena (mul_ena), .rdy (mul_rdy_tp), + .reduce_only (mul_crt), + .a_bram_addr (t2_addr_rd), .b_bram_addr (p3_addr_rd), .n_bram_addr (n2_bram_addr), @@ -703,8 +772,18 @@ module modexpa7_exponentiator # // case (fsm_next_state) FSM_STATE_MUL_CALC_1, - FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1; - default: mul_ena <= 1'b0; + FSM_STATE_CRT_CALC_A_1, + FSM_STATE_CRT_CALC_B_1, + FSM_STATE_CRT_CALC_C_1, + FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1; + default: mul_ena <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_next_state) + FSM_STATE_CRT_CALC_A_1: mul_crt <= 1'b1; + default: mul_crt <= 1'b0; endcase @@ -726,53 +805,70 @@ module modexpa7_exponentiator # // case (fsm_state) // - FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2; - FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3; - FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4; - FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1; - else fsm_next_state = FSM_STATE_MUL_INIT_4; - // - FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2; - FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3; - else fsm_next_state = FSM_STATE_MUL_CALC_2; - FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1; - // - FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_MUL_INIT_1; - else fsm_next_state = FSM_STATE_EXP_IDLE; - // - FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2; - FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3; - FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4; - FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; - else fsm_next_state = FSM_STATE_EXP_INIT_4; - // - FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2; - FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3; - FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4; - FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; - else fsm_next_state = FSM_STATE_EXP_LOAD_4; - // - FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2; - FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; - else fsm_next_state = FSM_STATE_EXP_CALC_2; - FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1; - // - FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2; - FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3; - FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4; - FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; - else fsm_next_state = FSM_STATE_EXP_FILL_4; - // - FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; - else fsm_next_state = FSM_STATE_EXP_LOAD_1; - // - FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2; - FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3; - FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4; - FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; - else fsm_next_state = FSM_STATE_EXP_SAVE_4; - // - FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; + // + FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2; + FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3; + FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4; + FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1; + else fsm_next_state = FSM_STATE_MUL_INIT_4; + // + FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2; + FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3; + else fsm_next_state = FSM_STATE_MUL_CALC_2; + FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1; + // + // + FSM_STATE_CRT_INIT_A_1: fsm_next_state = FSM_STATE_CRT_INIT_A_2; + FSM_STATE_CRT_INIT_A_2: fsm_next_state = FSM_STATE_CRT_INIT_A_3; + FSM_STATE_CRT_INIT_A_3: fsm_next_state = FSM_STATE_CRT_INIT_A_4; + FSM_STATE_CRT_INIT_A_4: if (t2_addr_wr_done_crt) fsm_next_state = FSM_STATE_CRT_CALC_A_1; + else fsm_next_state = FSM_STATE_CRT_INIT_A_4; + + // + FSM_STATE_CRT_CALC_A_1: fsm_next_state = FSM_STATE_CRT_CALC_A_2; + FSM_STATE_CRT_CALC_A_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_CRT_CALC_A_3; + else fsm_next_state = FSM_STATE_CRT_CALC_A_2; + FSM_STATE_CRT_CALC_A_3: fsm_next_state = FSM_STATE_EXP_INIT_1; + // + // + FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = crt ? + FSM_STATE_CRT_INIT_A_1 : FSM_STATE_MUL_INIT_1; + else fsm_next_state = FSM_STATE_EXP_IDLE; + // + // + FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2; + FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3; + FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4; + FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; + else fsm_next_state = FSM_STATE_EXP_INIT_4; + // + FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2; + FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3; + FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4; + FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_4; + // + FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; + else fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1; + // + FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2; + FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3; + FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4; + FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; + else fsm_next_state = FSM_STATE_EXP_FILL_4; + // + FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_1; + // + FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2; + FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3; + FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4; + FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; + else fsm_next_state = FSM_STATE_EXP_SAVE_4; + // + FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; // endcase // diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index 7293998..444693d 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -57,6 +57,8 @@ module modexpa7_systolic_multiplier # input ena, output rdy, + + input reduce_only, output [OPERAND_ADDR_WIDTH-1:0] a_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] b_bram_addr, @@ -155,7 +157,8 @@ module modexpa7_systolic_multiplier # * Parameters Latch */ reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; - reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; + reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch; + reg reduce_only_latch; // save number of words in n when new operation starts always @(posedge clk) @@ -163,7 +166,12 @@ module modexpa7_systolic_multiplier # if ((fsm_state == FSM_STATE_IDLE) && ena_trig) n_num_words_latch <= n_num_words; + always @(posedge clk) + // + if ((fsm_state == FSM_STATE_IDLE) && ena_trig) + reduce_only_latch <= reduce_only; + /* * Multiplication Phase */ @@ -174,6 +182,7 @@ module modexpa7_systolic_multiplier # reg [ 1: 0] mult_phase; + wire mult_phase_ab = (mult_phase == MULT_PHASE_A_B) ? 1'b1 : 1'b0; wire mult_phase_done = (mult_phase == MULT_PHASE_STALL) ? 1'b1 : 1'b0; always @(posedge clk) @@ -296,6 +305,7 @@ module modexpa7_systolic_multiplier # wire [OPERAND_ADDR_WIDTH :0] bram_addr_ext_last = {n_num_words_latch, 1'b1}; // address registers + wire [OPERAND_ADDR_WIDTH-1:0] a_addr; reg [OPERAND_ADDR_WIDTH-1:0] b_addr; reg [OPERAND_ADDR_WIDTH-1:0] n_addr; wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_wr; @@ -570,8 +580,9 @@ module modexpa7_systolic_multiplier # MULT_PHASE_Q_N: p_num_words_latch <= {n_num_words_latch, 1'b1}; endcase - assign n_coeff_bram_addr = a_bram_addr; - assign q_addr_rd = a_bram_addr; + assign a_bram_addr = a_addr; + assign n_coeff_bram_addr = a_addr; + assign q_addr_rd = a_addr; reg [31: 0] a_data_out; @@ -597,12 +608,14 @@ module modexpa7_systolic_multiplier # .ena (pe_array_ena), .rdy (pe_array_rdy), + .crt (reduce_only_latch && mult_phase_ab), + .loader_addr_rd (loader_addr_rd), .pe_a_wide ({SYSTOLIC_ARRAY_LENGTH{a_data_out}}), .pe_b_wide (pe_b_wide), - .a_bram_addr (a_bram_addr), + .a_bram_addr (a_addr), .p_bram_addr (p_addr_ext_wr), .p_bram_in (p_data_in), diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v index 754203d..3280010 100644 --- a/src/rtl/modexpa7_systolic_multiplier_array.v +++ b/src/rtl/modexpa7_systolic_multiplier_array.v @@ -48,6 +48,8 @@ module modexpa7_systolic_multiplier_array # input ena, output rdy, + input crt, + output [OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0] loader_addr_rd, input [ 32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0] pe_a_wide, @@ -385,6 +387,8 @@ module modexpa7_systolic_multiplier_array # // the very last address wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last = n_num_words_latch; + wire [OPERAND_ADDR_WIDTH - 1 : 0] bram_addr_last_crt = + {n_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1}; wire [OPERAND_ADDR_WIDTH : 0] bram_addr_ext_last = p_num_words_latch; // registers @@ -398,8 +402,9 @@ module modexpa7_systolic_multiplier_array # wire [OPERAND_ADDR_WIDTH : 0] p_addr_next = p_addr + 1'b1; // handy flags - wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0; - wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0; + wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire a_addr_done_crt = (a_addr == bram_addr_last_crt) ? 1'b1 : 1'b0; + wire p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0; // map top-level ports to internal registers assign a_bram_addr = a_addr; @@ -452,7 +457,7 @@ module modexpa7_systolic_multiplier_array # // case (fsm_next_state) FSM_STATE_MULT_START: a_addr <= bram_addr_zero; - FSM_STATE_MULT_RELOAD: a_addr <= !a_addr_done ? a_addr_next : a_addr; + FSM_STATE_MULT_RELOAD: crt ? //a_addr <= !a_addr_done ? a_addr_next : a_addr; endcase // end diff --git a/src/rtl/pe/modexpa7_primitive_switch.v b/src/rtl/pe/modexpa7_primitive_switch.v index fa958ec..17e8264 100644 --- a/src/rtl/pe/modexpa7_primitive_switch.v +++ b/src/rtl/pe/modexpa7_primitive_switch.v @@ -1,4 +1,4 @@ -`define USE_VENDOR_PRIMITIVES +//`define USE_VENDOR_PRIMITIVES `ifdef USE_VENDOR_PRIMITIVES -- cgit v1.2.3