From 7486edd118f6d69c4817040e53240baf24628dd7 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Wed, 19 Jul 2017 21:09:48 +0300 Subject: Added pre-multiplication step. Added 512-bit testbench. --- src/rtl/modexpa7_exponentiator.v | 212 ++++++++++++++++++++++++--------------- 1 file changed, 132 insertions(+), 80 deletions(-) (limited to 'src/rtl/modexpa7_exponentiator.v') diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v index e34a7ab..cda6882 100644 --- a/src/rtl/modexpa7_exponentiator.v +++ b/src/rtl/modexpa7_exponentiator.v @@ -60,6 +60,7 @@ module modexpa7_exponentiator # output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] f_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr, output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr, @@ -68,6 +69,7 @@ module modexpa7_exponentiator # input [ 32-1:0] m_bram_out, input [ 32-1:0] d_bram_out, + input [ 32-1:0] f_bram_out, input [ 32-1:0] n1_bram_out, input [ 32-1:0] n2_bram_out, input [ 32-1:0] n_coeff1_bram_out, @@ -84,7 +86,7 @@ module modexpa7_exponentiator # // // FSM Declaration // - localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; + localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; // localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1; localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2; @@ -121,7 +123,7 @@ module modexpa7_exponentiator # localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22; localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23; // - localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; + localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; /* @@ -209,7 +211,7 @@ module modexpa7_exponentiator # /* save number of words in a and b when new operation starts */ always @(posedge clk) // - if (fsm_next_state == FSM_STATE_EXP_INIT_1) + if ((fsm_state == FSM_STATE_EXP_IDLE) && ena_trig) {m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits}; @@ -231,6 +233,7 @@ module modexpa7_exponentiator # /* address registers */ reg [OPERAND_ADDR_WIDTH-1:0] m_addr; reg [OPERAND_ADDR_WIDTH-1:0] d_addr; + reg [OPERAND_ADDR_WIDTH-1:0] f_addr; reg [OPERAND_ADDR_WIDTH-1:0] r_addr; reg [OPERAND_ADDR_WIDTH-1:0] t0_addr; reg [OPERAND_ADDR_WIDTH-1:0] t1_addr; @@ -248,6 +251,7 @@ module modexpa7_exponentiator # /* handy increment values */ wire [OPERAND_ADDR_WIDTH-1:0] m_addr_next = m_addr + 1'b1; wire [OPERAND_ADDR_WIDTH-1:0] d_addr_next = d_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] f_addr_next = f_addr + 1'b1; wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1; wire [OPERAND_ADDR_WIDTH-1:0] t0_addr_next = t0_addr + 1'b1; wire [OPERAND_ADDR_WIDTH-1:0] t1_addr_next = t1_addr + 1'b1; @@ -259,6 +263,7 @@ module modexpa7_exponentiator # /* handy stop flags */ wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0; wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0; wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0; wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0; wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0; @@ -270,6 +275,7 @@ module modexpa7_exponentiator # /* map registers to top-level ports */ assign m_bram_addr = m_addr; assign d_bram_addr = d_addr; + assign f_bram_addr = f_addr; assign r_bram_addr = r_addr; // @@ -386,93 +392,115 @@ module modexpa7_exponentiator # // m_addr // case (fsm_next_state) - FSM_STATE_EXP_INIT_1: m_addr <= bram_addr_zero; - FSM_STATE_EXP_INIT_2, - FSM_STATE_EXP_INIT_3, - FSM_STATE_EXP_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero; + FSM_STATE_MUL_INIT_2, + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; endcase // // d_addr // case (fsm_next_state) - FSM_STATE_EXP_CALC_1: d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5]; + FSM_STATE_EXP_CALC_1: d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5]; + endcase + // + // f_addr + // + case (fsm_next_state) + FSM_STATE_MUL_INIT_1: f_addr <= bram_addr_zero; + FSM_STATE_MUL_INIT_2, + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4: f_addr <= !f_addr_done ? f_addr_next : f_addr; endcase // // r_addr // case (fsm_next_state) - FSM_STATE_EXP_SAVE_3: r_addr <= bram_addr_zero; - FSM_STATE_EXP_SAVE_4: r_addr <= r_addr_next; + FSM_STATE_EXP_SAVE_3: r_addr <= bram_addr_zero; + FSM_STATE_EXP_SAVE_4: r_addr <= r_addr_next; endcase // // p_addr_wr // case (fsm_next_state) - FSM_STATE_EXP_INIT_3: p_addr_wr <= bram_addr_zero; - FSM_STATE_EXP_INIT_4: p_addr_wr <= p_addr_wr_next; // - FSM_STATE_EXP_FILL_3: p_addr_wr <= bram_addr_zero; - FSM_STATE_EXP_FILL_4: p_addr_wr <= p_addr_wr_next; + FSM_STATE_MUL_INIT_3: p_addr_wr <= bram_addr_zero; + FSM_STATE_MUL_INIT_4: p_addr_wr <= p_addr_wr_next; + // + FSM_STATE_EXP_INIT_3: p_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_INIT_4: p_addr_wr <= p_addr_wr_next; + // + FSM_STATE_EXP_FILL_3: p_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_FILL_4: p_addr_wr <= p_addr_wr_next; endcase // // t0_addr // case (fsm_next_state) - FSM_STATE_EXP_LOAD_3: t0_addr <= bram_addr_zero; - FSM_STATE_EXP_LOAD_4: t0_addr <= t0_addr_next; + FSM_STATE_EXP_LOAD_3: t0_addr <= bram_addr_zero; + FSM_STATE_EXP_LOAD_4: t0_addr <= t0_addr_next; // - FSM_STATE_EXP_FILL_1: t0_addr <= bram_addr_zero; + FSM_STATE_EXP_FILL_1: t0_addr <= bram_addr_zero; FSM_STATE_EXP_FILL_2, FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr; + FSM_STATE_EXP_FILL_4: t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr; endcase // // t1_addr // case (fsm_next_state) - FSM_STATE_EXP_INIT_3: t1_addr <= bram_addr_zero; - FSM_STATE_EXP_INIT_4: t1_addr <= t1_addr_next; + FSM_STATE_EXP_INIT_3: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_INIT_4: t1_addr <= t1_addr_next; // - FSM_STATE_EXP_LOAD_1: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_LOAD_1: t1_addr <= bram_addr_zero; FSM_STATE_EXP_LOAD_2, FSM_STATE_EXP_LOAD_3, - FSM_STATE_EXP_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; + FSM_STATE_EXP_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; // - FSM_STATE_EXP_FILL_3: t1_addr <= bram_addr_zero; - FSM_STATE_EXP_FILL_4: t1_addr <= t1_addr_next; + FSM_STATE_EXP_FILL_3: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_FILL_4: t1_addr <= t1_addr_next; // - FSM_STATE_EXP_SAVE_1: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_SAVE_1: t1_addr <= bram_addr_zero; FSM_STATE_EXP_SAVE_2, FSM_STATE_EXP_SAVE_3, - FSM_STATE_EXP_SAVE_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; + FSM_STATE_EXP_SAVE_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; endcase // // t2_addr_wr // case (fsm_next_state) - FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero; - FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next; // - FSM_STATE_EXP_FILL_3: t2_addr_wr <= bram_addr_zero; - FSM_STATE_EXP_FILL_4: t2_addr_wr <= t2_addr_wr_next; + FSM_STATE_MUL_INIT_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_MUL_INIT_4: t2_addr_wr <= t2_addr_wr_next; + + FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next; + // + FSM_STATE_EXP_FILL_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_FILL_4: t2_addr_wr <= t2_addr_wr_next; endcase // // pp_addr_rd // case (fsm_next_state) - FSM_STATE_EXP_FILL_1: pp_addr_rd <= bram_addr_zero; + FSM_STATE_EXP_FILL_1: pp_addr_rd <= bram_addr_zero; FSM_STATE_EXP_FILL_2, FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd; + FSM_STATE_EXP_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd; endcase // // tp_addr_rd // case (fsm_next_state) - FSM_STATE_EXP_FILL_1: tp_addr_rd <= bram_addr_zero; + FSM_STATE_EXP_INIT_1: tp_addr_rd <= bram_addr_zero; + FSM_STATE_EXP_INIT_2, + FSM_STATE_EXP_INIT_3, + FSM_STATE_EXP_INIT_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd; + // + FSM_STATE_EXP_FILL_1: tp_addr_rd <= bram_addr_zero; FSM_STATE_EXP_FILL_2, FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd; + FSM_STATE_EXP_FILL_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd; endcase // end @@ -487,26 +515,28 @@ module modexpa7_exponentiator # // case (fsm_next_state) FSM_STATE_EXP_SAVE_3, - FSM_STATE_EXP_SAVE_4: r_wren <= 1'b1; - default: r_wren <= 1'b0; + FSM_STATE_EXP_SAVE_4: r_wren <= 1'b1; + default: r_wren <= 1'b0; endcase // // p_wren // case (fsm_next_state) - FSM_STATE_EXP_INIT_3, + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4, + FSM_STATE_EXP_INIT_3, FSM_STATE_EXP_INIT_4, FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: p_wren <= 1'b1; - default: p_wren <= 1'b0; + FSM_STATE_EXP_FILL_4: p_wren <= 1'b1; + default: p_wren <= 1'b0; endcase // // t0_wren // case (fsm_next_state) FSM_STATE_EXP_LOAD_3, - FSM_STATE_EXP_LOAD_4: t0_wren <= 1'b1; - default: t0_wren <= 1'b0; + FSM_STATE_EXP_LOAD_4: t0_wren <= 1'b1; + default: t0_wren <= 1'b0; endcase // // t1_wren @@ -515,18 +545,20 @@ module modexpa7_exponentiator # FSM_STATE_EXP_INIT_3, FSM_STATE_EXP_INIT_4, FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t1_wren <= 1'b1; - default: t1_wren <= 1'b0; + FSM_STATE_EXP_FILL_4: t1_wren <= 1'b1; + default: t1_wren <= 1'b0; endcase // // t2_wren // case (fsm_next_state) + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4, FSM_STATE_EXP_INIT_3, FSM_STATE_EXP_INIT_4, FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t2_wren <= 1'b1; - default: t2_wren <= 1'b0; + FSM_STATE_EXP_FILL_4: t2_wren <= 1'b1; + default: t2_wren <= 1'b0; endcase // end @@ -541,50 +573,58 @@ module modexpa7_exponentiator # // case (fsm_next_state) FSM_STATE_EXP_SAVE_3, - FSM_STATE_EXP_SAVE_4: r_data_in <= t1_data_out; - default: r_data_in <= 32'dX; + FSM_STATE_EXP_SAVE_4: r_data_in <= t1_data_out; + default: r_data_in <= 32'dX; endcase // // p_data_in // case (fsm_next_state) + // + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4: p_data_in <= f_bram_out; + // FSM_STATE_EXP_INIT_3, - FSM_STATE_EXP_INIT_4: p_data_in <= m_bram_out; + FSM_STATE_EXP_INIT_4: p_data_in <= tp_data_out; // FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: p_data_in <= pp_data_out; + FSM_STATE_EXP_FILL_4: p_data_in <= pp_data_out; // - default: p_data_in <= 32'dX; + default: p_data_in <= 32'dX; endcase // // t0_data_in // case (fsm_next_state) FSM_STATE_EXP_LOAD_3, - FSM_STATE_EXP_LOAD_4: t0_data_in <= t1_data_out; - default: t0_data_in <= 32'dX; + FSM_STATE_EXP_LOAD_4: t0_data_in <= t1_data_out; + default: t0_data_in <= 32'dX; endcase // // t1_data_in // case (fsm_next_state) - FSM_STATE_EXP_INIT_3: t1_data_in <= 32'd1; - FSM_STATE_EXP_INIT_4: t1_data_in <= 32'd0; + FSM_STATE_EXP_INIT_3: t1_data_in <= 32'd1; + FSM_STATE_EXP_INIT_4: t1_data_in <= 32'd0; // FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t1_data_in <= flag_update_r ? tp_data_out : t0_data_out; - default: t1_data_in <= 32'dX; + FSM_STATE_EXP_FILL_4: t1_data_in <= flag_update_r ? tp_data_out : t0_data_out; + default: t1_data_in <= 32'dX; endcase // // t2_data_in // case (fsm_next_state) - FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; - FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; + // + FSM_STATE_MUL_INIT_3, + FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out; + // + FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; + FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; // FSM_STATE_EXP_FILL_3, - FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; - default: t2_data_in <= 32'dX; + FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; + default: t2_data_in <= 32'dX; endcase // end @@ -661,11 +701,12 @@ module modexpa7_exponentiator # always @(posedge clk) // - mul_ena <= (fsm_next_state == FSM_STATE_EXP_CALC_1) ? 1'b1 : 1'b0; - - - - + case (fsm_next_state) + FSM_STATE_MUL_CALC_1, + FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1; + default: mul_ena <= 1'b0; + endcase + // // FSM Process @@ -685,42 +726,53 @@ module modexpa7_exponentiator # // case (fsm_state) // - FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_EXP_INIT_1; - else fsm_next_state = FSM_STATE_EXP_IDLE; + FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2; + FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3; + FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4; + FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1; + else fsm_next_state = FSM_STATE_MUL_INIT_4; + // + FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2; + FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3; + else fsm_next_state = FSM_STATE_MUL_CALC_2; + FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1; + // + FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_MUL_INIT_1; + else fsm_next_state = FSM_STATE_EXP_IDLE; // FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2; FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3; FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4; - FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; - else fsm_next_state = FSM_STATE_EXP_INIT_4; + FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; + else fsm_next_state = FSM_STATE_EXP_INIT_4; // FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2; FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3; FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4; - FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; - else fsm_next_state = FSM_STATE_EXP_LOAD_4; + FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_4; // FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2; - FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; - else fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; + else fsm_next_state = FSM_STATE_EXP_CALC_2; FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1; // FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2; FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3; FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4; - FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; - else fsm_next_state = FSM_STATE_EXP_FILL_4; + FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; + else fsm_next_state = FSM_STATE_EXP_FILL_4; // - FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; - else fsm_next_state = FSM_STATE_EXP_LOAD_1; + FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_1; // FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2; FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3; FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4; - FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; - else fsm_next_state = FSM_STATE_EXP_SAVE_4; + FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; + else fsm_next_state = FSM_STATE_EXP_SAVE_4; // - FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; + FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; // endcase // -- cgit v1.2.3