From 344ed1b7ab2fba06158a28b3c691cf9d9ee8cb75 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Wed, 19 Jul 2017 15:00:10 +0300 Subject: Finished modular exponentiation module: * works in simulator * passes synthesis without major issues Started adding pre-multiplication logic... --- src/rtl/modexpa7_exponentiator.v | 500 +++++++++++++++++++++++++-------------- src/tb/tb_exponentiator.v | 2 +- 2 files changed, 327 insertions(+), 175 deletions(-) diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v index 1f55cec..e34a7ab 100644 --- a/src/rtl/modexpa7_exponentiator.v +++ b/src/rtl/modexpa7_exponentiator.v @@ -57,26 +57,26 @@ module modexpa7_exponentiator # input ena, output rdy, - - output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n_coeff2_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] r_bram_addr, - - input [ 32-1:0] m_bram_out, - input [ 32-1:0] d_bram_out, - input [ 32-1:0] n1_bram_out, - input [ 32-1:0] n2_bram_out, - input [ 32-1:0] n_coeff1_bram_out, - input [ 32-1:0] n_coeff2_bram_out, + + output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] n_coeff2_bram_addr, + output [OPERAND_ADDR_WIDTH-1:0] r_bram_addr, + + input [ 32-1:0] m_bram_out, + input [ 32-1:0] d_bram_out, + input [ 32-1:0] n1_bram_out, + input [ 32-1:0] n2_bram_out, + input [ 32-1:0] n_coeff1_bram_out, + input [ 32-1:0] n_coeff2_bram_out, output [ 32-1:0] r_bram_in, output r_bram_wr, - input [OPERAND_ADDR_WIDTH-1:0] n_num_words, + input [OPERAND_ADDR_WIDTH-1:0] m_num_words, input [OPERAND_ADDR_WIDTH+4:0] d_num_bits ); @@ -84,35 +84,86 @@ module modexpa7_exponentiator # // // FSM Declaration // - localparam [ 7: 0] FSM_STATE_IDLE = 8'h00; - - localparam [ 7: 0] FSM_STATE_INIT_1 = 8'hA1; - localparam [ 7: 0] FSM_STATE_INIT_2 = 8'hA2; - localparam [ 7: 0] FSM_STATE_INIT_3 = 8'hA3; - localparam [ 7: 0] FSM_STATE_INIT_4 = 8'hA4; - - localparam [ 7: 0] FSM_STATE_LOAD_1 = 8'hB1; - localparam [ 7: 0] FSM_STATE_LOAD_2 = 8'hB2; - localparam [ 7: 0] FSM_STATE_LOAD_3 = 8'hB3; - localparam [ 7: 0] FSM_STATE_LOAD_4 = 8'hB4; - - localparam [ 7: 0] FSM_STATE_CALC_1 = 8'hC1; - localparam [ 7: 0] FSM_STATE_CALC_2 = 8'hC2; - localparam [ 7: 0] FSM_STATE_CALC_3 = 8'hC3; - - localparam [ 7: 0] FSM_STATE_FILL_1 = 8'hD1; - localparam [ 7: 0] FSM_STATE_FILL_2 = 8'hD2; - localparam [ 7: 0] FSM_STATE_FILL_3 = 8'hD3; - localparam [ 7: 0] FSM_STATE_FILL_4 = 8'hD4; + localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00; + // + localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1; + localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2; + localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3; + localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4; + + localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3; + localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4; + + localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1; + localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2; + localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3; + + localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1; + localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2; + localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3; + localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4; + + localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0; + + localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3; + localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4; + // + localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11; + localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12; + localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13; + localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14; + + localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21; + localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22; + localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23; + // + localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF; + + + /* + * // + * + * MUL_INIT: P1 = F + * P2 = F + * P3 = F + * T2 = M + * + * MUL_CALC: TP = T2 * P3 + * + * // + * + * EXP_INIT: P1 <= TP + * P2 <= TP + * P3 <= TP + * T1 <= 1 + * T2 <= 1 + * + * EXP_LOAD: T0 <= T1 + * + * EXP_CALC: PP = P1 * P2 + * TP = T2 * P3 + * + * EXP_FILL: P1 <= PP + * P2 <= PP + * P3 <= PP + * T1 <= D[i] ? TP : T0 + * T2 <= D[i] ? TP : T0 + * + * EXP_SAVE: R <= T1 + * + * // + * + */ - localparam [ 7: 0] FSM_STATE_NEXT = 8'hE0; - - localparam [ 7: 0] FSM_STATE_STOP = 8'hFF; // // FSM State / Next State // - reg [ 7: 0] fsm_state = FSM_STATE_IDLE; + reg [ 7: 0] fsm_state = FSM_STATE_EXP_IDLE; reg [ 7: 0] fsm_next_state; @@ -141,10 +192,10 @@ module modexpa7_exponentiator # else begin /* clear flag when operation is started */ - if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig; + if (fsm_state == FSM_STATE_EXP_IDLE) rdy_reg <= ~ena_trig; /* set flag after operation is finished */ - if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1; + if (fsm_state == FSM_STATE_EXP_STOP) rdy_reg <= 1'b1; end @@ -152,14 +203,14 @@ module modexpa7_exponentiator # // // Parameters Latch // - reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; + reg [OPERAND_ADDR_WIDTH-1:0] m_num_words_latch; reg [OPERAND_ADDR_WIDTH+4:0] d_num_bits_latch; /* save number of words in a and b when new operation starts */ always @(posedge clk) // - if (fsm_next_state == FSM_STATE_INIT_1) - {n_num_words_latch, d_num_bits_latch} <= {n_num_words, d_num_bits}; + if (fsm_next_state == FSM_STATE_EXP_INIT_1) + {m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits}; // @@ -175,7 +226,7 @@ module modexpa7_exponentiator # wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}}; /* the very last addresses */ - wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {n_num_words_latch}; + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch}; /* address registers */ reg [OPERAND_ADDR_WIDTH-1:0] m_addr; @@ -226,6 +277,7 @@ module modexpa7_exponentiator # // /* memory inputs */ + reg [31: 0] r_data_in; reg [31: 0] t0_data_in; reg [31: 0] t1_data_in; reg [31: 0] t2_data_in; @@ -244,12 +296,17 @@ module modexpa7_exponentiator # wire [31: 0] tp_data_out; /* write enables */ + reg r_wren; reg t0_wren; reg t1_wren; reg t2_wren; reg p_wren; wire pp_wren; wire tp_wren; + + /* map */ + assign r_bram_in = r_data_in; + assign r_bram_wr = r_wren; bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out)); @@ -257,37 +314,69 @@ module modexpa7_exponentiator # bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out)); - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_t2 (.clk(clk), - .a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(), - .b_addr(t2_addr_rd), .b_out(t2_data_out)); + .a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(), + .b_addr(t2_addr_rd), .b_out(t2_data_out)); - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_p1 (.clk(clk), - .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), - .b_addr(p1_addr_rd), .b_out(p1_data_out)); + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), + .b_addr(p1_addr_rd), .b_out(p1_data_out)); - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_p2 (.clk(clk), - .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), - .b_addr(p2_addr_rd), .b_out(p2_data_out)); + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), + .b_addr(p2_addr_rd), .b_out(p2_data_out)); - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_p3 (.clk(clk), - .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), - .b_addr(p3_addr_rd), .b_out(p3_data_out)); + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), + .b_addr(p3_addr_rd), .b_out(p3_data_out)); - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_pp (.clk(clk), - .a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(), - .b_addr(pp_addr_rd), .b_out(pp_data_out)); + .a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(), + .b_addr(pp_addr_rd), .b_out(pp_data_out)); - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) bram_tp (.clk(clk), - .a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(), - .b_addr(tp_addr_rd), .b_out(tp_data_out)); + .a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(), + .b_addr(tp_addr_rd), .b_out(tp_data_out)); + // + // Bit Counter + // + reg [OPERAND_ADDR_WIDTH+4:0] bit_cnt; + + wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}}; + wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_last = d_num_bits_latch; + wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_next = bit_cnt + 1'b1; + + /* handy flag */ + wire bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0; + + always @(posedge clk) + // + if (fsm_next_state == FSM_STATE_EXP_LOAD_1) + // + case (fsm_state) + FSM_STATE_EXP_INIT_4: bit_cnt <= bit_cnt_zero; + FSM_STATE_EXP_NEXT: bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt; + endcase + + + // + // Flags + // + reg flag_update_r; + + always @(posedge clk) + // + if (fsm_next_state == FSM_STATE_EXP_CALC_3) + flag_update_r <= d_bram_out[bit_cnt[4:0]]; + // // Memory Address Control Logic @@ -297,54 +386,93 @@ module modexpa7_exponentiator # // m_addr // case (fsm_next_state) - FSM_STATE_INIT_1: m_addr <= bram_addr_zero; - FSM_STATE_INIT_2, - FSM_STATE_INIT_3, - FSM_STATE_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + FSM_STATE_EXP_INIT_1: m_addr <= bram_addr_zero; + FSM_STATE_EXP_INIT_2, + FSM_STATE_EXP_INIT_3, + FSM_STATE_EXP_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr; + endcase + // + // d_addr + // + case (fsm_next_state) + FSM_STATE_EXP_CALC_1: d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5]; + endcase + // + // r_addr + // + case (fsm_next_state) + FSM_STATE_EXP_SAVE_3: r_addr <= bram_addr_zero; + FSM_STATE_EXP_SAVE_4: r_addr <= r_addr_next; endcase // // p_addr_wr // case (fsm_next_state) - FSM_STATE_INIT_3, - FSM_STATE_FILL_3: p_addr_wr <= bram_addr_zero; - FSM_STATE_INIT_4, - FSM_STATE_FILL_4: p_addr_wr <= p_addr_wr_next; + FSM_STATE_EXP_INIT_3: p_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_INIT_4: p_addr_wr <= p_addr_wr_next; + // + FSM_STATE_EXP_FILL_3: p_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_FILL_4: p_addr_wr <= p_addr_wr_next; endcase // // t0_addr // case (fsm_next_state) - FSM_STATE_LOAD_3: t0_addr <= bram_addr_zero; - FSM_STATE_LOAD_4: t0_addr <= t0_addr_next; + FSM_STATE_EXP_LOAD_3: t0_addr <= bram_addr_zero; + FSM_STATE_EXP_LOAD_4: t0_addr <= t0_addr_next; + // + FSM_STATE_EXP_FILL_1: t0_addr <= bram_addr_zero; + FSM_STATE_EXP_FILL_2, + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr; endcase // // t1_addr // case (fsm_next_state) - FSM_STATE_INIT_3: t1_addr <= bram_addr_zero; - FSM_STATE_INIT_4: t1_addr <= t1_addr_next; + FSM_STATE_EXP_INIT_3: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_INIT_4: t1_addr <= t1_addr_next; // - FSM_STATE_LOAD_1: t1_addr <= bram_addr_zero; - FSM_STATE_LOAD_2, - FSM_STATE_LOAD_3, - FSM_STATE_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; + FSM_STATE_EXP_LOAD_1: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_LOAD_2, + FSM_STATE_EXP_LOAD_3, + FSM_STATE_EXP_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; + // + FSM_STATE_EXP_FILL_3: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_FILL_4: t1_addr <= t1_addr_next; + // + FSM_STATE_EXP_SAVE_1: t1_addr <= bram_addr_zero; + FSM_STATE_EXP_SAVE_2, + FSM_STATE_EXP_SAVE_3, + FSM_STATE_EXP_SAVE_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr; endcase // // t2_addr_wr // case (fsm_next_state) - FSM_STATE_INIT_3: t2_addr_wr <= bram_addr_zero; - FSM_STATE_INIT_4: t2_addr_wr <= t2_addr_wr_next; + FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next; + // + FSM_STATE_EXP_FILL_3: t2_addr_wr <= bram_addr_zero; + FSM_STATE_EXP_FILL_4: t2_addr_wr <= t2_addr_wr_next; endcase // // pp_addr_rd // case (fsm_next_state) - FSM_STATE_FILL_1: pp_addr_rd <= bram_addr_zero; - FSM_STATE_FILL_2, - FSM_STATE_FILL_3, - FSM_STATE_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd; + FSM_STATE_EXP_FILL_1: pp_addr_rd <= bram_addr_zero; + FSM_STATE_EXP_FILL_2, + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd; + endcase + // + // tp_addr_rd + // + case (fsm_next_state) + FSM_STATE_EXP_FILL_1: tp_addr_rd <= bram_addr_zero; + FSM_STATE_EXP_FILL_2, + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd; endcase // end @@ -354,38 +482,50 @@ module modexpa7_exponentiator # // Memory Write Enable Logic // always @(posedge clk) begin + // + // r_wren + // + case (fsm_next_state) + FSM_STATE_EXP_SAVE_3, + FSM_STATE_EXP_SAVE_4: r_wren <= 1'b1; + default: r_wren <= 1'b0; + endcase // // p_wren // case (fsm_next_state) - FSM_STATE_INIT_3, - FSM_STATE_INIT_4, - FSM_STATE_FILL_3, - FSM_STATE_FILL_4: p_wren <= 1'b1; + FSM_STATE_EXP_INIT_3, + FSM_STATE_EXP_INIT_4, + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: p_wren <= 1'b1; default: p_wren <= 1'b0; endcase // // t0_wren // case (fsm_next_state) - FSM_STATE_LOAD_3, - FSM_STATE_LOAD_4: t0_wren <= 1'b1; + FSM_STATE_EXP_LOAD_3, + FSM_STATE_EXP_LOAD_4: t0_wren <= 1'b1; default: t0_wren <= 1'b0; endcase // // t1_wren // case (fsm_next_state) - FSM_STATE_INIT_3, - FSM_STATE_INIT_4: t1_wren <= 1'b1; + FSM_STATE_EXP_INIT_3, + FSM_STATE_EXP_INIT_4, + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: t1_wren <= 1'b1; default: t1_wren <= 1'b0; endcase // // t2_wren // case (fsm_next_state) - FSM_STATE_INIT_3, - FSM_STATE_INIT_4: t2_wren <= 1'b1; + FSM_STATE_EXP_INIT_3, + FSM_STATE_EXP_INIT_4, + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: t2_wren <= 1'b1; default: t2_wren <= 1'b0; endcase // @@ -396,19 +536,23 @@ module modexpa7_exponentiator # // Memory Input Selector // always @(posedge clk) begin + // + // r_data_in // case (fsm_next_state) - FSM_STATE_INIT_3: {t2_data_in, t1_data_in} <= {2{32'd1}}; - FSM_STATE_INIT_4: {t2_data_in, t1_data_in} <= {2{32'd0}}; - default: {t2_data_in, t1_data_in} <= {2{32'dX}}; - endcase + FSM_STATE_EXP_SAVE_3, + FSM_STATE_EXP_SAVE_4: r_data_in <= t1_data_out; + default: r_data_in <= 32'dX; + endcase + // + // p_data_in // case (fsm_next_state) - FSM_STATE_INIT_3, - FSM_STATE_INIT_4: p_data_in <= m_bram_out; + FSM_STATE_EXP_INIT_3, + FSM_STATE_EXP_INIT_4: p_data_in <= m_bram_out; // - FSM_STATE_FILL_3, - FSM_STATE_FILL_4: p_data_in <= pp_data_out; + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: p_data_in <= pp_data_out; // default: p_data_in <= 32'dX; endcase @@ -416,9 +560,31 @@ module modexpa7_exponentiator # // t0_data_in // case (fsm_next_state) - FSM_STATE_LOAD_3, - FSM_STATE_LOAD_4: t0_data_in <= t1_data_out; - default: t0_data_in <= 32'dX; + FSM_STATE_EXP_LOAD_3, + FSM_STATE_EXP_LOAD_4: t0_data_in <= t1_data_out; + default: t0_data_in <= 32'dX; + endcase + // + // t1_data_in + // + case (fsm_next_state) + FSM_STATE_EXP_INIT_3: t1_data_in <= 32'd1; + FSM_STATE_EXP_INIT_4: t1_data_in <= 32'd0; + // + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: t1_data_in <= flag_update_r ? tp_data_out : t0_data_out; + default: t1_data_in <= 32'dX; + endcase + // + // t2_data_in + // + case (fsm_next_state) + FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1; + FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0; + // + FSM_STATE_EXP_FILL_3, + FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out; + default: t2_data_in <= 32'dX; endcase // end @@ -445,21 +611,21 @@ module modexpa7_exponentiator # .ena (mul_ena), .rdy (mul_rdy_pp), - .a_bram_addr (p1_addr_rd), - .b_bram_addr (p2_addr_rd), - .n_bram_addr (n1_bram_addr), - .n_coeff_bram_addr (n_coeff1_bram_addr), - .r_bram_addr (pp_addr_wr), + .a_bram_addr (p1_addr_rd), + .b_bram_addr (p2_addr_rd), + .n_bram_addr (n1_bram_addr), + .n_coeff_bram_addr (n_coeff1_bram_addr), + .r_bram_addr (pp_addr_wr), - .a_bram_out (p1_data_out), - .b_bram_out (p2_data_out), - .n_bram_out (n1_bram_out), - .n_coeff_bram_out (n_coeff1_bram_out), + .a_bram_out (p1_data_out), + .b_bram_out (p2_data_out), + .n_bram_out (n1_bram_out), + .n_coeff_bram_out (n_coeff1_bram_out), .r_bram_in (pp_data_in), .r_bram_wr (pp_wren), - .ab_num_words (n_num_words_latch) + .ab_num_words (m_num_words_latch) ); modexpa7_systolic_multiplier # @@ -475,50 +641,30 @@ module modexpa7_exponentiator # .ena (mul_ena), .rdy (mul_rdy_tp), - .a_bram_addr (t2_addr_rd), - .b_bram_addr (p3_addr_rd), - .n_bram_addr (n2_bram_addr), - .n_coeff_bram_addr (n_coeff2_bram_addr), - .r_bram_addr (tp_addr_wr), + .a_bram_addr (t2_addr_rd), + .b_bram_addr (p3_addr_rd), + .n_bram_addr (n2_bram_addr), + .n_coeff_bram_addr (n_coeff2_bram_addr), + .r_bram_addr (tp_addr_wr), - .a_bram_out (t2_data_out), - .b_bram_out (p3_data_out), - .n_bram_out (n2_bram_out), - .n_coeff_bram_out (n_coeff2_bram_out), + .a_bram_out (t2_data_out), + .b_bram_out (p3_data_out), + .n_bram_out (n2_bram_out), + .n_coeff_bram_out (n_coeff2_bram_out), .r_bram_in (tp_data_in), .r_bram_wr (tp_wren), - .ab_num_words (n_num_words_latch) + .ab_num_words (m_num_words_latch) ); always @(posedge clk) // - mul_ena <= (fsm_next_state == FSM_STATE_CALC_1) ? 1'b1 : 1'b0; + mul_ena <= (fsm_next_state == FSM_STATE_EXP_CALC_1) ? 1'b1 : 1'b0; - // - // Bit Counter - // - reg [OPERAND_ADDR_WIDTH+4:0] bit_cnt; - - wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}}; - wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_last = d_num_bits_latch; - wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_next = bit_cnt + 1'b1; - - /* handy flag */ - wire bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0; - always @(posedge clk) - // - if (fsm_next_state == FSM_STATE_LOAD_1) - // - case (fsm_state) - FSM_STATE_INIT_4: bit_cnt <= bit_cnt_zero; - FSM_STATE_NEXT: bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt; - endcase - // @@ -526,7 +672,7 @@ module modexpa7_exponentiator # // always @(posedge clk or negedge rst_n) // - if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + if (rst_n == 1'b0) fsm_state <= FSM_STATE_EXP_IDLE; else fsm_state <= fsm_next_state; @@ -535,40 +681,46 @@ module modexpa7_exponentiator # // always @* begin // - fsm_next_state = FSM_STATE_STOP; + fsm_next_state = FSM_STATE_EXP_STOP; // case (fsm_state) // - FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_INIT_1; - else fsm_next_state = FSM_STATE_IDLE; + FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_EXP_INIT_1; + else fsm_next_state = FSM_STATE_EXP_IDLE; + // + FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2; + FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3; + FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4; + FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1; + else fsm_next_state = FSM_STATE_EXP_INIT_4; // - FSM_STATE_INIT_1: fsm_next_state = FSM_STATE_INIT_2; - FSM_STATE_INIT_2: fsm_next_state = FSM_STATE_INIT_3; - FSM_STATE_INIT_3: fsm_next_state = FSM_STATE_INIT_4; - FSM_STATE_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_LOAD_1; - else fsm_next_state = FSM_STATE_INIT_4; + FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2; + FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3; + FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4; + FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_4; // - FSM_STATE_LOAD_1: fsm_next_state = FSM_STATE_LOAD_2; - FSM_STATE_LOAD_2: fsm_next_state = FSM_STATE_LOAD_3; - FSM_STATE_LOAD_3: fsm_next_state = FSM_STATE_LOAD_4; - FSM_STATE_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_CALC_1; - else fsm_next_state = FSM_STATE_LOAD_4; + FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3; + else fsm_next_state = FSM_STATE_EXP_CALC_2; + FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1; // - FSM_STATE_CALC_1: fsm_next_state = FSM_STATE_CALC_2; - FSM_STATE_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_CALC_3; - else fsm_next_state = FSM_STATE_CALC_2; - FSM_STATE_CALC_3: fsm_next_state = FSM_STATE_FILL_1; + FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2; + FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3; + FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4; + FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT; + else fsm_next_state = FSM_STATE_EXP_FILL_4; // - FSM_STATE_FILL_1: fsm_next_state = FSM_STATE_FILL_2; - FSM_STATE_FILL_2: fsm_next_state = FSM_STATE_FILL_3; - FSM_STATE_FILL_3: fsm_next_state = FSM_STATE_FILL_4; - FSM_STATE_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_NEXT; - else fsm_next_state = FSM_STATE_FILL_4; + FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1; + else fsm_next_state = FSM_STATE_EXP_LOAD_1; // - FSM_STATE_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_STOP; - else fsm_next_state = FSM_STATE_LOAD_1; + FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2; + FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3; + FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4; + FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP; + else fsm_next_state = FSM_STATE_EXP_SAVE_4; // - FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE; + FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE; // endcase // diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v index 8ca9444..3b612c5 100644 --- a/src/tb/tb_exponentiator.v +++ b/src/tb/tb_exponentiator.v @@ -180,7 +180,7 @@ module tb_exponentiator; .r_bram_in (core_r_data_in), .r_bram_wr (core_r_wren), - .n_num_words (n_num_words), + .m_num_words (n_num_words), .d_num_bits (d_num_bits) ); -- cgit v1.2.3