diff options
Diffstat (limited to 'src/rtl')
-rw-r--r-- | src/rtl/modexpa7_exponentiator.v | 500 |
1 files changed, 326 insertions, 174 deletions
diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v index 1f55cec..e34a7ab 100644 --- a/src/rtl/modexpa7_exponentiator.v +++ b/src/rtl/modexpa7_exponentiator.v @@ -57,26 +57,26 @@ module modexpa7_exponentiator # input ena,
output rdy,
-
- output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] n_coeff2_bram_addr, - output [OPERAND_ADDR_WIDTH-1:0] r_bram_addr, -
- input [ 32-1:0] m_bram_out, - input [ 32-1:0] d_bram_out, - input [ 32-1:0] n1_bram_out, - input [ 32-1:0] n2_bram_out, - input [ 32-1:0] n_coeff1_bram_out, - input [ 32-1:0] n_coeff2_bram_out, +
+ output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] n_coeff2_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] r_bram_addr,
+
+ input [ 32-1:0] m_bram_out,
+ input [ 32-1:0] d_bram_out,
+ input [ 32-1:0] n1_bram_out,
+ input [ 32-1:0] n2_bram_out,
+ input [ 32-1:0] n_coeff1_bram_out,
+ input [ 32-1:0] n_coeff2_bram_out,
output [ 32-1:0] r_bram_in,
output r_bram_wr,
- input [OPERAND_ADDR_WIDTH-1:0] n_num_words,
+ input [OPERAND_ADDR_WIDTH-1:0] m_num_words,
input [OPERAND_ADDR_WIDTH+4:0] d_num_bits
);
@@ -84,35 +84,86 @@ module modexpa7_exponentiator # //
// FSM Declaration
//
- localparam [ 7: 0] FSM_STATE_IDLE = 8'h00;
-
- localparam [ 7: 0] FSM_STATE_INIT_1 = 8'hA1;
- localparam [ 7: 0] FSM_STATE_INIT_2 = 8'hA2;
- localparam [ 7: 0] FSM_STATE_INIT_3 = 8'hA3;
- localparam [ 7: 0] FSM_STATE_INIT_4 = 8'hA4;
-
- localparam [ 7: 0] FSM_STATE_LOAD_1 = 8'hB1;
- localparam [ 7: 0] FSM_STATE_LOAD_2 = 8'hB2;
- localparam [ 7: 0] FSM_STATE_LOAD_3 = 8'hB3;
- localparam [ 7: 0] FSM_STATE_LOAD_4 = 8'hB4;
-
- localparam [ 7: 0] FSM_STATE_CALC_1 = 8'hC1;
- localparam [ 7: 0] FSM_STATE_CALC_2 = 8'hC2;
- localparam [ 7: 0] FSM_STATE_CALC_3 = 8'hC3;
-
- localparam [ 7: 0] FSM_STATE_FILL_1 = 8'hD1;
- localparam [ 7: 0] FSM_STATE_FILL_2 = 8'hD2;
- localparam [ 7: 0] FSM_STATE_FILL_3 = 8'hD3;
- localparam [ 7: 0] FSM_STATE_FILL_4 = 8'hD4;
+ localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
+ //
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_3 = 8'hA3;
+ localparam [ 7: 0] FSM_STATE_EXP_INIT_4 = 8'hA4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_1 = 8'hB1;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_2 = 8'hB2;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_3 = 8'hB3;
+ localparam [ 7: 0] FSM_STATE_EXP_LOAD_4 = 8'hB4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_1 = 8'hC1;
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_2 = 8'hC2;
+ localparam [ 7: 0] FSM_STATE_EXP_CALC_3 = 8'hC3;
+
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_1 = 8'hD1;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_2 = 8'hD2;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_3 = 8'hD3;
+ localparam [ 7: 0] FSM_STATE_EXP_FILL_4 = 8'hD4;
+
+ localparam [ 7: 0] FSM_STATE_EXP_NEXT = 8'hE0;
+
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_1 = 8'hF1;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_2 = 8'hF2;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_3 = 8'hF3;
+ localparam [ 7: 0] FSM_STATE_EXP_SAVE_4 = 8'hF4;
+ //
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_1 = 8'h11;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_2 = 8'h12;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_3 = 8'h13;
+ localparam [ 7: 0] FSM_STATE_MUL_INIT_4 = 8'h14;
+
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_1 = 8'h21;
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22;
+ localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23;
+ //
+ localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
+
+
+ /*
+ * //
+ *
+ * MUL_INIT: P1 = F
+ * P2 = F
+ * P3 = F
+ * T2 = M
+ *
+ * MUL_CALC: TP = T2 * P3
+ *
+ * //
+ *
+ * EXP_INIT: P1 <= TP
+ * P2 <= TP
+ * P3 <= TP
+ * T1 <= 1
+ * T2 <= 1
+ *
+ * EXP_LOAD: T0 <= T1
+ *
+ * EXP_CALC: PP = P1 * P2
+ * TP = T2 * P3
+ *
+ * EXP_FILL: P1 <= PP
+ * P2 <= PP
+ * P3 <= PP
+ * T1 <= D[i] ? TP : T0
+ * T2 <= D[i] ? TP : T0
+ *
+ * EXP_SAVE: R <= T1
+ *
+ * //
+ *
+ */
- localparam [ 7: 0] FSM_STATE_NEXT = 8'hE0;
-
- localparam [ 7: 0] FSM_STATE_STOP = 8'hFF;
//
// FSM State / Next State
//
- reg [ 7: 0] fsm_state = FSM_STATE_IDLE;
+ reg [ 7: 0] fsm_state = FSM_STATE_EXP_IDLE;
reg [ 7: 0] fsm_next_state;
@@ -141,10 +192,10 @@ module modexpa7_exponentiator # else begin
/* clear flag when operation is started */
- if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig;
+ if (fsm_state == FSM_STATE_EXP_IDLE) rdy_reg <= ~ena_trig;
/* set flag after operation is finished */
- if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1;
+ if (fsm_state == FSM_STATE_EXP_STOP) rdy_reg <= 1'b1;
end
@@ -152,14 +203,14 @@ module modexpa7_exponentiator # //
// Parameters Latch
//
- reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch;
+ reg [OPERAND_ADDR_WIDTH-1:0] m_num_words_latch;
reg [OPERAND_ADDR_WIDTH+4:0] d_num_bits_latch;
/* save number of words in a and b when new operation starts */
always @(posedge clk)
//
- if (fsm_next_state == FSM_STATE_INIT_1)
- {n_num_words_latch, d_num_bits_latch} <= {n_num_words, d_num_bits};
+ if (fsm_next_state == FSM_STATE_EXP_INIT_1)
+ {m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits};
//
@@ -175,7 +226,7 @@ module modexpa7_exponentiator # wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {{OPERAND_ADDR_WIDTH{1'b0}}};
/* the very last addresses */
- wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {n_num_words_latch};
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {m_num_words_latch};
/* address registers */
reg [OPERAND_ADDR_WIDTH-1:0] m_addr;
@@ -226,6 +277,7 @@ module modexpa7_exponentiator # //
/* memory inputs */
+ reg [31: 0] r_data_in;
reg [31: 0] t0_data_in;
reg [31: 0] t1_data_in;
reg [31: 0] t2_data_in;
@@ -244,12 +296,17 @@ module modexpa7_exponentiator # wire [31: 0] tp_data_out;
/* write enables */
+ reg r_wren;
reg t0_wren;
reg t1_wren;
reg t2_wren;
reg p_wren;
wire pp_wren;
wire tp_wren;
+
+ /* map */
+ assign r_bram_in = r_data_in;
+ assign r_bram_wr = r_wren;
bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out));
@@ -257,37 +314,69 @@ module modexpa7_exponentiator # bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out));
- bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_t2 (.clk(clk),
- .a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(), - .b_addr(t2_addr_rd), .b_out(t2_data_out)); + .a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(),
+ .b_addr(t2_addr_rd), .b_out(t2_data_out));
- bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_p1 (.clk(clk),
- .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), - .b_addr(p1_addr_rd), .b_out(p1_data_out)); + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p1_addr_rd), .b_out(p1_data_out));
- bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_p2 (.clk(clk),
- .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), - .b_addr(p2_addr_rd), .b_out(p2_data_out)); + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p2_addr_rd), .b_out(p2_data_out));
- bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_p3 (.clk(clk),
- .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(), - .b_addr(p3_addr_rd), .b_out(p3_data_out)); + .a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p3_addr_rd), .b_out(p3_data_out));
- bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_pp (.clk(clk),
- .a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(), - .b_addr(pp_addr_rd), .b_out(pp_data_out)); + .a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(),
+ .b_addr(pp_addr_rd), .b_out(pp_data_out));
- bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) + bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
bram_tp (.clk(clk),
- .a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(), - .b_addr(tp_addr_rd), .b_out(tp_data_out)); + .a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(),
+ .b_addr(tp_addr_rd), .b_out(tp_data_out));
+ //
+ // Bit Counter
+ //
+ reg [OPERAND_ADDR_WIDTH+4:0] bit_cnt;
+
+ wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
+ wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_last = d_num_bits_latch;
+ wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_next = bit_cnt + 1'b1;
+
+ /* handy flag */
+ wire bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_EXP_LOAD_1)
+ //
+ case (fsm_state)
+ FSM_STATE_EXP_INIT_4: bit_cnt <= bit_cnt_zero;
+ FSM_STATE_EXP_NEXT: bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
+ endcase
+
+
+ //
+ // Flags
+ //
+ reg flag_update_r;
+
+ always @(posedge clk)
+ //
+ if (fsm_next_state == FSM_STATE_EXP_CALC_3)
+ flag_update_r <= d_bram_out[bit_cnt[4:0]];
+
//
// Memory Address Control Logic
@@ -297,54 +386,93 @@ module modexpa7_exponentiator # // m_addr
//
case (fsm_next_state)
- FSM_STATE_INIT_1: m_addr <= bram_addr_zero;
- FSM_STATE_INIT_2,
- FSM_STATE_INIT_3,
- FSM_STATE_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ FSM_STATE_EXP_INIT_1: m_addr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_2,
+ FSM_STATE_EXP_INIT_3,
+ FSM_STATE_EXP_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ endcase
+ //
+ // d_addr
+ //
+ case (fsm_next_state)
+ FSM_STATE_EXP_CALC_1: d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+ endcase
+ //
+ // r_addr
+ //
+ case (fsm_next_state)
+ FSM_STATE_EXP_SAVE_3: r_addr <= bram_addr_zero;
+ FSM_STATE_EXP_SAVE_4: r_addr <= r_addr_next;
endcase
//
// p_addr_wr
//
case (fsm_next_state)
- FSM_STATE_INIT_3,
- FSM_STATE_FILL_3: p_addr_wr <= bram_addr_zero;
- FSM_STATE_INIT_4,
- FSM_STATE_FILL_4: p_addr_wr <= p_addr_wr_next;
+ FSM_STATE_EXP_INIT_3: p_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_4: p_addr_wr <= p_addr_wr_next;
+ //
+ FSM_STATE_EXP_FILL_3: p_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_4: p_addr_wr <= p_addr_wr_next;
endcase
//
// t0_addr
//
case (fsm_next_state)
- FSM_STATE_LOAD_3: t0_addr <= bram_addr_zero;
- FSM_STATE_LOAD_4: t0_addr <= t0_addr_next;
+ FSM_STATE_EXP_LOAD_3: t0_addr <= bram_addr_zero;
+ FSM_STATE_EXP_LOAD_4: t0_addr <= t0_addr_next;
+ //
+ FSM_STATE_EXP_FILL_1: t0_addr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_2,
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
endcase
//
// t1_addr
//
case (fsm_next_state)
- FSM_STATE_INIT_3: t1_addr <= bram_addr_zero;
- FSM_STATE_INIT_4: t1_addr <= t1_addr_next;
+ FSM_STATE_EXP_INIT_3: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_4: t1_addr <= t1_addr_next;
//
- FSM_STATE_LOAD_1: t1_addr <= bram_addr_zero;
- FSM_STATE_LOAD_2,
- FSM_STATE_LOAD_3,
- FSM_STATE_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+ FSM_STATE_EXP_LOAD_1: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_LOAD_2,
+ FSM_STATE_EXP_LOAD_3,
+ FSM_STATE_EXP_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+ //
+ FSM_STATE_EXP_FILL_3: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_4: t1_addr <= t1_addr_next;
+ //
+ FSM_STATE_EXP_SAVE_1: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_SAVE_2,
+ FSM_STATE_EXP_SAVE_3,
+ FSM_STATE_EXP_SAVE_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
endcase
//
// t2_addr_wr
//
case (fsm_next_state)
- FSM_STATE_INIT_3: t2_addr_wr <= bram_addr_zero;
- FSM_STATE_INIT_4: t2_addr_wr <= t2_addr_wr_next;
+ FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next;
+ //
+ FSM_STATE_EXP_FILL_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_4: t2_addr_wr <= t2_addr_wr_next;
endcase
//
// pp_addr_rd
//
case (fsm_next_state)
- FSM_STATE_FILL_1: pp_addr_rd <= bram_addr_zero;
- FSM_STATE_FILL_2,
- FSM_STATE_FILL_3,
- FSM_STATE_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+ FSM_STATE_EXP_FILL_1: pp_addr_rd <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_2,
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+ endcase
+ //
+ // tp_addr_rd
+ //
+ case (fsm_next_state)
+ FSM_STATE_EXP_FILL_1: tp_addr_rd <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_2,
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
endcase
//
end
@@ -355,37 +483,49 @@ module modexpa7_exponentiator # //
always @(posedge clk) begin
//
+ // r_wren
+ //
+ case (fsm_next_state)
+ FSM_STATE_EXP_SAVE_3,
+ FSM_STATE_EXP_SAVE_4: r_wren <= 1'b1;
+ default: r_wren <= 1'b0;
+ endcase
+ //
// p_wren
//
case (fsm_next_state)
- FSM_STATE_INIT_3,
- FSM_STATE_INIT_4,
- FSM_STATE_FILL_3,
- FSM_STATE_FILL_4: p_wren <= 1'b1;
+ FSM_STATE_EXP_INIT_3,
+ FSM_STATE_EXP_INIT_4,
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: p_wren <= 1'b1;
default: p_wren <= 1'b0;
endcase
//
// t0_wren
//
case (fsm_next_state)
- FSM_STATE_LOAD_3,
- FSM_STATE_LOAD_4: t0_wren <= 1'b1;
+ FSM_STATE_EXP_LOAD_3,
+ FSM_STATE_EXP_LOAD_4: t0_wren <= 1'b1;
default: t0_wren <= 1'b0;
endcase
//
// t1_wren
//
case (fsm_next_state)
- FSM_STATE_INIT_3,
- FSM_STATE_INIT_4: t1_wren <= 1'b1;
+ FSM_STATE_EXP_INIT_3,
+ FSM_STATE_EXP_INIT_4,
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: t1_wren <= 1'b1;
default: t1_wren <= 1'b0;
endcase
//
// t2_wren
//
case (fsm_next_state)
- FSM_STATE_INIT_3,
- FSM_STATE_INIT_4: t2_wren <= 1'b1;
+ FSM_STATE_EXP_INIT_3,
+ FSM_STATE_EXP_INIT_4,
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: t2_wren <= 1'b1;
default: t2_wren <= 1'b0;
endcase
//
@@ -397,18 +537,22 @@ module modexpa7_exponentiator # //
always @(posedge clk) begin
//
+ // r_data_in
+ //
case (fsm_next_state)
- FSM_STATE_INIT_3: {t2_data_in, t1_data_in} <= {2{32'd1}};
- FSM_STATE_INIT_4: {t2_data_in, t1_data_in} <= {2{32'd0}};
- default: {t2_data_in, t1_data_in} <= {2{32'dX}};
- endcase
+ FSM_STATE_EXP_SAVE_3,
+ FSM_STATE_EXP_SAVE_4: r_data_in <= t1_data_out;
+ default: r_data_in <= 32'dX;
+ endcase
+ //
+ // p_data_in
//
case (fsm_next_state)
- FSM_STATE_INIT_3,
- FSM_STATE_INIT_4: p_data_in <= m_bram_out;
+ FSM_STATE_EXP_INIT_3,
+ FSM_STATE_EXP_INIT_4: p_data_in <= m_bram_out;
//
- FSM_STATE_FILL_3,
- FSM_STATE_FILL_4: p_data_in <= pp_data_out;
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: p_data_in <= pp_data_out;
//
default: p_data_in <= 32'dX;
endcase
@@ -416,9 +560,31 @@ module modexpa7_exponentiator # // t0_data_in
//
case (fsm_next_state)
- FSM_STATE_LOAD_3,
- FSM_STATE_LOAD_4: t0_data_in <= t1_data_out;
- default: t0_data_in <= 32'dX;
+ FSM_STATE_EXP_LOAD_3,
+ FSM_STATE_EXP_LOAD_4: t0_data_in <= t1_data_out;
+ default: t0_data_in <= 32'dX;
+ endcase
+ //
+ // t1_data_in
+ //
+ case (fsm_next_state)
+ FSM_STATE_EXP_INIT_3: t1_data_in <= 32'd1;
+ FSM_STATE_EXP_INIT_4: t1_data_in <= 32'd0;
+ //
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+ default: t1_data_in <= 32'dX;
+ endcase
+ //
+ // t2_data_in
+ //
+ case (fsm_next_state)
+ FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
+ FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
+ //
+ FSM_STATE_EXP_FILL_3,
+ FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+ default: t2_data_in <= 32'dX;
endcase
//
end
@@ -445,21 +611,21 @@ module modexpa7_exponentiator # .ena (mul_ena),
.rdy (mul_rdy_pp),
- .a_bram_addr (p1_addr_rd), - .b_bram_addr (p2_addr_rd), - .n_bram_addr (n1_bram_addr), - .n_coeff_bram_addr (n_coeff1_bram_addr), - .r_bram_addr (pp_addr_wr), + .a_bram_addr (p1_addr_rd),
+ .b_bram_addr (p2_addr_rd),
+ .n_bram_addr (n1_bram_addr),
+ .n_coeff_bram_addr (n_coeff1_bram_addr),
+ .r_bram_addr (pp_addr_wr),
- .a_bram_out (p1_data_out), - .b_bram_out (p2_data_out), - .n_bram_out (n1_bram_out), - .n_coeff_bram_out (n_coeff1_bram_out), + .a_bram_out (p1_data_out),
+ .b_bram_out (p2_data_out),
+ .n_bram_out (n1_bram_out),
+ .n_coeff_bram_out (n_coeff1_bram_out),
.r_bram_in (pp_data_in),
.r_bram_wr (pp_wren),
- .ab_num_words (n_num_words_latch)
+ .ab_num_words (m_num_words_latch)
);
modexpa7_systolic_multiplier #
@@ -475,50 +641,30 @@ module modexpa7_exponentiator # .ena (mul_ena),
.rdy (mul_rdy_tp),
- .a_bram_addr (t2_addr_rd), - .b_bram_addr (p3_addr_rd), - .n_bram_addr (n2_bram_addr), - .n_coeff_bram_addr (n_coeff2_bram_addr), - .r_bram_addr (tp_addr_wr), + .a_bram_addr (t2_addr_rd),
+ .b_bram_addr (p3_addr_rd),
+ .n_bram_addr (n2_bram_addr),
+ .n_coeff_bram_addr (n_coeff2_bram_addr),
+ .r_bram_addr (tp_addr_wr),
- .a_bram_out (t2_data_out), - .b_bram_out (p3_data_out), - .n_bram_out (n2_bram_out), - .n_coeff_bram_out (n_coeff2_bram_out), + .a_bram_out (t2_data_out),
+ .b_bram_out (p3_data_out),
+ .n_bram_out (n2_bram_out),
+ .n_coeff_bram_out (n_coeff2_bram_out),
.r_bram_in (tp_data_in),
.r_bram_wr (tp_wren),
- .ab_num_words (n_num_words_latch)
+ .ab_num_words (m_num_words_latch)
);
always @(posedge clk)
//
- mul_ena <= (fsm_next_state == FSM_STATE_CALC_1) ? 1'b1 : 1'b0;
+ mul_ena <= (fsm_next_state == FSM_STATE_EXP_CALC_1) ? 1'b1 : 1'b0;
- //
- // Bit Counter
- //
- reg [OPERAND_ADDR_WIDTH+4:0] bit_cnt;
-
- wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
- wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_last = d_num_bits_latch;
- wire [OPERAND_ADDR_WIDTH+4:0] bit_cnt_next = bit_cnt + 1'b1;
-
- /* handy flag */
- wire bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
- always @(posedge clk)
- //
- if (fsm_next_state == FSM_STATE_LOAD_1)
- //
- case (fsm_state)
- FSM_STATE_INIT_4: bit_cnt <= bit_cnt_zero;
- FSM_STATE_NEXT: bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
- endcase
-
//
@@ -526,7 +672,7 @@ module modexpa7_exponentiator # //
always @(posedge clk or negedge rst_n)
//
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_EXP_IDLE;
else fsm_state <= fsm_next_state;
@@ -535,40 +681,46 @@ module modexpa7_exponentiator # //
always @* begin
//
- fsm_next_state = FSM_STATE_STOP;
+ fsm_next_state = FSM_STATE_EXP_STOP;
//
case (fsm_state)
//
- FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_INIT_1;
- else fsm_next_state = FSM_STATE_IDLE;
+ FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_EXP_INIT_1;
+ else fsm_next_state = FSM_STATE_EXP_IDLE;
+ //
+ FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2;
+ FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3;
+ FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4;
+ FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ else fsm_next_state = FSM_STATE_EXP_INIT_4;
//
- FSM_STATE_INIT_1: fsm_next_state = FSM_STATE_INIT_2;
- FSM_STATE_INIT_2: fsm_next_state = FSM_STATE_INIT_3;
- FSM_STATE_INIT_3: fsm_next_state = FSM_STATE_INIT_4;
- FSM_STATE_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_LOAD_1;
- else fsm_next_state = FSM_STATE_INIT_4;
+ FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2;
+ FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3;
+ FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4;
+ FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_4;
//
- FSM_STATE_LOAD_1: fsm_next_state = FSM_STATE_LOAD_2;
- FSM_STATE_LOAD_2: fsm_next_state = FSM_STATE_LOAD_3;
- FSM_STATE_LOAD_3: fsm_next_state = FSM_STATE_LOAD_4;
- FSM_STATE_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_CALC_1;
- else fsm_next_state = FSM_STATE_LOAD_4;
+ FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
+ else fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1;
//
- FSM_STATE_CALC_1: fsm_next_state = FSM_STATE_CALC_2;
- FSM_STATE_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_CALC_3;
- else fsm_next_state = FSM_STATE_CALC_2;
- FSM_STATE_CALC_3: fsm_next_state = FSM_STATE_FILL_1;
+ FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2;
+ FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3;
+ FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4;
+ FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
+ else fsm_next_state = FSM_STATE_EXP_FILL_4;
//
- FSM_STATE_FILL_1: fsm_next_state = FSM_STATE_FILL_2;
- FSM_STATE_FILL_2: fsm_next_state = FSM_STATE_FILL_3;
- FSM_STATE_FILL_3: fsm_next_state = FSM_STATE_FILL_4;
- FSM_STATE_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_NEXT;
- else fsm_next_state = FSM_STATE_FILL_4;
+ FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_1;
//
- FSM_STATE_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_STOP;
- else fsm_next_state = FSM_STATE_LOAD_1;
+ FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2;
+ FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3;
+ FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4;
+ FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
+ else fsm_next_state = FSM_STATE_EXP_SAVE_4;
//
- FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;
+ FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
//
endcase
//
|