aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-19 21:09:48 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2017-07-19 21:09:48 +0300
commit7486edd118f6d69c4817040e53240baf24628dd7 (patch)
tree2cd39104975f87142985094911db976cb69398fd
parent344ed1b7ab2fba06158a28b3c691cf9d9ee8cb75 (diff)
Added pre-multiplication step.
Added 512-bit testbench.
-rw-r--r--src/rtl/modexpa7_exponentiator.v212
-rw-r--r--src/tb/tb_exponentiator.v151
2 files changed, 227 insertions, 136 deletions
diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
index e34a7ab..cda6882 100644
--- a/src/rtl/modexpa7_exponentiator.v
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -60,6 +60,7 @@ module modexpa7_exponentiator #
output [OPERAND_ADDR_WIDTH-1:0] m_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] d_bram_addr,
+ output [OPERAND_ADDR_WIDTH-1:0] f_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] n1_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] n2_bram_addr,
output [OPERAND_ADDR_WIDTH-1:0] n_coeff1_bram_addr,
@@ -68,6 +69,7 @@ module modexpa7_exponentiator #
input [ 32-1:0] m_bram_out,
input [ 32-1:0] d_bram_out,
+ input [ 32-1:0] f_bram_out,
input [ 32-1:0] n1_bram_out,
input [ 32-1:0] n2_bram_out,
input [ 32-1:0] n_coeff1_bram_out,
@@ -84,7 +86,7 @@ module modexpa7_exponentiator #
//
// FSM Declaration
//
- localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
+ localparam [ 7: 0] FSM_STATE_EXP_IDLE = 8'h00;
//
localparam [ 7: 0] FSM_STATE_EXP_INIT_1 = 8'hA1;
localparam [ 7: 0] FSM_STATE_EXP_INIT_2 = 8'hA2;
@@ -121,7 +123,7 @@ module modexpa7_exponentiator #
localparam [ 7: 0] FSM_STATE_MUL_CALC_2 = 8'h22;
localparam [ 7: 0] FSM_STATE_MUL_CALC_3 = 8'h23;
//
- localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
+ localparam [ 7: 0] FSM_STATE_EXP_STOP = 8'hFF;
/*
@@ -209,7 +211,7 @@ module modexpa7_exponentiator #
/* save number of words in a and b when new operation starts */
always @(posedge clk)
//
- if (fsm_next_state == FSM_STATE_EXP_INIT_1)
+ if ((fsm_state == FSM_STATE_EXP_IDLE) && ena_trig)
{m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits};
@@ -231,6 +233,7 @@ module modexpa7_exponentiator #
/* address registers */
reg [OPERAND_ADDR_WIDTH-1:0] m_addr;
reg [OPERAND_ADDR_WIDTH-1:0] d_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] f_addr;
reg [OPERAND_ADDR_WIDTH-1:0] r_addr;
reg [OPERAND_ADDR_WIDTH-1:0] t0_addr;
reg [OPERAND_ADDR_WIDTH-1:0] t1_addr;
@@ -248,6 +251,7 @@ module modexpa7_exponentiator #
/* handy increment values */
wire [OPERAND_ADDR_WIDTH-1:0] m_addr_next = m_addr + 1'b1;
wire [OPERAND_ADDR_WIDTH-1:0] d_addr_next = d_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] f_addr_next = f_addr + 1'b1;
wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1;
wire [OPERAND_ADDR_WIDTH-1:0] t0_addr_next = t0_addr + 1'b1;
wire [OPERAND_ADDR_WIDTH-1:0] t1_addr_next = t1_addr + 1'b1;
@@ -259,6 +263,7 @@ module modexpa7_exponentiator #
/* handy stop flags */
wire m_addr_done = (m_addr == bram_addr_last) ? 1'b1 : 1'b0;
wire d_addr_done = (d_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire f_addr_done = (f_addr == bram_addr_last) ? 1'b1 : 1'b0;
wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
wire t0_addr_done = (t0_addr == bram_addr_last) ? 1'b1 : 1'b0;
wire t1_addr_done = (t1_addr == bram_addr_last) ? 1'b1 : 1'b0;
@@ -270,6 +275,7 @@ module modexpa7_exponentiator #
/* map registers to top-level ports */
assign m_bram_addr = m_addr;
assign d_bram_addr = d_addr;
+ assign f_bram_addr = f_addr;
assign r_bram_addr = r_addr;
//
@@ -386,93 +392,115 @@ module modexpa7_exponentiator #
// m_addr
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_1: m_addr <= bram_addr_zero;
- FSM_STATE_EXP_INIT_2,
- FSM_STATE_EXP_INIT_3,
- FSM_STATE_EXP_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
+ FSM_STATE_MUL_INIT_1: m_addr <= bram_addr_zero;
+ FSM_STATE_MUL_INIT_2,
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4: m_addr <= !m_addr_done ? m_addr_next : m_addr;
endcase
//
// d_addr
//
case (fsm_next_state)
- FSM_STATE_EXP_CALC_1: d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+ FSM_STATE_EXP_CALC_1: d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+ endcase
+ //
+ // f_addr
+ //
+ case (fsm_next_state)
+ FSM_STATE_MUL_INIT_1: f_addr <= bram_addr_zero;
+ FSM_STATE_MUL_INIT_2,
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4: f_addr <= !f_addr_done ? f_addr_next : f_addr;
endcase
//
// r_addr
//
case (fsm_next_state)
- FSM_STATE_EXP_SAVE_3: r_addr <= bram_addr_zero;
- FSM_STATE_EXP_SAVE_4: r_addr <= r_addr_next;
+ FSM_STATE_EXP_SAVE_3: r_addr <= bram_addr_zero;
+ FSM_STATE_EXP_SAVE_4: r_addr <= r_addr_next;
endcase
//
// p_addr_wr
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_3: p_addr_wr <= bram_addr_zero;
- FSM_STATE_EXP_INIT_4: p_addr_wr <= p_addr_wr_next;
//
- FSM_STATE_EXP_FILL_3: p_addr_wr <= bram_addr_zero;
- FSM_STATE_EXP_FILL_4: p_addr_wr <= p_addr_wr_next;
+ FSM_STATE_MUL_INIT_3: p_addr_wr <= bram_addr_zero;
+ FSM_STATE_MUL_INIT_4: p_addr_wr <= p_addr_wr_next;
+ //
+ FSM_STATE_EXP_INIT_3: p_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_4: p_addr_wr <= p_addr_wr_next;
+ //
+ FSM_STATE_EXP_FILL_3: p_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_4: p_addr_wr <= p_addr_wr_next;
endcase
//
// t0_addr
//
case (fsm_next_state)
- FSM_STATE_EXP_LOAD_3: t0_addr <= bram_addr_zero;
- FSM_STATE_EXP_LOAD_4: t0_addr <= t0_addr_next;
+ FSM_STATE_EXP_LOAD_3: t0_addr <= bram_addr_zero;
+ FSM_STATE_EXP_LOAD_4: t0_addr <= t0_addr_next;
//
- FSM_STATE_EXP_FILL_1: t0_addr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_1: t0_addr <= bram_addr_zero;
FSM_STATE_EXP_FILL_2,
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
+ FSM_STATE_EXP_FILL_4: t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
endcase
//
// t1_addr
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_3: t1_addr <= bram_addr_zero;
- FSM_STATE_EXP_INIT_4: t1_addr <= t1_addr_next;
+ FSM_STATE_EXP_INIT_3: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_4: t1_addr <= t1_addr_next;
//
- FSM_STATE_EXP_LOAD_1: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_LOAD_1: t1_addr <= bram_addr_zero;
FSM_STATE_EXP_LOAD_2,
FSM_STATE_EXP_LOAD_3,
- FSM_STATE_EXP_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+ FSM_STATE_EXP_LOAD_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
//
- FSM_STATE_EXP_FILL_3: t1_addr <= bram_addr_zero;
- FSM_STATE_EXP_FILL_4: t1_addr <= t1_addr_next;
+ FSM_STATE_EXP_FILL_3: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_4: t1_addr <= t1_addr_next;
//
- FSM_STATE_EXP_SAVE_1: t1_addr <= bram_addr_zero;
+ FSM_STATE_EXP_SAVE_1: t1_addr <= bram_addr_zero;
FSM_STATE_EXP_SAVE_2,
FSM_STATE_EXP_SAVE_3,
- FSM_STATE_EXP_SAVE_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+ FSM_STATE_EXP_SAVE_4: t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
endcase
//
// t2_addr_wr
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero;
- FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next;
//
- FSM_STATE_EXP_FILL_3: t2_addr_wr <= bram_addr_zero;
- FSM_STATE_EXP_FILL_4: t2_addr_wr <= t2_addr_wr_next;
+ FSM_STATE_MUL_INIT_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_MUL_INIT_4: t2_addr_wr <= t2_addr_wr_next;
+
+ FSM_STATE_EXP_INIT_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_4: t2_addr_wr <= t2_addr_wr_next;
+ //
+ FSM_STATE_EXP_FILL_3: t2_addr_wr <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_4: t2_addr_wr <= t2_addr_wr_next;
endcase
//
// pp_addr_rd
//
case (fsm_next_state)
- FSM_STATE_EXP_FILL_1: pp_addr_rd <= bram_addr_zero;
+ FSM_STATE_EXP_FILL_1: pp_addr_rd <= bram_addr_zero;
FSM_STATE_EXP_FILL_2,
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+ FSM_STATE_EXP_FILL_4: pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
endcase
//
// tp_addr_rd
//
case (fsm_next_state)
- FSM_STATE_EXP_FILL_1: tp_addr_rd <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_1: tp_addr_rd <= bram_addr_zero;
+ FSM_STATE_EXP_INIT_2,
+ FSM_STATE_EXP_INIT_3,
+ FSM_STATE_EXP_INIT_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+ //
+ FSM_STATE_EXP_FILL_1: tp_addr_rd <= bram_addr_zero;
FSM_STATE_EXP_FILL_2,
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+ FSM_STATE_EXP_FILL_4: tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
endcase
//
end
@@ -487,26 +515,28 @@ module modexpa7_exponentiator #
//
case (fsm_next_state)
FSM_STATE_EXP_SAVE_3,
- FSM_STATE_EXP_SAVE_4: r_wren <= 1'b1;
- default: r_wren <= 1'b0;
+ FSM_STATE_EXP_SAVE_4: r_wren <= 1'b1;
+ default: r_wren <= 1'b0;
endcase
//
// p_wren
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_3,
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4,
+ FSM_STATE_EXP_INIT_3,
FSM_STATE_EXP_INIT_4,
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: p_wren <= 1'b1;
- default: p_wren <= 1'b0;
+ FSM_STATE_EXP_FILL_4: p_wren <= 1'b1;
+ default: p_wren <= 1'b0;
endcase
//
// t0_wren
//
case (fsm_next_state)
FSM_STATE_EXP_LOAD_3,
- FSM_STATE_EXP_LOAD_4: t0_wren <= 1'b1;
- default: t0_wren <= 1'b0;
+ FSM_STATE_EXP_LOAD_4: t0_wren <= 1'b1;
+ default: t0_wren <= 1'b0;
endcase
//
// t1_wren
@@ -515,18 +545,20 @@ module modexpa7_exponentiator #
FSM_STATE_EXP_INIT_3,
FSM_STATE_EXP_INIT_4,
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t1_wren <= 1'b1;
- default: t1_wren <= 1'b0;
+ FSM_STATE_EXP_FILL_4: t1_wren <= 1'b1;
+ default: t1_wren <= 1'b0;
endcase
//
// t2_wren
//
case (fsm_next_state)
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4,
FSM_STATE_EXP_INIT_3,
FSM_STATE_EXP_INIT_4,
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t2_wren <= 1'b1;
- default: t2_wren <= 1'b0;
+ FSM_STATE_EXP_FILL_4: t2_wren <= 1'b1;
+ default: t2_wren <= 1'b0;
endcase
//
end
@@ -541,50 +573,58 @@ module modexpa7_exponentiator #
//
case (fsm_next_state)
FSM_STATE_EXP_SAVE_3,
- FSM_STATE_EXP_SAVE_4: r_data_in <= t1_data_out;
- default: r_data_in <= 32'dX;
+ FSM_STATE_EXP_SAVE_4: r_data_in <= t1_data_out;
+ default: r_data_in <= 32'dX;
endcase
//
// p_data_in
//
case (fsm_next_state)
+ //
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4: p_data_in <= f_bram_out;
+ //
FSM_STATE_EXP_INIT_3,
- FSM_STATE_EXP_INIT_4: p_data_in <= m_bram_out;
+ FSM_STATE_EXP_INIT_4: p_data_in <= tp_data_out;
//
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: p_data_in <= pp_data_out;
+ FSM_STATE_EXP_FILL_4: p_data_in <= pp_data_out;
//
- default: p_data_in <= 32'dX;
+ default: p_data_in <= 32'dX;
endcase
//
// t0_data_in
//
case (fsm_next_state)
FSM_STATE_EXP_LOAD_3,
- FSM_STATE_EXP_LOAD_4: t0_data_in <= t1_data_out;
- default: t0_data_in <= 32'dX;
+ FSM_STATE_EXP_LOAD_4: t0_data_in <= t1_data_out;
+ default: t0_data_in <= 32'dX;
endcase
//
// t1_data_in
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_3: t1_data_in <= 32'd1;
- FSM_STATE_EXP_INIT_4: t1_data_in <= 32'd0;
+ FSM_STATE_EXP_INIT_3: t1_data_in <= 32'd1;
+ FSM_STATE_EXP_INIT_4: t1_data_in <= 32'd0;
//
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
- default: t1_data_in <= 32'dX;
+ FSM_STATE_EXP_FILL_4: t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+ default: t1_data_in <= 32'dX;
endcase
//
// t2_data_in
//
case (fsm_next_state)
- FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
- FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
+ //
+ FSM_STATE_MUL_INIT_3,
+ FSM_STATE_MUL_INIT_4: t2_data_in <= m_bram_out;
+ //
+ FSM_STATE_EXP_INIT_3: t2_data_in <= 32'd1;
+ FSM_STATE_EXP_INIT_4: t2_data_in <= 32'd0;
//
FSM_STATE_EXP_FILL_3,
- FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
- default: t2_data_in <= 32'dX;
+ FSM_STATE_EXP_FILL_4: t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+ default: t2_data_in <= 32'dX;
endcase
//
end
@@ -661,11 +701,12 @@ module modexpa7_exponentiator #
always @(posedge clk)
//
- mul_ena <= (fsm_next_state == FSM_STATE_EXP_CALC_1) ? 1'b1 : 1'b0;
-
-
-
-
+ case (fsm_next_state)
+ FSM_STATE_MUL_CALC_1,
+ FSM_STATE_EXP_CALC_1: mul_ena <= 1'b1;
+ default: mul_ena <= 1'b0;
+ endcase
+
//
// FSM Process
@@ -685,42 +726,53 @@ module modexpa7_exponentiator #
//
case (fsm_state)
//
- FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_EXP_INIT_1;
- else fsm_next_state = FSM_STATE_EXP_IDLE;
+ FSM_STATE_MUL_INIT_1: fsm_next_state = FSM_STATE_MUL_INIT_2;
+ FSM_STATE_MUL_INIT_2: fsm_next_state = FSM_STATE_MUL_INIT_3;
+ FSM_STATE_MUL_INIT_3: fsm_next_state = FSM_STATE_MUL_INIT_4;
+ FSM_STATE_MUL_INIT_4: if (t2_addr_wr_done) fsm_next_state = FSM_STATE_MUL_CALC_1;
+ else fsm_next_state = FSM_STATE_MUL_INIT_4;
+ //
+ FSM_STATE_MUL_CALC_1: fsm_next_state = FSM_STATE_MUL_CALC_2;
+ FSM_STATE_MUL_CALC_2: if (mul_rdy_tp) fsm_next_state = FSM_STATE_MUL_CALC_3;
+ else fsm_next_state = FSM_STATE_MUL_CALC_2;
+ FSM_STATE_MUL_CALC_3: fsm_next_state = FSM_STATE_EXP_INIT_1;
+ //
+ FSM_STATE_EXP_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_MUL_INIT_1;
+ else fsm_next_state = FSM_STATE_EXP_IDLE;
//
FSM_STATE_EXP_INIT_1: fsm_next_state = FSM_STATE_EXP_INIT_2;
FSM_STATE_EXP_INIT_2: fsm_next_state = FSM_STATE_EXP_INIT_3;
FSM_STATE_EXP_INIT_3: fsm_next_state = FSM_STATE_EXP_INIT_4;
- FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
- else fsm_next_state = FSM_STATE_EXP_INIT_4;
+ FSM_STATE_EXP_INIT_4: if (t1_addr_done) fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ else fsm_next_state = FSM_STATE_EXP_INIT_4;
//
FSM_STATE_EXP_LOAD_1: fsm_next_state = FSM_STATE_EXP_LOAD_2;
FSM_STATE_EXP_LOAD_2: fsm_next_state = FSM_STATE_EXP_LOAD_3;
FSM_STATE_EXP_LOAD_3: fsm_next_state = FSM_STATE_EXP_LOAD_4;
- FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
- else fsm_next_state = FSM_STATE_EXP_LOAD_4;
+ FSM_STATE_EXP_LOAD_4: if (t0_addr_done) fsm_next_state = FSM_STATE_EXP_CALC_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_4;
//
FSM_STATE_EXP_CALC_1: fsm_next_state = FSM_STATE_EXP_CALC_2;
- FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
- else fsm_next_state = FSM_STATE_EXP_CALC_2;
+ FSM_STATE_EXP_CALC_2: if (mul_rdy_all) fsm_next_state = FSM_STATE_EXP_CALC_3;
+ else fsm_next_state = FSM_STATE_EXP_CALC_2;
FSM_STATE_EXP_CALC_3: fsm_next_state = FSM_STATE_EXP_FILL_1;
//
FSM_STATE_EXP_FILL_1: fsm_next_state = FSM_STATE_EXP_FILL_2;
FSM_STATE_EXP_FILL_2: fsm_next_state = FSM_STATE_EXP_FILL_3;
FSM_STATE_EXP_FILL_3: fsm_next_state = FSM_STATE_EXP_FILL_4;
- FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
- else fsm_next_state = FSM_STATE_EXP_FILL_4;
+ FSM_STATE_EXP_FILL_4: if (p_addr_wr_done) fsm_next_state = FSM_STATE_EXP_NEXT;
+ else fsm_next_state = FSM_STATE_EXP_FILL_4;
//
- FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
- else fsm_next_state = FSM_STATE_EXP_LOAD_1;
+ FSM_STATE_EXP_NEXT: if (bit_cnt_done) fsm_next_state = FSM_STATE_EXP_SAVE_1;
+ else fsm_next_state = FSM_STATE_EXP_LOAD_1;
//
FSM_STATE_EXP_SAVE_1: fsm_next_state = FSM_STATE_EXP_SAVE_2;
FSM_STATE_EXP_SAVE_2: fsm_next_state = FSM_STATE_EXP_SAVE_3;
FSM_STATE_EXP_SAVE_3: fsm_next_state = FSM_STATE_EXP_SAVE_4;
- FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
- else fsm_next_state = FSM_STATE_EXP_SAVE_4;
+ FSM_STATE_EXP_SAVE_4: if (r_addr_done) fsm_next_state = FSM_STATE_EXP_STOP;
+ else fsm_next_state = FSM_STATE_EXP_SAVE_4;
//
- FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
+ FSM_STATE_EXP_STOP: fsm_next_state = FSM_STATE_EXP_IDLE;
//
endcase
//
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
index 3b612c5..c854e65 100644
--- a/src/tb/tb_exponentiator.v
+++ b/src/tb/tb_exponentiator.v
@@ -81,6 +81,7 @@ module tb_exponentiator;
//
wire [ 3: 0] core_m_addr;
wire [ 3: 0] core_d_addr;
+ wire [ 3: 0] core_f_addr;
wire [ 3: 0] core_n1_addr;
wire [ 3: 0] core_n2_addr;
wire [ 3: 0] core_n_coeff1_addr;
@@ -89,6 +90,7 @@ module tb_exponentiator;
wire [31: 0] core_m_data;
wire [31: 0] core_d_data;
+ wire [31: 0] core_f_data;
wire [31: 0] core_n1_data;
wire [31: 0] core_n2_data;
wire [31: 0] core_n_coeff1_data;
@@ -97,48 +99,54 @@ module tb_exponentiator;
wire core_r_wren;
- reg [ 3: 0] tb_mdn_addr;
+ reg [ 3: 0] tb_mdfn_addr;
reg [ 3: 0] tb_r_addr;
reg [31:0] tb_m_data;
reg [31:0] tb_d_data;
+ reg [31:0] tb_f_data;
reg [31:0] tb_n_data;
reg [31:0] tb_n_coeff_data;
wire [31:0] tb_r_data;
- reg tb_mdn_wren;
+ reg tb_mdfn_wren;
//
// BRAMs
//
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
bram_m (.clk(clk),
- .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_m_data), .a_out(),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_m_data), .a_out(),
.b_addr(core_m_addr), .b_out(core_m_data));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
bram_d (.clk(clk),
- .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_d_data), .a_out(),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_d_data), .a_out(),
.b_addr(core_d_addr), .b_out(core_d_data));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+ bram_f (.clk(clk),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_f_data), .a_out(),
+ .b_addr(core_f_addr), .b_out(core_f_data));
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
bram_n1 (.clk(clk),
- .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_data), .a_out(),
.b_addr(core_n1_addr), .b_out(core_n1_data));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
bram_n2 (.clk(clk),
- .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_data), .a_out(),
.b_addr(core_n2_addr), .b_out(core_n2_data));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
bram_n_coeff1 (.clk(clk),
- .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_coeff_data), .a_out(),
.b_addr(core_n_coeff1_addr), .b_out(core_n_coeff1_data));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
bram_n_coeff2 (.clk(clk),
- .a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+ .a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_coeff_data), .a_out(),
.b_addr(core_n_coeff2_addr), .b_out(core_n_coeff2_data));
bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
@@ -164,6 +172,7 @@ module tb_exponentiator;
.m_bram_addr (core_m_addr),
.d_bram_addr (core_d_addr),
+ .f_bram_addr (core_f_addr),
.n1_bram_addr (core_n1_addr),
.n2_bram_addr (core_n2_addr),
.n_coeff1_bram_addr (core_n_coeff1_addr),
@@ -172,6 +181,7 @@ module tb_exponentiator;
.m_bram_out (core_m_data),
.d_bram_out (core_d_data),
+ .f_bram_out (core_f_data),
.n1_bram_out (core_n1_data),
.n2_bram_out (core_n2_data),
.n_coeff1_bram_out (core_n_coeff1_data),
@@ -197,8 +207,8 @@ module tb_exponentiator;
rst_n = 1'b1;
#100;
- test_exponent_384(M_FACTOR_384, D_384, N_384, N_COEFF_384, S_384);
- //test_exponent_512(M_512);
+ test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
+ test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
end
@@ -211,6 +221,7 @@ module tb_exponentiator;
//
input [383:0] m;
input [383:0] d;
+ input [383:0] f;
input [383:0] n;
input [383:0] n_coeff;
input [383:0] s;
@@ -223,7 +234,7 @@ module tb_exponentiator;
n_num_words = 4'd11; // set number of words
d_num_bits = 9'd383; // set number of bits
//
- write_memory_384(m, d, n, n_coeff); // fill memory
+ write_memory_384(m, d, f, n, n_coeff); // fill memory
ena = 1; // start operation
#10; //
@@ -247,40 +258,38 @@ module tb_exponentiator;
end
//
endtask
- /*
- task test_factor_512;
+
+ task test_exponent_512;
//
+ input [511:0] m;
+ input [511:0] d;
+ input [511:0] f;
input [511:0] n;
- reg [511:0] f;
- reg [511:0] factor;
+ input [511:0] n_coeff;
+ input [511:0] s;
+ reg [511:0] r;
+ //
integer i;
//
begin
- //
- calc_factor_512(n, f); // calculate factor on-the-fly
-
- // make sure, that the value matches the one saved in the include file
- if (f !== FACTOR_512) begin
- $display("ERROR: Calculated factor value differs from the one in the test vector!");
- $finish;
- end
-
-
+ //
n_num_words = 4'd15; // set number of words
- write_memory_512(n); // fill memory
+ d_num_bits = 9'd511; // set number of bits
+ //
+ write_memory_512(m, d, f, n, n_coeff); // fill memory
ena = 1; // start operation
#10; //
ena = 0; // clear flag
while (!rdy) #10; // wait for operation to complete
- read_memory_512(factor); // get result from memory
+ read_memory_512(r); // get result from memory
- $display(" calculated: %x", factor); // display result
- $display(" expected: %x", f); //
+ $display(" calculated: %x", r); // display result
+ $display(" expected: %x", s); //
// check calculated value
- if (f === factor) begin
+ if (r === s) begin
$display(" OK");
$display("SUCCESS: Test passed.");
end else begin
@@ -291,7 +300,7 @@ module tb_exponentiator;
end
//
endtask
- */
+
//
// write_memory_384
@@ -300,74 +309,104 @@ module tb_exponentiator;
//
input [383:0] m;
input [383:0] d;
+ input [383:0] f;
input [383:0] n;
input [383:0] n_coeff;
reg [383:0] m_shreg;
+ reg [383:0] f_shreg;
reg [383:0] d_shreg;
reg [383:0] n_shreg;
reg [383:0] n_coeff_shreg;
//
begin
//
- tb_mdn_wren = 1; // start filling memories
+ tb_mdfn_wren = 1; // start filling memories
m_shreg = m; // preload shift register
d_shreg = d; // preload shift register
+ f_shreg = f; // preload shift register
n_shreg = n; // preload shift register
n_coeff_shreg = n_coeff; // preload shift register
//
for (w=0; w<NUM_WORDS_384; w=w+1) begin // write all words
- tb_mdn_addr = w[3:0]; // set address
+ tb_mdfn_addr = w[3:0]; // set address
tb_m_data = m_shreg[31:0]; // set data
tb_d_data = d_shreg[31:0]; // set data
+ tb_f_data = f_shreg[31:0]; // set data
tb_n_data = n_shreg[31:0]; // set data
tb_n_coeff_data = n_coeff_shreg[31:0]; // set data
m_shreg = {{32{1'bX}}, m_shreg[383:32]}; // update shift register
d_shreg = {{32{1'bX}}, d_shreg[383:32]}; // update shift register
+ f_shreg = {{32{1'bX}}, f_shreg[383:32]}; // update shift register
n_shreg = {{32{1'bX}}, n_shreg[383:32]}; // update shift register
n_coeff_shreg = {{32{1'bX}}, n_coeff_shreg[383:32]}; // update shift register
#10; // wait for 1 clock tick
end
//
- tb_mdn_addr = {4{1'bX}}; // wipe addresses
+ tb_mdfn_addr = {4{1'bX}}; // wipe addresses
tb_m_data = {32{1'bX}}; // wipe data
tb_d_data = {32{1'bX}}; // wipe data
+ tb_f_data = {32{1'bX}}; // wipe data
tb_n_data = {32{1'bX}}; // wipe data
tb_n_coeff_data = {32{1'bX}}; // wipe data
- tb_mdn_wren = 0; // stop filling memory
+ tb_mdfn_wren = 0; // stop filling memory
//
end
//
endtask
- /*
+
//
// write_memory_512
//
task write_memory_512;
//
+ input [511:0] m;
+ input [511:0] d;
+ input [511:0] f;
input [511:0] n;
+ input [511:0] n_coeff;
+ reg [511:0] m_shreg;
+ reg [511:0] f_shreg;
+ reg [511:0] d_shreg;
reg [511:0] n_shreg;
+ reg [511:0] n_coeff_shreg;
//
begin
//
- tb_n_wren = 1; // start filling memories
- n_shreg = n; // preload shift register
+ tb_mdfn_wren = 1; // start filling memories
+ m_shreg = m; // preload shift register
+ d_shreg = d; // preload shift register
+ f_shreg = f; // preload shift register
+ n_shreg = n; // preload shift register
+ n_coeff_shreg = n_coeff; // preload shift register
//
- for (w=0; w<NUM_WORDS_512; w=w+1) begin // write all words
- tb_n_addr = w[3:0]; // set address
- tb_n_data = n_shreg[31:0]; // set data
- n_shreg = {{32{1'bX}}, n_shreg[511:32]}; // update shift register
- #10; // wait for 1 clock tick
+ for (w=0; w<NUM_WORDS_512; w=w+1) begin // write all words
+ tb_mdfn_addr = w[3:0]; // set address
+ tb_m_data = m_shreg[31:0]; // set data
+ tb_d_data = d_shreg[31:0]; // set data
+ tb_f_data = f_shreg[31:0]; // set data
+ tb_n_data = n_shreg[31:0]; // set data
+ tb_n_coeff_data = n_coeff_shreg[31:0]; // set data
+ m_shreg = {{32{1'bX}}, m_shreg[511:32]}; // update shift register
+ d_shreg = {{32{1'bX}}, d_shreg[511:32]}; // update shift register
+ f_shreg = {{32{1'bX}}, f_shreg[511:32]}; // update shift register
+ n_shreg = {{32{1'bX}}, n_shreg[511:32]}; // update shift register
+ n_coeff_shreg = {{32{1'bX}}, n_coeff_shreg[511:32]}; // update shift register
+ #10; // wait for 1 clock tick
end
//
- tb_n_addr = {4{1'bX}}; // wipe addresses
- tb_n_data = {32{1'bX}}; // wipe data
- tb_n_wren = 0; // stop filling memory
+ tb_mdfn_addr = {4{1'bX}}; // wipe addresses
+ tb_m_data = {32{1'bX}}; // wipe data
+ tb_d_data = {32{1'bX}}; // wipe data
+ tb_f_data = {32{1'bX}}; // wipe data
+ tb_n_data = {32{1'bX}}; // wipe data
+ tb_n_coeff_data = {32{1'bX}}; // wipe data
+ tb_mdfn_wren = 0; // stop filling memory
//
end
//
endtask
- */
+
//
// read_memory_384
@@ -392,30 +431,30 @@ module tb_exponentiator;
//
endtask
- /*
+
//
// read_memory_512
//
task read_memory_512;
//
- output [511:0] f;
- reg [511:0] f_shreg;
+ output [511:0] r;
+ reg [511:0] r_shreg;
//
begin
//
for (w=0; w<NUM_WORDS_512; w=w+1) begin // read result word-by-word
- tb_f_addr = w[3:0]; // set address
+ tb_r_addr = w[3:0]; // set address
#10; // wait for 1 clock tick
- f_shreg = {tb_f_data, f_shreg[511:32]}; // store data word
+ r_shreg = {tb_r_data, r_shreg[511:32]}; // store data word
end
//
- tb_f_addr = {4{1'bX}}; // wipe address
- f = f_shreg; // return
+ tb_r_addr = {4{1'bX}}; // wipe address
+ r = r_shreg; // return
//
end
//
- endtask
- */
+ endtask
+
endmodule