aboutsummaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:05:11 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:05:11 +0300
commit9e9689d7b00ecdcc1c651f5e369e00a53d62df3c (patch)
treef7bdddda835e26aff3642b99e1ee8b2f1a64434d /bench
parent29fb6afd018c601a2e0c7376656d5e37beb565d6 (diff)
Further work on the Montgomery modular multiplier. Can now to the "triangular"
part of multiplication, i.e. compute the "magic" reduction coefficient Q = LSB(AB) * N_COEFF.
Diffstat (limited to 'bench')
-rw-r--r--bench/tb_square.v391
1 files changed, 314 insertions, 77 deletions
diff --git a/bench/tb_square.v b/bench/tb_square.v
index 61e5d8a..23831db 100644
--- a/bench/tb_square.v
+++ b/bench/tb_square.v
@@ -39,6 +39,8 @@ module tb_square;
reg [17:0] T1[0:31];
reg [17:0] T2[0:31];
reg [17:0] AB[0:63];
+ reg [17:0] N_COEFF[0:32];
+ reg [17:0] Q[0:32];
//
@@ -81,6 +83,26 @@ module tb_square;
AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194;
AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df;
//
+ N_COEFF[ 0] = 18'h05a97; N_COEFF[ 1] = 18'h0ac69; N_COEFF[ 2] = 18'h0d51e; N_COEFF[ 3] = 18'h07326;
+ N_COEFF[ 4] = 18'h01053; N_COEFF[ 5] = 18'h0f68a; N_COEFF[ 6] = 18'h09c70; N_COEFF[ 7] = 18'h064f7;
+ N_COEFF[ 8] = 18'h01041; N_COEFF[ 9] = 18'h0c2bf; N_COEFF[10] = 18'h0f01f; N_COEFF[11] = 18'h01842;
+ N_COEFF[12] = 18'h0e69a; N_COEFF[13] = 18'h037ea; N_COEFF[14] = 18'h0b4a0; N_COEFF[15] = 18'h0c1ab;
+ N_COEFF[16] = 18'h0bd5b; N_COEFF[17] = 18'h09e5e; N_COEFF[18] = 18'h039bd; N_COEFF[19] = 18'h06430;
+ N_COEFF[20] = 18'h0b460; N_COEFF[21] = 18'h08bd4; N_COEFF[22] = 18'h09fcd; N_COEFF[23] = 18'h05391;
+ N_COEFF[24] = 18'h0fa45; N_COEFF[25] = 18'h08892; N_COEFF[26] = 18'h0732c; N_COEFF[27] = 18'h0baf6;
+ N_COEFF[28] = 18'h067a9; N_COEFF[29] = 18'h0b184; N_COEFF[30] = 18'h02089; N_COEFF[31] = 18'h0297b;
+ N_COEFF[32] = 18'h01810;
+ //
+ Q[ 0] = 18'h0ac02; Q[ 1] = 18'h0a026; Q[ 2] = 18'h06825; Q[ 3] = 18'h08f06;
+ Q[ 4] = 18'h03783; Q[ 5] = 18'h04cb5; Q[ 6] = 18'h0e8ea; Q[ 7] = 18'h083d2;
+ Q[ 8] = 18'h0fec9; Q[ 9] = 18'h066d9; Q[10] = 18'h0edad; Q[11] = 18'h06c12;
+ Q[12] = 18'h0a5fb; Q[13] = 18'h07295; Q[14] = 18'h06a0c; Q[15] = 18'h081a5;
+ Q[16] = 18'h03493; Q[17] = 18'h0a393; Q[18] = 18'h03da6; Q[19] = 18'h0beb1;
+ Q[20] = 18'h0d138; Q[21] = 18'h02815; Q[22] = 18'h0f191; Q[23] = 18'h03617;
+ Q[24] = 18'h08d4f; Q[25] = 18'h0f641; Q[26] = 18'h00e82; Q[27] = 18'h01774;
+ Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a;
+ Q[32] = 18'h0eef3;
+ //
end
@@ -102,9 +124,10 @@ module tb_square;
reg mac_fat_bram_xy_ena = 1'b0;
reg mac_fat_bram_xy_reg_ena = 1'b0;
reg [ 2:0] mac_fat_bram_xy_bank;
- reg [ 7:0] mac_fat_bram_xy_addr[0:3];
- wire [17:0] mac_fat_bram_x_dout[0:3];
- wire [17:0] mac_fat_bram_y_dout[0:3];
+ reg [ 2:0] mac_fat_bram_xy_bank_aux;
+ reg [ 7:0] mac_fat_bram_xy_addr[0:4];
+ wire [17:0] mac_fat_bram_x_dout[0:4];
+ wire [17:0] mac_fat_bram_y_dout[0:4];
reg tb_slim_bram_xy_ena = 1'b0;
reg [ 1:0] tb_slim_bram_xy_bank;
@@ -124,14 +147,14 @@ module tb_square;
//
mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr;
- reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
+ reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
always @(posedge clk)
mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena;
genvar z;
- generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
begin : gen_fat_bram
//
ip_bram_36k fat_bram_x
@@ -145,7 +168,8 @@ module tb_square;
.clkb (clk),
.enb (mac_fat_bram_xy_ena),
.regceb (mac_fat_bram_xy_reg_ena),
- .addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
+ .addrb ({(z < (NUM_MULTS/2) ?
+ mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux), mac_fat_bram_xy_addr[z]}),
.doutb (mac_fat_bram_x_dout[z])
);
//
@@ -160,7 +184,8 @@ module tb_square;
.clkb (clk),
.enb (mac_fat_bram_xy_ena),
.regceb (mac_fat_bram_xy_reg_ena),
- .addrb ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
+ .addrb ({z < (NUM_MULTS/2) ?
+ mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_addr[z]}),
.doutb (mac_fat_bram_y_dout[z])
);
//
@@ -232,6 +257,23 @@ module tb_square;
wait_clock_tick;
end
+ for (i=0; i<32; i=i+1) begin
+ tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF;
+ tb_slim_bram_xy_addr = i[7:0];
+ tb_slim_bram_x_din = N_COEFF[i];
+ tb_slim_bram_y_din = N_COEFF[i];
+
+ wait_clock_tick;
+ end
+ for (i=32; i<33; i=i+1) begin
+ tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF_EXT;
+ tb_slim_bram_xy_addr = 0;
+ tb_slim_bram_x_din = N_COEFF[i];
+ tb_slim_bram_y_din = N_COEFF[i];
+
+ wait_clock_tick;
+ end
+
tb_fat_bram_xy_ena = 1'b0;
tb_slim_bram_xy_ena = 1'b0;
@@ -256,6 +298,7 @@ module tb_square;
wait_clock_tick;
verify_ab;
+ verify_q;
end
@@ -270,11 +313,11 @@ module tb_square;
reg dsp_x_ce_p;
reg dsp_x_ce_mode;
- reg [8 -1:0] dsp_x_mode_z = {8{1'b1}};
+ reg [9 -1:0] dsp_x_mode_z = {9{1'b1}};
- wire [4*18-1:0] dsp_x_a;
+ wire [5*18-1:0] dsp_x_a;
reg [1*17-1:0] dsp_x_b;
- wire [8*47-1:0] dsp_x_p;
+ wire [9*47-1:0] dsp_x_p;
reg dsp_y_ce_a;
reg dsp_y_ce_b;
@@ -283,13 +326,13 @@ module tb_square;
reg dsp_y_ce_p;
reg dsp_y_ce_mode;
- reg [8 -1:0] dsp_y_mode_z = {8{1'b1}};
+ reg [9 -1:0] dsp_y_mode_z = {9{1'b1}};
- wire [4*18-1:0] dsp_y_a;
+ wire [5*18-1:0] dsp_y_a;
reg [1*17-1:0] dsp_y_b;
- wire [8*47-1:0] dsp_y_p;
+ wire [9*47-1:0] dsp_y_p;
- generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
begin : gen_dsp_xy_a_split
assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z];
assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z];
@@ -301,10 +344,10 @@ module tb_square;
{dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b};
- reg [8 -1:0] dsp_xy_mode_z_adv1 = {8{1'b1}};
- reg [8 -1:0] dsp_xy_mode_z_adv2 = {8{1'b1}};
- reg [8 -1:0] dsp_xy_mode_z_adv3 = {8{1'b1}};
- reg [8 -1:0] dsp_xy_mode_z_adv4 = {8{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
dsp_array dsp_x
(
@@ -355,16 +398,47 @@ module tb_square;
localparam [7:0] index_last = 8'd31;
+ localparam [7:0] index_last_minus1 = index_last - 1'b1;
+
+
+ //
+ // Column
+ //
+ reg [4:0] col_index; // current column index
+ reg [4:0] col_index_prev; // delayed column index value
+ reg [4:0] col_index_last; // index of the very last column
+ reg [4:0] col_index_next1; // precomputed next column index
+ //reg [4:0] col_index_next2; // precomputed next column index after next column index
+ reg col_is_last; // flag set during the very last column
+
+ always @(posedge clk)
+ //
+ col_index_prev <= col_index;
wire mult_square_addr_almost_done_comb;
reg mult_square_addr_almost_done_flop;
+
+ //wire mult_square_addr_surely_done_comb;
+ reg mult_square_addr_surely_done_flop;
+
+ reg mult_triangle_addr_almost_done_comb;
+ reg mult_triangle_addr_almost_done_flop;
+
+ //wire mult_triangle_addr_surely_done_comb;
+ reg mult_triangle_addr_surely_done_flop;
+ reg mult_triangle_addr_tardy_done_flop;
- wire mult_square_addr_surely_done_comb;
- reg mult_square_addr_surely_done_flop;
+ assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
+
+ always @*
+ //
+ //if (!col_is_last)
+ mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+ //else
+ //mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+
- assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == (index_last - 8'd1);
- assign mult_square_addr_surely_done_comb = mac_slim_bram_xy_addr == index_last;
always @(posedge clk)
//
@@ -372,60 +446,130 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <=
- {mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
-
+ mult_square_addr_almost_done_flop <= mult_square_addr_almost_done_comb;
+ //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <=
+ //{mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
default:
- {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
+ mult_square_addr_almost_done_flop <= 1'b0;
+ //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
endcase
-
- //
- // Column
- //
- reg [4:0] col_index;
- reg [4:0] col_index_prev;
- reg [4:0] col_index_last;
+ always @(posedge clk)
+ //
+ mult_square_addr_surely_done_flop <= mult_square_addr_almost_done_flop;
always @(posedge clk)
//
- col_index_prev <= col_index;
+ case (fsm_state)
+
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
+ mult_triangle_addr_almost_done_flop <= mult_triangle_addr_almost_done_comb;
+ //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
+ //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
+
+ default:
+ mult_triangle_addr_almost_done_flop <= 1'b0;
+ //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
+
+ endcase
+
+ always @(posedge clk) begin
+ //
+ mult_triangle_addr_surely_done_flop <= mult_triangle_addr_almost_done_flop;
+ mult_triangle_addr_tardy_done_flop <= mult_triangle_addr_surely_done_flop;
+ //
+ end
+
+
//
// FSM Transition Logic
//
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
-
always @(posedge clk)
//
case (fsm_state_next)
+ //
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0;
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
+ 8'd0 : mac_slim_bram_xy_addr + 1'b1;
+ //
default: mac_slim_bram_xy_addr <= 8'dX;
endcase
+
+ wire [2:0] fat_bram_offset_rom[0:3];
+
+ generate for (z=1; z<NUM_MULTS; z=z+2)
+ begin : gen_fat_bram_offset
+ assign fat_bram_offset_rom[(z-1)/2] = z[2:0];
+ end
+ endgenerate
+
integer j;
- always @(posedge clk)
+ always @(posedge clk) begin
//
for (j=0; j<(NUM_MULTS/2); j=j+1)
+ //
case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= 1 + 2 * j;
- FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= 8 * (col_index + 1) + 1 + 2 * j;
+ //
+ // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ //
default: mac_fat_bram_xy_addr[j] <= 8'dX;
endcase
-
-
+ //
+ case (fsm_state_next)
+ //
+ // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+ //
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+ //
+ default: mac_fat_bram_xy_addr[4] <= 8'dX;
+ endcase
+//
+ end
always @(posedge clk)
//
@@ -436,6 +580,13 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
+ BANK_SLIM_N_COEFF_EXT : BANK_SLIM_N_COEFF;
default: mac_slim_bram_xy_bank <= 2'bXX;
endcase
@@ -447,8 +598,14 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2;
- default: mac_fat_bram_xy_bank <= 3'bXXX;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_T1T2}};
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL};
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}};
+ default: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}};
endcase
@@ -462,7 +619,13 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop;
- default: mac_slim_bram_xy_ena <= 1'b0;
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop;
+ default: mac_slim_bram_xy_ena <= 1'b0;
endcase
always @(posedge clk)
@@ -473,7 +636,13 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
default: mac_fat_bram_xy_ena <= 1'b0;
endcase
@@ -486,7 +655,7 @@ module tb_square;
//
mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
-
+
always @(posedge clk)
//
if (mac_slim_bram_xy_reg_ena_dly)
@@ -499,7 +668,7 @@ module tb_square;
input [7:0] mac_fat_bram_xy_addr_current;
input [7:0] mac_fat_bram_xy_addr_last;
begin
- if (mac_fat_bram_xy_addr_current > 0)
+ if (mac_fat_bram_xy_addr_current > 8'd0)
mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1;
else
mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last;
@@ -541,26 +710,41 @@ module tb_square;
//
case (fsm_state_next)
//
- FSM_STATE_MULT_SQUARE_COL_0_INIT: begin
- col_index <= 5'd0;
- col_index_last <= index_last[7:3];
+ FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: begin
+ col_index <= 5'd0;
+ col_index_last <= index_last[7:3];
+ col_index_next1 <= 5'd1;
+ //col_index_next2 <= 5'd2;
+ col_is_last <= 1'b0;
+
end
//
- FSM_STATE_MULT_SQUARE_COL_N_INIT:
- col_index <= col_index + 1'b1;
+ FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: begin
+ col_index <= col_index_next1;
+ col_is_last <= col_index_next1 == col_index_last;
+ col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;
+ //col_index_next2 <= col_index_next2 + 1'b1;
+ end
//
endcase
- assign fsm_state_after_mult_square = (col_index == col_index_last) ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+ assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+ assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
always @(posedge clk)
//
case (fsm_state_next)
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {8{1'b0}};
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
- default: dsp_xy_mode_z_adv4 <= {8{1'b1}};
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
endcase
always @(posedge clk) begin
@@ -571,25 +755,45 @@ module tb_square;
dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4};
end
- function [NUM_MULTS-1:0] calc_mac_mode_z_square;
- input [ 4:0] col_index_value;
- input [ 7:0] mac_slim_bram_xy_addr_value;
+ function [NUM_MULTS:0] calc_mac_mode_z_square;
+ input [ 4:0] col_index_value;
+ input [ 7:0] mac_slim_bram_xy_addr_value;
begin
if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
case (mac_slim_bram_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_square = 8'b11111110;
- 3'b001: calc_mac_mode_z_square = 8'b11111101;
- 3'b010: calc_mac_mode_z_square = 8'b11111011;
- 3'b011: calc_mac_mode_z_square = 8'b11110111;
- 3'b100: calc_mac_mode_z_square = 8'b11101111;
- 3'b101: calc_mac_mode_z_square = 8'b11011111;
- 3'b110: calc_mac_mode_z_square = 8'b10111111;
- 3'b111: calc_mac_mode_z_square = 8'b01111111;
+ 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
endcase
else
- calc_mac_mode_z_square = {NUM_MULTS{1'b1}};
+ calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
end
endfunction
+ /*
+ function [NUM_MULTS:0] calc_mac_mode_z_triangle;
+ input [ 4:0] col_index_value;
+ input [ 7:0] mac_slim_bram_xy_addr_value;
+ begin
+ if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
+ case (mac_slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+ endcase
+ else
+ calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+ end
+ endfunction*/
reg recomb_x_ena = 1'b0;
reg recomb_y_ena = 1'b0;
@@ -623,6 +827,7 @@ module tb_square;
.col_index (col_index),
.col_index_last (col_index_last),
.slim_bram_xy_addr (mac_slim_bram_xy_addr),
+ .slim_bram_xy_bank (mac_slim_bram_xy_bank),
.fat_bram_xy_bank (recomb_fat_bram_xy_bank),
.fat_bram_xy_addr (recomb_fat_bram_xy_addr),
.fat_bram_x_dout (recomb_fat_bram_x_dout),
@@ -631,14 +836,17 @@ module tb_square;
);
reg [17:0] AB_READ[0:63];
+ reg [17:0] Q_READ[0:32];
always @(posedge clk)
//
if (recomb_fat_bram_xy_dout_valid)
//
case (recomb_fat_bram_xy_bank)
- 3'd1: AB_READ[recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
- 3'd2: AB_READ[32 + recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
+ 3'd1: AB_READ[ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ 3'd2: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ 3'd3: Q_READ [ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ 3'd4: Q_READ [32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
endcase
@@ -663,11 +871,8 @@ module tb_square;
mgr_fat_bram_x_din <= {18{1'bX}};
mgr_fat_bram_y_din <= {18{1'bX}};
end
-
-
-
-
-
+
+
task verify_ab;
reg verify_ab_ok;
begin
@@ -687,6 +892,28 @@ module tb_square;
endtask
+ task verify_q;
+ reg verify_q_ok;
+ begin
+ verify_q_ok = 1;
+ for (i=0; i<33; i=i+1)
+ if (Q_READ[i] === Q[i])
+ $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x", i, Q[i], Q_READ[i]);
+ else begin
+ $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, Q[i], Q_READ[i]);
+ verify_q_ok = 0;
+ end
+ if (verify_q_ok)
+ $display("Q is OK.");
+ else
+ $display("Q is WRONG!");
+ end
+ endtask
+
+
+ wire mult_square_addr_done = mult_square_addr_surely_done_flop;
+
+ wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
always @* begin
//
@@ -697,13 +924,23 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+
+ FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
+
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = mult_triangle_addr_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
+
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
- FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
+ FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = FSM_STATE_MULT_TRIANGLE_HOLDOFF;//recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
default: fsm_state_next = FSM_STATE_IDLE ;