aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:07:56 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:07:56 +0300
commitecf0374b7bbf1c1ea56fea8f1acaeea85c3612d2 (patch)
tree7d73b8875ed8b64a3fd27137659e9fb9ea0217be
parent9e9689d7b00ecdcc1c651f5e369e00a53d62df3c (diff)
Further work on the Montgomery modular multiplier. Added the third
"rectangular" stage of the multiplication process, i.e. computation of how many copies of the modulus N to add to the intermediate product AB to zeroize the lower half: M = Q * N.
-rw-r--r--bench/tb_square.v362
-rw-r--r--rtl/modexpng_mmm_fsm.vh11
-rw-r--r--rtl/modexpng_parameters.vh26
-rw-r--r--rtl/modexpng_part_recombinator.v425
4 files changed, 668 insertions, 156 deletions
diff --git a/bench/tb_square.v b/bench/tb_square.v
index 23831db..d35a5cc 100644
--- a/bench/tb_square.v
+++ b/bench/tb_square.v
@@ -41,6 +41,8 @@ module tb_square;
reg [17:0] AB[0:63];
reg [17:0] N_COEFF[0:32];
reg [17:0] Q[0:32];
+ reg [17:0] N[0:31];
+ reg [17:0] M[0:64];
//
@@ -103,6 +105,33 @@ module tb_square;
Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a;
Q[32] = 18'h0eef3;
//
+ N[ 0] = 18'h03ad9; N[ 1] = 18'h046b4; N[ 2] = 18'h0e181; N[ 3] = 18'h0fac7;
+ N[ 4] = 18'h0be72; N[ 5] = 18'h029ab; N[ 6] = 18'h07e51; N[ 7] = 18'h037a8;
+ N[ 8] = 18'h0880c; N[ 9] = 18'h05a7d; N[10] = 18'h043c2; N[11] = 18'h038c9;
+ N[12] = 18'h01275; N[13] = 18'h0aa0d; N[14] = 18'h0c0c1; N[15] = 18'h0d035;
+ N[16] = 18'h04082; N[17] = 18'h0543c; N[18] = 18'h0dcb0; N[19] = 18'h0497c;
+ N[20] = 18'h0b12c; N[21] = 18'h013d4; N[22] = 18'h0b80a; N[23] = 18'h051cf;
+ N[24] = 18'h0286c; N[25] = 18'h0b600; N[26] = 18'h0d838; N[27] = 18'h0af4b;
+ N[28] = 18'h08274; N[29] = 18'h06a07; N[30] = 18'h0beea; N[31] = 18'h0f000;
+ //
+ M[ 0] = 18'h041b2; M[ 1] = 18'h00128; M[ 2] = 18'h06b69; M[ 3] = 18'h08e7e;
+ M[ 4] = 18'h0118c; M[ 5] = 18'h0b96d; M[ 6] = 18'h0ebe5; M[ 7] = 18'h0f873;
+ M[ 8] = 18'h0cf14; M[ 9] = 18'h0de83; M[10] = 18'h09690; M[11] = 18'h05e9a;
+ M[12] = 18'h048ac; M[13] = 18'h0b506; M[14] = 18'h01283; M[15] = 18'h08631;
+ M[16] = 18'h0179c; M[17] = 18'h06820; M[18] = 18'h0867b; M[19] = 18'h0b750;
+ M[20] = 18'h0e680; M[21] = 18'h0df95; M[22] = 18'h0d818; M[23] = 18'h0b4c5;
+ M[24] = 18'h0cced; M[25] = 18'h0c4a9; M[26] = 18'h0bb78; M[27] = 18'h04295;
+ M[28] = 18'h0b1b4; M[29] = 18'h09635; M[30] = 18'h0066b; M[31] = 18'h022b1;
+ M[32] = 18'h04fdb; M[33] = 18'h0efc8; M[34] = 18'h00a14; M[35] = 18'h04bef;
+ M[36] = 18'h006a1; M[37] = 18'h0f1a6; M[38] = 18'h0fc40; M[39] = 18'h0adb5;
+ M[40] = 18'h06e8f; M[41] = 18'h02c60; M[42] = 18'h083e1; M[43] = 18'h0f862;
+ M[44] = 18'h0da61; M[45] = 18'h0dd3d; M[46] = 18'h03381; M[47] = 18'h09db0;
+ M[48] = 18'h05454; M[49] = 18'h07525; M[50] = 18'h0d9c7; M[51] = 18'h0a361;
+ M[52] = 18'h049e0; M[53] = 18'h0a671; M[54] = 18'h0242e; M[55] = 18'h07cb2;
+ M[56] = 18'h02021; M[57] = 18'h0bde1; M[58] = 18'h025aa; M[59] = 18'h0c615;
+ M[60] = 18'h05645; M[61] = 18'h03b46; M[62] = 18'h065d6; M[63] = 18'h0390d;
+ M[64] = 18'h0e005;
+ //
end
@@ -134,6 +163,12 @@ module tb_square;
reg [ 7:0] tb_slim_bram_xy_addr;
reg [17:0] tb_slim_bram_x_din;
reg [17:0] tb_slim_bram_y_din;
+
+ reg mgr_slim_bram_xy_ena = 1'b0;
+ reg [ 1:0] mgr_slim_bram_xy_bank;
+ reg [ 7:0] mgr_slim_bram_xy_addr;
+ reg [17:0] mgr_slim_bram_x_din;
+ reg [17:0] mgr_slim_bram_y_din;
reg mac_slim_bram_xy_ena = 1'b0;
reg mac_slim_bram_xy_reg_ena = 1'b0;
@@ -195,10 +230,10 @@ module tb_square;
ip_bram_18k slim_bram_x
(
.clka (clk),
- .ena (tb_slim_bram_xy_ena),
- .wea (tb_slim_bram_xy_ena),
- .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
- .dina (tb_slim_bram_x_din),
+ .ena (mgr_slim_bram_xy_ena),
+ .wea (mgr_slim_bram_xy_ena),
+ .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
+ .dina (mgr_slim_bram_x_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
@@ -210,10 +245,10 @@ module tb_square;
ip_bram_18k slim_bram_y
(
.clka (clk),
- .ena (tb_slim_bram_xy_ena),
- .wea (tb_slim_bram_xy_ena),
- .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
- .dina (tb_slim_bram_y_din),
+ .ena (mgr_slim_bram_xy_ena),
+ .wea (mgr_slim_bram_xy_ena),
+ .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
+ .dina (mgr_slim_bram_y_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
@@ -266,14 +301,23 @@ module tb_square;
wait_clock_tick;
end
for (i=32; i<33; i=i+1) begin
- tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF_EXT;
- tb_slim_bram_xy_addr = 0;
+ tb_slim_bram_xy_bank = BANK_SLIM_EXT;
+ tb_slim_bram_xy_addr = 0; // !
tb_slim_bram_x_din = N_COEFF[i];
tb_slim_bram_y_din = N_COEFF[i];
wait_clock_tick;
end
+ for (i=0; i<32; i=i+1) begin
+ tb_fat_bram_xy_bank = BANK_FAT_N;
+ tb_fat_bram_xy_addr = i[7:0];
+ tb_fat_bram_x_din = N[i];
+ tb_fat_bram_y_din = N[i];
+
+ wait_clock_tick;
+ end
+
tb_fat_bram_xy_ena = 1'b0;
tb_slim_bram_xy_ena = 1'b0;
@@ -299,6 +343,7 @@ module tb_square;
verify_ab;
verify_q;
+ verify_m;
end
@@ -418,25 +463,23 @@ module tb_square;
wire mult_square_addr_almost_done_comb;
reg mult_square_addr_almost_done_flop;
-
- //wire mult_square_addr_surely_done_comb;
reg mult_square_addr_surely_done_flop;
- reg mult_triangle_addr_almost_done_comb;
- reg mult_triangle_addr_almost_done_flop;
-
- //wire mult_triangle_addr_surely_done_comb;
+ wire mult_triangle_addr_almost_done_comb;
+ reg mult_triangle_addr_almost_done_flop;
reg mult_triangle_addr_surely_done_flop;
reg mult_triangle_addr_tardy_done_flop;
+
+ wire mult_rectangle_addr_almost_done_comb;
+ reg mult_rectangle_addr_almost_done_flop;
+ reg mult_rectangle_addr_surely_done_flop;
+ reg mult_rectangle_addr_tardy_done_flop;
+
assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
+ assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+ assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
- always @*
- //
- //if (!col_is_last)
- mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
- //else
- //mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
@@ -482,6 +525,29 @@ module tb_square;
//
end
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
+ mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb;
+ //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
+ //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
+
+ default:
+ mult_rectangle_addr_almost_done_flop <= 1'b0;
+ //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
+
+ endcase
+
+ always @(posedge clk) begin
+ //
+ mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop;
+ mult_rectangle_addr_tardy_done_flop <= mult_rectangle_addr_surely_done_flop;
+ //
+ end
//
@@ -489,6 +555,7 @@ module tb_square;
//
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
always @(posedge clk)
@@ -510,6 +577,14 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
8'd0 : mac_slim_bram_xy_addr + 1'b1;
//
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
+ 8'd1 : mac_slim_bram_xy_addr + 1'b1;
+ //
default: mac_slim_bram_xy_addr <= 8'dX;
endcase
@@ -543,7 +618,14 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
//
default: mac_fat_bram_xy_addr[j] <= 8'dX;
endcase
@@ -564,7 +646,14 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
//
default: mac_fat_bram_xy_addr[4] <= 8'dX;
endcase
@@ -574,19 +663,30 @@ module tb_square;
always @(posedge clk)
//
case (fsm_state_next)
+ //
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
+ //
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
- BANK_SLIM_N_COEFF_EXT : BANK_SLIM_N_COEFF;
+ BANK_SLIM_EXT : BANK_SLIM_N_COEFF;
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
+ BANK_SLIM_EXT : BANK_SLIM_Q;
+ //
default: mac_slim_bram_xy_bank <= 2'bXX;
endcase
@@ -605,6 +705,12 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL};
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}};
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_N}};
default: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}};
endcase
@@ -625,6 +731,12 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop;
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_rectangle_addr_surely_done_flop;
default: mac_slim_bram_xy_ena <= 1'b0;
endcase
@@ -642,7 +754,13 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
default: mac_fat_bram_xy_ena <= 1'b0;
endcase
@@ -654,12 +772,30 @@ module tb_square;
always @(posedge clk)
//
mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
-
+
+ reg ladder_mode = 1'b0; // 0 = X:T1*T2, Y:T2*T2
+ // 1 = X:T1*T2, Y:T2*T1
+
+ reg dsp_swap_xy;
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_swap_xy <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_swap_xy <= 1'b0;
+ endcase
+
always @(posedge clk)
//
- if (mac_slim_bram_xy_reg_ena_dly)
- {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+ if (mac_slim_bram_xy_reg_ena_dly) begin // rewrite
+ if (!dsp_swap_xy)
+ {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
+ else begin
+ if (!ladder_mode) {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+ else {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
+ end
+ end
else
{dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
@@ -711,7 +847,8 @@ module tb_square;
case (fsm_state_next)
//
FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT: begin
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
col_index <= 5'd0;
col_index_last <= index_last[7:3];
col_index_next1 <= 5'd1;
@@ -721,7 +858,8 @@ module tb_square;
end
//
FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT: begin
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
col_index <= col_index_next1;
col_is_last <= col_index_next1 == col_index_last;
col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;
@@ -730,8 +868,9 @@ module tb_square;
//
endcase
- assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
- assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+ assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
always @(posedge clk)
//
@@ -741,9 +880,13 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly);
default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
endcase
@@ -774,26 +917,26 @@ module tb_square;
calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
end
endfunction
- /*
- function [NUM_MULTS:0] calc_mac_mode_z_triangle;
+
+ function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
input [ 4:0] col_index_value;
input [ 7:0] mac_slim_bram_xy_addr_value;
begin
if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
case (mac_slim_bram_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
- 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
- 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
- 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
- 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
- 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
- 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
- 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+ 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
endcase
else
- calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+ calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
end
- endfunction*/
+ endfunction
reg recomb_x_ena = 1'b0;
reg recomb_y_ena = 1'b0;
@@ -810,44 +953,66 @@ module tb_square;
wire [17:0] recomb_fat_bram_x_dout;
wire [17:0] recomb_fat_bram_y_dout;
wire recomb_fat_bram_xy_dout_valid;
+ wire [ 2:0] recomb_slim_bram_xy_bank;
+ wire [ 7:0] recomb_slim_bram_xy_addr;
+ wire [17:0] recomb_slim_bram_x_dout;
+ wire [17:0] recomb_slim_bram_y_dout;
+ wire recomb_slim_bram_xy_dout_valid;
wire recomb_rdy;
modexpng_part_recombinator recomb
(
- .clk (clk),
- .rdy (recomb_rdy),
- .fsm_state_next (fsm_state_next),
- .index_last (index_last),
- .dsp_x_ce_p (dsp_x_ce_p),
- .dsp_y_ce_p (dsp_y_ce_p),
- .ena_x (recomb_x_ena),
- .ena_y (recomb_y_ena),
- .dsp_x_p (dsp_x_p),
- .dsp_y_p (dsp_y_p),
- .col_index (col_index),
- .col_index_last (col_index_last),
- .slim_bram_xy_addr (mac_slim_bram_xy_addr),
- .slim_bram_xy_bank (mac_slim_bram_xy_bank),
- .fat_bram_xy_bank (recomb_fat_bram_xy_bank),
- .fat_bram_xy_addr (recomb_fat_bram_xy_addr),
- .fat_bram_x_dout (recomb_fat_bram_x_dout),
- .fat_bram_y_dout (recomb_fat_bram_y_dout),
- .fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid)
+ .clk (clk),
+ .rdy (recomb_rdy),
+ .fsm_state_next (fsm_state_next),
+ .index_last (index_last),
+ .dsp_x_ce_p (dsp_x_ce_p),
+ .dsp_y_ce_p (dsp_y_ce_p),
+ .ena_x (recomb_x_ena),
+ .ena_y (recomb_y_ena),
+ .dsp_x_p (dsp_x_p),
+ .dsp_y_p (dsp_y_p),
+ .col_index (col_index),
+ .col_index_last (col_index_last),
+ .slim_bram_xy_addr (mac_slim_bram_xy_addr),
+ .slim_bram_xy_bank (mac_slim_bram_xy_bank),
+ .rcmb_fat_bram_xy_bank (recomb_fat_bram_xy_bank),
+ .rcmb_fat_bram_xy_addr (recomb_fat_bram_xy_addr),
+ .rcmb_fat_bram_x_dout (recomb_fat_bram_x_dout),
+ .rcmb_fat_bram_y_dout (recomb_fat_bram_y_dout),
+ .rcmb_fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid),
+ .rcmb_slim_bram_xy_bank (recomb_slim_bram_xy_bank),
+ .rcmb_slim_bram_xy_addr (recomb_slim_bram_xy_addr),
+ .rcmb_slim_bram_x_dout (recomb_slim_bram_x_dout),
+ .rcmb_slim_bram_y_dout (recomb_slim_bram_y_dout),
+ .rcmb_slim_bram_xy_dout_valid (recomb_slim_bram_xy_dout_valid)
);
reg [17:0] AB_READ[0:63];
reg [17:0] Q_READ[0:32];
+ reg [17:0] M_READ[0:64];
- always @(posedge clk)
+ always @(posedge clk) begin
//
if (recomb_fat_bram_xy_dout_valid)
//
case (recomb_fat_bram_xy_bank)
- 3'd1: AB_READ[ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- 3'd2: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- 3'd3: Q_READ [ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- 3'd4: Q_READ [32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
+ BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ BANK_FAT_ML: M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
+ BANK_FAT_MH: M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
endcase
+ //
+ if (recomb_slim_bram_xy_dout_valid)
+ //
+ case (recomb_slim_bram_xy_bank)
+ BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout;
+ BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1)
+ Q_READ[32] <= recomb_slim_bram_x_dout;
+ endcase
+ //
+ end
always @(posedge clk)
@@ -873,6 +1038,29 @@ module tb_square;
end
+ always @(posedge clk)
+ //
+ if (tb_slim_bram_xy_ena) begin
+ mgr_slim_bram_xy_ena <= 1'b1;
+ mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank;
+ mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr;
+ mgr_slim_bram_x_din <= tb_slim_bram_x_din;
+ mgr_slim_bram_y_din <= tb_slim_bram_y_din;
+ end else if (recomb_slim_bram_xy_dout_valid) begin
+ mgr_slim_bram_xy_ena <= 1'b1;
+ mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank;
+ mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr;
+ mgr_slim_bram_x_din <= recomb_slim_bram_x_dout;
+ mgr_slim_bram_y_din <= recomb_slim_bram_y_dout;
+ end else begin
+ mgr_slim_bram_xy_ena <= 1'b0;
+ mgr_slim_bram_xy_bank <= 3'bXXX;
+ mgr_slim_bram_xy_addr <= 8'hXX;
+ mgr_slim_bram_x_din <= {18{1'bX}};
+ mgr_slim_bram_y_din <= {18{1'bX}};
+ end
+
+
task verify_ab;
reg verify_ab_ok;
begin
@@ -911,9 +1099,29 @@ module tb_square;
endtask
+ task verify_m;
+ reg verify_m_ok;
+ begin
+ verify_m_ok = 1;
+ for (i=0; i<65; i=i+1)
+ if (M_READ[i] === M[i])
+ $display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]);
+ else begin
+ $display("M / M_READ [%02d] = 0x%05x / 0x%05x <???>", i, M[i], M_READ[i]);
+ verify_m_ok = 0;
+ end
+ if (verify_m_ok)
+ $display("M is OK.");
+ else
+ $display("M is WRONG!");
+ end
+ endtask
+
+
wire mult_square_addr_done = mult_square_addr_surely_done_flop;
-
wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
+ wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop;
+
always @* begin
//
@@ -940,7 +1148,17 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
- FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = FSM_STATE_MULT_TRIANGLE_HOLDOFF;//recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
+ FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
+
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
default: fsm_state_next = FSM_STATE_IDLE ;
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh
index 2700a42..3bdae66 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_fsm.vh
@@ -30,5 +30,14 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37;
+
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
- \ No newline at end of file
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index f846119..57eef35 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -3,19 +3,19 @@
//localparam BANK_ADDR_WIDTH = 3; // TODO: Replace everywhere!
-localparam [2:0] BANK_FAT_T1T2 = 3'd0;
-localparam [2:0] BANK_FAT_ABL = 3'd1;
-localparam [2:0] BANK_FAT_ABH = 3'd2;
-localparam [2:0] BANK_FAT_Q = 3'd3;
-localparam [2:0] BANK_FAT_Q_EXT = 3'd4;
-localparam [2:0] BANK_FAT_ML = 3'd5;
-localparam [2:0] BANK_FAT_MH = 3'd6;
-localparam [2:0] BANK_FAT_MH_EXT = 3'd7;
-
-localparam [1:0] BANK_SLIM_T1T2 = 2'd0;
-localparam [1:0] BANK_SLIM_N = 2'd1;
-localparam [1:0] BANK_SLIM_N_COEFF = 2'd2;
-localparam [1:0] BANK_SLIM_N_COEFF_EXT = 2'd3;
+localparam [2:0] BANK_FAT_T1T2 = 3'd0;
+localparam [2:0] BANK_FAT_ABL = 3'd1;
+localparam [2:0] BANK_FAT_ABH = 3'd2;
+localparam [2:0] BANK_FAT_N = 3'd3;
+localparam [2:0] BANK_FAT_ML = 3'd4;
+localparam [2:0] BANK_FAT_MH = 3'd5;
+localparam [2:0] BANK_FAT_EXT = 3'd6; // 0 -> MH'
+localparam [2:0] BANK_FAT_UNUSED = 3'd7;
+
+localparam [1:0] BANK_SLIM_T1T2 = 2'd0;
+localparam [1:0] BANK_SLIM_N_COEFF = 2'd1;
+localparam [1:0] BANK_SLIM_Q = 2'd2;
+localparam [1:0] BANK_SLIM_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q'
//localparam BANK_Y_T2 = 3'd0;
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v
index c51e7ef..567ecd5 100644
--- a/rtl/modexpng_part_recombinator.v
+++ b/rtl/modexpng_part_recombinator.v
@@ -9,7 +9,8 @@ module modexpng_part_recombinator
dsp_x_p, dsp_y_p,
col_index, col_index_last,
slim_bram_xy_addr, slim_bram_xy_bank,
- fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid
+ rcmb_fat_bram_xy_bank, rcmb_fat_bram_xy_addr, rcmb_fat_bram_x_dout, rcmb_fat_bram_y_dout, rcmb_fat_bram_xy_dout_valid,
+ rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid
);
@@ -36,11 +37,17 @@ module modexpng_part_recombinator
input [ 7:0] slim_bram_xy_addr;
input [ 1:0] slim_bram_xy_bank;
- output [ 2:0] fat_bram_xy_bank;
- output [ 7:0] fat_bram_xy_addr;
- output [ 17:0] fat_bram_x_dout;
- output [ 17:0] fat_bram_y_dout;
- output fat_bram_xy_dout_valid;
+ output [ 2:0] rcmb_fat_bram_xy_bank;
+ output [ 7:0] rcmb_fat_bram_xy_addr;
+ output [ 17:0] rcmb_fat_bram_x_dout;
+ output [ 17:0] rcmb_fat_bram_y_dout;
+ output rcmb_fat_bram_xy_dout_valid;
+
+ output [ 2:0] rcmb_slim_bram_xy_bank;
+ output [ 7:0] rcmb_slim_bram_xy_addr;
+ output [ 17:0] rcmb_slim_bram_x_dout;
+ output [ 17:0] rcmb_slim_bram_y_dout;
+ output rcmb_slim_bram_xy_dout_valid;
//
@@ -148,10 +155,10 @@ module modexpng_part_recombinator
if (ena_x && ena_y)
//
case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
- //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
- default: rcmb_mode <= 2'd0;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
+ default: rcmb_mode <= 2'd0;
endcase
@@ -203,7 +210,7 @@ module modexpng_part_recombinator
input [1:0] slim_bram_xy_bank_value;
begin
//
- if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT)
+ if (slim_bram_xy_bank_value == BANK_SLIM_EXT)
calc_triangle_aux_lsb = 1'b1;
else
calc_triangle_aux_lsb = 1'b0;
@@ -216,6 +223,21 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_rectangle_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT;
+ else
+ calc_rectangle_valid_lsb = 1'b0;
+ //
+ end
+ endfunction
+
function [7:0] calc_square_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -265,6 +287,32 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function [7:0] calc_rectangle_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
+ 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
+ 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
+ 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
+ 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
+ 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
+ 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
+ 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
+ endcase
+ //
+ else
+ calc_rectangle_bitmap_lsb = {8{1'b0}};
+ //
+ end
+ endfunction
function [2:0] calc_square_index_lsb;
input [4:0] col_index_value;
@@ -315,6 +363,32 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function [2:0] calc_rectangle_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_rectangle_index_lsb = 3'd0;
+ 3'b001: calc_rectangle_index_lsb = 3'd1;
+ 3'b010: calc_rectangle_index_lsb = 3'd2;
+ 3'b011: calc_rectangle_index_lsb = 3'd3;
+ 3'b100: calc_rectangle_index_lsb = 3'd4;
+ 3'b101: calc_rectangle_index_lsb = 3'd5;
+ 3'b110: calc_rectangle_index_lsb = 3'd6;
+ 3'b111: calc_rectangle_index_lsb = 3'd7;
+ endcase
+ //
+ else
+ calc_rectangle_index_lsb = 3'dX;
+ //
+ end
+ endfunction
function calc_square_purge_lsb;
input [4:0] col_index_value;
@@ -330,6 +404,20 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_rectangle_purge_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
+ else
+ calc_rectangle_purge_lsb = 1'b0;
+ //
+ end
+ endfunction
+
function calc_square_valid_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -344,6 +432,22 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function calc_rectangle_valid_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
+ calc_rectangle_valid_msb = 1'b1;
+ else
+ calc_rectangle_valid_msb = 1'b0;
+ //
+ end
+ endfunction
function [7:0] calc_square_bitmap_msb;
input [4:0] col_index_value;
@@ -361,6 +465,22 @@ module modexpng_part_recombinator
end
endfunction
+ function [7:0] calc_rectangle_bitmap_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin
+ calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
+ end else
+ calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
+ //
+ end
+ endfunction
+
function calc_square_purge_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -376,6 +496,22 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_rectangle_purge_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
+ calc_rectangle_purge_msb = col_index_value == col_index_last_value;
+ else
+ calc_rectangle_purge_msb = 1'b0;
+ //
+ end
+ endfunction
+
reg recomb_lsb_ce = 1'b0;
reg recomb_lsb_ce_aux;
@@ -494,7 +630,24 @@ module modexpng_part_recombinator
xy_bitmap_msb_adv[6] <= {8{1'b0}};
xy_purge_msb_adv [6] <= 1'b0;
//
- end
+ end
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
+ //
+ xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_aux_lsb_adv [6] <= 1'b0;
+ xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
+ //
+ xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+ xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+ xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+ //
+ end
//
default: begin
//
@@ -586,15 +739,24 @@ module modexpng_part_recombinator
end
-
reg [ 2:0] fat_bram_xy_bank_reg;
reg [ 7:0] fat_bram_xy_addr_reg;
- reg [ 7:0] fat_bram_xy_cnt_lsb;
- reg [ 7:0] fat_bram_xy_cnt_msb;
reg [17:0] fat_bram_x_dout_reg;
reg [17:0] fat_bram_y_dout_reg;
reg fat_bram_xy_dout_valid_reg = 1'b0;
+ reg [ 2:0] slim_bram_xy_bank_reg;
+ reg [ 7:0] slim_bram_xy_addr_reg;
+ reg [17:0] slim_bram_x_dout_reg;
+ reg [17:0] slim_bram_y_dout_reg;
+ reg slim_bram_xy_dout_valid_reg = 1'b0;
+
+ reg [ 7:0] bram_xy_cnt_lsb;
+ reg [ 7:0] bram_xy_cnt_msb;
+
+ reg bram_xy_cnt_lsb_wrapped;
+ reg bram_xy_cnt_msb_wrapped;
+
reg [15:0] recomb_msb_dout_carry_0;
reg [15:0] recomb_msb_dout_carry_1;
@@ -606,11 +768,21 @@ module modexpng_part_recombinator
reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0;
reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0;
- assign fat_bram_xy_bank = fat_bram_xy_bank_reg;
- assign fat_bram_xy_addr = fat_bram_xy_addr_reg;
- assign fat_bram_x_dout = fat_bram_x_dout_reg;
- assign fat_bram_y_dout = fat_bram_y_dout_reg;
- assign fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
+ reg recomb_msb_flag_delay_0;
+ reg recomb_msb_flag_delay_1;
+ reg recomb_msb_flag_delay_2;
+
+ assign rcmb_fat_bram_xy_bank = fat_bram_xy_bank_reg;
+ assign rcmb_fat_bram_xy_addr = fat_bram_xy_addr_reg;
+ assign rcmb_fat_bram_x_dout = fat_bram_x_dout_reg;
+ assign rcmb_fat_bram_y_dout = fat_bram_y_dout_reg;
+ assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
+
+ assign rcmb_slim_bram_xy_bank = slim_bram_xy_bank_reg;
+ assign rcmb_slim_bram_xy_addr = slim_bram_xy_addr_reg;
+ assign rcmb_slim_bram_x_dout = slim_bram_x_dout_reg;
+ assign rcmb_slim_bram_y_dout = slim_bram_y_dout_reg;
+ assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg;
reg rdy_reg = 1'b1;
reg rdy_adv = 1'b1;
@@ -629,7 +801,9 @@ module modexpng_part_recombinator
task advance_recomb_msb_dout_delay;
input [15:0] dout;
input [ 7:0] cnt;
+ input flag;
begin
+ //
recomb_msb_dout_delay_0 <= dout;
recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
@@ -637,6 +811,11 @@ module modexpng_part_recombinator
recomb_msb_cnt_delay_0 <= cnt;
recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
+ //
+ recomb_msb_flag_delay_0 <= flag;
+ recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0;
+ recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1;
+ //
end
endtask
@@ -659,10 +838,24 @@ module modexpng_part_recombinator
fat_bram_xy_addr_reg <= addr;
fat_bram_x_dout_reg <= dout_x;
fat_bram_y_dout_reg <= dout_y;
- fat_bram_xy_dout_valid_reg <= 1'b1;
+ fat_bram_xy_dout_valid_reg <= valid;
end
endtask
+ task _update_slim_bram_regs;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ slim_bram_xy_bank_reg <= bank;
+ slim_bram_xy_addr_reg <= addr;
+ slim_bram_x_dout_reg <= dout_x;
+ slim_bram_y_dout_reg <= dout_y;
+ slim_bram_xy_dout_valid_reg <= valid;
+ end
+ endtask
task set_fat_bram_regs;
input [ 2:0] bank;
@@ -674,44 +867,73 @@ module modexpng_part_recombinator
end
endtask
+ task set_slim_bram_regs;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
task clear_fat_bram_regs;
begin
_update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
+
+ task clear_slim_bram_regs;
+ begin
+ _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
- task _set_fat_bram_cnt_lsb;
+ task _set_bram_cnt_lsb;
input [7:0] cnt;
+ input wrapped;
begin
- fat_bram_xy_cnt_lsb <= cnt;
+ bram_xy_cnt_lsb <= cnt;
+ bram_xy_cnt_lsb_wrapped <= wrapped;
end
endtask
- task _set_fat_bram_cnt_msb;
+
+ task _set_bram_cnt_msb;
input [7:0] cnt;
+ input wrapped;
begin
- fat_bram_xy_cnt_msb <= cnt;
+ bram_xy_cnt_msb <= cnt;
+ bram_xy_cnt_msb_wrapped <= wrapped;
end
endtask
- task inc_fat_bram_cnt_lsb;
+ task inc_bram_cnt_lsb;
begin
- _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1);
+ if (bram_xy_cnt_lsb == index_last)
+ _set_bram_cnt_lsb(8'd0, 1'b1);
+ else
+ _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped);
end
endtask
- task inc_fat_bram_cnt_msb;
+
+ task inc_bram_cnt_msb;
begin
- _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1);
+ if (bram_xy_cnt_msb == index_last)
+ _set_bram_cnt_msb(8'd0, 1'b1);
+ else
+ _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped);
end
endtask
- task clr_fat_bram_cnt_lsb;
+ task clr_bram_cnt_lsb;
begin
- _set_fat_bram_cnt_lsb(8'd0);
+ _set_bram_cnt_lsb(8'd0, 1'b0);
end
endtask
- task clr_fat_bram_cnt_msb;
+
+ task clr_bram_cnt_msb;
begin
- _set_fat_bram_cnt_msb(8'd0);
+ _set_bram_cnt_msb(8'd0, 1'b0);
end
endtask
@@ -724,51 +946,53 @@ module modexpng_part_recombinator
always @(posedge clk)
//
if (ena_x & ena_y) begin
- clr_fat_bram_cnt_lsb();
- clr_fat_bram_cnt_msb();
+ clr_bram_cnt_lsb();
+ clr_bram_cnt_msb();
end else begin // if not ready???
//
case (rcmb_mode)
2'd1: recombine_square();
2'd2: recombine_triangle();
+ 2'd3: recombine_rectangle();
endcase
//
end
task recombine_square;
+ //
begin
//
case (rcmb_xy_dout_valid)
//
- 2'b01: inc_fat_bram_cnt_lsb();
- 2'b10: inc_fat_bram_cnt_msb();
+ 2'b01: inc_bram_cnt_lsb();
+ 2'b10: inc_bram_cnt_msb();
2'b11: begin
- if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb();
- else inc_fat_bram_cnt_lsb();
- inc_fat_bram_cnt_msb();
+ inc_bram_cnt_lsb();
+ inc_bram_cnt_msb();
end
//
endcase
//
case (rcmb_xy_dout_valid)
//
- 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
- else clear_fat_bram_regs();
- 2'b01: set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
- else set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
- 2'b11: if (fat_bram_xy_cnt_lsb < index_last) set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
- else set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+ else clear_fat_bram_regs();
+ 2'b01: set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
+ else set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
+ 2'b11: if (bram_xy_cnt_lsb_wrapped) set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
+ else set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ default: clear_fat_bram_regs(); // DEBUG!!!
//
endcase
//
case (rcmb_xy_dout_valid)
//
- 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0);
- 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout);
- //
- 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb);
- if (fat_bram_xy_cnt_lsb < index_last) shift_recomb_msb_dout_carry({16{1'bX}});
+ 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+ 2'b10: if (bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout);
+// //
+ 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
+ if (bram_xy_cnt_lsb_wrapped) shift_recomb_msb_dout_carry({16{1'bX}});
end
//
endcase
@@ -779,50 +1003,111 @@ module modexpng_part_recombinator
task recombine_triangle;
+ //
begin
//
case (rcmb_xy_dout_valid)
//
- 2'b01: begin inc_fat_bram_cnt_lsb();
- if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb();
- end
- //
+ 2'b01: inc_bram_cnt_lsb();
+ //
endcase
//
case (rcmb_xy_dout_valid)
//
- 2'b00: clear_fat_bram_regs();
- 2'b01: if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- else set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b00: clear_slim_bram_regs();
+ 2'b01: if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ else set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: clear_slim_bram_regs();
+ 2'b11: clear_slim_bram_regs();
//
endcase
//
end
+ //
endtask
+
+ task recombine_rectangle;
+ //
+ begin
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b01: inc_bram_cnt_lsb();
+ 2'b10: inc_bram_cnt_msb();
+ 2'b11: begin
+ inc_bram_cnt_lsb();
+ inc_bram_cnt_msb();
+ end
+ //
+ endcase
+// //
+ case (rcmb_xy_dout_valid)
+// //
+ 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+ else clear_fat_bram_regs();
+ 2'b01: set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: if (!bram_xy_cnt_msb_wrapped) begin
+ if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
+ else set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
+ end else
+ set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}});
+
+ 2'b11: set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
+// //
+ endcase
+// //
+ case (rcmb_xy_dout_valid)
+// //
+ 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+ 2'b10: begin
+ if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped) shift_recomb_msb_dout_carry(recomb_msb_dout);
+ if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+ end
+// //
+ 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
+ shift_recomb_msb_dout_carry({16{1'bX}});
+ end
+// //
+ endcase
+ //
+ end
+ //
+ endtask
always @(posedge clk)
//
if (ena_x & ena_y) begin
rdy_adv <= 1'b0;
- end else begin
+ end else if (!rdy_reg) begin
//
- case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid})
+ case (rcmb_mode)
+ //
+ 2'd1: case (rcmb_xy_dout_valid)
+ //
+ 2'b00: begin
+ //
+ if (recomb_msb_flag_delay_2) begin
+ //
+ rdy_adv <= ~recomb_msb_flag_delay_1;
+ //
+ end
+ //
+ end
+ endcase
+ //
+ 2'd2: case (rcmb_xy_dout_valid)
+ //
+ 2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped; //
+ //
+ endcase
//
- 2'b00: begin
- //
- if (recomb_msb_cnt_delay_2 > 8'd0) begin
- //
- rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0;
- //
- end
- //
- end
endcase
//
end
+ // add ready for mode=3
endmodule