aboutsummaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:05:11 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:05:11 +0300
commit9e9689d7b00ecdcc1c651f5e369e00a53d62df3c (patch)
treef7bdddda835e26aff3642b99e1ee8b2f1a64434d /rtl
parent29fb6afd018c601a2e0c7376656d5e37beb565d6 (diff)
Further work on the Montgomery modular multiplier. Can now to the "triangular"
part of multiplication, i.e. compute the "magic" reduction coefficient Q = LSB(AB) * N_COEFF.
Diffstat (limited to 'rtl')
-rw-r--r--rtl/dsp/dsp_array.v42
-rw-r--r--rtl/modexpng_mmm_fsm.vh10
-rw-r--r--rtl/modexpng_part_recombinator.v455
3 files changed, 377 insertions, 130 deletions
diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v
index 178f87f..2a050d4 100644
--- a/rtl/dsp/dsp_array.v
+++ b/rtl/dsp/dsp_array.v
@@ -8,11 +8,11 @@ module dsp_array
input ce_p,
input ce_mode,
- input [8 -1:0] mode_z,
+ input [9 -1:0] mode_z,
- input [4*18-1:0] a,
+ input [5*18-1:0] a,
input [1*17-1:0] b,
- output [8*47-1:0] p
+ output [9*47-1:0] p
);
`include "../modexpng_parameters_x8.vh"
@@ -37,7 +37,7 @@ module dsp_array
genvar z;
generate for (z=0; z<(NUM_MULTS/2); z=z+1)
//
- begin : DSP48E1
+ begin : gen_DSP48E1
//
dsp_slice #
(
@@ -64,7 +64,7 @@ module dsp_array
.opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
.alumode (4'b0000),
- .casc_a_in ({17{1'b0}}),
+ .casc_a_in ({18{1'b0}}),
.casc_b_in ({17{1'b0}}),
.casc_a_out (casc_a[z]),
@@ -107,5 +107,37 @@ module dsp_array
//
endgenerate
+ dsp_slice #
+ (
+ .AB_INPUT("DIRECT"),
+ .B_REG(2)
+ )
+ dsp_aux
+ (
+ .clk (clk),
+
+ .ce_a1 (ce_a0),
+ .ce_b1 (ce_b0),
+ .ce_a2 (ce_a1),
+ .ce_b2 (ce_b1),
+ .ce_m (ce_m),
+ .ce_p (ce_p),
+ .ce_mode (ce_mode),
+
+ .a (a[4*18+:18]),
+ .b (b),
+ .p (p[47*2*4+:47]),
+
+ .inmode (5'b00000),
+ .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}),
+ .alumode (4'b0000),
+
+ .casc_a_in ({18{1'b0}}),
+ .casc_b_in ({17{1'b0}}),
+
+ .casc_a_out (),
+ .casc_b_out ()
+ );
+
endmodule
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh
index c237a0b..2700a42 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_fsm.vh
@@ -19,6 +19,16 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15;
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16;
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
+
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
\ No newline at end of file
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v
index db4774b..c51e7ef 100644
--- a/rtl/modexpng_part_recombinator.v
+++ b/rtl/modexpng_part_recombinator.v
@@ -7,7 +7,8 @@ module modexpng_part_recombinator
dsp_x_ce_p, dsp_y_ce_p,
ena_x, ena_y,
dsp_x_p, dsp_y_p,
- col_index, col_index_last, slim_bram_xy_addr,
+ col_index, col_index_last,
+ slim_bram_xy_addr, slim_bram_xy_bank,
fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid
);
@@ -28,11 +29,12 @@ module modexpng_part_recombinator
input dsp_y_ce_p;
input ena_x;
input ena_y;
- input [8*47-1:0] dsp_x_p;
- input [8*47-1:0] dsp_y_p;
+ input [9*47-1:0] dsp_x_p;
+ input [9*47-1:0] dsp_y_p;
input [ 4:0] col_index;
input [ 4:0] col_index_last;
input [ 7:0] slim_bram_xy_addr;
+ input [ 1:0] slim_bram_xy_bank;
output [ 2:0] fat_bram_xy_bank;
output [ 7:0] fat_bram_xy_addr;
@@ -44,18 +46,18 @@ module modexpng_part_recombinator
//
// Latches
//
- reg [1*47-1:0] dsp_x_p_latch[0:7];
- reg [1*47-1:0] dsp_y_p_latch[0:7];
+ reg [1*47-1:0] dsp_x_p_latch[0:8];
+ reg [1*47-1:0] dsp_y_p_latch[0:8];
//
// Mapping
//
- wire [46:0] dsp_x_p_split[0:7];
- wire [46:0] dsp_y_p_split[0:7];
+ wire [46:0] dsp_x_p_split[0:8];
+ wire [46:0] dsp_y_p_split[0:8];
genvar z;
- generate for (z=0; z<NUM_MULTS; z=z+1)
+ generate for (z=0; z<(NUM_MULTS+1); z=z+1)
begin : gen_dsp_xy_p_split
assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
@@ -83,6 +85,8 @@ module modexpng_part_recombinator
// valid
reg x_valid_lsb = 1'b0;
reg y_valid_lsb = 1'b0;
+ reg x_aux_lsb = 1'b0;
+ reg y_aux_lsb = 1'b0;
reg x_valid_msb = 1'b0;
reg y_valid_msb = 1'b0;
@@ -106,6 +110,10 @@ module modexpng_part_recombinator
reg x_valid_latch_lsb = 1'b0;
reg y_valid_latch_lsb = 1'b0;
+ // aux - latch
+ reg x_aux_latch_lsb = 1'b0;
+ reg y_aux_latch_lsb = 1'b0;
+
// bitmap - latch
reg [7:0] x_bitmap_latch_lsb = {8{1'b0}};
reg [7:0] y_bitmap_latch_lsb = {8{1'b0}};
@@ -125,6 +133,7 @@ module modexpng_part_recombinator
//
reg xy_valid_lsb_adv[1:6];
reg xy_valid_msb_adv[1:6];
+ reg xy_aux_lsb_adv[1:6];
reg [7:0] xy_bitmap_lsb_adv[1:6];
reg [7:0] xy_bitmap_msb_adv[1:6];
reg [2:0] xy_index_lsb_adv[1:6];
@@ -132,11 +141,25 @@ module modexpng_part_recombinator
reg xy_purge_lsb_adv[1:6];
reg xy_purge_msb_adv[1:6];
-
+ reg [1:0] rcmb_mode;
+
+ always @(posedge clk)
+ //
+ if (ena_x && ena_y)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
+ //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
+ default: rcmb_mode <= 2'd0;
+ endcase
+
+
integer i;
initial for (i=1; i<6; i=i+1) begin
xy_valid_lsb_adv[i] = 1'b0;
xy_valid_msb_adv[i] = 1'b0;
+ xy_aux_lsb_adv[i] = 1'b0;
xy_bitmap_lsb_adv[i] = {8{1'b0}};
xy_bitmap_msb_adv[i] = {8{1'b0}};
xy_index_lsb_adv[i] = 3'dX;
@@ -145,7 +168,7 @@ module modexpng_part_recombinator
xy_purge_msb_adv[i] = 1'b0;
end
- function [0:0] calc_square_valid_lsb;
+ function calc_square_valid_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
input [7:0] slim_bram_xy_addr_value;
@@ -159,6 +182,40 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_triangle_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ calc_triangle_valid_lsb = 1'b1;
+ else
+ calc_triangle_valid_lsb = 1'b0;
+ //
+ end
+ endfunction
+
+ function calc_triangle_aux_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT)
+ calc_triangle_aux_lsb = 1'b1;
+ else
+ calc_triangle_aux_lsb = 1'b0;
+ //
+ //if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ //calc_triangle_aux_lsb = 1'b1;
+ //else
+ //calc_triangle_aux_lsb = 1'b0;
+ //
+ end
+ endfunction
+
function [7:0] calc_square_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -183,7 +240,32 @@ module modexpng_part_recombinator
//
end
endfunction
-
+
+ function [7:0] calc_triangle_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_triangle_bitmap_lsb = 8'b00000001;
+ 3'b001: calc_triangle_bitmap_lsb = 8'b00000010;
+ 3'b010: calc_triangle_bitmap_lsb = 8'b00000100;
+ 3'b011: calc_triangle_bitmap_lsb = 8'b00001000;
+ 3'b100: calc_triangle_bitmap_lsb = 8'b00010000;
+ 3'b101: calc_triangle_bitmap_lsb = 8'b00100000;
+ 3'b110: calc_triangle_bitmap_lsb = 8'b01000000;
+ 3'b111: calc_triangle_bitmap_lsb = 8'b10000000;
+ endcase
+ //
+ else
+ calc_triangle_bitmap_lsb = {8{1'b0}};
+ //
+ end
+ endfunction
+
function [2:0] calc_square_index_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -208,6 +290,31 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function [2:0] calc_triangle_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_triangle_index_lsb = 3'd0;
+ 3'b001: calc_triangle_index_lsb = 3'd1;
+ 3'b010: calc_triangle_index_lsb = 3'd2;
+ 3'b011: calc_triangle_index_lsb = 3'd3;
+ 3'b100: calc_triangle_index_lsb = 3'd4;
+ 3'b101: calc_triangle_index_lsb = 3'd5;
+ 3'b110: calc_triangle_index_lsb = 3'd6;
+ 3'b111: calc_triangle_index_lsb = 3'd7;
+ endcase
+ //
+ else
+ calc_triangle_index_lsb = 3'dX;
+ //
+ end
+ endfunction
function calc_square_purge_lsb;
input [4:0] col_index_value;
@@ -271,10 +378,10 @@ module modexpng_part_recombinator
reg recomb_lsb_ce = 1'b0;
+ reg recomb_lsb_ce_aux;
reg [ 2:0] recomb_lsb_ce_purge = 3'b000;
- wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_purge[0];
+ wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_aux | recomb_lsb_ce_purge[0];
reg recomb_lsb_clr;
- reg recomb_lsb_vld = 1'b0;
reg [46:0] recomb_lsb_din;
wire [15:0] recomb_lsb_dout;
@@ -283,12 +390,7 @@ module modexpng_part_recombinator
reg [ 1:0] recomb_msb_ce_purge = 2'b00;
wire recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0];
reg recomb_msb_clr;
- reg recomb_msb_vld = 1'b0;
- always @(posedge clk)
- //
- {recomb_msb_vld, recomb_lsb_vld} <= {recomb_msb_ce_combined, recomb_lsb_ce_combined};
-
reg [46:0] recomb_msb_din;
wire [15:0] recomb_msb_dout;
@@ -313,6 +415,7 @@ module modexpng_part_recombinator
always @(posedge clk) begin
//
recomb_lsb_ce <= x_valid_latch_lsb;
+ recomb_lsb_ce_aux <= x_aux_latch_lsb;
recomb_msb_ce <= x_bitmap_latch_msb[0];
//
if (x_purge_latch_lsb)
@@ -342,6 +445,8 @@ module modexpng_part_recombinator
//
if (x_valid_latch_lsb)
recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb];
+ else if (x_aux_latch_lsb)
+ recomb_lsb_din <= dsp_x_p_latch[8];
else
recomb_lsb_din <= {47{1'b0}};
@@ -363,6 +468,7 @@ module modexpng_part_recombinator
FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr);
+ xy_aux_lsb_adv [6] <= 1'b0;
xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr);
xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr);
xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
@@ -373,9 +479,27 @@ module modexpng_part_recombinator
//
end
//
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
+ //
+ xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); /// bank
+ xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); //! bank
+ xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr); // ! bank!!!
+ xy_purge_lsb_adv [6] <= 1'b0;
+ //
+ xy_valid_msb_adv [6] <= 1'b0;
+ xy_bitmap_msb_adv[6] <= {8{1'b0}};
+ xy_purge_msb_adv [6] <= 1'b0;
+ //
+ end
+ //
default: begin
//
xy_valid_lsb_adv [6] <= 1'b0;
+ xy_aux_lsb_adv [6] <= 1'b0;
xy_bitmap_lsb_adv[6] <= {8{1'b0}};
xy_index_lsb_adv [6] <= 3'dX;
xy_purge_lsb_adv [6] <= 1'b0;
@@ -392,11 +516,13 @@ module modexpng_part_recombinator
always @(posedge clk) begin
//
{y_valid_lsb, x_valid_lsb} <= {2{xy_valid_lsb_adv [1]}};
+ {y_aux_lsb, x_aux_lsb} <= {2{xy_aux_lsb_adv [1]}};
{y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}};
{y_index_lsb, x_index_lsb} <= {2{xy_index_lsb_adv [1]}};
{y_purge_lsb, x_purge_lsb} <= {2{xy_purge_lsb_adv [1]}};
//
{y_valid_latch_lsb, x_valid_latch_lsb} <= {y_valid_lsb, x_valid_lsb};
+ {y_aux_latch_lsb, x_aux_latch_lsb} <= {y_aux_lsb, x_aux_lsb};
{y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb};
{y_index_latch_lsb, x_index_latch_lsb} <= {y_index_lsb, x_index_lsb};
{y_purge_latch_lsb, x_purge_latch_lsb} <= {y_purge_lsb, x_purge_lsb};
@@ -415,6 +541,7 @@ module modexpng_part_recombinator
//
for (i=1; i<6; i=i+1) begin
xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
+ xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1];
xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
@@ -436,7 +563,7 @@ module modexpng_part_recombinator
else
dsp_x_p_latch[i] <= {47{1'bX}};
//
- else if (dsp_x_ce_p_dly1)
+ else if (dsp_x_ce_p_dly1) begin
//
for (i=0; i<8; i=i+1)
//
@@ -444,6 +571,11 @@ module modexpng_part_recombinator
dsp_x_p_latch[i] <= dsp_x_p_split[i];
else if (x_valid_msb && x_bitmap_msb[i])
dsp_x_p_latch[i] <= dsp_x_p_split[i];
+ //
+ if (x_aux_lsb)
+ dsp_x_p_latch[8] <= dsp_x_p_split[8];
+ //
+ end
reg recomb_x_lsb_dout_valid = 1'b0;
reg recomb_x_msb_dout_valid = 1'b0;
@@ -493,12 +625,187 @@ module modexpng_part_recombinator
else
rdy_reg <= rdy_adv;
+
+ task advance_recomb_msb_dout_delay;
+ input [15:0] dout;
+ input [ 7:0] cnt;
+ begin
+ recomb_msb_dout_delay_0 <= dout;
+ recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
+ recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
+ //
+ recomb_msb_cnt_delay_0 <= cnt;
+ recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
+ recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
+ end
+ endtask
+
+ task shift_recomb_msb_dout_carry;
+ input [15:0] dout;
+ begin
+ recomb_msb_dout_carry_0 <= dout;
+ recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
+ end
+ endtask
+
+ task _update_fat_bram_regs;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ fat_bram_xy_bank_reg <= bank;
+ fat_bram_xy_addr_reg <= addr;
+ fat_bram_x_dout_reg <= dout_x;
+ fat_bram_y_dout_reg <= dout_y;
+ fat_bram_xy_dout_valid_reg <= 1'b1;
+ end
+ endtask
+
+
+ task set_fat_bram_regs;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_fat_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task clear_fat_bram_regs;
+ begin
+ _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
+
+ task _set_fat_bram_cnt_lsb;
+ input [7:0] cnt;
+ begin
+ fat_bram_xy_cnt_lsb <= cnt;
+ end
+ endtask
+ task _set_fat_bram_cnt_msb;
+ input [7:0] cnt;
+ begin
+ fat_bram_xy_cnt_msb <= cnt;
+ end
+ endtask
+
+ task inc_fat_bram_cnt_lsb;
+ begin
+ _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1);
+ end
+ endtask
+ task inc_fat_bram_cnt_msb;
+ begin
+ _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1);
+ end
+ endtask
+
+ task clr_fat_bram_cnt_lsb;
+ begin
+ _set_fat_bram_cnt_lsb(8'd0);
+ end
+ endtask
+ task clr_fat_bram_cnt_msb;
+ begin
+ _set_fat_bram_cnt_msb(8'd0);
+ end
+ endtask
+
+
+
+
+
+ wire [1:0] rcmb_xy_dout_valid = {recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid};
+
+ always @(posedge clk)
+ //
+ if (ena_x & ena_y) begin
+ clr_fat_bram_cnt_lsb();
+ clr_fat_bram_cnt_msb();
+ end else begin // if not ready???
+ //
+ case (rcmb_mode)
+ 2'd1: recombine_square();
+ 2'd2: recombine_triangle();
+ endcase
+ //
+ end
+
+ task recombine_square;
+ begin
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b01: inc_fat_bram_cnt_lsb();
+ 2'b10: inc_fat_bram_cnt_msb();
+ 2'b11: begin
+ if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb();
+ else inc_fat_bram_cnt_lsb();
+ inc_fat_bram_cnt_msb();
+ end
+ //
+ endcase
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+ else clear_fat_bram_regs();
+ 2'b01: set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
+ else set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
+ 2'b11: if (fat_bram_xy_cnt_lsb < index_last) set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
+ else set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ //
+ endcase
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0);
+ 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout);
+ //
+ 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb);
+ if (fat_bram_xy_cnt_lsb < index_last) shift_recomb_msb_dout_carry({16{1'bX}});
+ end
+ //
+ endcase
+ //
+ end
+ //
+ endtask
+
+
+ task recombine_triangle;
+ begin
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b01: begin inc_fat_bram_cnt_lsb();
+ if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb();
+ end
+ //
+ endcase
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b00: clear_fat_bram_regs();
+ 2'b01: if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ else set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ //
+ endcase
+ //
+ end
+ endtask
+
+
+
always @(posedge clk)
//
if (ena_x & ena_y) begin
rdy_adv <= 1'b0;
- fat_bram_xy_cnt_lsb <= 8'd0;
- fat_bram_xy_cnt_msb <= 8'd0;
end else begin
//
case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid})
@@ -509,115 +816,13 @@ module modexpng_part_recombinator
//
rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0;
//
- recomb_msb_dout_delay_0 <= {18{1'bX}};
- recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
- recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
- //
- recomb_msb_cnt_delay_0 <= 8'd0;
- recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
- recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
- //
- fat_bram_xy_bank_reg <= BANK_FAT_ABH;
- fat_bram_xy_addr_reg <= recomb_msb_cnt_delay_2;
- fat_bram_x_dout_reg <= recomb_msb_dout_delay_2;
-// fat_bram_y_dout_reg <= {18{1'bX}};
- fat_bram_xy_dout_valid_reg <= 1'b1;
- //
- end else begin
- //
- fat_bram_xy_bank_reg <= 3'bXXX;
- fat_bram_xy_addr_reg <= 8'hXX;
- fat_bram_x_dout_reg <= {18{1'bX}};
- fat_bram_y_dout_reg <= {18{1'bX}};
- fat_bram_xy_dout_valid_reg <= 1'b0;
- //
- end
- //
- end
- //
- 2'b01: begin
- //
- fat_bram_xy_bank_reg <= BANK_FAT_ABL;
- fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb;
- fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout};
-// fat_bram_y_dout_reg
- fat_bram_xy_dout_valid_reg <= 1'b1;
- //
- fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1;
- //
- end
- //
- 2'b10: begin
- //
- if (fat_bram_xy_cnt_msb < 8'd2) begin
- //
- recomb_msb_dout_carry_0 <= recomb_msb_dout;
- recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
- //
- fat_bram_xy_bank_reg <= 3'bXXX;
- fat_bram_xy_addr_reg <= 8'hXX;
- fat_bram_x_dout_reg <= {18{1'bX}};
- // fat_bram_y_dout_reg
- fat_bram_xy_dout_valid_reg <= 1'b0;
- //
- end else begin
- //
- fat_bram_xy_bank_reg <= BANK_FAT_ABH;
- fat_bram_xy_addr_reg <= fat_bram_xy_cnt_msb;
- fat_bram_x_dout_reg <= {2'b00, recomb_msb_dout};
- // fat_bram_y_dout_reg
- fat_bram_xy_dout_valid_reg <= 1'b1;
- //
- end
- //
- fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1;
- //
- end
- //
- 2'b11: begin
- //
- if (fat_bram_xy_cnt_lsb == index_last) begin
- //
- fat_bram_xy_bank_reg <= BANK_FAT_ABL;
- fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb;
- fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout};
-// fat_bram_y_dout_reg <= {18{1'bX}};
- fat_bram_xy_dout_valid_reg <= 1'b1;
- //
- fat_bram_xy_cnt_lsb <= 8'd0;
- //
- end else begin
- //
- fat_bram_xy_bank_reg <= BANK_FAT_ABH;
- fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb;
- fat_bram_x_dout_reg <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}};
-// fat_bram_y_dout_reg <= {18{1'bX}};
- fat_bram_xy_dout_valid_reg <= 1'b1;
- //
- fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1;
- //
- recomb_msb_dout_carry_0 <= {16{1'bX}};
- recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
- //
end
//
- recomb_msb_dout_delay_0 <= recomb_msb_dout;
- recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
- recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
- //
- recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb;
- recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
- recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
- //
- fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1;
- //
end
- //
endcase
//
end
-
-
-
+
+
endmodule