diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:05:11 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:05:11 +0300 |
commit | 9e9689d7b00ecdcc1c651f5e369e00a53d62df3c (patch) | |
tree | f7bdddda835e26aff3642b99e1ee8b2f1a64434d /rtl | |
parent | 29fb6afd018c601a2e0c7376656d5e37beb565d6 (diff) |
Further work on the Montgomery modular multiplier. Can now to the "triangular"
part of multiplication, i.e. compute the "magic" reduction coefficient
Q = LSB(AB) * N_COEFF.
Diffstat (limited to 'rtl')
-rw-r--r-- | rtl/dsp/dsp_array.v | 42 | ||||
-rw-r--r-- | rtl/modexpng_mmm_fsm.vh | 10 | ||||
-rw-r--r-- | rtl/modexpng_part_recombinator.v | 455 |
3 files changed, 377 insertions, 130 deletions
diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v index 178f87f..2a050d4 100644 --- a/rtl/dsp/dsp_array.v +++ b/rtl/dsp/dsp_array.v @@ -8,11 +8,11 @@ module dsp_array input ce_p, input ce_mode, - input [8 -1:0] mode_z, + input [9 -1:0] mode_z, - input [4*18-1:0] a, + input [5*18-1:0] a, input [1*17-1:0] b, - output [8*47-1:0] p + output [9*47-1:0] p ); `include "../modexpng_parameters_x8.vh" @@ -37,7 +37,7 @@ module dsp_array genvar z; generate for (z=0; z<(NUM_MULTS/2); z=z+1) // - begin : DSP48E1 + begin : gen_DSP48E1 // dsp_slice # ( @@ -64,7 +64,7 @@ module dsp_array .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), .alumode (4'b0000), - .casc_a_in ({17{1'b0}}), + .casc_a_in ({18{1'b0}}), .casc_b_in ({17{1'b0}}), .casc_a_out (casc_a[z]), @@ -107,5 +107,37 @@ module dsp_array // endgenerate + dsp_slice # + ( + .AB_INPUT("DIRECT"), + .B_REG(2) + ) + dsp_aux + ( + .clk (clk), + + .ce_a1 (ce_a0), + .ce_b1 (ce_b0), + .ce_a2 (ce_a1), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[4*18+:18]), + .b (b), + .p (p[47*2*4+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in ({18{1'b0}}), + .casc_b_in ({17{1'b0}}), + + .casc_a_out (), + .casc_b_out () + ); + endmodule diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh index c237a0b..2700a42 100644 --- a/rtl/modexpng_mmm_fsm.vh +++ b/rtl/modexpng_mmm_fsm.vh @@ -19,6 +19,16 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; + localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
\ No newline at end of file diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v index db4774b..c51e7ef 100644 --- a/rtl/modexpng_part_recombinator.v +++ b/rtl/modexpng_part_recombinator.v @@ -7,7 +7,8 @@ module modexpng_part_recombinator dsp_x_ce_p, dsp_y_ce_p, ena_x, ena_y, dsp_x_p, dsp_y_p, - col_index, col_index_last, slim_bram_xy_addr, + col_index, col_index_last, + slim_bram_xy_addr, slim_bram_xy_bank, fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid ); @@ -28,11 +29,12 @@ module modexpng_part_recombinator input dsp_y_ce_p; input ena_x; input ena_y; - input [8*47-1:0] dsp_x_p; - input [8*47-1:0] dsp_y_p; + input [9*47-1:0] dsp_x_p; + input [9*47-1:0] dsp_y_p; input [ 4:0] col_index; input [ 4:0] col_index_last; input [ 7:0] slim_bram_xy_addr; + input [ 1:0] slim_bram_xy_bank; output [ 2:0] fat_bram_xy_bank; output [ 7:0] fat_bram_xy_addr; @@ -44,18 +46,18 @@ module modexpng_part_recombinator // // Latches // - reg [1*47-1:0] dsp_x_p_latch[0:7]; - reg [1*47-1:0] dsp_y_p_latch[0:7]; + reg [1*47-1:0] dsp_x_p_latch[0:8]; + reg [1*47-1:0] dsp_y_p_latch[0:8]; // // Mapping // - wire [46:0] dsp_x_p_split[0:7]; - wire [46:0] dsp_y_p_split[0:7]; + wire [46:0] dsp_x_p_split[0:8]; + wire [46:0] dsp_y_p_split[0:8]; genvar z; - generate for (z=0; z<NUM_MULTS; z=z+1) + generate for (z=0; z<(NUM_MULTS+1); z=z+1) begin : gen_dsp_xy_p_split assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; @@ -83,6 +85,8 @@ module modexpng_part_recombinator // valid reg x_valid_lsb = 1'b0; reg y_valid_lsb = 1'b0; + reg x_aux_lsb = 1'b0; + reg y_aux_lsb = 1'b0; reg x_valid_msb = 1'b0; reg y_valid_msb = 1'b0; @@ -106,6 +110,10 @@ module modexpng_part_recombinator reg x_valid_latch_lsb = 1'b0; reg y_valid_latch_lsb = 1'b0; + // aux - latch + reg x_aux_latch_lsb = 1'b0; + reg y_aux_latch_lsb = 1'b0; + // bitmap - latch reg [7:0] x_bitmap_latch_lsb = {8{1'b0}}; reg [7:0] y_bitmap_latch_lsb = {8{1'b0}}; @@ -125,6 +133,7 @@ module modexpng_part_recombinator // reg xy_valid_lsb_adv[1:6]; reg xy_valid_msb_adv[1:6]; + reg xy_aux_lsb_adv[1:6]; reg [7:0] xy_bitmap_lsb_adv[1:6]; reg [7:0] xy_bitmap_msb_adv[1:6]; reg [2:0] xy_index_lsb_adv[1:6]; @@ -132,11 +141,25 @@ module modexpng_part_recombinator reg xy_purge_lsb_adv[1:6]; reg xy_purge_msb_adv[1:6]; - + reg [1:0] rcmb_mode; + + always @(posedge clk) + // + if (ena_x && ena_y) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; + //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; + default: rcmb_mode <= 2'd0; + endcase + + integer i; initial for (i=1; i<6; i=i+1) begin xy_valid_lsb_adv[i] = 1'b0; xy_valid_msb_adv[i] = 1'b0; + xy_aux_lsb_adv[i] = 1'b0; xy_bitmap_lsb_adv[i] = {8{1'b0}}; xy_bitmap_msb_adv[i] = {8{1'b0}}; xy_index_lsb_adv[i] = 3'dX; @@ -145,7 +168,7 @@ module modexpng_part_recombinator xy_purge_msb_adv[i] = 1'b0; end - function [0:0] calc_square_valid_lsb; + function calc_square_valid_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; input [7:0] slim_bram_xy_addr_value; @@ -159,6 +182,40 @@ module modexpng_part_recombinator end endfunction + function calc_triangle_valid_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + calc_triangle_valid_lsb = 1'b1; + else + calc_triangle_valid_lsb = 1'b0; + // + end + endfunction + + function calc_triangle_aux_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + input [1:0] slim_bram_xy_bank_value; + begin + // + if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT) + calc_triangle_aux_lsb = 1'b1; + else + calc_triangle_aux_lsb = 1'b0; + // + //if (slim_bram_xy_addr_value[7:3] == col_index_value) + //calc_triangle_aux_lsb = 1'b1; + //else + //calc_triangle_aux_lsb = 1'b0; + // + end + endfunction + function [7:0] calc_square_bitmap_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; @@ -183,7 +240,32 @@ module modexpng_part_recombinator // end endfunction - + + function [7:0] calc_triangle_bitmap_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_triangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_triangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_triangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_triangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_triangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_triangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_triangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_triangle_bitmap_lsb = 8'b10000000; + endcase + // + else + calc_triangle_bitmap_lsb = {8{1'b0}}; + // + end + endfunction + function [2:0] calc_square_index_lsb; input [4:0] col_index_value; input [4:0] col_index_last_value; @@ -208,6 +290,31 @@ module modexpng_part_recombinator // end endfunction + + function [2:0] calc_triangle_index_lsb; + input [4:0] col_index_value; + input [4:0] col_index_last_value; + input [7:0] slim_bram_xy_addr_value; + begin + // + if (slim_bram_xy_addr_value[7:3] == col_index_value) + // + case (slim_bram_xy_addr_value[2:0]) + 3'b000: calc_triangle_index_lsb = 3'd0; + 3'b001: calc_triangle_index_lsb = 3'd1; + 3'b010: calc_triangle_index_lsb = 3'd2; + 3'b011: calc_triangle_index_lsb = 3'd3; + 3'b100: calc_triangle_index_lsb = 3'd4; + 3'b101: calc_triangle_index_lsb = 3'd5; + 3'b110: calc_triangle_index_lsb = 3'd6; + 3'b111: calc_triangle_index_lsb = 3'd7; + endcase + // + else + calc_triangle_index_lsb = 3'dX; + // + end + endfunction function calc_square_purge_lsb; input [4:0] col_index_value; @@ -271,10 +378,10 @@ module modexpng_part_recombinator reg recomb_lsb_ce = 1'b0; + reg recomb_lsb_ce_aux; reg [ 2:0] recomb_lsb_ce_purge = 3'b000; - wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_purge[0]; + wire recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_aux | recomb_lsb_ce_purge[0]; reg recomb_lsb_clr; - reg recomb_lsb_vld = 1'b0; reg [46:0] recomb_lsb_din; wire [15:0] recomb_lsb_dout; @@ -283,12 +390,7 @@ module modexpng_part_recombinator reg [ 1:0] recomb_msb_ce_purge = 2'b00; wire recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0]; reg recomb_msb_clr; - reg recomb_msb_vld = 1'b0; - always @(posedge clk) - // - {recomb_msb_vld, recomb_lsb_vld} <= {recomb_msb_ce_combined, recomb_lsb_ce_combined}; - reg [46:0] recomb_msb_din; wire [15:0] recomb_msb_dout; @@ -313,6 +415,7 @@ module modexpng_part_recombinator always @(posedge clk) begin // recomb_lsb_ce <= x_valid_latch_lsb; + recomb_lsb_ce_aux <= x_aux_latch_lsb; recomb_msb_ce <= x_bitmap_latch_msb[0]; // if (x_purge_latch_lsb) @@ -342,6 +445,8 @@ module modexpng_part_recombinator // if (x_valid_latch_lsb) recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb]; + else if (x_aux_latch_lsb) + recomb_lsb_din <= dsp_x_p_latch[8]; else recomb_lsb_din <= {47{1'b0}}; @@ -363,6 +468,7 @@ module modexpng_part_recombinator FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin // xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); + xy_aux_lsb_adv [6] <= 1'b0; xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr); xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr); @@ -373,9 +479,27 @@ module modexpng_part_recombinator // end // + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin + // + xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); /// bank + xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank); + xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); //! bank + xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr); // ! bank!!! + xy_purge_lsb_adv [6] <= 1'b0; + // + xy_valid_msb_adv [6] <= 1'b0; + xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_purge_msb_adv [6] <= 1'b0; + // + end + // default: begin // xy_valid_lsb_adv [6] <= 1'b0; + xy_aux_lsb_adv [6] <= 1'b0; xy_bitmap_lsb_adv[6] <= {8{1'b0}}; xy_index_lsb_adv [6] <= 3'dX; xy_purge_lsb_adv [6] <= 1'b0; @@ -392,11 +516,13 @@ module modexpng_part_recombinator always @(posedge clk) begin // {y_valid_lsb, x_valid_lsb} <= {2{xy_valid_lsb_adv [1]}}; + {y_aux_lsb, x_aux_lsb} <= {2{xy_aux_lsb_adv [1]}}; {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}}; {y_index_lsb, x_index_lsb} <= {2{xy_index_lsb_adv [1]}}; {y_purge_lsb, x_purge_lsb} <= {2{xy_purge_lsb_adv [1]}}; // {y_valid_latch_lsb, x_valid_latch_lsb} <= {y_valid_lsb, x_valid_lsb}; + {y_aux_latch_lsb, x_aux_latch_lsb} <= {y_aux_lsb, x_aux_lsb}; {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb}; {y_index_latch_lsb, x_index_latch_lsb} <= {y_index_lsb, x_index_lsb}; {y_purge_latch_lsb, x_purge_latch_lsb} <= {y_purge_lsb, x_purge_lsb}; @@ -415,6 +541,7 @@ module modexpng_part_recombinator // for (i=1; i<6; i=i+1) begin xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1]; + xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1]; xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1]; xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1]; xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1]; @@ -436,7 +563,7 @@ module modexpng_part_recombinator else dsp_x_p_latch[i] <= {47{1'bX}}; // - else if (dsp_x_ce_p_dly1) + else if (dsp_x_ce_p_dly1) begin // for (i=0; i<8; i=i+1) // @@ -444,6 +571,11 @@ module modexpng_part_recombinator dsp_x_p_latch[i] <= dsp_x_p_split[i]; else if (x_valid_msb && x_bitmap_msb[i]) dsp_x_p_latch[i] <= dsp_x_p_split[i]; + // + if (x_aux_lsb) + dsp_x_p_latch[8] <= dsp_x_p_split[8]; + // + end reg recomb_x_lsb_dout_valid = 1'b0; reg recomb_x_msb_dout_valid = 1'b0; @@ -493,12 +625,187 @@ module modexpng_part_recombinator else rdy_reg <= rdy_adv; + + task advance_recomb_msb_dout_delay; + input [15:0] dout; + input [ 7:0] cnt; + begin + recomb_msb_dout_delay_0 <= dout; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= cnt; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + end + endtask + + task shift_recomb_msb_dout_carry; + input [15:0] dout; + begin + recomb_msb_dout_carry_0 <= dout; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + end + endtask + + task _update_fat_bram_regs; + input [ 2:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + input valid; + begin + fat_bram_xy_bank_reg <= bank; + fat_bram_xy_addr_reg <= addr; + fat_bram_x_dout_reg <= dout_x; + fat_bram_y_dout_reg <= dout_y; + fat_bram_xy_dout_valid_reg <= 1'b1; + end + endtask + + + task set_fat_bram_regs; + input [ 2:0] bank; + input [ 7:0] addr; + input [17:0] dout_x; + input [17:0] dout_y; + begin + _update_fat_bram_regs(bank, addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_fat_bram_regs; + begin + _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0); + end + endtask + + task _set_fat_bram_cnt_lsb; + input [7:0] cnt; + begin + fat_bram_xy_cnt_lsb <= cnt; + end + endtask + task _set_fat_bram_cnt_msb; + input [7:0] cnt; + begin + fat_bram_xy_cnt_msb <= cnt; + end + endtask + + task inc_fat_bram_cnt_lsb; + begin + _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1); + end + endtask + task inc_fat_bram_cnt_msb; + begin + _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1); + end + endtask + + task clr_fat_bram_cnt_lsb; + begin + _set_fat_bram_cnt_lsb(8'd0); + end + endtask + task clr_fat_bram_cnt_msb; + begin + _set_fat_bram_cnt_msb(8'd0); + end + endtask + + + + + + wire [1:0] rcmb_xy_dout_valid = {recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}; + + always @(posedge clk) + // + if (ena_x & ena_y) begin + clr_fat_bram_cnt_lsb(); + clr_fat_bram_cnt_msb(); + end else begin // if not ready??? + // + case (rcmb_mode) + 2'd1: recombine_square(); + 2'd2: recombine_triangle(); + endcase + // + end + + task recombine_square; + begin + // + case (rcmb_xy_dout_valid) + // + 2'b01: inc_fat_bram_cnt_lsb(); + 2'b10: inc_fat_bram_cnt_msb(); + 2'b11: begin + if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb(); + else inc_fat_bram_cnt_lsb(); + inc_fat_bram_cnt_msb(); + end + // + endcase + // + case (rcmb_xy_dout_valid) + // + 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); + else clear_fat_bram_regs(); + 2'b01: set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); + else set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); + 2'b11: if (fat_bram_xy_cnt_lsb < index_last) set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); + else set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + // + endcase + // + case (rcmb_xy_dout_valid) + // + 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0); + 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout); + // + 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb); + if (fat_bram_xy_cnt_lsb < index_last) shift_recomb_msb_dout_carry({16{1'bX}}); + end + // + endcase + // + end + // + endtask + + + task recombine_triangle; + begin + // + case (rcmb_xy_dout_valid) + // + 2'b01: begin inc_fat_bram_cnt_lsb(); + if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb(); + end + // + endcase + // + case (rcmb_xy_dout_valid) + // + 2'b00: clear_fat_bram_regs(); + 2'b01: if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + else set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + // + endcase + // + end + endtask + + + always @(posedge clk) // if (ena_x & ena_y) begin rdy_adv <= 1'b0; - fat_bram_xy_cnt_lsb <= 8'd0; - fat_bram_xy_cnt_msb <= 8'd0; end else begin // case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}) @@ -509,115 +816,13 @@ module modexpng_part_recombinator // rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0; // - recomb_msb_dout_delay_0 <= {18{1'bX}}; - recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; - recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; - // - recomb_msb_cnt_delay_0 <= 8'd0; - recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; - recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; - // - fat_bram_xy_bank_reg <= BANK_FAT_ABH; - fat_bram_xy_addr_reg <= recomb_msb_cnt_delay_2; - fat_bram_x_dout_reg <= recomb_msb_dout_delay_2; -// fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b1; - // - end else begin - // - fat_bram_xy_bank_reg <= 3'bXXX; - fat_bram_xy_addr_reg <= 8'hXX; - fat_bram_x_dout_reg <= {18{1'bX}}; - fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b0; - // - end - // - end - // - 2'b01: begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABL; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; - fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; -// fat_bram_y_dout_reg - fat_bram_xy_dout_valid_reg <= 1'b1; - // - fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; - // - end - // - 2'b10: begin - // - if (fat_bram_xy_cnt_msb < 8'd2) begin - // - recomb_msb_dout_carry_0 <= recomb_msb_dout; - recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; - // - fat_bram_xy_bank_reg <= 3'bXXX; - fat_bram_xy_addr_reg <= 8'hXX; - fat_bram_x_dout_reg <= {18{1'bX}}; - // fat_bram_y_dout_reg - fat_bram_xy_dout_valid_reg <= 1'b0; - // - end else begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABH; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_msb; - fat_bram_x_dout_reg <= {2'b00, recomb_msb_dout}; - // fat_bram_y_dout_reg - fat_bram_xy_dout_valid_reg <= 1'b1; - // - end - // - fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; - // - end - // - 2'b11: begin - // - if (fat_bram_xy_cnt_lsb == index_last) begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABL; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; - fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; -// fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b1; - // - fat_bram_xy_cnt_lsb <= 8'd0; - // - end else begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABH; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; - fat_bram_x_dout_reg <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}; -// fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b1; - // - fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; - // - recomb_msb_dout_carry_0 <= {16{1'bX}}; - recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; - // end // - recomb_msb_dout_delay_0 <= recomb_msb_dout; - recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; - recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; - // - recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb; - recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; - recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; - // - fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; - // end - // endcase // end - - - + + endmodule |