From 9e9689d7b00ecdcc1c651f5e369e00a53d62df3c Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:05:11 +0300 Subject: Further work on the Montgomery modular multiplier. Can now to the "triangular" part of multiplication, i.e. compute the "magic" reduction coefficient Q = LSB(AB) * N_COEFF. --- rtl/dsp/dsp_array.v | 42 +++- rtl/modexpng_mmm_fsm.vh | 10 + rtl/modexpng_part_recombinator.v | 455 ++++++++++++++++++++++++++++----------- 3 files changed, 377 insertions(+), 130 deletions(-) (limited to 'rtl') diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v index 178f87f..2a050d4 100644 --- a/rtl/dsp/dsp_array.v +++ b/rtl/dsp/dsp_array.v @@ -8,11 +8,11 @@ module dsp_array input ce_p, input ce_mode, - input [8 -1:0] mode_z, + input [9 -1:0] mode_z, - input [4*18-1:0] a, + input [5*18-1:0] a, input [1*17-1:0] b, - output [8*47-1:0] p + output [9*47-1:0] p ); `include "../modexpng_parameters_x8.vh" @@ -37,7 +37,7 @@ module dsp_array genvar z; generate for (z=0; z<(NUM_MULTS/2); z=z+1) // - begin : DSP48E1 + begin : gen_DSP48E1 // dsp_slice # ( @@ -64,7 +64,7 @@ module dsp_array .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), .alumode (4'b0000), - .casc_a_in ({17{1'b0}}), + .casc_a_in ({18{1'b0}}), .casc_b_in ({17{1'b0}}), .casc_a_out (casc_a[z]), @@ -107,5 +107,37 @@ module dsp_array // endgenerate + dsp_slice # + ( + .AB_INPUT("DIRECT"), + .B_REG(2) + ) + dsp_aux + ( + .clk (clk), + + .ce_a1 (ce_a0), + .ce_b1 (ce_b0), + .ce_a2 (ce_a1), + .ce_b2 (ce_b1), + .ce_m (ce_m), + .ce_p (ce_p), + .ce_mode (ce_mode), + + .a (a[4*18+:18]), + .b (b), + .p (p[47*2*4+:47]), + + .inmode (5'b00000), + .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}), + .alumode (4'b0000), + + .casc_a_in ({18{1'b0}}), + .casc_b_in ({17{1'b0}}), + + .casc_a_out (), + .casc_b_out () + ); + endmodule diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh index c237a0b..2700a42 100644 --- a/rtl/modexpng_mmm_fsm.vh +++ b/rtl/modexpng_mmm_fsm.vh @@ -19,6 +19,16 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15; localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16; localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23; + +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26; +localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27; + localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999; \ No newline at end of file diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v index db4774b..c51e7ef 100644 --- a/rtl/modexpng_part_recombinator.v +++ b/rtl/modexpng_part_recombinator.v @@ -7,7 +7,8 @@ module modexpng_part_recombinator dsp_x_ce_p, dsp_y_ce_p, ena_x, ena_y, dsp_x_p, dsp_y_p, - col_index, col_index_last, slim_bram_xy_addr, + col_index, col_index_last, + slim_bram_xy_addr, slim_bram_xy_bank, fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid ); @@ -28,11 +29,12 @@ module modexpng_part_recombinator input dsp_y_ce_p; input ena_x; input ena_y; - input [8*47-1:0] dsp_x_p; - input [8*47-1:0] dsp_y_p; + input [9*47-1:0] dsp_x_p; + input [9*47-1:0] dsp_y_p; input [ 4:0] col_index; input [ 4:0] col_index_last; input [ 7:0] slim_bram_xy_addr; + input [ 1:0] slim_bram_xy_bank; output [ 2:0] fat_bram_xy_bank; output [ 7:0] fat_bram_xy_addr; @@ -44,18 +46,18 @@ module modexpng_part_recombinator // // Latches // - reg [1*47-1:0] dsp_x_p_latch[0:7]; - reg [1*47-1:0] dsp_y_p_latch[0:7]; + reg [1*47-1:0] dsp_x_p_latch[0:8]; + reg [1*47-1:0] dsp_y_p_latch[0:8]; // // Mapping // - wire [46:0] dsp_x_p_split[0:7]; - wire [46:0] dsp_y_p_split[0:7]; + wire [46:0] dsp_x_p_split[0:8]; + wire [46:0] dsp_y_p_split[0:8]; genvar z; - generate for (z=0; z 8'd0) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}}); + else clear_fat_bram_regs(); + 2'b01: set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs(); + else set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}}); + 2'b11: if (fat_bram_xy_cnt_lsb < index_last) set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); + else set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + // + endcase + // + case (rcmb_xy_dout_valid) + // + 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0); + 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout); + // + 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb); + if (fat_bram_xy_cnt_lsb < index_last) shift_recomb_msb_dout_carry({16{1'bX}}); + end + // + endcase + // + end + // + endtask + + + task recombine_triangle; + begin + // + case (rcmb_xy_dout_valid) + // + 2'b01: begin inc_fat_bram_cnt_lsb(); + if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb(); + end + // + endcase + // + case (rcmb_xy_dout_valid) + // + 2'b00: clear_fat_bram_regs(); + 2'b01: if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + else set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); + // + endcase + // + end + endtask + + + always @(posedge clk) // if (ena_x & ena_y) begin rdy_adv <= 1'b0; - fat_bram_xy_cnt_lsb <= 8'd0; - fat_bram_xy_cnt_msb <= 8'd0; end else begin // case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}) @@ -509,115 +816,13 @@ module modexpng_part_recombinator // rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0; // - recomb_msb_dout_delay_0 <= {18{1'bX}}; - recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; - recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; - // - recomb_msb_cnt_delay_0 <= 8'd0; - recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; - recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; - // - fat_bram_xy_bank_reg <= BANK_FAT_ABH; - fat_bram_xy_addr_reg <= recomb_msb_cnt_delay_2; - fat_bram_x_dout_reg <= recomb_msb_dout_delay_2; -// fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b1; - // - end else begin - // - fat_bram_xy_bank_reg <= 3'bXXX; - fat_bram_xy_addr_reg <= 8'hXX; - fat_bram_x_dout_reg <= {18{1'bX}}; - fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b0; - // - end - // - end - // - 2'b01: begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABL; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; - fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; -// fat_bram_y_dout_reg - fat_bram_xy_dout_valid_reg <= 1'b1; - // - fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; - // - end - // - 2'b10: begin - // - if (fat_bram_xy_cnt_msb < 8'd2) begin - // - recomb_msb_dout_carry_0 <= recomb_msb_dout; - recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; - // - fat_bram_xy_bank_reg <= 3'bXXX; - fat_bram_xy_addr_reg <= 8'hXX; - fat_bram_x_dout_reg <= {18{1'bX}}; - // fat_bram_y_dout_reg - fat_bram_xy_dout_valid_reg <= 1'b0; - // - end else begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABH; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_msb; - fat_bram_x_dout_reg <= {2'b00, recomb_msb_dout}; - // fat_bram_y_dout_reg - fat_bram_xy_dout_valid_reg <= 1'b1; - // - end - // - fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; - // - end - // - 2'b11: begin - // - if (fat_bram_xy_cnt_lsb == index_last) begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABL; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; - fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; -// fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b1; - // - fat_bram_xy_cnt_lsb <= 8'd0; - // - end else begin - // - fat_bram_xy_bank_reg <= BANK_FAT_ABH; - fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; - fat_bram_x_dout_reg <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}; -// fat_bram_y_dout_reg <= {18{1'bX}}; - fat_bram_xy_dout_valid_reg <= 1'b1; - // - fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; - // - recomb_msb_dout_carry_0 <= {16{1'bX}}; - recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; - // end // - recomb_msb_dout_delay_0 <= recomb_msb_dout; - recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; - recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; - // - recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb; - recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; - recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; - // - fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; - // end - // endcase // end - - - + + endmodule -- cgit v1.2.3