From 29fb6afd018c601a2e0c7376656d5e37beb565d6 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:01:43 +0300 Subject: Started working on the pipelined Montgomery modular multiplier. Currently can do the "square" part of the multiplication, i.e. compute the twice larger intermediate product AB = A * B. --- rtl/dev/temp.txt | 384 +++++++++++++++++++++++ rtl/dsp/dsp_array.v | 111 +++++++ rtl/dsp/dsp_slice.v | 125 ++++++++ rtl/modexpng_mac.v | 54 ++++ rtl/modexpng_mac_array.v | 116 +++++++ rtl/modexpng_mem.v | 93 ++++++ rtl/modexpng_mmm_col_index.v | 90 ++++++ rtl/modexpng_mmm_din_addr.v | 167 ++++++++++ rtl/modexpng_mmm_dout_addr.v | 167 ++++++++++ rtl/modexpng_mmm_fsm.vh | 24 ++ rtl/modexpng_mmm_pad.v | 153 ++++++++++ rtl/modexpng_mmm_transporter.v | 157 ++++++++++ rtl/modexpng_mmm_x8_dual.v | 550 +++++++++++++++++++++++++++++++++ rtl/modexpng_parameters.vh | 39 +++ rtl/modexpng_parameters_x8.vh | 1 + rtl/modexpng_part_recombinator.v | 623 ++++++++++++++++++++++++++++++++++++++ rtl/modexpng_recombinator_block.v | 35 +++ 17 files changed, 2889 insertions(+) create mode 100644 rtl/dev/temp.txt create mode 100644 rtl/dsp/dsp_array.v create mode 100644 rtl/dsp/dsp_slice.v create mode 100644 rtl/modexpng_mac.v create mode 100644 rtl/modexpng_mac_array.v create mode 100644 rtl/modexpng_mem.v create mode 100644 rtl/modexpng_mmm_col_index.v create mode 100644 rtl/modexpng_mmm_din_addr.v create mode 100644 rtl/modexpng_mmm_dout_addr.v create mode 100644 rtl/modexpng_mmm_fsm.vh create mode 100644 rtl/modexpng_mmm_pad.v create mode 100644 rtl/modexpng_mmm_transporter.v create mode 100644 rtl/modexpng_mmm_x8_dual.v create mode 100644 rtl/modexpng_parameters.vh create mode 100644 rtl/modexpng_parameters_x8.vh create mode 100644 rtl/modexpng_part_recombinator.v create mode 100644 rtl/modexpng_recombinator_block.v (limited to 'rtl') diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt new file mode 100644 index 0000000..987bd86 --- /dev/null +++ b/rtl/dev/temp.txt @@ -0,0 +1,384 @@ + // + // Helper Functions + // + /* + function [INDEX_WIDTH-1:0] calc_preset_a_index; + input [INDEX_WIDTH-4:0] col_in; + input integer x_in; + integer index_out; + begin + index_out = col_in * NUM_MULTS + x_in; + calc_preset_a_index = index_out[INDEX_WIDTH-1:0]; + end + endfunction + + function [INDEX_WIDTH-1:0] calc_rotate_a_index; + input [INDEX_WIDTH-1:0] current_index_in; + input [INDEX_WIDTH-1:0] last_index_in; + begin + if (current_index_in > {INDEX_WIDTH{1'b0}}) + calc_rotate_a_index = current_index_in - 1'b1; + else + calc_rotate_a_index = last_index_in; + end + endfunction + */ + + /* + // + // Narrow Counters + // + reg [INDEX_WIDTH-1:0] din_addr_narrow_reg; + reg [INDEX_WIDTH-1:0] din_addr_narrow_dly; + localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}}; + wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ? + din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero; + wire din_addr_narrow_done = din_addr_narrow_reg == index_last; + + assign din_addr_narrow = din_addr_narrow_reg; + + always @(posedge clk) + // + din_addr_narrow_dly <= din_addr_narrow_reg; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; + endcase + + + // + // Helper Functions + // + function [NUM_MULTS-1:0] calc_mac_clear_bitmask; + input [2:0] t; + begin + case (t) + 3'd0: calc_mac_clear_bitmask = 8'b00000001; + 3'd1: calc_mac_clear_bitmask = 8'b00000010; + 3'd2: calc_mac_clear_bitmask = 8'b00000100; + 3'd3: calc_mac_clear_bitmask = 8'b00001000; + 3'd4: calc_mac_clear_bitmask = 8'b00010000; + 3'd5: calc_mac_clear_bitmask = 8'b00100000; + 3'd6: calc_mac_clear_bitmask = 8'b01000000; + 3'd7: calc_mac_clear_bitmask = 8'b10000000; + endcase + end + endfunction + + function [NUM_MULTS:0] calc_mac_clear_square; + input [INDEX_WIDTH-4:0] current_col_index; + input [INDEX_WIDTH-1:0] b_addr_prev; + begin + if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index) + calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])}; + else + calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}}; + end + endfunction + + + // + // Wide Counters + // + reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1]; + + integer xi; + always @(posedge clk) + // + for (xi=0; xi 8'd0) begin + // + rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0; + // + recomb_msb_dout_delay_0 <= {18{1'bX}}; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= 8'd0; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= recomb_msb_cnt_delay_2; + fat_bram_x_dout_reg <= recomb_msb_dout_delay_2; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + end else begin + // + fat_bram_xy_bank_reg <= 3'bXXX; + fat_bram_xy_addr_reg <= 8'hXX; + fat_bram_x_dout_reg <= {18{1'bX}}; + fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b0; + // + end + // + end + // + 2'b01: begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABL; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; +// fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; + // + end + // + 2'b10: begin + // + if (fat_bram_xy_cnt_msb < 8'd2) begin + // + recomb_msb_dout_carry_0 <= recomb_msb_dout; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + // + fat_bram_xy_bank_reg <= 3'bXXX; + fat_bram_xy_addr_reg <= 8'hXX; + fat_bram_x_dout_reg <= {18{1'bX}}; + // fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b0; + // + end else begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_msb; + fat_bram_x_dout_reg <= {2'b00, recomb_msb_dout}; + // fat_bram_y_dout_reg + fat_bram_xy_dout_valid_reg <= 1'b1; + // + end + // + fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; + // + end + // + 2'b11: begin + // + if (fat_bram_xy_cnt_lsb == index_last) begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABL; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {2'b00, recomb_lsb_dout}; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= 8'd0; + // + end else begin + // + fat_bram_xy_bank_reg <= BANK_FAT_ABH; + fat_bram_xy_addr_reg <= fat_bram_xy_cnt_lsb; + fat_bram_x_dout_reg <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}; +// fat_bram_y_dout_reg <= {18{1'bX}}; + fat_bram_xy_dout_valid_reg <= 1'b1; + // + fat_bram_xy_cnt_lsb <= fat_bram_xy_cnt_lsb + 1'b1; + // + recomb_msb_dout_carry_0 <= {16{1'bX}}; + recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0; + // + end + // + recomb_msb_dout_delay_0 <= recomb_msb_dout; + recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0; + recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1; + // + recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb; + recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0; + recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1; + // + fat_bram_xy_cnt_msb <= fat_bram_xy_cnt_msb + 1'b1; + // + end + // + endcase + // + end + + + + +endmodule diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v new file mode 100644 index 0000000..efe0ac5 --- /dev/null +++ b/rtl/modexpng_recombinator_block.v @@ -0,0 +1,35 @@ +module modexpng_recombinator_block +( + clk, + ce, clr, + din, dout +); + + input clk; + input ce; + input clr; + input [46:0] din; + output [15:0] dout; + + reg [14:0] z; + reg [16:0] y; + reg [17:0] x; + //reg [15:0] w; + + //assign dout = w; + assign dout = x[15:0]; + + wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here + wire [15:0] din_y = din[31:16]; + wire [15:0] din_x = din[15: 0]; + + always @(posedge clk) + // + if (ce) begin + z <= din_z; + y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; + x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]}; + //w <= clr ? {16{1'bX}} : x[15:0]; + end + +endmodule -- cgit v1.2.3