diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:01:43 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-01 15:01:43 +0300 |
commit | 29fb6afd018c601a2e0c7376656d5e37beb565d6 (patch) | |
tree | dc11ee0c8e5a30113052254be23594da74a8a572 /rtl/dev | |
parent | ec07464d239f7f6379a682ac57b58b863d3f0374 (diff) |
Started working on the pipelined Montgomery modular multiplier. Currently can
do the "square" part of the multiplication, i.e. compute the twice larger
intermediate product AB = A * B.
Diffstat (limited to 'rtl/dev')
-rw-r--r-- | rtl/dev/temp.txt | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt new file mode 100644 index 0000000..987bd86 --- /dev/null +++ b/rtl/dev/temp.txt @@ -0,0 +1,384 @@ + // + // Helper Functions + // + /* + function [INDEX_WIDTH-1:0] calc_preset_a_index; + input [INDEX_WIDTH-4:0] col_in; + input integer x_in; + integer index_out; + begin + index_out = col_in * NUM_MULTS + x_in; + calc_preset_a_index = index_out[INDEX_WIDTH-1:0]; + end + endfunction + + function [INDEX_WIDTH-1:0] calc_rotate_a_index; + input [INDEX_WIDTH-1:0] current_index_in; + input [INDEX_WIDTH-1:0] last_index_in; + begin + if (current_index_in > {INDEX_WIDTH{1'b0}}) + calc_rotate_a_index = current_index_in - 1'b1; + else + calc_rotate_a_index = last_index_in; + end + endfunction + */ + + /* + // + // Narrow Counters + // + reg [INDEX_WIDTH-1:0] din_addr_narrow_reg; + reg [INDEX_WIDTH-1:0] din_addr_narrow_dly; + localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}}; + wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ? + din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero; + wire din_addr_narrow_done = din_addr_narrow_reg == index_last; + + assign din_addr_narrow = din_addr_narrow_reg; + + always @(posedge clk) + // + din_addr_narrow_dly <= din_addr_narrow_reg; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next; + endcase + + + // + // Helper Functions + // + function [NUM_MULTS-1:0] calc_mac_clear_bitmask; + input [2:0] t; + begin + case (t) + 3'd0: calc_mac_clear_bitmask = 8'b00000001; + 3'd1: calc_mac_clear_bitmask = 8'b00000010; + 3'd2: calc_mac_clear_bitmask = 8'b00000100; + 3'd3: calc_mac_clear_bitmask = 8'b00001000; + 3'd4: calc_mac_clear_bitmask = 8'b00010000; + 3'd5: calc_mac_clear_bitmask = 8'b00100000; + 3'd6: calc_mac_clear_bitmask = 8'b01000000; + 3'd7: calc_mac_clear_bitmask = 8'b10000000; + endcase + end + endfunction + + function [NUM_MULTS:0] calc_mac_clear_square; + input [INDEX_WIDTH-4:0] current_col_index; + input [INDEX_WIDTH-1:0] b_addr_prev; + begin + if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index) + calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])}; + else + calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}}; + end + endfunction + + + // + // Wide Counters + // + reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1]; + + integer xi; + always @(posedge clk) + // + for (xi=0; xi<NUM_MULTS; xi=xi+1) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(0, xi); + FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(col_index + 1'b1, xi); + // + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_wide_reg[xi] <= calc_rotate_a_index(din_addr_wide_reg[xi], index_last); + // + endcase + + + // + // Enables + // + reg din_ena_narrow_reg = 1'b0; + reg [NUM_MULTS-1:0] din_ena_wide_reg = {NUM_MULTS{1'b0}}; + + assign din_ena_narrow = din_ena_narrow_reg; + assign din_ena_wide = din_ena_wide_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) din_ena_narrow_reg <= 1'b0; + else case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_narrow_reg <= 1'b1; + default: din_ena_narrow_reg <= 1'b0; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) din_ena_wide_reg <= {NUM_MULTS{1'b0}}; + else case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_wide_reg <= {NUM_MULTS{1'b1}}; + default: din_ena_wide_reg <= {NUM_MULTS{1'b0}}; + endcase + + + // + // Modes + // + reg [2-1:0] din_mode_wide_reg; + reg [2-1:0] din_mode_narrow_reg; + reg [2-1:0] dout_mode_wide_reg; + reg [2-1:0] dout_mode_narrow_reg; + + assign din_mode_wide = din_mode_wide_reg; + assign din_mode_narrow = din_mode_narrow_reg; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_wide_reg <= MODEXPNG_MODE_A; + default: din_mode_wide_reg <= 2'bXX; + endcase + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_narrow_reg <= MODEXPNG_MODE_B; + default: din_mode_narrow_reg <= 2'bXX; + endcase + + + // + // MAC Array + // + wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_a[0:NUM_MULTS]; + wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_b; + reg [ NUM_MULTS :0] mac_ce; + reg [ NUM_MULTS :0] mac_clr; + wire [ MODEXPNG_MAC_WIDTH-1:0] mac_p[0:NUM_MULTS]; + reg [ NUM_MULTS :0] mac_rdy_lsb; + reg [ NUM_MULTS :0] mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1:0]; + + //reg [ NUM_MULTS :0] mac_ce_dly[MODEXPNG_MAC_LATENCY-1:0]; + //wire [ NUM_MULTS :0] mac_rdy; + + + + + + assign mac_din_b = din_narrow; + + + genvar x; + generate for (x=0; x<=NUM_MULTS; x=x+1) + begin : gen_macs + // + //assign mac_rdy[x] = mac_ce_dly[MODEXPNG_MAC_LATENCY-1][x]; + // + modexpng_mac mac_inst + ( + .clk (clk), + .ce (mac_ce[x]), + .clr (mac_clr[x]), + .a (mac_din_a[x]), + .b (mac_din_b), + .p (mac_p[x]) + ); + // + end + // + endgenerate + + generate for (x=0; x<NUM_MULTS; x=x+1) + begin : gen_mac_din_a + // + assign mac_din_a[x] = din_wide[x*MODEXPNG_WORD_WIDTH+:MODEXPNG_WORD_WIDTH]; + // + end + endgenerate + + generate for (x=0; x<NUM_MULTS; x=x+1) + begin : gen_din_addr_wide + // + assign din_addr_wide[x*INDEX_WIDTH+:INDEX_WIDTH] = din_addr_wide_reg[x]; + // + end + endgenerate + + + // + // MAC Clock Enable Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; + else case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_ce <= {1'b0, {NUM_MULTS{1'b1}}}; + default: mac_ce <= {1'b0, {NUM_MULTS{1'b0}}}; + endcase + + + // + // MAC Valid Logic + // + integer y; + + always @(posedge clk) + // + for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin + mac_rdy_lsb_dly[0][xi] <= mac_rdy_lsb[xi]; + for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) + mac_rdy_lsb_dly[y][xi] <= mac_rdy_lsb_dly[y-1][xi]; + end + + always @(posedge clk) begin + // + fsm_state_dly[0] <= fsm_state; + for (y=1; y<=MODEXPNG_MAC_LATENCY; y=y+1) + fsm_state_dly[y] <= fsm_state_dly[y-1]; + end + + */ + + /* + always @(posedge clk) + // + for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin + mac_ce_dly[0][xi] <= mac_ce[xi]; + for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) + mac_ce_dly[y][xi] <= mac_ce_dly[y-1][xi]; + end + */ + /* + always @(posedge clk) + // + for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin + mac_clr_dly[0][xi] <= mac_clr[xi]; + for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1) + mac_clr_dly[y][xi] <= mac_clr_dly[y-1][xi]; + end + */ + + /* + // + // MAC Clear Logic + // + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_clr <= {1'b0, {NUM_MULTS{1'b1}}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_clr <= calc_mac_clear_square(col_index, din_addr_narrow_dly); + default: mac_clr <= {1'bX, {NUM_MULTS{1'bX}}}; + endcase + + + // + // MAC Ready Logic + // + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_rdy_lsb <= calc_mac_clear_square(col_index, din_addr_narrow); + default: mac_rdy_lsb <= {1'bX, {NUM_MULTS{1'bX}}}; + endcase + + + + // + // Recombinators + // + reg rcmb_lsb_ce; + reg rcmb_lsb_clr; + reg [MODEXPNG_MAC_WIDTH-1: 0] rcmb_lsb_din; + wire [15: 0] rcmb_lsb_dout; + + modexpng_part_recombinator recomb_lsb + ( + .clk (clk), + .ce (rcmb_lsb_ce), + .clr (rcmb_lsb_clr), + .din (rcmb_lsb_din), + .dout (rcmb_lsb_dout) + ); + + + reg calc_rcmb_lsb_ce; + always @* + // + calc_rcmb_lsb_ce = | mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]; + + reg [MODEXPNG_MAC_WIDTH-1:0] calc_rcmb_lsb_din; + + always @* + // + casez (mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0]) + 8'b00000001: calc_rcmb_lsb_din = mac_p[0]; + 8'b00000010: calc_rcmb_lsb_din = mac_p[1]; + 8'b00000100: calc_rcmb_lsb_din = mac_p[2]; + 8'b00001000: calc_rcmb_lsb_din = mac_p[3]; + 8'b00010000: calc_rcmb_lsb_din = mac_p[4]; + 8'b00100000: calc_rcmb_lsb_din = mac_p[5]; + 8'b01000000: calc_rcmb_lsb_din = mac_p[6]; + 8'b10000000: calc_rcmb_lsb_din = mac_p[7]; + default: calc_rcmb_lsb_din = {MODEXPNG_MAC_WIDTH{1'bX}}; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + rcmb_lsb_ce <= 1'b0; + else case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_ce <= calc_rcmb_lsb_ce; + default: rcmb_lsb_ce <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) + FSM_STATE_MULT_SQUARE_COL_0_TRIG: rcmb_lsb_clr <= 1'b1; + default: rcmb_lsb_clr <= 1'b0; + endcase + + always @(posedge clk) + // + case (fsm_state_dly[MODEXPNG_MAC_LATENCY]) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_din <= calc_rcmb_lsb_din; + default: rcmb_lsb_din <= {MODEXPNG_MAC_WIDTH{1'bX}}; + endcase + + + +*/ |