aboutsummaryrefslogtreecommitdiff
path: root/rtl/dev
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:01:43 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-01 15:01:43 +0300
commit29fb6afd018c601a2e0c7376656d5e37beb565d6 (patch)
treedc11ee0c8e5a30113052254be23594da74a8a572 /rtl/dev
parentec07464d239f7f6379a682ac57b58b863d3f0374 (diff)
Started working on the pipelined Montgomery modular multiplier. Currently can
do the "square" part of the multiplication, i.e. compute the twice larger intermediate product AB = A * B.
Diffstat (limited to 'rtl/dev')
-rw-r--r--rtl/dev/temp.txt384
1 files changed, 384 insertions, 0 deletions
diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt
new file mode 100644
index 0000000..987bd86
--- /dev/null
+++ b/rtl/dev/temp.txt
@@ -0,0 +1,384 @@
+ //
+ // Helper Functions
+ //
+ /*
+ function [INDEX_WIDTH-1:0] calc_preset_a_index;
+ input [INDEX_WIDTH-4:0] col_in;
+ input integer x_in;
+ integer index_out;
+ begin
+ index_out = col_in * NUM_MULTS + x_in;
+ calc_preset_a_index = index_out[INDEX_WIDTH-1:0];
+ end
+ endfunction
+
+ function [INDEX_WIDTH-1:0] calc_rotate_a_index;
+ input [INDEX_WIDTH-1:0] current_index_in;
+ input [INDEX_WIDTH-1:0] last_index_in;
+ begin
+ if (current_index_in > {INDEX_WIDTH{1'b0}})
+ calc_rotate_a_index = current_index_in - 1'b1;
+ else
+ calc_rotate_a_index = last_index_in;
+ end
+ endfunction
+ */
+
+ /*
+ //
+ // Narrow Counters
+ //
+ reg [INDEX_WIDTH-1:0] din_addr_narrow_reg;
+ reg [INDEX_WIDTH-1:0] din_addr_narrow_dly;
+ localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}};
+ wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ?
+ din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero;
+ wire din_addr_narrow_done = din_addr_narrow_reg == index_last;
+
+ assign din_addr_narrow = din_addr_narrow_reg;
+
+ always @(posedge clk)
+ //
+ din_addr_narrow_dly <= din_addr_narrow_reg;
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next;
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next;
+ endcase
+
+
+ //
+ // Helper Functions
+ //
+ function [NUM_MULTS-1:0] calc_mac_clear_bitmask;
+ input [2:0] t;
+ begin
+ case (t)
+ 3'd0: calc_mac_clear_bitmask = 8'b00000001;
+ 3'd1: calc_mac_clear_bitmask = 8'b00000010;
+ 3'd2: calc_mac_clear_bitmask = 8'b00000100;
+ 3'd3: calc_mac_clear_bitmask = 8'b00001000;
+ 3'd4: calc_mac_clear_bitmask = 8'b00010000;
+ 3'd5: calc_mac_clear_bitmask = 8'b00100000;
+ 3'd6: calc_mac_clear_bitmask = 8'b01000000;
+ 3'd7: calc_mac_clear_bitmask = 8'b10000000;
+ endcase
+ end
+ endfunction
+
+ function [NUM_MULTS:0] calc_mac_clear_square;
+ input [INDEX_WIDTH-4:0] current_col_index;
+ input [INDEX_WIDTH-1:0] b_addr_prev;
+ begin
+ if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index)
+ calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])};
+ else
+ calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}};
+ end
+ endfunction
+
+
+ //
+ // Wide Counters
+ //
+ reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1];
+
+ integer xi;
+ always @(posedge clk)
+ //
+ for (xi=0; xi<NUM_MULTS; xi=xi+1)
+ //
+ case (fsm_state_next)
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(0, xi);
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(col_index + 1'b1, xi);
+ //
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_wide_reg[xi] <= calc_rotate_a_index(din_addr_wide_reg[xi], index_last);
+ //
+ endcase
+
+
+ //
+ // Enables
+ //
+ reg din_ena_narrow_reg = 1'b0;
+ reg [NUM_MULTS-1:0] din_ena_wide_reg = {NUM_MULTS{1'b0}};
+
+ assign din_ena_narrow = din_ena_narrow_reg;
+ assign din_ena_wide = din_ena_wide_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) din_ena_narrow_reg <= 1'b0;
+ else case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_narrow_reg <= 1'b1;
+ default: din_ena_narrow_reg <= 1'b0;
+ endcase
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) din_ena_wide_reg <= {NUM_MULTS{1'b0}};
+ else case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_wide_reg <= {NUM_MULTS{1'b1}};
+ default: din_ena_wide_reg <= {NUM_MULTS{1'b0}};
+ endcase
+
+
+ //
+ // Modes
+ //
+ reg [2-1:0] din_mode_wide_reg;
+ reg [2-1:0] din_mode_narrow_reg;
+ reg [2-1:0] dout_mode_wide_reg;
+ reg [2-1:0] dout_mode_narrow_reg;
+
+ assign din_mode_wide = din_mode_wide_reg;
+ assign din_mode_narrow = din_mode_narrow_reg;
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_wide_reg <= MODEXPNG_MODE_A;
+ default: din_mode_wide_reg <= 2'bXX;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_narrow_reg <= MODEXPNG_MODE_B;
+ default: din_mode_narrow_reg <= 2'bXX;
+ endcase
+
+
+ //
+ // MAC Array
+ //
+ wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_a[0:NUM_MULTS];
+ wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_b;
+ reg [ NUM_MULTS :0] mac_ce;
+ reg [ NUM_MULTS :0] mac_clr;
+ wire [ MODEXPNG_MAC_WIDTH-1:0] mac_p[0:NUM_MULTS];
+ reg [ NUM_MULTS :0] mac_rdy_lsb;
+ reg [ NUM_MULTS :0] mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1:0];
+
+ //reg [ NUM_MULTS :0] mac_ce_dly[MODEXPNG_MAC_LATENCY-1:0];
+ //wire [ NUM_MULTS :0] mac_rdy;
+
+
+
+
+
+ assign mac_din_b = din_narrow;
+
+
+ genvar x;
+ generate for (x=0; x<=NUM_MULTS; x=x+1)
+ begin : gen_macs
+ //
+ //assign mac_rdy[x] = mac_ce_dly[MODEXPNG_MAC_LATENCY-1][x];
+ //
+ modexpng_mac mac_inst
+ (
+ .clk (clk),
+ .ce (mac_ce[x]),
+ .clr (mac_clr[x]),
+ .a (mac_din_a[x]),
+ .b (mac_din_b),
+ .p (mac_p[x])
+ );
+ //
+ end
+ //
+ endgenerate
+
+ generate for (x=0; x<NUM_MULTS; x=x+1)
+ begin : gen_mac_din_a
+ //
+ assign mac_din_a[x] = din_wide[x*MODEXPNG_WORD_WIDTH+:MODEXPNG_WORD_WIDTH];
+ //
+ end
+ endgenerate
+
+ generate for (x=0; x<NUM_MULTS; x=x+1)
+ begin : gen_din_addr_wide
+ //
+ assign din_addr_wide[x*INDEX_WIDTH+:INDEX_WIDTH] = din_addr_wide_reg[x];
+ //
+ end
+ endgenerate
+
+
+ //
+ // MAC Clock Enable Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) mac_ce <= {1'b0, {NUM_MULTS{1'b0}}};
+ else case (fsm_state)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_ce <= {1'b0, {NUM_MULTS{1'b1}}};
+ default: mac_ce <= {1'b0, {NUM_MULTS{1'b0}}};
+ endcase
+
+
+ //
+ // MAC Valid Logic
+ //
+ integer y;
+
+ always @(posedge clk)
+ //
+ for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin
+ mac_rdy_lsb_dly[0][xi] <= mac_rdy_lsb[xi];
+ for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1)
+ mac_rdy_lsb_dly[y][xi] <= mac_rdy_lsb_dly[y-1][xi];
+ end
+
+ always @(posedge clk) begin
+ //
+ fsm_state_dly[0] <= fsm_state;
+ for (y=1; y<=MODEXPNG_MAC_LATENCY; y=y+1)
+ fsm_state_dly[y] <= fsm_state_dly[y-1];
+ end
+
+ */
+
+ /*
+ always @(posedge clk)
+ //
+ for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin
+ mac_ce_dly[0][xi] <= mac_ce[xi];
+ for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1)
+ mac_ce_dly[y][xi] <= mac_ce_dly[y-1][xi];
+ end
+ */
+ /*
+ always @(posedge clk)
+ //
+ for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin
+ mac_clr_dly[0][xi] <= mac_clr[xi];
+ for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1)
+ mac_clr_dly[y][xi] <= mac_clr_dly[y-1][xi];
+ end
+ */
+
+ /*
+ //
+ // MAC Clear Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_clr <= {1'b0, {NUM_MULTS{1'b1}}};
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_clr <= calc_mac_clear_square(col_index, din_addr_narrow_dly);
+ default: mac_clr <= {1'bX, {NUM_MULTS{1'bX}}};
+ endcase
+
+
+ //
+ // MAC Ready Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_rdy_lsb <= calc_mac_clear_square(col_index, din_addr_narrow);
+ default: mac_rdy_lsb <= {1'bX, {NUM_MULTS{1'bX}}};
+ endcase
+
+
+
+ //
+ // Recombinators
+ //
+ reg rcmb_lsb_ce;
+ reg rcmb_lsb_clr;
+ reg [MODEXPNG_MAC_WIDTH-1: 0] rcmb_lsb_din;
+ wire [15: 0] rcmb_lsb_dout;
+
+ modexpng_part_recombinator recomb_lsb
+ (
+ .clk (clk),
+ .ce (rcmb_lsb_ce),
+ .clr (rcmb_lsb_clr),
+ .din (rcmb_lsb_din),
+ .dout (rcmb_lsb_dout)
+ );
+
+
+ reg calc_rcmb_lsb_ce;
+ always @*
+ //
+ calc_rcmb_lsb_ce = | mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0];
+
+ reg [MODEXPNG_MAC_WIDTH-1:0] calc_rcmb_lsb_din;
+
+ always @*
+ //
+ casez (mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0])
+ 8'b00000001: calc_rcmb_lsb_din = mac_p[0];
+ 8'b00000010: calc_rcmb_lsb_din = mac_p[1];
+ 8'b00000100: calc_rcmb_lsb_din = mac_p[2];
+ 8'b00001000: calc_rcmb_lsb_din = mac_p[3];
+ 8'b00010000: calc_rcmb_lsb_din = mac_p[4];
+ 8'b00100000: calc_rcmb_lsb_din = mac_p[5];
+ 8'b01000000: calc_rcmb_lsb_din = mac_p[6];
+ 8'b10000000: calc_rcmb_lsb_din = mac_p[7];
+ default: calc_rcmb_lsb_din = {MODEXPNG_MAC_WIDTH{1'bX}};
+ endcase
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ rcmb_lsb_ce <= 1'b0;
+ else case (fsm_state_dly[MODEXPNG_MAC_LATENCY])
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_ce <= calc_rcmb_lsb_ce;
+ default: rcmb_lsb_ce <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_state_dly[MODEXPNG_MAC_LATENCY])
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: rcmb_lsb_clr <= 1'b1;
+ default: rcmb_lsb_clr <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_state_dly[MODEXPNG_MAC_LATENCY])
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_din <= calc_rcmb_lsb_din;
+ default: rcmb_lsb_din <= {MODEXPNG_MAC_WIDTH{1'bX}};
+ endcase
+
+
+
+*/