From 29fb6afd018c601a2e0c7376656d5e37beb565d6 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:01:43 +0300 Subject: Started working on the pipelined Montgomery modular multiplier. Currently can do the "square" part of the multiplication, i.e. compute the twice larger intermediate product AB = A * B. --- bench/tb_mmm_x8_dual.v | 327 ++++++++++++++++++++++ bench/tb_square.v | 716 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1043 insertions(+) create mode 100644 bench/tb_mmm_x8_dual.v create mode 100644 bench/tb_square.v (limited to 'bench') diff --git a/bench/tb_mmm_x8_dual.v b/bench/tb_mmm_x8_dual.v new file mode 100644 index 0000000..aa25900 --- /dev/null +++ b/bench/tb_mmm_x8_dual.v @@ -0,0 +1,327 @@ +`timescale 1ns / 1ps + +module tb_mmm_x8_dual; + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + `include "../rtl/modexpng_parameters_x8.vh" + + + // + // Settings + // + localparam INDEX_WIDTH = 6; + + wire [INDEX_WIDTH-1:0] index_last = 31; // 512 bits + + + // + // Clock + // + `define CLK_FREQUENCY_MHZ 100.0 + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + + reg clk = 1'b0; + + always begin + #`CLK_PERIOD_HALF_NS clk = 1'b1; + #`CLK_PERIOD_HALF_NS clk = 1'b0; + end + + + // + // Reset + // + reg rst = 1'b1; + wire rst_n = ~rst; + + + // + // Control + // + reg ena = 1'b0; + wire rdy; + + reg mode; + reg transfer; + + + // + // Interface + // + + + // + // Interface - Data Buses + // + wire [NUM_MULTS*WORD_WIDTH-1:0] x_din; + wire [NUM_MULTS*WORD_WIDTH-1:0] y_din; + wire [NUM_MULTS*WORD_WIDTH-1:0] x_dout; + wire [NUM_MULTS*WORD_WIDTH-1:0] y_dout; + + + // + // Interface - Address Buses + // + wire [INDEX_WIDTH-4:0] x_din_addr; + wire [INDEX_WIDTH-4:0] y_din_addr; + wire [INDEX_WIDTH-4:0] x_dout_addr; + wire [INDEX_WIDTH-4:0] y_dout_addr; + + + // + // Interface - Enable Buses + // + wire [ 1-1:0] x_din_ena; + wire [ 1-1:0] y_din_ena; + wire [ 1-1:0] x_din_reg_ena; + wire [ 1-1:0] y_din_reg_ena; + wire [NUM_MULTS-1:0] x_dout_ena; + wire [NUM_MULTS-1:0] y_dout_ena; + + + // + // Interface - Bank Buses + // + wire [3-1:0] x_din_bank; + wire [3-1:0] y_din_bank; + wire [3-1:0] x_dout_bank; + wire [3-1:0] y_dout_bank; + + + // + // Operands + // + reg [WORD_WIDTH-1:0] T1[0:2**INDEX_WIDTH-1]; + reg [WORD_WIDTH-1:0] T2[0:2**INDEX_WIDTH-1]; + reg [WORD_WIDTH-1:0] N[0:2**INDEX_WIDTH-1]; + reg [WORD_WIDTH-1:0] N_COEFF[0:2**INDEX_WIDTH]; + + + // + // Memories + // + genvar z; + generate for (z=0; z 0) + mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1; + else + mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last; + end + endfunction + + + + always @(posedge clk) + // + {dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}}; + + always @(posedge clk) + // + {dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}}; + + always @(posedge clk) + // + {dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly}; + + always @(posedge clk) + // + {dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m}; + + always @(posedge clk) + // + {dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly}; + + task wait_clock_tick; + begin + #`CLK_PERIOD_NS; + end + endtask + + // + // Increment Logic + // + always @(posedge clk) + // + case (fsm_state_next) + // + FSM_STATE_MULT_SQUARE_COL_0_INIT: begin + col_index <= 5'd0; + col_index_last <= index_last[7:3]; + end + // + FSM_STATE_MULT_SQUARE_COL_N_INIT: + col_index <= col_index + 1'b1; + // + endcase + + assign fsm_state_after_mult_square = (col_index == col_index_last) ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; + + always @(posedge clk) + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {8{1'b0}}; + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly); + default: dsp_xy_mode_z_adv4 <= {8{1'b1}}; + endcase + + always @(posedge clk) begin + {dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}}; + // + dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2}; + dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3}; + dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4}; + end + + function [NUM_MULTS-1:0] calc_mac_mode_z_square; + input [ 4:0] col_index_value; + input [ 7:0] mac_slim_bram_xy_addr_value; + begin + if (mac_slim_bram_xy_addr_value[7:3] == col_index_value) + case (mac_slim_bram_xy_addr_value[2:0]) + 3'b000: calc_mac_mode_z_square = 8'b11111110; + 3'b001: calc_mac_mode_z_square = 8'b11111101; + 3'b010: calc_mac_mode_z_square = 8'b11111011; + 3'b011: calc_mac_mode_z_square = 8'b11110111; + 3'b100: calc_mac_mode_z_square = 8'b11101111; + 3'b101: calc_mac_mode_z_square = 8'b11011111; + 3'b110: calc_mac_mode_z_square = 8'b10111111; + 3'b111: calc_mac_mode_z_square = 8'b01111111; + endcase + else + calc_mac_mode_z_square = {NUM_MULTS{1'b1}}; + end + endfunction + + reg recomb_x_ena = 1'b0; + reg recomb_y_ena = 1'b0; + + always @(posedge clk) begin + // + recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p; + recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p; + // + end + + wire [ 2:0] recomb_fat_bram_xy_bank; + wire [ 7:0] recomb_fat_bram_xy_addr; + wire [17:0] recomb_fat_bram_x_dout; + wire [17:0] recomb_fat_bram_y_dout; + wire recomb_fat_bram_xy_dout_valid; + wire recomb_rdy; + + modexpng_part_recombinator recomb + ( + .clk (clk), + .rdy (recomb_rdy), + .fsm_state_next (fsm_state_next), + .index_last (index_last), + .dsp_x_ce_p (dsp_x_ce_p), + .dsp_y_ce_p (dsp_y_ce_p), + .ena_x (recomb_x_ena), + .ena_y (recomb_y_ena), + .dsp_x_p (dsp_x_p), + .dsp_y_p (dsp_y_p), + .col_index (col_index), + .col_index_last (col_index_last), + .slim_bram_xy_addr (mac_slim_bram_xy_addr), + .fat_bram_xy_bank (recomb_fat_bram_xy_bank), + .fat_bram_xy_addr (recomb_fat_bram_xy_addr), + .fat_bram_x_dout (recomb_fat_bram_x_dout), + .fat_bram_y_dout (recomb_fat_bram_y_dout), + .fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid) + ); + + reg [17:0] AB_READ[0:63]; + + always @(posedge clk) + // + if (recomb_fat_bram_xy_dout_valid) + // + case (recomb_fat_bram_xy_bank) + 3'd1: AB_READ[recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout; + 3'd2: AB_READ[32 + recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout; + endcase + + + always @(posedge clk) + // + if (tb_fat_bram_xy_ena) begin + mgr_fat_bram_xy_ena <= 1'b1; + mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank; + mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr; + mgr_fat_bram_x_din <= tb_fat_bram_x_din; + mgr_fat_bram_y_din <= tb_fat_bram_y_din; + end else if (recomb_fat_bram_xy_dout_valid) begin + mgr_fat_bram_xy_ena <= 1'b1; + mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank; + mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr; + mgr_fat_bram_x_din <= recomb_fat_bram_x_dout; + mgr_fat_bram_y_din <= recomb_fat_bram_y_dout; + end else begin + mgr_fat_bram_xy_ena <= 1'b0; + mgr_fat_bram_xy_bank <= 3'bXXX; + mgr_fat_bram_xy_addr <= 8'hXX; + mgr_fat_bram_x_din <= {18{1'bX}}; + mgr_fat_bram_y_din <= {18{1'bX}}; + end + + + + + + task verify_ab; + reg verify_ab_ok; + begin + verify_ab_ok = 1; + for (i=0; i<64; i=i+1) + if (AB_READ[i] === AB[i]) + $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]); + else begin + $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x ", i, AB[i], AB_READ[i]); + verify_ab_ok = 0; + end + if (verify_ab_ok) + $display("AB is OK."); + else + $display("AB is WRONG!"); + end + endtask + + + + always @* begin + // + fsm_state_next = FSM_STATE_IDLE; + // + case (fsm_state) + FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + + FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; + FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; + FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY; + + FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ; + FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY; + + FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF; + + default: fsm_state_next = FSM_STATE_IDLE ; + + endcase + // + end + + +endmodule + -- cgit v1.2.3