From 71b75290bf2ade9a4022bad93dc80bfb77f87f40 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 10 Jul 2017 15:31:25 +0300 Subject: * made separate file for low-level settings * turned crazy triple multiplier array into one array with input mux --- src/rtl/modexpa7_settings.v | 6 + src/rtl/modexpa7_systolic_multiplier.v | 1298 ++++++++++++++++--------------- src/rtl/pe/modexpa7_adder32.v | 2 +- src/rtl/pe/modexpa7_lowlevel_settings.v | 15 - src/rtl/pe/modexpa7_primitive_switch.v | 16 + src/rtl/pe/modexpa7_subtractor32.v | 2 +- src/rtl/pe/modexpa7_systolic_pe.v | 2 +- src/tb/tb_systolic_multiplier.v | 3 +- 8 files changed, 685 insertions(+), 659 deletions(-) create mode 100644 src/rtl/modexpa7_settings.v delete mode 100644 src/rtl/pe/modexpa7_lowlevel_settings.v create mode 100644 src/rtl/pe/modexpa7_primitive_switch.v diff --git a/src/rtl/modexpa7_settings.v b/src/rtl/modexpa7_settings.v new file mode 100644 index 0000000..0ec6978 --- /dev/null +++ b/src/rtl/modexpa7_settings.v @@ -0,0 +1,6 @@ +localparam SYSTOLIC_PE_LATENCY = 4; + +localparam SYSTOLIC_CNTR_WIDTH = OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER; +localparam SYSTOLIC_ARRAY_LENGTH = 2 ** SYSTOLIC_ARRAY_POWER; +localparam SYSTOLIC_NUM_CYCLES = 2 ** SYSTOLIC_CNTR_WIDTH; + diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v index cb1c716..56e7be3 100644 --- a/src/rtl/modexpa7_systolic_multiplier.v +++ b/src/rtl/modexpa7_systolic_multiplier.v @@ -40,16 +40,16 @@ module modexpa7_systolic_multiplier # ( // // This sets the address widths of memory buffers. Internal data - // width is 32 bits, so for e.g. 1024-bit operands buffers must store - // 1024 / 32 = 32 words, and these need 5-bit address bus, because - // 2 ** 5 = 32. + // width is 32 bits, so for e.g. 2048-bit operands buffers must store + // 2048 / 32 = 64 words, and these need 5-bit address bus, because + // 2 ** 6 = 64. // - parameter OPERAND_ADDR_WIDTH = 5, + parameter OPERAND_ADDR_WIDTH = 4, // - // This sets the width of the systolic cycle counter. TODO: Explain. + // Explain. // - parameter SYSTOLIC_ARRAY_POWER = 3 + parameter SYSTOLIC_ARRAY_POWER = 2 ) ( input clk, @@ -72,801 +72,819 @@ module modexpa7_systolic_multiplier # output [ 32-1:0] r_bram_in, output r_bram_wr, - input [OPERAND_ADDR_WIDTH-1:0] n_num_words + input [OPERAND_ADDR_WIDTH-1:0] ab_num_words ); - + // - // Constants + // Include Settings // - localparam SYSTOLIC_CNTR_WIDTH = OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER; - localparam SYSTOLIC_ARRAY_LENGTH = 2 ** SYSTOLIC_ARRAY_POWER; - localparam SYSTOLIC_NUM_CYCLES = 2 ** SYSTOLIC_CNTR_WIDTH; - - localparam SYSTOLIC_PE_LATENCY = 4; - + `include "pe/modexpa7_primitive_switch.v" + `include "modexpa7_settings.v" + // // FSM Declaration // - localparam [ 3: 0] FSM_STATE_IDLE = 4'd0; - localparam [ 3: 0] FSM_STATE_INIT_ZERO_ADDR = 4'd1; - localparam [ 3: 0] FSM_STATE_INIT_NEXT_ADDR = 4'd2; - localparam [ 3: 0] FSM_STATE_INIT_LAST_ADDR = 4'd3; - localparam [ 3: 0] FSM_STATE_PIPE_CRUNCH = 4'd4; - localparam [ 3: 0] FSM_STATE_PIPE_RELOAD = 4'd5; - localparam [ 3: 0] FSM_STATE_SAVE_ZERO_ADDR = 4'd6; - localparam [ 3: 0] FSM_STATE_SAVE_NEXT_ADDR = 4'd7; - localparam [ 3: 0] FSM_STATE_SAVE_LAST_ADDR = 4'd8; - localparam [ 3: 0] FSM_STATE_STOP = 4'd9; - - reg [ 3: 0] fsm_state = FSM_STATE_IDLE; - reg [ 3: 0] fsm_next_state; + localparam [ 7: 0] FSM_STATE_IDLE = 8'h00; - - // - // Enable Delay (Trigger) - // - reg ena_dly = 1'b0; - wire ena_trig = ena && !ena_dly; - always @(posedge clk) ena_dly <= ena; + localparam [ 7: 0] FSM_STATE_LOAD_B_START = 8'h11; + localparam [ 7: 0] FSM_STATE_LOAD_B_SHIFT = 8'h12; + localparam [ 7: 0] FSM_STATE_LOAD_B_WRITE = 8'h13; + localparam [ 7: 0] FSM_STATE_LOAD_B_FINAL = 8'h14; - - // - // Parameters Latch - // - reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch; + localparam [ 7: 0] FSM_STATE_LOAD_N_COEFF_START = 8'h21; + localparam [ 7: 0] FSM_STATE_LOAD_N_COEFF_SHIFT = 8'h22; + localparam [ 7: 0] FSM_STATE_LOAD_N_COEFF_WRITE = 8'h23; + localparam [ 7: 0] FSM_STATE_LOAD_N_COEFF_FINAL = 8'h24; - always @(posedge clk) - // - if (fsm_next_state == FSM_STATE_INIT_ZERO_ADDR) - n_num_words_latch <= n_num_words; + localparam [ 7: 0] FSM_STATE_LOAD_N_START = 8'h31; + localparam [ 7: 0] FSM_STATE_LOAD_N_SHIFT = 8'h32; + localparam [ 7: 0] FSM_STATE_LOAD_N_WRITE = 8'h33; + localparam [ 7: 0] FSM_STATE_LOAD_N_FINAL = 8'h34; + localparam [ 7: 0] FSM_STATE_MULT_A_B_START = 8'h41; + localparam [ 7: 0] FSM_STATE_MULT_A_B_CRUNCH = 8'h42; + localparam [ 7: 0] FSM_STATE_MULT_A_B_RELOAD = 8'h43; + localparam [ 7: 0] FSM_STATE_MULT_A_B_FINAL = 8'h44; - // - // Addresses - // - localparam [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; - wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = n_num_words_latch; + localparam [ 7: 0] FSM_STATE_MULT_AB_N_COEFF_START = 8'h51; + localparam [ 7: 0] FSM_STATE_MULT_AB_N_COEFF_CRUNCH = 8'h52; + localparam [ 7: 0] FSM_STATE_MULT_AB_N_COEFF_RELOAD = 8'h53; + localparam [ 7: 0] FSM_STATE_MULT_AB_N_COEFF_FINAL = 8'h54; + + localparam [ 7: 0] FSM_STATE_MULT_Q_N_START = 8'h61; + localparam [ 7: 0] FSM_STATE_MULT_Q_N_CRUNCH = 8'h62; + localparam [ 7: 0] FSM_STATE_MULT_Q_N_RELOAD = 8'h63; + localparam [ 7: 0] FSM_STATE_MULT_Q_N_FINAL = 8'h64; + localparam [ 7: 0] FSM_STATE_STOP = 8'hFF; // - // BRAM Addresses + // FSM State / Next State // - reg [OPERAND_ADDR_WIDTH-1:0] b_bram_addr_reg; - reg [OPERAND_ADDR_WIDTH-1:0] a_bram_addr_reg; - reg [OPERAND_ADDR_WIDTH-1:0] n_coeff_bram_addr_reg; - reg [OPERAND_ADDR_WIDTH-1:0] n_bram_addr_reg; - reg [OPERAND_ADDR_WIDTH-1:0] s_bram_addr_reg; - reg [OPERAND_ADDR_WIDTH-1:0] r_bram_addr_reg; + reg [ 7: 0] fsm_state = FSM_STATE_IDLE; + reg [ 7: 0] fsm_next_state; - wire [OPERAND_ADDR_WIDTH-1:0] s_bram_addr = s_bram_addr_reg; - - reg [OPERAND_ADDR_WIDTH-1:0] b_bram_addr_dly; - reg [OPERAND_ADDR_WIDTH-1:0] n_coeff_bram_addr_dly; - reg [OPERAND_ADDR_WIDTH-1:0] n_bram_addr_dly; - reg [OPERAND_ADDR_WIDTH-1:0] s_bram_addr_dly; - - wire [OPERAND_ADDR_WIDTH-1:0] b_bram_addr_next = b_bram_addr + 1'b1; - wire [OPERAND_ADDR_WIDTH-1:0] a_bram_addr_next = a_bram_addr + 1'b1; - wire [OPERAND_ADDR_WIDTH-1:0] n_coeff_bram_addr_next = n_coeff_bram_addr + 1'b1; - wire [OPERAND_ADDR_WIDTH-1:0] n_bram_addr_next = n_bram_addr + 1'b1; - wire [OPERAND_ADDR_WIDTH-1:0] s_bram_addr_next = s_bram_addr + 1'b1; + + // + // Enable Delay and Trigger + // + reg ena_dly = 1'b0; - wire b_bram_addr_done = - (b_bram_addr == bram_addr_last) ? 1'b1 : 1'b0; + /* delay enable by one clock cycle */ + always @(posedge clk) ena_dly <= ena; - wire s_bram_addr_done = - (s_bram_addr == bram_addr_last) ? 1'b1 : 1'b0; + /* trigger new operation when enable goes high */ + wire ena_trig = ena && !ena_dly; + - assign b_bram_addr = b_bram_addr_reg; - assign a_bram_addr = a_bram_addr_reg; - assign n_coeff_bram_addr = n_coeff_bram_addr_reg; - assign n_bram_addr = n_bram_addr_reg; - assign r_bram_addr = r_bram_addr_reg; + // + // Ready Flag Logic + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; - always @(posedge clk) b_bram_addr_dly <= b_bram_addr; - always @(posedge clk) n_coeff_bram_addr_dly <= n_coeff_bram_addr; - always @(posedge clk) n_bram_addr_dly <= n_bram_addr; - always @(posedge clk) s_bram_addr_dly <= s_bram_addr; + always @(posedge clk or negedge rst_n) + + /* reset flag */ + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear flag when operation is started */ + if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig; + + /* set flag after operation is finished */ + if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1; + + end + - always @(posedge clk) // - case (fsm_next_state) - FSM_STATE_INIT_ZERO_ADDR: b_bram_addr_reg <= bram_addr_zero; - FSM_STATE_INIT_NEXT_ADDR: b_bram_addr_reg <= b_bram_addr_next; - endcase - - always @(posedge clk) - case (fsm_next_state) - FSM_STATE_SAVE_ZERO_ADDR: s_bram_addr_reg <= bram_addr_zero; - FSM_STATE_SAVE_NEXT_ADDR: s_bram_addr_reg <= s_bram_addr_next; - endcase - - always @(posedge clk) + // Parameters Latch // - case (fsm_next_state) - FSM_STATE_INIT_LAST_ADDR: a_bram_addr_reg <= bram_addr_zero; - FSM_STATE_PIPE_RELOAD: a_bram_addr_reg <= (a_bram_addr < bram_addr_last) ? a_bram_addr_next : a_bram_addr; - endcase + reg [OPERAND_ADDR_WIDTH-1:0] ab_num_words_latch; + /* save number of words in a and b when new operation starts */ always @(posedge clk) // - case (fsm_next_state) - FSM_STATE_INIT_ZERO_ADDR: n_coeff_bram_addr_reg <= bram_addr_zero; - FSM_STATE_INIT_NEXT_ADDR: n_coeff_bram_addr_reg <= n_coeff_bram_addr_next; - endcase - - - - + if (fsm_next_state == FSM_STATE_LOAD_B_START) + ab_num_words_latch <= ab_num_words; + + // - // Latency Compensation TODO: Remove ab maybe? Looks like latency should be consistent for all cycles... + // Systolic Cycle Counters // - wire [SYSTOLIC_PE_LATENCY:0] pe_latency_start = {{SYSTOLIC_PE_LATENCY{1'b0}}, 1'b1}; - - reg [SYSTOLIC_PE_LATENCY:0] pe_latency_ab_lsb; - reg [SYSTOLIC_PE_LATENCY:0] pe_latency_ab_msb; + + /* handy values */ + wire [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_zero = {SYSTOLIC_CNTR_WIDTH{1'b0}}; + wire [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_last = ab_num_words_latch[OPERAND_ADDR_WIDTH-1:SYSTOLIC_ARRAY_POWER]; - wire [SYSTOLIC_PE_LATENCY:0] pe_latency_ab_lsb_next = - {pe_latency_ab_lsb[SYSTOLIC_PE_LATENCY-1:0], pe_latency_ab_lsb[SYSTOLIC_PE_LATENCY]}; + /* counters */ + reg [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_init; + reg [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_load; + reg [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_unload; + + /* handy increment values */ + wire [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_init_next = syst_cnt_init + 1'b1; + wire [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_load_next = syst_cnt_load + 1'b1; + wire [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_unload_next = syst_cnt_unload + 1'b1; - wire [SYSTOLIC_PE_LATENCY:0] pe_latency_ab_msb_next = - {pe_latency_ab_msb[SYSTOLIC_PE_LATENCY-1:0], pe_latency_ab_msb[SYSTOLIC_PE_LATENCY]}; + /* handy stop flags */ + wire syst_cnt_init_done = (syst_cnt_init == syst_cnt_last) ? 1'b1 : 1'b0; + wire syst_cnt_load_done = (syst_cnt_load == syst_cnt_last) ? 1'b1 : 1'b0; + wire syst_cnt_unload_done = (syst_cnt_unload == syst_cnt_last) ? 1'b1 : 1'b0; - wire pe_latency_ab_lsb_done = pe_latency_ab_lsb[SYSTOLIC_PE_LATENCY]; - wire pe_latency_ab_msb_done = pe_latency_ab_msb[SYSTOLIC_PE_LATENCY]; + /* delayed load counter */ + reg [SYSTOLIC_CNTR_WIDTH-1:0] syst_cnt_load_dly; + always @(posedge clk) syst_cnt_load_dly <= syst_cnt_load; - always @(posedge clk) - // - if (fsm_next_state == FSM_STATE_PIPE_CRUNCH) - // - case (fsm_state) - FSM_STATE_INIT_LAST_ADDR, - FSM_STATE_PIPE_RELOAD: pe_latency_ab_lsb <= pe_latency_start; - FSM_STATE_PIPE_CRUNCH: pe_latency_ab_lsb <= pe_latency_ab_lsb_done ? - pe_latency_ab_lsb : pe_latency_ab_lsb_next; - endcase // - // Buffers + // Multiplier Iteration Counter // - integer i, j; - - reg [31: 0] b_buf[SYSTOLIC_NUM_CYCLES-1:0][SYSTOLIC_ARRAY_LENGTH-1:0]; - reg [31: 0] n_coeff_buf[SYSTOLIC_NUM_CYCLES-1:0][SYSTOLIC_ARRAY_LENGTH-1:0]; - reg [31: 0] n_buf[SYSTOLIC_NUM_CYCLES-1:0][SYSTOLIC_ARRAY_LENGTH-1:0]; + + /* handy values */ + wire [SYSTOLIC_ARRAY_POWER-1:0] mult_cnt_zero = {SYSTOLIC_ARRAY_POWER{1'b0}}; + wire [SYSTOLIC_ARRAY_POWER-1:0] mult_cnt_last = {SYSTOLIC_ARRAY_POWER{1'b1}}; - always @(posedge clk) + /* counter */ + reg [SYSTOLIC_ARRAY_POWER-1:0] mult_cnt; + + /* handy increment value and stop flag */ + wire [SYSTOLIC_ARRAY_POWER-1:0] mult_cnt_next = mult_cnt + 1'b1; + wire mult_cnt_done = (mult_cnt == mult_cnt_last) ? 1'b1 : 1'b0; + + // - case (fsm_state) - FSM_STATE_INIT_ZERO_ADDR: - for (i=0; i mult_cnt_zero) mult_cnt_q <= mult_cnt_q_done ? mult_cnt_q : mult_cnt_q_next; - endcase + bram_1rw_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (SYSTOLIC_CNTR_WIDTH + 2) + ) + bram_loader + ( + .clk (clk), + .a_addr ({loader_addr_msb[i], loader_addr_lsb[i]}), + .a_wr (loader_wren[i]), + .a_in (loader_din[i]), + .a_out (loader_dout[i]) + ); + // + end + // + endgenerate + - always @(posedge clk) // - if (fsm_next_state == FSM_STATE_PIPE_CRUNCH) - // - case (fsm_state) - FSM_STATE_INIT_LAST_ADDR: mult_cnt_qn <= mult_cnt_zero; - FSM_STATE_PIPE_RELOAD: if (mult_cnt_q > mult_cnt_zero) mult_cnt_qn <= mult_cnt_qn_done ? mult_cnt_qn : mult_cnt_qn_next; - endcase - - always @(posedge clk) + // Block Memory Addresses // - if (fsm_next_state == FSM_STATE_PIPE_CRUNCH) - // - case (fsm_state) - FSM_STATE_INIT_LAST_ADDR: mult_cnt_s <= mult_cnt_zero; - FSM_STATE_PIPE_RELOAD: if (mult_cnt_qn > mult_cnt_zero) mult_cnt_s <= mult_cnt_s_done ? mult_cnt_qn : mult_cnt_s_next; - endcase + /* + * Explain why there are two memory sizes. + * + */ + + /* the very first addresses */ + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_zero = { {OPERAND_ADDR_WIDTH{1'b0}}}; + wire [OPERAND_ADDR_WIDTH :0] bram_addr_ext_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + + /* the very last addresses */ + wire [OPERAND_ADDR_WIDTH-1:0] bram_addr_last = {ab_num_words_latch}; + wire [OPERAND_ADDR_WIDTH :0] bram_addr_ext_last = {ab_num_words_latch, 1'b1}; + + /* address registers */ + reg [OPERAND_ADDR_WIDTH-1:0] a_addr; + reg [OPERAND_ADDR_WIDTH-1:0] b_addr; + reg [OPERAND_ADDR_WIDTH-1:0] n_coeff_addr; + reg [OPERAND_ADDR_WIDTH-1:0] n_addr; + reg [OPERAND_ADDR_WIDTH :0] ab_addr_ext; + reg [OPERAND_ADDR_WIDTH-1:0] q_addr; + reg [OPERAND_ADDR_WIDTH :0] qn_addr_ext; + /* handy increment values */ + wire [OPERAND_ADDR_WIDTH-1:0] a_addr_next = a_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] b_addr_next = b_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] n_coeff_addr_next = n_coeff_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] n_addr_next = n_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH :0] ab_addr_ext_next = ab_addr_ext + 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] q_addr_next = q_addr + 1'b1; + wire [OPERAND_ADDR_WIDTH :0] qn_addr_ext_next = qn_addr_ext + 1'b1; + + /* handy stop flags */ + wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire b_addr_done = (b_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire n_coeff_addr_done = (n_coeff_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire n_addr_done = (n_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire ab_addr_ext_done = (ab_addr_ext == bram_addr_ext_last) ? 1'b1 : 1'b0; + wire q_addr_done = (q_addr == bram_addr_last) ? 1'b1 : 1'b0; + wire qn_addr_ext_done = (qn_addr_ext == bram_addr_ext_last) ? 1'b1 : 1'b0; + + /* delayed B address */ + reg [OPERAND_ADDR_WIDTH-1:0] b_addr_dly; + always @(posedge clk) b_addr_dly <= b_addr; + + reg [OPERAND_ADDR_WIDTH-1:0] n_coeff_addr_dly; + always @(posedge clk) n_coeff_addr_dly <= n_coeff_addr; + + reg [OPERAND_ADDR_WIDTH-1:0] n_addr_dly; + always @(posedge clk) n_addr_dly <= n_addr; + + /* map registers to top-level ports */ + assign a_bram_addr = a_addr; + assign b_bram_addr = b_addr; + assign n_coeff_bram_addr = n_coeff_addr; + assign n_bram_addr = n_addr; + + + // + // Memory Address Control Logic + // always @(posedge clk) begin - syst_cnt_dly[0] <= syst_cnt; - for (i=1; i mult_cnt_zero)) - for (j=0; j mult_cnt_zero)) - for (j=0; j syst_cnt_zero) - t_ab[syst_cnt_latency-1'b1][SYSTOLIC_ARRAY_LENGTH-1'b1] <= mul_ab_p[0]; - for (j=1; j mult_cnt_zero)) begin - if (syst_cnt_latency > syst_cnt_zero) - t_q[syst_cnt_latency-1'b1][SYSTOLIC_ARRAY_LENGTH-1'b1] <= mul_q_p[0]; - for (j=1; j mult_cnt_zero)) begin - if (syst_cnt_latency > syst_cnt_zero) - t_qn[syst_cnt_latency-1'b1][SYSTOLIC_ARRAY_LENGTH-1'b1] <= mul_qn_p[0]; - for (j=1; j mult_cnt_zero) && !mult_cnt_s_done; - - always @(posedge clk) - pe_sub_ce <= pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero) && (mult_cnt_qn > mult_cnt_zero); + + + // + // Shift Registers + // + reg [SYSTOLIC_NUM_CYCLES-1:0] shreg_load; + reg [SYSTOLIC_PE_LATENCY :0] shreg_latency; + reg [SYSTOLIC_NUM_CYCLES-1:0] shreg_unload; - always @(posedge clk) - // - if (pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero) && (mult_cnt_q > mult_cnt_zero) && !mult_cnt_s_done) - pe_add_c_in <= (mult_cnt_qn == mult_cnt_zero) ? 1'b0 : pe_add_c_out; + wire shreg_done_load = shreg_load[syst_cnt_last]; + wire shreg_done_latency = shreg_latency[SYSTOLIC_PE_LATENCY]; + wire shreg_done_unload = shreg_unload[syst_cnt_last]; - always @(posedge clk) - // - if (pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero) && (mult_cnt_qn > mult_cnt_zero)) - pe_sub_b_in <= (mult_cnt_s == mult_cnt_zero) ? 1'b0 : pe_sub_b_out; - + reg shreg_now_loading; + reg shreg_now_latency; + reg shreg_now_unloading; - modexpa7_adder32 pe_add_inst - ( - .clk (clk), - .ce (pe_add_ce), - .a (pe_add_a2), - .b (pe_add_b0), - .c_in (pe_add_c_in), - .s (pe_add_s), - .c_out (pe_add_c_out) - ); - - modexpa7_subtractor32 pe_sub_inst - ( - .clk (clk), - .ce (pe_sub_ce), - .a (pe_sub_a0), - .b (pe_sub_b0), - .b_in (pe_sub_b_in), - .d (pe_sub_d), - .b_out (pe_sub_b_out) - ); + reg shreg_done_latency_dly; always @(posedge clk) - // - if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) begin - pe_add_a0 <= mul_ab_p[0]; - pe_add_a1 <= pe_add_a0; - pe_add_a2 <= pe_add_a1; - end + shreg_done_latency_dly <= shreg_done_latency; always @(posedge clk) // - if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - pe_sub_a0 <= pe_add_s; - - always @(posedge clk) - // - if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - pe_add_b0 <= mul_qn_p[0]; - - always @(posedge clk) - // - if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - pe_sub_b0 <= (mult_cnt_s <= mult_cnt_half) ? 32'd0 : n_bram_out; - - - always @(posedge clk) - // - case (fsm_next_state) - FSM_STATE_INIT_ZERO_ADDR: n_bram_addr_reg <= bram_addr_zero; - FSM_STATE_INIT_NEXT_ADDR: n_bram_addr_reg <= n_bram_addr_next; - FSM_STATE_PIPE_RELOAD: begin - if (mult_cnt_s == mult_cnt_half) n_bram_addr_reg <= bram_addr_zero; - if (mult_cnt_s > mult_cnt_half) n_bram_addr_reg <= n_bram_addr_next; + case (fsm_state) + // + FSM_STATE_MULT_A_B_START, + FSM_STATE_MULT_AB_N_COEFF_START, + FSM_STATE_MULT_Q_N_START, + FSM_STATE_MULT_A_B_RELOAD, + FSM_STATE_MULT_AB_N_COEFF_RELOAD, + FSM_STATE_MULT_Q_N_RELOAD: begin + shreg_now_loading <= 1'b1; + shreg_now_latency <= 1'b1; + shreg_now_unloading <= 1'b0; + shreg_load <= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b1}; + shreg_latency <= {{SYSTOLIC_PE_LATENCY{1'b0}}, 1'b1}; + shreg_unload <= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b0}; + end + // + FSM_STATE_MULT_A_B_CRUNCH, + FSM_STATE_MULT_AB_N_COEFF_CRUNCH, + FSM_STATE_MULT_Q_N_CRUNCH: begin + shreg_load <= {shreg_load[SYSTOLIC_NUM_CYCLES-2:0], 1'b0}; + shreg_latency <= {shreg_latency[SYSTOLIC_PE_LATENCY-1:0], 1'b0}; + shreg_unload <= {shreg_unload[SYSTOLIC_NUM_CYCLES-2:0], shreg_latency[SYSTOLIC_PE_LATENCY]}; + + if (shreg_done_load) shreg_now_loading <= 1'b0; + if (shreg_done_latency) shreg_now_latency <= 1'b0; + if (shreg_done_latency) shreg_now_unloading <= 1'b1; + else if (shreg_done_unload) shreg_now_unloading <= 1'b0; + + end + // + default: begin + shreg_now_loading <= 1'b0; + shreg_now_latency <= 1'b0; + shreg_now_unloading <= 1'b0; end + // endcase + + + + always @(posedge clk) begin // - // Ready Flag Logic - // - reg rdy_reg = 1'b1; - assign rdy = rdy_reg; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) rdy_reg <= 1'b1; - else begin - if (fsm_state == FSM_STATE_IDLE) rdy_reg <= ~ena_trig; - if (fsm_state == FSM_STATE_STOP) rdy_reg <= 1'b1; - end - - - // - // - // - always @(posedge clk) - // - if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - mul_q_a_int <= mul_ab_p[0]; - - always @(posedge clk) + case (fsm_state) + FSM_STATE_MULT_A_B_START: ab_addr_ext <= bram_addr_ext_zero; + FSM_STATE_MULT_AB_N_COEFF_START: q_addr <= bram_addr_zero; + FSM_STATE_MULT_Q_N_START: qn_addr_ext <= bram_addr_ext_zero; + + FSM_STATE_MULT_A_B_RELOAD: ab_addr_ext <= ab_addr_ext_next; + FSM_STATE_MULT_AB_N_COEFF_RELOAD: q_addr <= q_addr_next; + FSM_STATE_MULT_Q_N_RELOAD: qn_addr_ext <= qn_addr_ext_next; + + endcase // - if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - mul_qn_a_int <= mul_q_p[0]; - - always @(posedge clk) + case (fsm_next_state) + FSM_STATE_MULT_AB_N_COEFF_START: ab_addr_ext <= bram_addr_ext_zero; + FSM_STATE_MULT_AB_N_COEFF_RELOAD: ab_addr_ext <= ab_addr_ext_next; + endcase // - if (fsm_state == FSM_STATE_PIPE_RELOAD) - mul_q_a <= mul_q_a_int; // TODO: Add masking! Maybe not needed after all?.. + case (fsm_next_state) + FSM_STATE_MULT_Q_N_START: q_addr <= bram_addr_zero; + FSM_STATE_MULT_Q_N_RELOAD: q_addr <= !q_addr_done ? q_addr_next : q_addr; + endcase - always @(posedge clk) - // - if (fsm_state == FSM_STATE_PIPE_RELOAD) - mul_qn_a <= (mult_cnt_qn < mult_cnt_half) ? mul_qn_a_int : 32'd0; - - // - // Debug - // - //always @(posedge clk) begin - // - //if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - //$display("ab[%2d] = %08x", mult_cnt_ab, mul_ab_p[0]); - // - //if ((fsm_state == FSM_STATE_PIPE_CRUNCH) && pe_latency_ab_lsb_done && (syst_cnt_latency == syst_cnt_zero)) - //$display("q[%2d] = %08x", mult_cnt_q, mul_q_p[0]); - // - //if (fsm_state == FSM_STATE_PIPE_RELOAD) - //$display("s[%2d] = %08x", mult_cnt_qn, pe_add_s); // - //if (fsm_state == FSM_STATE_PIPE_RELOAD) - //$display("d[%2d] = %08x", mult_cnt_s, pe_sub_d); - // - //end - + end - wire [OPERAND_ADDR_WIDTH-1:0] s_bram_addr_rd; - reg [OPERAND_ADDR_WIDTH-1:0] s_bram_addr_wr; - wire [OPERAND_ADDR_WIDTH-1:0] s_bram_addr_wr_next = s_bram_addr_wr + 1'b1; - reg s_bram_en; - - wire [OPERAND_ADDR_WIDTH-1:0] sn_bram_addr_rd; - reg [OPERAND_ADDR_WIDTH-1:0] sn_bram_addr_wr; - wire [OPERAND_ADDR_WIDTH-1:0] sn_bram_addr_wr_next = sn_bram_addr_wr + 1'b1; - reg sn_bram_en; - - assign s_bram_addr_rd = s_bram_addr; - assign sn_bram_addr_rd = s_bram_addr; - - wire [31: 0] s_bram_din; - wire [31: 0] s_bram_dout; - - wire [31: 0] sn_bram_din; - wire [31: 0] sn_bram_dout; - - assign s_bram_din = pe_add_s; - assign sn_bram_din = pe_sub_d; - - always @(posedge clk) + always @(posedge clk) begin // - s_bram_en <= pe_add_ce && (mult_cnt_qn > mult_cnt_half); - - always @(posedge clk) + if (fsm_state == FSM_STATE_MULT_A_B_CRUNCH) begin + ab_wren <= shreg_done_latency_dly; + ab_data_in <= shreg_done_latency_dly ? pe_p[0] : 32'hXXXXXXXX; + end else begin + ab_wren <= 1'b0; + ab_data_in <= 32'hXXXXXXXX; + end // - sn_bram_en <= pe_sub_ce && (mult_cnt_s > mult_cnt_half); - - always @(posedge clk) begin + if (fsm_state == FSM_STATE_MULT_AB_N_COEFF_CRUNCH) begin + q_wren <= shreg_done_latency_dly; + q_data_in <= shreg_done_latency_dly ? pe_p[0] : 32'hXXXXXXXX; + end else begin + q_wren <= 1'b0; + q_data_in <= 32'hXXXXXXXX; + end // - if (pe_add_ce && (mult_cnt_qn == mult_cnt_half)) s_bram_addr_wr <= bram_addr_zero; - if (s_bram_en && (s_bram_addr_wr < bram_addr_last)) s_bram_addr_wr <= s_bram_addr_wr_next; - end + if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH) begin + qn_wren <= shreg_done_latency_dly; + qn_data_in <= shreg_done_latency_dly ? pe_p[0] : 32'hXXXXXXXX; + end else begin + qn_wren <= 1'b0; + qn_data_in <= 32'hXXXXXXXX; + end - always @(posedge clk) begin // - if (pe_sub_ce && (mult_cnt_s == mult_cnt_half)) sn_bram_addr_wr <= bram_addr_zero; - if (sn_bram_en && (sn_bram_addr_wr < bram_addr_last)) sn_bram_addr_wr <= sn_bram_addr_wr_next; end - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) - bram_s (.clk(clk), - .a_addr(s_bram_addr_wr), .a_wr(s_bram_en), .a_in(s_bram_din), .a_out(), - .b_addr(s_bram_addr_rd), .b_out(s_bram_dout)); - - bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH)) - bram_sn (.clk(clk), - .a_addr(sn_bram_addr_wr), .a_wr(sn_bram_en), .a_in(sn_bram_din), .a_out(), - .b_addr(sn_bram_addr_rd), .b_out(sn_bram_dout)); - - - reg r_bram_en; always @(posedge clk) // - case (fsm_state) - FSM_STATE_SAVE_ZERO_ADDR, - FSM_STATE_SAVE_NEXT_ADDR: r_bram_en <= 1'b1; - default: r_bram_en <= 1'b0; + case (fsm_next_state) + FSM_STATE_MULT_A_B_START, + FSM_STATE_MULT_AB_N_COEFF_START, + FSM_STATE_MULT_Q_N_START, + FSM_STATE_MULT_A_B_RELOAD, + FSM_STATE_MULT_AB_N_COEFF_RELOAD, + FSM_STATE_MULT_Q_N_RELOAD: + // + syst_cnt_load <= syst_cnt_zero; + FSM_STATE_MULT_A_B_CRUNCH, + FSM_STATE_MULT_AB_N_COEFF_CRUNCH, + FSM_STATE_MULT_Q_N_CRUNCH: + // + syst_cnt_load <= !syst_cnt_load_done ? syst_cnt_load_next : syst_cnt_load; + endcase + + always @(posedge clk) + // + case (fsm_state) + FSM_STATE_MULT_A_B_CRUNCH, + FSM_STATE_MULT_AB_N_COEFF_CRUNCH, + FSM_STATE_MULT_Q_N_CRUNCH: begin - reg r_bram_wr_reg; - - assign r_bram_wr = r_bram_wr_reg; + if (shreg_done_latency) syst_cnt_unload <= syst_cnt_zero; + else if (shreg_now_unloading) + syst_cnt_unload <= !syst_cnt_unload_done ? syst_cnt_unload_next : syst_cnt_unload; + + end + endcase always @(posedge clk) // - r_bram_wr_reg <= r_bram_en; - - - wire r_select_s_over_sn = pe_sub_b_out && !pe_add_c_out; - + case (fsm_state) + FSM_STATE_MULT_A_B_CRUNCH, + FSM_STATE_MULT_AB_N_COEFF_CRUNCH, + FSM_STATE_MULT_Q_N_CRUNCH: begin - reg [31: 0] r_bram_in_reg; - - assign r_bram_in = r_bram_in_reg; + if (shreg_now_unloading) + for (j=0; j syst_cnt_zero) + pe_t_mem[syst_cnt_unload-1'b1][SYSTOLIC_ARRAY_LENGTH-1] <= pe_p[0]; + else + pe_t_mem[syst_cnt_last][SYSTOLIC_ARRAY_LENGTH-1] <= 32'd0; + + end + end + endcase - always @(posedge clk) + // - if (r_bram_en) - r_bram_in_reg <= r_select_s_over_sn ? s_bram_dout : sn_bram_dout; - - always @(posedge clk) + // T and C_IN can be moved to a separate code block + // + always @(posedge clk) begin + // + if (fsm_state == FSM_STATE_MULT_A_B_CRUNCH) + // + for (j=0; j {1'b0, a_addr}) ? 32'd0 : a_bram_out; + pe_b[j] <= loader_dout[j]; + pe_t[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[syst_cnt_load_dly][j]; + pe_c_in[j] <= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[syst_cnt_load_dly][j]; + end else begin + pe_a[j] <= 32'hXXXXXXXX; + pe_b[j] <= 32'hXXXXXXXX; + pe_t[j] <= 32'hXXXXXXXX; + pe_c_in[j] <= 32'hXXXXXXXX; + end + // + if (fsm_state == FSM_STATE_MULT_AB_N_COEFF_CRUNCH) + // + for (j=0; j {1'b0, q_addr}) ? 32'd0 : q_data_out; + pe_b[j] <= loader_dout[j]; + pe_t[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[syst_cnt_load_dly][j]; + pe_c_in[j] <= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[syst_cnt_load_dly][j]; + end else begin + pe_a[j] <= 32'hXXXXXXXX; + pe_b[j] <= 32'hXXXXXXXX; + pe_t[j] <= 32'hXXXXXXXX; + pe_c_in[j] <= 32'hXXXXXXXX; + end // - if (r_bram_en) - r_bram_addr_reg <= s_bram_addr_dly; - // - // FSM Transition Logic + end + + + // + // FSM Process // always @(posedge clk or negedge rst_n) // if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; else fsm_state <= fsm_next_state; + + // + // FSM Transition Logic + // always @* begin // fsm_next_state = FSM_STATE_STOP; // case (fsm_state) - - FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_INIT_ZERO_ADDR; + + FSM_STATE_IDLE: if (ena_trig) fsm_next_state = FSM_STATE_LOAD_B_START; else fsm_next_state = FSM_STATE_IDLE; - - FSM_STATE_INIT_ZERO_ADDR: fsm_next_state = FSM_STATE_INIT_NEXT_ADDR; - - FSM_STATE_INIT_NEXT_ADDR: if (b_bram_addr_done) fsm_next_state = FSM_STATE_INIT_LAST_ADDR; - else fsm_next_state = FSM_STATE_INIT_NEXT_ADDR; - - FSM_STATE_INIT_LAST_ADDR: fsm_next_state = FSM_STATE_PIPE_CRUNCH; - - FSM_STATE_PIPE_CRUNCH: if (syst_cnt_done) fsm_next_state = pe_latency_ab_msb_done ? - FSM_STATE_PIPE_RELOAD : FSM_STATE_PIPE_CRUNCH; - else fsm_next_state = FSM_STATE_PIPE_CRUNCH; - - FSM_STATE_PIPE_RELOAD: if (mult_cnt_s_done) fsm_next_state = FSM_STATE_SAVE_ZERO_ADDR; - else fsm_next_state = FSM_STATE_PIPE_CRUNCH; - - FSM_STATE_SAVE_ZERO_ADDR: fsm_next_state = FSM_STATE_SAVE_NEXT_ADDR; - - FSM_STATE_SAVE_NEXT_ADDR: if (s_bram_addr_done) fsm_next_state = FSM_STATE_SAVE_LAST_ADDR; - else fsm_next_state = FSM_STATE_SAVE_NEXT_ADDR; - - FSM_STATE_SAVE_LAST_ADDR: fsm_next_state = FSM_STATE_STOP; - + // + FSM_STATE_LOAD_B_START: fsm_next_state = FSM_STATE_LOAD_B_SHIFT; + FSM_STATE_LOAD_B_SHIFT: if (mult_cnt_done) fsm_next_state = FSM_STATE_LOAD_B_WRITE; + else fsm_next_state = FSM_STATE_LOAD_B_SHIFT; + FSM_STATE_LOAD_B_WRITE: if (syst_cnt_init_done) fsm_next_state = FSM_STATE_LOAD_B_FINAL; + else fsm_next_state = FSM_STATE_LOAD_B_SHIFT; + FSM_STATE_LOAD_B_FINAL: fsm_next_state = FSM_STATE_LOAD_N_COEFF_START; + // + FSM_STATE_LOAD_N_COEFF_START: fsm_next_state = FSM_STATE_LOAD_N_COEFF_SHIFT; + FSM_STATE_LOAD_N_COEFF_SHIFT: if (mult_cnt_done) fsm_next_state = FSM_STATE_LOAD_N_COEFF_WRITE; + else fsm_next_state = FSM_STATE_LOAD_N_COEFF_SHIFT; + FSM_STATE_LOAD_N_COEFF_WRITE: if (syst_cnt_init_done) fsm_next_state = FSM_STATE_LOAD_N_COEFF_FINAL; + else fsm_next_state = FSM_STATE_LOAD_N_COEFF_SHIFT; + FSM_STATE_LOAD_N_COEFF_FINAL: fsm_next_state = FSM_STATE_LOAD_N_START; + // + FSM_STATE_LOAD_N_START: fsm_next_state = FSM_STATE_LOAD_N_SHIFT; + FSM_STATE_LOAD_N_SHIFT: if (mult_cnt_done) fsm_next_state = FSM_STATE_LOAD_N_WRITE; + else fsm_next_state = FSM_STATE_LOAD_N_SHIFT; + FSM_STATE_LOAD_N_WRITE: if (syst_cnt_init_done) fsm_next_state = FSM_STATE_LOAD_N_FINAL; + else fsm_next_state = FSM_STATE_LOAD_N_SHIFT; + FSM_STATE_LOAD_N_FINAL: fsm_next_state = FSM_STATE_MULT_A_B_START; + // + FSM_STATE_MULT_A_B_START: fsm_next_state = FSM_STATE_MULT_A_B_CRUNCH; + FSM_STATE_MULT_A_B_CRUNCH: if (shreg_done_unload) fsm_next_state = FSM_STATE_MULT_A_B_RELOAD; + else fsm_next_state = FSM_STATE_MULT_A_B_CRUNCH; + FSM_STATE_MULT_A_B_RELOAD: if (ab_addr_ext_done) fsm_next_state = FSM_STATE_MULT_A_B_FINAL; + else fsm_next_state = FSM_STATE_MULT_A_B_CRUNCH; + FSM_STATE_MULT_A_B_FINAL: fsm_next_state = FSM_STATE_MULT_AB_N_COEFF_START; + // + FSM_STATE_MULT_AB_N_COEFF_START: fsm_next_state = FSM_STATE_MULT_AB_N_COEFF_CRUNCH; + FSM_STATE_MULT_AB_N_COEFF_CRUNCH: if (shreg_done_unload) fsm_next_state = FSM_STATE_MULT_AB_N_COEFF_RELOAD; + else fsm_next_state = FSM_STATE_MULT_AB_N_COEFF_CRUNCH; + FSM_STATE_MULT_AB_N_COEFF_RELOAD: if (q_addr_done) fsm_next_state = FSM_STATE_MULT_AB_N_COEFF_FINAL; + else fsm_next_state = FSM_STATE_MULT_AB_N_COEFF_CRUNCH; + FSM_STATE_MULT_AB_N_COEFF_FINAL: fsm_next_state = FSM_STATE_MULT_Q_N_START; + // + FSM_STATE_MULT_Q_N_START: fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH; + FSM_STATE_MULT_Q_N_CRUNCH: if (shreg_done_unload) fsm_next_state = FSM_STATE_MULT_Q_N_RELOAD; + else fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH; + FSM_STATE_MULT_Q_N_RELOAD: if (qn_addr_ext_done) fsm_next_state = FSM_STATE_MULT_Q_N_FINAL; + else fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH; + FSM_STATE_MULT_Q_N_FINAL: fsm_next_state = FSM_STATE_STOP; + // FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE; - + endcase - end + // + end endmodule diff --git a/src/rtl/pe/modexpa7_adder32.v b/src/rtl/pe/modexpa7_adder32.v index ad296b1..04f8a18 100644 --- a/src/rtl/pe/modexpa7_adder32.v +++ b/src/rtl/pe/modexpa7_adder32.v @@ -51,7 +51,7 @@ module modexpa7_adder32 // // Include Primitive Selector // - `include "modexpa7_lowlevel_settings.v" + `include "modexpa7_primitive_switch.v" // diff --git a/src/rtl/pe/modexpa7_lowlevel_settings.v b/src/rtl/pe/modexpa7_lowlevel_settings.v deleted file mode 100644 index 93f5f34..0000000 --- a/src/rtl/pe/modexpa7_lowlevel_settings.v +++ /dev/null @@ -1,15 +0,0 @@ -//`define USE_VENDOR_PRIMITIVES - -`ifdef USE_VENDOR_PRIMITIVES - -`define ADDER32_PRIMITIVE adder32_artix7 -`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7 -`define SYSTOLIC_PE_PRIMITIVE systolic_pe_artix7 - -`else - -`define ADDER32_PRIMITIVE adder32_generic -`define SUBTRACTOR32_PRIMITIVE subtractor32_generic -`define SYSTOLIC_PE_PRIMITIVE systolic_pe_generic - -`endif diff --git a/src/rtl/pe/modexpa7_primitive_switch.v b/src/rtl/pe/modexpa7_primitive_switch.v new file mode 100644 index 0000000..d38069b --- /dev/null +++ b/src/rtl/pe/modexpa7_primitive_switch.v @@ -0,0 +1,16 @@ +//`define USE_VENDOR_PRIMITIVES + +`ifdef USE_VENDOR_PRIMITIVES + +`define ADDER32_PRIMITIVE adder32_artix7 +`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7 +`define SYSTOLIC_PE_PRIMITIVE systolic_pe_artix7 + +`else + +`define ADDER32_PRIMITIVE adder32_generic +`define SUBTRACTOR32_PRIMITIVE subtractor32_generic +`define SYSTOLIC_PE_PRIMITIVE systolic_pe_generic + + +`endif diff --git a/src/rtl/pe/modexpa7_subtractor32.v b/src/rtl/pe/modexpa7_subtractor32.v index 75b9c13..a43d670 100644 --- a/src/rtl/pe/modexpa7_subtractor32.v +++ b/src/rtl/pe/modexpa7_subtractor32.v @@ -51,7 +51,7 @@ module modexpa7_subtractor32 // // Include Primitive Selector // - `include "modexpa7_lowlevel_settings.v" + `include "modexpa7_primitive_switch.v" // diff --git a/src/rtl/pe/modexpa7_systolic_pe.v b/src/rtl/pe/modexpa7_systolic_pe.v index 22e6874..b284134 100644 --- a/src/rtl/pe/modexpa7_systolic_pe.v +++ b/src/rtl/pe/modexpa7_systolic_pe.v @@ -51,7 +51,7 @@ module modexpa7_systolic_pe // // Include Primitive Selector // - `include "modexpa7_lowlevel_settings.v" + `include "modexpa7_primitive_switch.v" // diff --git a/src/tb/tb_systolic_multiplier.v b/src/tb/tb_systolic_multiplier.v index 21e319a..9df492e 100644 --- a/src/tb/tb_systolic_multiplier.v +++ b/src/tb/tb_systolic_multiplier.v @@ -176,7 +176,7 @@ module tb_systolic_multiplier; .r_bram_in (core_r_data), .r_bram_wr (core_r_wren), - .n_num_words (n_num_words) + .ab_num_words (n_num_words) ); @@ -273,6 +273,7 @@ module tb_systolic_multiplier; b = ab_modulo; // prepare for next round + #1000000; end // final step, display results -- cgit v1.2.3