//====================================================================== // // Copyright (c) 2019, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // //====================================================================== module modexpng_mmm_dual ( clk, rst_n, ena, rdy, ladder_mode, word_index_last, word_index_last_minus1, force_unity_b, only_reduce, just_multiply, sel_wide_in, sel_narrow_in, rd_wide_xy_ena, rd_wide_xy_ena_aux, rd_wide_xy_bank, rd_wide_xy_bank_aux, rd_wide_xy_addr, rd_wide_xy_addr_aux, rd_wide_x_din, rd_wide_y_din, rd_wide_x_din_aux, rd_wide_y_din_aux, rd_narrow_xy_ena, rd_narrow_xy_bank, rd_narrow_xy_addr, rd_narrow_x_din, rd_narrow_y_din, rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid, rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid, rcmb_xy_bank, rcmb_xy_addr, rcmb_x_dout, rcmb_y_dout, rcmb_xy_valid, rdct_ena, rdct_rdy ); // // Headers // `include "modexpng_parameters.vh" `include "modexpng_mmm_dual_fsm.vh" // // Ports // input clk; input rst_n; input ena; output rdy; input ladder_mode; input [7:0] word_index_last; input [7:0] word_index_last_minus1; input force_unity_b; input only_reduce; input just_multiply; input [BANK_ADDR_W-1:0] sel_wide_in; input [BANK_ADDR_W-1:0] sel_narrow_in; output rd_wide_xy_ena; output rd_wide_xy_ena_aux; output [ BANK_ADDR_W -1:0] rd_wide_xy_bank; output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; output [ 8-1:0] rd_wide_xy_addr_aux; input [18*NUM_MULTS/2-1:0] rd_wide_x_din; input [18*NUM_MULTS/2-1:0] rd_wide_y_din; input [ 18-1:0] rd_wide_x_din_aux; input [ 18-1:0] rd_wide_y_din_aux; output rd_narrow_xy_ena; output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; output [ 7:0] rd_narrow_xy_addr; input [18-1:0] rd_narrow_x_din; input [18-1:0] rd_narrow_y_din; output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; output [17:0] rcmb_wide_x_dout; output [17:0] rcmb_wide_y_dout; output rcmb_wide_xy_valid; output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; output [ 7:0] rcmb_narrow_xy_addr; output [17:0] rcmb_narrow_x_dout; output [17:0] rcmb_narrow_y_dout; output rcmb_narrow_xy_valid; output [BANK_ADDR_W -1:0] rcmb_xy_bank; output [ 7:0] rcmb_xy_addr; output [17:0] rcmb_x_dout; output [17:0] rcmb_y_dout; output rcmb_xy_valid; output rdct_ena; input rdct_rdy; // // FSM Declaration // reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE; reg [MMM_FSM_STATE_W-1:0] fsm_state_next; wire [MMM_FSM_STATE_W-1:0] fsm_state_after_idle; wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square; wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle; wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle; wire [MMM_FSM_STATE_W-1:0] fsm_state_after_square_holdoff; // // FSM Process // always @(posedge clk or negedge rst_n) // if (!rst_n) fsm_state <= MMM_FSM_STATE_IDLE; else fsm_state <= fsm_state_next; // // Storage Control Interface // reg wide_xy_ena = 1'b0; reg wide_xy_ena_aux = 1'b0; reg [ BANK_ADDR_W -1:0] wide_xy_bank; reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux; reg [ 8-1:0] wide_xy_addr[0:3]; reg [ 8-1:0] wide_xy_addr_aux; reg narrow_xy_ena = 1'b0; reg [ BANK_ADDR_W -1:0] narrow_xy_bank; reg [ 7:0] narrow_xy_addr; reg [ 7:0] narrow_xy_addr_dly; assign rd_wide_xy_ena = wide_xy_ena; assign rd_wide_xy_ena_aux = wide_xy_ena_aux; assign rd_wide_xy_bank = wide_xy_bank; assign rd_wide_xy_bank_aux = wide_xy_bank_aux; assign rd_wide_xy_addr_aux = wide_xy_addr_aux; assign rd_narrow_xy_ena = narrow_xy_ena; assign rd_narrow_xy_bank = narrow_xy_bank; assign rd_narrow_xy_addr = narrow_xy_addr; genvar z; generate for (z=0; z<(NUM_MULTS/2); z=z+1) begin : gen_rd_wide_xy_addr assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z]; end endgenerate // // Column Counter // reg [4:0] col_index; // current column index reg [4:0] col_index_prev; // delayed column index value reg [4:0] col_index_last; // index of the very last column reg [4:0] col_index_next; // precomputed next column index reg col_is_last; // flag set during the very last column always @(posedge clk) // col_index_prev <= col_index; // // Column Counter Increment Logic // always @(posedge clk) // case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin col_index <= 5'd0; col_index_last <= word_index_last[7:3]; col_index_next <= 5'd1; col_is_last <= 1'b0; end // MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin col_index <= col_index_next; col_is_last <= col_index_next == col_index_last; col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1; end // endcase // // Completion Flags // wire square_almost_done_comb; reg square_almost_done_flop = 1'b0; reg square_surely_done_flop = 1'b0; wire triangle_almost_done_comb; reg triangle_almost_done_flop = 1'b0; reg triangle_surely_done_flop = 1'b0; reg triangle_tardy_done_flop = 1'b0; wire rectangle_almost_done_comb; reg rectangle_almost_done_flop = 1'b0; reg rectangle_surely_done_flop = 1'b0; reg rectangle_tardy_done_flop = 1'b0; assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1; assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index); assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1; // // Square Completion Flags // always @(posedge clk) begin // case (fsm_state) // MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: square_almost_done_flop <= square_almost_done_comb; // default: square_almost_done_flop <= 1'b0; // endcase // square_surely_done_flop <= square_almost_done_flop; // end // // Triangle Completion Flags // always @(posedge clk) begin // case (fsm_state) // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: triangle_almost_done_flop <= triangle_almost_done_comb; // default: triangle_almost_done_flop <= 1'b0; // endcase // triangle_surely_done_flop <= triangle_almost_done_flop; triangle_tardy_done_flop <= triangle_surely_done_flop; // end // // Rectangle Completion Flags // always @(posedge clk) begin // case (fsm_state) // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: rectangle_almost_done_flop <= rectangle_almost_done_comb; // default: rectangle_almost_done_flop <= 1'b0; // endcase // rectangle_surely_done_flop <= rectangle_almost_done_flop; rectangle_tardy_done_flop <= rectangle_surely_done_flop; // end // // Narrow Storage Control Logic // always @(posedge clk or negedge rst_n) // if (!rst_n) narrow_xy_ena <= 1'b0; else begin // // Narrow Address // case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0; MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0; // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? 8'd0 : narrow_xy_addr + 1'b1; // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? 8'd1 : narrow_xy_addr + 1'b1; // default: narrow_xy_addr <= 8'dX; // endcase // // Narrow Bank // case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in; // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? BANK_NARROW_EXT : BANK_NARROW_COEFF; // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? BANK_NARROW_EXT : BANK_NARROW_Q; // default: narrow_xy_bank <= 2'bXX; // endcase // case (fsm_state_next) // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1; MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop; // default: narrow_xy_ena <= 1'b0; // endcase // end // // Wide Storage Control Logic // wire [2:0] wide_offset_rom[0:3]; generate for (z=1; z 8'd0) wide_xy_addr_next = wide_xy_addr_current - 1'b1; else wide_xy_addr_next = wide_xy_addr_last; end endfunction integer j; always @(posedge clk or negedge rst_n) // if (!rst_n) begin wide_xy_ena <= 1'b0; wide_xy_ena_aux <= 1'b0; end else begin // // Wide Address // for (j=0; j<(NUM_MULTS/2); j=j+1) // case (fsm_state_next) // // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]}; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last); // default: wide_xy_addr[j] <= 8'dX; endcase // // Wide Aux Address // case (fsm_state_next) // // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions! // MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); // MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1}; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last); // MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0}; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX; //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; // default: wide_xy_addr_aux <= 8'dX; endcase // // Wide Bank // case (fsm_state_next) MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N; default: wide_xy_bank <= 3'bXXX; endcase // // Wide Aux Bank // case (fsm_state_next) MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's) case (rcmb_xy_bank) BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L; BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H; //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX default: wide_xy_bank_aux <= 3'bXXX; endcase else wide_xy_bank_aux <= 3'bXXX; default: wide_xy_bank_aux <= 3'bXXX; endcase // // Wide Enable // case (fsm_state_next) MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT, MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1; default: wide_xy_ena <= 1'b0; endcase // // Wide Aux Enable // case (fsm_state_next) MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); default: wide_xy_ena_aux <= 1'b0; endcase // end // // Delay Lines // always @(posedge clk) // narrow_xy_addr_dly <= narrow_xy_addr; // // DSP Array Logic // reg dsp_xy_ce_a = 1'b0; reg dsp_xy_ce_b = 1'b0; reg dsp_xy_ce_b_dly = 1'b0; reg dsp_xy_ce_m = 1'b0; reg dsp_xy_ce_p = 1'b0; reg dsp_xy_ce_mode = 1'b0; reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}}; wire [5*18-1:0] dsp_x_a; wire [5*18-1:0] dsp_y_a; reg [1*16-1:0] dsp_x_b; reg [1*16-1:0] dsp_y_b; reg [ 1:0] dsp_xy_b_carry; wire [9*47-1:0] dsp_x_p; wire [9*47-1:0] dsp_y_p; //generate for (z=0; z<(NUM_MULTS/2); z=z+1) //begin : gen_dsp_xy_a_split //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z]; //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z]; //end //endgenerate assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din}; assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din}; //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; always @(posedge clk) // dsp_xy_ce_b_dly <= dsp_xy_ce_b; modexpng_dsp_array_block dsp_array_block_x ( .clk (clk), .ce_a (dsp_xy_ce_a), .ce_b (dsp_xy_ce_b), .ce_m (dsp_xy_ce_m), .ce_p (dsp_xy_ce_p), .ce_mode (dsp_xy_ce_mode), .mode_z (dsp_xy_mode_z), .a (dsp_x_a), .b (dsp_x_b), .p (dsp_x_p) ); modexpng_dsp_array_block dsp_array_block_y ( .clk (clk), .ce_a (dsp_xy_ce_a), .ce_b (dsp_xy_ce_b), .ce_m (dsp_xy_ce_m), .ce_p (dsp_xy_ce_p), .ce_mode (dsp_xy_ce_mode), .mode_z (dsp_xy_mode_z), .a (dsp_y_a), .b (dsp_y_b), .p (dsp_y_p) ); // // DSP Control Logic // reg narrow_xy_ena_dly1 = 1'b0; reg narrow_xy_ena_dly2 = 1'b0; always @(posedge clk or negedge rst_n) // if (!rst_n) begin // narrow_xy_ena_dly1 <= 1'b0; narrow_xy_ena_dly2 <= 1'b0; // dsp_xy_ce_a <= 1'b0; dsp_xy_ce_b <= 1'b0; dsp_xy_ce_m <= 1'b0; dsp_xy_ce_p <= 1'b0; dsp_xy_ce_mode <= 1'b0; // end else begin // narrow_xy_ena_dly1 <= narrow_xy_ena; narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; // dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2; dsp_xy_ce_b <= narrow_xy_ena_dly2; dsp_xy_ce_m <= dsp_xy_ce_b_dly; dsp_xy_ce_p <= dsp_xy_ce_m; dsp_xy_ce_mode <= dsp_xy_ce_b_dly; // end // // DSP Feed Logic // reg dsp_merge_xy_b; reg dsp_merge_xy_b_first; always @(posedge clk) begin // case (fsm_state) MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; endcase // case (fsm_state) MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1; default: dsp_merge_xy_b_first <= 1'b0; endcase // end // // On-the-fly Carry Recombination // wire [17:0] rd_narrow_x_din_carry = rd_narrow_x_din + {{16{1'b0}}, dsp_xy_b_carry}; wire [17:0] rd_narrow_y_din_carry = rd_narrow_y_din + {{16{1'b0}}, dsp_xy_b_carry}; wire [17:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry; wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ? rd_narrow_xy_din_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO; always @(posedge clk) // if (narrow_xy_ena_dly2) begin // rewrite // if (!dsp_merge_xy_b) begin dsp_x_b <= rd_narrow_x_din[15:0]; dsp_y_b <= rd_narrow_y_din[15:0]; dsp_xy_b_carry <= 2'b00; end else begin dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity; dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity; dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[17:16]; end // end else begin // dsp_x_b <= WORD_DNC; dsp_y_b <= WORD_DNC; // dsp_xy_b_carry <= 2'b00; // end reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}}; reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}}; reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}}; reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}}; function [NUM_MULTS:0] calc_mac_mode_z_square; input [ 4:0] col_index_value; input [ 7:0] narrow_xy_addr_value; begin if (narrow_xy_addr_value[7:3] == col_index_value) case (narrow_xy_addr_value[2:0]) 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110}; 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101}; 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011}; 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111}; 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111}; 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111}; 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111}; 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111}; endcase else calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}}; end endfunction function [NUM_MULTS:0] calc_mac_mode_z_rectangle; input [ 4:0] col_index_value; input [ 7:0] narrow_xy_addr_value; begin if (narrow_xy_addr_value[7:3] == col_index_value) case (narrow_xy_addr_value[2:0]) 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110}; 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101}; 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011}; 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111}; 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111}; 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111}; 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111}; 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111}; endcase else calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}}; end endfunction always @(posedge clk) // case (fsm_state_next) MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG, MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY, MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly); MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}}; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly); default: dsp_xy_mode_z_adv4 <= {9{1'b1}}; endcase always @(posedge clk) begin dsp_xy_mode_z <= dsp_xy_mode_z_adv1; // dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2; dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3; dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4; end // // Recombinator // reg rcmb_ena = 1'b0; wire rcmb_rdy; modexpng_recombinator_block recombinator_block ( .clk (clk), .rst_n (rst_n), .ena (rcmb_ena), .rdy (rcmb_rdy), .fsm_state_next (fsm_state_next), .word_index_last (word_index_last), .dsp_xy_ce_p (dsp_xy_ce_p), .dsp_x_p (dsp_x_p), .dsp_y_p (dsp_y_p), .col_index (col_index), .col_index_last (col_index_last), .rd_narrow_xy_addr (narrow_xy_addr), .rd_narrow_xy_bank (narrow_xy_bank), .rcmb_wide_xy_bank (rcmb_wide_xy_bank), .rcmb_wide_xy_addr (rcmb_wide_xy_addr), .rcmb_wide_x_dout (rcmb_wide_x_dout), .rcmb_wide_y_dout (rcmb_wide_y_dout), .rcmb_wide_xy_valid (rcmb_wide_xy_valid), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), .rcmb_narrow_x_dout (rcmb_narrow_x_dout), .rcmb_narrow_y_dout (rcmb_narrow_y_dout), .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), .rdct_narrow_xy_bank (rcmb_xy_bank), .rdct_narrow_xy_addr (rcmb_xy_addr), .rdct_narrow_x_dout (rcmb_x_dout), .rdct_narrow_y_dout (rcmb_y_dout), .rdct_narrow_xy_valid (rcmb_xy_valid) ); // // Recombinator Enable Logic // always @(posedge clk or negedge rst_n) // if (!rst_n) rcmb_ena <= 1'b0; else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p; // // Handy Completion Flags // wire square_done = square_surely_done_flop; wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop; wire rectangle_done = rectangle_tardy_done_flop; // // FSM Transition Logic // assign fsm_state_after_idle = !only_reduce ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT; assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT; assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT; assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT; assign fsm_state_after_square_holdoff = just_multiply ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT; always @* begin // fsm_state_next = MMM_FSM_STATE_IDLE; // case (fsm_state) MMM_FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle /*MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : MMM_FSM_STATE_IDLE; MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ; MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ; MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY; MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ; MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ; MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY; MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? fsm_state_after_square_holdoff : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ; MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY; MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ; MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ; MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY; MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ; MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ; MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY; MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_WAIT_REDUCTOR : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF; MMM_FSM_STATE_WAIT_REDUCTOR: fsm_state_next = rdct_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_WAIT_REDUCTOR; MMM_FSM_STATE_STOP: fsm_state_next = MMM_FSM_STATE_IDLE ; default: fsm_state_next = MMM_FSM_STATE_IDLE ; endcase // end // // Reductor Control Logic // reg rdct_ena_reg = 1'b0; assign rdct_ena = rdct_ena_reg; always @(posedge clk or negedge rst_n) // if (!rst_n) rdct_ena_reg <= 1'b0; else case (fsm_state) MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1; default: rdct_ena_reg <= 1'b0; endcase // // Ready Logic // reg rdy_reg = 1'b1; assign rdy = rdy_reg; always @(posedge clk or negedge rst_n) // if (!rst_n) rdy_reg <= 1'b1; else begin if (rdy && ena) rdy_reg <= 1'b0; if (!rdy && (fsm_state == MMM_FSM_STATE_STOP)) rdy_reg <= 1'b1; end // // Debug // `ifdef MODEXPNG_ENABLE_DEBUG real load_cyc_mult = 0.0; always @(posedge clk) // if (dsp_xy_ce_m) load_cyc_mult <= load_cyc_mult + 1.0; `endif endmodule