//====================================================================== // // Copyright (c) 2019, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // //====================================================================== module modexpng_general_worker ( clk, rst_n, ena, rdy, sel_narrow_in, sel_narrow_out, sel_wide_in, sel_wide_out, opcode, word_index_last, word_index_last_half, wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x, wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x, wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y, wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y, wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x, wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x, wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y, wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y ); // // Headers // `include "modexpng_parameters.vh" `include "modexpng_microcode.vh" `include "modexpng_dsp48e1.vh" `include "modexpng_dsp_slice_primitives.vh" // // Ports // input clk; input rst_n; input ena; output rdy; input [ BANK_ADDR_W -1:0] sel_narrow_in; input [ BANK_ADDR_W -1:0] sel_narrow_out; input [ BANK_ADDR_W -1:0] sel_wide_in; input [ BANK_ADDR_W -1:0] sel_wide_out; input [ UOP_OPCODE_W -1:0] opcode; input [ OP_ADDR_W -1:0] word_index_last; input [ OP_ADDR_W -1:0] word_index_last_half; output wrk_rd_wide_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x; input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x; output wrk_rd_narrow_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x; input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x; output wrk_rd_wide_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y; input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y; output wrk_rd_narrow_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y; input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y; output wrk_wr_wide_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x; output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x; output wrk_wr_narrow_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x; output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x; output wrk_wr_wide_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y; output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y; output wrk_wr_narrow_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y; output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y; // // FSM Declaration // localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0; localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1; localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2; localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3 = 4'h3; localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4 = 4'h4; localparam [3:0] WRK_FSM_STATE_BUSY1 = 4'hA; localparam [3:0] WRK_FSM_STATE_BUSY2 = 4'hB; localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6; localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7; localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8; localparam [3:0] WRK_FSM_STATE_STOP = 4'hF; reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; reg [3:0] wrk_fsm_state_next; // // Control Signals // reg rd_wide_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] rd_wide_bank_x; reg [ OP_ADDR_W -1:0] rd_wide_addr_x; reg rd_narrow_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] rd_narrow_bank_x; reg [ OP_ADDR_W -1:0] rd_narrow_addr_x; reg rd_wide_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] rd_wide_bank_y; reg [ OP_ADDR_W -1:0] rd_wide_addr_y; reg rd_narrow_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] rd_narrow_bank_y; reg [ OP_ADDR_W -1:0] rd_narrow_addr_y; reg wr_wide_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x; reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x; reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x; reg wr_narrow_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x; reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x; reg wr_wide_xy_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y; reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y; reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y; reg wr_narrow_xy_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y; reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y; // // Mapping // assign wrk_rd_wide_xy_ena_x = rd_wide_ena_x; assign wrk_rd_wide_xy_bank_x = rd_wide_bank_x; assign wrk_rd_wide_xy_addr_x = rd_wide_addr_x; assign wrk_rd_narrow_xy_ena_x = rd_narrow_ena_x; assign wrk_rd_narrow_xy_bank_x = rd_narrow_bank_x; assign wrk_rd_narrow_xy_addr_x = rd_narrow_addr_x; assign wrk_rd_wide_xy_ena_y = rd_wide_ena_y; assign wrk_rd_wide_xy_bank_y = rd_wide_bank_y; assign wrk_rd_wide_xy_addr_y = rd_wide_addr_y; assign wrk_rd_narrow_xy_ena_y = rd_narrow_ena_y; assign wrk_rd_narrow_xy_bank_y = rd_narrow_bank_y; assign wrk_rd_narrow_xy_addr_y = rd_narrow_addr_y; assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x; assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x; assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x; assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x; assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x; assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x; assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x; assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x; assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x; assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x; assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y; assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y; assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y; assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y; assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y; assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y; assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y; assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y; assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y; assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y; // // Delays // reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4]; reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4]; reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:4]; reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:4]; reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1; reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1; reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1; reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1; reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1; reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1; reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1; reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1; reg rd_narrow_ena_x_dly1 = 1'b0; reg rd_narrow_ena_y_dly1 = 1'b0; reg rd_narrow_ena_x_dly2 = 1'b0; reg rd_narrow_ena_y_dly2 = 1'b0; always @(posedge clk) begin // {rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x}; {rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x}; {rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y}; {rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y}; // {rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x}; {rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x}; {rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y}; {rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y}; // {rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x}; {rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y}; // {rd_wide_addr_x_dly[4], rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; {rd_wide_addr_y_dly[4], rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; // {rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x}; {rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y}; // end // // Source Read Enable Logic // task _update_wide_rd_en; input _en; {rd_wide_ena_x, rd_wide_ena_y } <= {2{_en}}; endtask task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask task enable_wide_rd_en; _update_wide_rd_en(1'b1); endtask task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask task enable_narrow_rd_en; _update_narrow_rd_en(1'b1); endtask task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask always @(posedge clk or negedge rst_n) // if (!rst_n) begin // disable_wide_rd_en; disable_narrow_rd_en; // end else begin // disable_wide_rd_en; disable_narrow_rd_en; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1, WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: enable_narrow_rd_en; endcase // UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_SUBTRACT_Y, UOP_OPCODE_MODULAR_SUBTRACT_Z, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1, WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en; end endcase // UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1, WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en; end endcase // UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1, WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: enable_wide_rd_en; endcase // endcase // end // // Destination Write Enable Logic // task _update_wide_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask task enable_wide_wr_en; _update_wide_wr_en(1'b1); endtask task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask task enable_narrow_wr_en; _update_narrow_wr_en(1'b1); endtask task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask always @(posedge clk or negedge rst_n) // if (!rst_n) begin // disable_wide_wr_en; disable_narrow_wr_en; // end else begin // disable_wide_wr_en; disable_narrow_wr_en; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en; endcase // UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en; endcase // UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_OPCODE_MODULAR_SUBTRACT_Z: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end endcase // UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: enable_wide_wr_en; endcase // UOP_OPCODE_MODULAR_REDUCE_INIT: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en; endcase // endcase // end // // Source Read Address Logic // reg [OP_ADDR_W -1:0] rd_wide_addr_next; reg [OP_ADDR_W -1:0] rd_narrow_addr_next; reg rd_wide_addr_is_last = 1'b0; reg rd_narrow_addr_is_last = 1'b0; reg rd_wide_addr_is_last_half = 1'b0; reg rd_narrow_addr_is_last_half = 1'b0; reg rd_wide_addr_next_is_last = 1'b0; reg rd_narrow_addr_next_is_last = 1'b0; reg rd_wide_addr_next_is_last_half = 1'b0; reg rd_narrow_addr_next_is_last_half = 1'b0; reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0; reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0; always @(posedge clk) begin // rd_wide_addr_is_last_half_dly <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half}; rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half}; // end task preset_rd_wide_bank_addr; input [BANK_ADDR_W -1:0] bank; input [ OP_ADDR_W -1:0] addr; begin {rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr}; {rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr}; rd_wide_addr_is_last <= 1'b0; rd_wide_addr_is_last_half <= 1'b0; end endtask task preset_rd_narrow_bank_addr; input [BANK_ADDR_W -1:0] bank; input [ OP_ADDR_W -1:0] addr; begin {rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr}; {rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr}; rd_narrow_addr_is_last <= 1'b0; rd_narrow_addr_is_last_half <= 1'b0; end endtask task preset_rd_wide_addr_next; input [OP_ADDR_W -1:0] addr; begin rd_wide_addr_next <= addr; rd_wide_addr_next_is_last <= 1'b0; rd_wide_addr_next_is_last_half <= 1'b0; end endtask task preset_rd_narrow_addr_next; input [OP_ADDR_W -1:0] addr; begin rd_narrow_addr_next <= addr; rd_narrow_addr_next_is_last <= 1'b0; rd_narrow_addr_next_is_last_half <= 1'b0; end endtask task keep_rd_wide_bank; begin {rd_wide_bank_x} <= {rd_wide_bank_x}; {rd_wide_bank_y} <= {rd_wide_bank_y}; end endtask task switch_rd_wide_bank; input [BANK_ADDR_W -1:0] bank; begin {rd_wide_bank_x} <= {bank}; {rd_wide_bank_y} <= {bank}; end endtask task keep_rd_wide_addr; begin {rd_wide_addr_x} <= {rd_wide_addr_x}; {rd_wide_addr_y} <= {rd_wide_addr_y}; end endtask task advance_rd_wide_addr; begin {rd_wide_addr_x} <= {rd_wide_addr_next}; {rd_wide_addr_y} <= {rd_wide_addr_next}; rd_wide_addr_is_last <= rd_wide_addr_next == word_index_last; rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half; end endtask task keep_rd_narrow_bank; begin {rd_narrow_bank_x} <= {rd_narrow_bank_x}; {rd_narrow_bank_y} <= {rd_narrow_bank_y}; end endtask task switch_rd_narrow_bank; input [BANK_ADDR_W -1:0] bank; begin {rd_narrow_bank_x} <= {bank}; {rd_narrow_bank_y} <= {bank}; end endtask task keep_rd_narrow_addr; begin {rd_narrow_addr_x} <= {rd_narrow_addr_x}; {rd_narrow_addr_y} <= {rd_narrow_addr_y}; end endtask task advance_rd_narrow_addr; begin {rd_narrow_addr_x} <= {rd_narrow_addr_next}; {rd_narrow_addr_y} <= {rd_narrow_addr_next}; rd_narrow_addr_is_last <= rd_narrow_addr_next == word_index_last; rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half; end endtask task update_rd_wide_addr_flags; begin rd_wide_addr_next_is_last <= rd_wide_addr_next == (word_index_last - 1'b1); rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1); end endtask task update_rd_narrow_addr_flags; begin rd_narrow_addr_next_is_last <= rd_narrow_addr_next == (word_index_last - 1'b1); rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1); end endtask task advance_rd_wide_addr_next; begin rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO; update_rd_wide_addr_flags; end endtask task advance_rd_narrow_addr_next; begin rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO; update_rd_narrow_addr_flags; end endtask task advance_rd_wide_addr_next_half; begin rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO; update_rd_wide_addr_flags; end endtask task advance_rd_narrow_addr_next_half; begin rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO; update_rd_narrow_addr_flags; end endtask always @(posedge clk) begin // preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC); preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC); // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_MODULAR_SUBTRACT_X: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank; endcase // UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_SUBTRACT_Z, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next; keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end endcase // UOP_OPCODE_MODULAR_REDUCE_INIT: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next_half; keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank; endcase // UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank (sel_wide_out); keep_rd_wide_addr; switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next; switch_rd_wide_bank(sel_wide_in); advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2: begin keep_rd_wide_addr; switch_rd_wide_bank (sel_wide_out); keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end endcase // UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end WRK_FSM_STATE_LATENCY_PRE3, WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next; keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end endcase // UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state_next) WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next_half; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end WRK_FSM_STATE_BUSY1: begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank; else switch_rd_wide_bank(BANK_WIDE_H); advance_rd_wide_addr; advance_rd_wide_addr_next_half; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2: keep_rd_wide_bank; endcase // endcase // end // // Destination Write Address Logic // reg modular_reduce_init_first_half_x; reg modular_reduce_init_first_half_y; reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x; reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y; always @(posedge clk) begin // modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half; modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half; // modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H; modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H; // end task update_wr_wide_bank_addr; input [BANK_ADDR_W -1:0] x_bank; input [BANK_ADDR_W -1:0] y_bank; input [ OP_ADDR_W -1:0] x_addr; input [ OP_ADDR_W -1:0] y_addr; begin {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr}; {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr}; end endtask task update_wr_narrow_bank_addr; input [BANK_ADDR_W -1:0] x_bank; input [BANK_ADDR_W -1:0] y_bank; input [ OP_ADDR_W -1:0] x_addr; input [ OP_ADDR_W -1:0] y_addr; begin {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr}; {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr}; end endtask always @(posedge clk) begin // update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC); update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC); // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]); endcase // UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]); endcase // UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_OPCODE_MODULAR_SUBTRACT_Z: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]); update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3] ); end endcase // UOP_OPCODE_MODULAR_REDUCE_INIT: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]); endcase // UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[4], rd_wide_addr_y_dly[4]); endcase // endcase // end // // UOP_OPCODE_REGULAR_ADD_UNEVEN // reg regular_add_uneven_flag; always @(posedge clk) // case (opcode) UOP_OPCODE_REGULAR_ADD_UNEVEN: case (wrk_fsm_state) WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_flag <= 1'b0; WRK_FSM_STATE_BUSY2: if (rd_wide_addr_is_last_half_dly[2]) regular_add_uneven_flag <= 1'b1; endcase endcase // // DSP Slice Array // reg [DSP48E1_C_W-1:0] dsp_x_x_x; reg [DSP48E1_C_W-1:0] dsp_y_x_x; reg [DSP48E1_C_W-1:0] dsp_x_y_x; reg [DSP48E1_C_W-1:0] dsp_y_y_x; reg [DSP48E1_C_W-1:0] dsp_x_x_y; reg [DSP48E1_C_W-1:0] dsp_y_x_y; reg [DSP48E1_C_W-1:0] dsp_x_y_y; reg [DSP48E1_C_W-1:0] dsp_y_y_y; wire [DSP48E1_P_W-1:0] dsp_x_x_p; wire [DSP48E1_P_W-1:0] dsp_y_x_p; wire [DSP48E1_P_W-1:0] dsp_x_y_p; wire [DSP48E1_P_W-1:0] dsp_y_y_p; wire [WORD_EXT_W-1:0] dsp_x_x_p_reduced = {CARRY_ZERO, dsp_x_x_p[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] dsp_y_x_p_reduced = {CARRY_ZERO, dsp_y_x_p[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]}; reg dsp_ce_x = 1'b0; reg dsp_ce_y = 1'b0; reg dsp_ce_x_dly = 1'b0; reg dsp_ce_y_dly = 1'b0; reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_x; reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_y; reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_x; reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_y; reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_x; reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_y; wire dsp_carry_out_x; wire dsp_carry_out_y; // // DSP - CE // always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y}; always @(posedge clk or negedge rst_n) // if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; else case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_MODULAR_SUBTRACT_Y, UOP_OPCODE_REGULAR_ADD_UNEVEN: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2}; default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0}; // endcase // // DSP - OPMODE, ALUMODE, CARRYINSEL // always @(posedge clk) begin // dsp_op_mode_x <= DSP48E1_OPMODE_DNC; dsp_op_mode_y <= DSP48E1_OPMODE_DNC; // dsp_alu_mode_x <= DSP48E1_ALUMODE_DNC; dsp_alu_mode_y <= DSP48E1_ALUMODE_DNC; // dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_DNC; dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_DNC; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES: begin // if (rd_narrow_ena_x_dly2) begin if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_X0; else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0; dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; end // if (rd_narrow_ena_y_dly2) begin if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_X0; else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0; dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; end // end // UOP_OPCODE_MODULAR_SUBTRACT_X: begin // if (rd_narrow_ena_x_dly2) begin dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB; dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN; if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT; end // if (rd_narrow_ena_y_dly2) begin dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB; dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN; if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT; end // end // UOP_OPCODE_MODULAR_SUBTRACT_Y: begin // if (rd_narrow_ena_x_dly2) begin dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB; dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT; end // if (rd_narrow_ena_y_dly2) begin dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB; dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT; end // end // UOP_OPCODE_REGULAR_ADD_UNEVEN: begin // if (rd_narrow_ena_x_dly2) begin if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_XAB; else begin if (!regular_add_uneven_flag) dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_XAB; else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0; end dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN; end // if (rd_narrow_ena_y_dly2) begin if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_XAB; else begin if (!regular_add_uneven_flag) dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_XAB; else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0; end dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN; dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN; end // end // endcase // end // // DSP Feed Logic // always @(posedge clk) begin // dsp_x_x_x <= {DSP48E1_C_W{1'bX}}; dsp_x_x_y <= {DSP48E1_C_W{1'bX}}; dsp_y_x_x <= {DSP48E1_C_W{1'bX}}; dsp_y_x_y <= {DSP48E1_C_W{1'bX}}; dsp_x_y_x <= {DSP48E1_C_W{1'bX}}; dsp_x_y_y <= {DSP48E1_C_W{1'bX}}; dsp_y_y_x <= {DSP48E1_C_W{1'bX}}; dsp_y_y_y <= {DSP48E1_C_W{1'bX}}; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES: begin // if (rd_narrow_ena_x_dly2) begin dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; end // if (rd_narrow_ena_y_dly2) begin dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; end // end // UOP_OPCODE_MODULAR_SUBTRACT_X: begin // if (rd_narrow_ena_x_dly2) begin dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; end // if (rd_narrow_ena_y_dly2) begin dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; end // end // UOP_OPCODE_MODULAR_SUBTRACT_Y: begin // if (rd_narrow_ena_x_dly2) begin dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]}; dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]}; end // if (rd_narrow_ena_y_dly2) begin dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]}; dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]}; end // end // UOP_OPCODE_REGULAR_ADD_UNEVEN: begin // if (rd_narrow_ena_x_dly2) begin dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; dsp_x_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_x [WORD_W-1:0]}; dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; dsp_y_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_x [WORD_W-1:0]}; end // if (rd_narrow_ena_y_dly2) begin dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; dsp_x_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_y [WORD_W-1:0]}; dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; dsp_y_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_y [WORD_W-1:0]}; end // end // endcase // end // // DSP Slices // `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_x_x ( .clk (clk), .ce_abc (dsp_ce_x), .ce_p (dsp_ce_x_dly), .ce_ctrl (dsp_ce_x), .ab (dsp_x_x_x), .c (dsp_x_x_y), .p (dsp_x_x_p), .op_mode (dsp_op_mode_x), .alu_mode (dsp_alu_mode_x), .carry_in_sel (dsp_carry_in_sel_x), .casc_p_in (), .casc_p_out (), .carry_out (dsp_carry_out_x) ); `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_y_x ( .clk (clk), .ce_abc (dsp_ce_x), .ce_p (dsp_ce_x_dly), .ce_ctrl (dsp_ce_x), .ab (dsp_y_x_x), .c (dsp_y_x_y), .p (dsp_y_x_p), .op_mode (dsp_op_mode_x), .alu_mode (dsp_alu_mode_x), .carry_in_sel (dsp_carry_in_sel_x), .casc_p_in (), .casc_p_out (), .carry_out () ); `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_x_y ( .clk (clk), .ce_abc (dsp_ce_y), .ce_p (dsp_ce_y_dly), .ce_ctrl (dsp_ce_y), .ab (dsp_x_y_x), .c (dsp_x_y_y), .p (dsp_x_y_p), .op_mode (dsp_op_mode_y), .alu_mode (dsp_alu_mode_y), .carry_in_sel (dsp_carry_in_sel_y), .casc_p_in (), .casc_p_out (), .carry_out (dsp_carry_out_y) ); `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_y_y ( .clk (clk), .ce_abc (dsp_ce_y), .ce_p (dsp_ce_y_dly), .ce_ctrl (dsp_ce_y), .ab (dsp_y_y_x), .c (dsp_y_y_y), .p (dsp_y_y_p), .op_mode (dsp_op_mode_y), .alu_mode (dsp_alu_mode_y), .carry_in_sel (dsp_carry_in_sel_y), .casc_p_in (), .casc_p_out (), .carry_out () ); // // UOP_OPCODE_MODULAR_SUBTRACT_X // // // IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_, // the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT" // section of Appendix A on pp. 55-56 of UG479 for more details. // reg modular_subtract_x_brw_flag; reg modular_subtract_y_brw_flag; reg [WORD_EXT_W -1:0] modular_subtract_x_mux; reg [WORD_EXT_W -1:0] modular_subtract_y_mux; wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]}; always @(posedge clk) // case (opcode) UOP_OPCODE_MODULAR_SUBTRACT_X: case (wrk_fsm_state) WRK_FSM_STATE_LATENCY_POST4: {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y}; endcase endcase always @(posedge clk) // case (opcode) UOP_OPCODE_MODULAR_SUBTRACT_Z: case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE4, WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2: // begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end endcase endcase // // FSM Process // always @(posedge clk or negedge rst_n) // if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE; else wrk_fsm_state <= wrk_fsm_state_next; // // Busy Exit Logic // reg wrk_fsm_done = 1'b0; always @(posedge clk) begin // wrk_fsm_done <= 1'b0; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_MODULAR_SUBTRACT_Y, UOP_OPCODE_MODULAR_SUBTRACT_Z, UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY1: if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1; endcase // endcase // end // // FSM Transition Logic // always @* begin // case (wrk_fsm_state) WRK_FSM_STATE_IDLE: wrk_fsm_state_next = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ; WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE2 ; WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE3 ; WRK_FSM_STATE_LATENCY_PRE3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE4 ; WRK_FSM_STATE_LATENCY_PRE4: wrk_fsm_state_next = WRK_FSM_STATE_BUSY1 ; WRK_FSM_STATE_BUSY1: wrk_fsm_state_next = WRK_FSM_STATE_BUSY2 ; WRK_FSM_STATE_BUSY2: wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ; WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST2 ; WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST3 ; WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST4 ; WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next = WRK_FSM_STATE_STOP ; WRK_FSM_STATE_STOP: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ; default: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ; endcase // end // // Ready Flag Logic // reg rdy_reg = 1'b1; assign rdy = rdy_reg; always @(posedge clk or negedge rst_n) // if (!rst_n) rdy_reg <= 1'b1; else case (wrk_fsm_state) WRK_FSM_STATE_IDLE: rdy_reg <= ~ena; WRK_FSM_STATE_STOP: rdy_reg <= 1'b1; endcase // // Source to Destination Data Logic // reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2; reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2; reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2; reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2; reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2; reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2; reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2; reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2; always @(posedge clk) begin {rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2 } <= {rd_wide_x_din_x_dly1, rd_wide_y_din_x_dly1, rd_wide_x_din_y_dly1, rd_wide_y_din_y_dly1 }; {rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1}; end task update_wide_dout; input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <= { x_x, y_x, x_y, y_y}; endtask task update_narrow_dout; input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <= { x_x, y_x, x_y, y_y}; endtask always @(posedge clk) begin // update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_MODULAR_SUBTRACT_X, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: // update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase // UOP_OPCODE_COPY_CRT_Y2X: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: // begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2); update_wide_dout (rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); end // endcase // UOP_OPCODE_MODULAR_REDUCE_INIT: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: // update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2); // endcase // UOP_OPCODE_COPY_LADDERS_X2Y: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: // begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_x_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_y_dly2); update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end // endcase // UOP_OPCODE_CROSS_LADDERS_X2Y: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: // begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_y_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_x_dly2); update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end // endcase // UOP_OPCODE_MODULAR_SUBTRACT_Y: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY2, WRK_FSM_STATE_LATENCY_POST2, WRK_FSM_STATE_LATENCY_POST4: // update_wide_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced); // endcase // UOP_OPCODE_MODULAR_SUBTRACT_Z: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: // begin update_wide_dout (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end // endcase // UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY1, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST3: // update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); // endcase // endcase // end endmodule