//======================================================================
//
// Copyright: 2019, The Commons Conservancy Cryptech Project
// SPDX-License-Identifier: BSD-3-Clause
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_general_worker
(
clk, rst_n,
ena, rdy,
sel_narrow_in, sel_narrow_out,
sel_wide_in, sel_wide_out,
opcode,
word_index_last, word_index_last_half,
wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x,
wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x,
wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y,
wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y,
wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x,
wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y,
wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_microcode.vh"
`include "modexpng_dsp48e1.vh"
`include "modexpng_dsp_slice_primitives.vh"
//
// Ports
//
input clk;
input rst_n;
input ena;
output rdy;
input [ BANK_ADDR_W -1:0] sel_narrow_in;
input [ BANK_ADDR_W -1:0] sel_narrow_out;
input [ BANK_ADDR_W -1:0] sel_wide_in;
input [ BANK_ADDR_W -1:0] sel_wide_out;
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
input [ OP_ADDR_W -1:0] word_index_last_half;
output wrk_rd_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x;
input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x;
input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x;
output wrk_rd_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x;
output wrk_rd_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y;
input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y;
input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y;
output wrk_rd_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y;
output wrk_wr_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x;
output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x;
output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x;
output wrk_wr_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x;
output wrk_wr_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y;
output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y;
output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y;
output wrk_wr_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y;
//
// FSM Declaration
//
localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3 = 4'h3;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4 = 4'h4;
localparam [3:0] WRK_FSM_STATE_BUSY1 = 4'hA;
localparam [3:0] WRK_FSM_STATE_BUSY2 = 4'hB;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8;
localparam [3:0] WRK_FSM_STATE_STOP = 4'hF;
reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
reg [3:0] wrk_fsm_state_next;
//
// Control Signals
//
reg rd_wide_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] rd_wide_bank_x;
reg [ OP_ADDR_W -1:0] rd_wide_addr_x;
reg rd_narrow_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] rd_narrow_bank_x;
reg [ OP_ADDR_W -1:0] rd_narrow_addr_x;
reg rd_wide_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] rd_wide_bank_y;
reg [ OP_ADDR_W -1:0] rd_wide_addr_y;
reg rd_narrow_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] rd_narrow_bank_y;
reg [ OP_ADDR_W -1:0] rd_narrow_addr_y;
reg wr_wide_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;
reg wr_narrow_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;
reg wr_wide_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;
reg wr_narrow_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;
//
// Mapping
//
assign wrk_rd_wide_xy_ena_x = rd_wide_ena_x;
assign wrk_rd_wide_xy_bank_x = rd_wide_bank_x;
assign wrk_rd_wide_xy_addr_x = rd_wide_addr_x;
assign wrk_rd_narrow_xy_ena_x = rd_narrow_ena_x;
assign wrk_rd_narrow_xy_bank_x = rd_narrow_bank_x;
assign wrk_rd_narrow_xy_addr_x = rd_narrow_addr_x;
assign wrk_rd_wide_xy_ena_y = rd_wide_ena_y;
assign wrk_rd_wide_xy_bank_y = rd_wide_bank_y;
assign wrk_rd_wide_xy_addr_y = rd_wide_addr_y;
assign wrk_rd_narrow_xy_ena_y = rd_narrow_ena_y;
assign wrk_rd_narrow_xy_bank_y = rd_narrow_bank_y;
assign wrk_rd_narrow_xy_addr_y = rd_narrow_addr_y;
assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x;
assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x;
assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x;
assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x;
assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x;
assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x;
assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x;
assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x;
assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x;
assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x;
assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y;
assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y;
assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y;
assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y;
assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y;
assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y;
assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y;
assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y;
assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y;
assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y;
//
// Delays
//
reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:4];
reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:4];
reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:4];
reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:4];
reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1;
reg rd_narrow_ena_x_dly1 = 1'b0;
reg rd_narrow_ena_y_dly1 = 1'b0;
reg rd_narrow_ena_x_dly2 = 1'b0;
reg rd_narrow_ena_y_dly2 = 1'b0;
always @(posedge clk) begin
//
{rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x};
{rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x};
{rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y};
{rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y};
//
{rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x};
{rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x};
{rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y};
{rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y};
//
{rd_narrow_addr_x_dly[4], rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
{rd_narrow_addr_y_dly[4], rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
//
{rd_wide_addr_x_dly[4], rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x};
{rd_wide_addr_y_dly[4], rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y};
//
{rd_narrow_ena_x_dly2, rd_narrow_ena_x_dly1} <= {rd_narrow_ena_x_dly1, rd_narrow_ena_x};
{rd_narrow_ena_y_dly2, rd_narrow_ena_y_dly1} <= {rd_narrow_ena_y_dly1, rd_narrow_ena_y};
//
end
//
// Source Read Enable Logic
//
task _update_wide_rd_en; input _en; {rd_wide_ena_x, rd_wide_ena_y } <= {2{_en}}; endtask
task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask
task enable_wide_rd_en; _update_wide_rd_en(1'b1); endtask
task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask
task enable_narrow_rd_en; _update_narrow_rd_en(1'b1); endtask
task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
disable_wide_rd_en;
disable_narrow_rd_en;
//
end else begin
//
disable_wide_rd_en;
disable_narrow_rd_en;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_MODULAR_SUBTRACT_X:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: enable_narrow_rd_en;
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_SUBTRACT_Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en; end
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en; end
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: enable_wide_rd_en;
endcase
//
endcase
//
end
//
// Destination Write Enable Logic
//
task _update_wide_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
task enable_wide_wr_en; _update_wide_wr_en(1'b1); endtask
task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask
task enable_narrow_wr_en; _update_narrow_wr_en(1'b1); endtask
task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
disable_wide_wr_en;
disable_narrow_wr_en;
//
end else begin
//
disable_wide_wr_en;
disable_narrow_wr_en;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2,
WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en;
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en;
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2,
WRK_FSM_STATE_LATENCY_POST4: enable_wide_wr_en;
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en;
endcase
//
endcase
//
end
//
// Source Read Address Logic
//
reg [OP_ADDR_W -1:0] rd_wide_addr_next;
reg [OP_ADDR_W -1:0] rd_narrow_addr_next;
reg rd_wide_addr_is_last = 1'b0;
reg rd_narrow_addr_is_last = 1'b0;
reg rd_wide_addr_is_last_half = 1'b0;
reg rd_narrow_addr_is_last_half = 1'b0;
reg rd_wide_addr_next_is_last = 1'b0;
reg rd_narrow_addr_next_is_last = 1'b0;
reg rd_wide_addr_next_is_last_half = 1'b0;
reg rd_narrow_addr_next_is_last_half = 1'b0;
reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0;
reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0;
always @(posedge clk) begin
//
rd_wide_addr_is_last_half_dly <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half};
rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half};
//
end
task preset_rd_wide_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
{rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr};
{rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr};
rd_wide_addr_is_last <= 1'b0;
rd_wide_addr_is_last_half <= 1'b0;
end
endtask
task preset_rd_narrow_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
{rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr};
{rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr};
rd_narrow_addr_is_last <= 1'b0;
rd_narrow_addr_is_last_half <= 1'b0;
end
endtask
task preset_rd_wide_addr_next;
input [OP_ADDR_W -1:0] addr;
begin
rd_wide_addr_next <= addr;
rd_wide_addr_next_is_last <= 1'b0;
rd_wide_addr_next_is_last_half <= 1'b0;
end
endtask
task preset_rd_narrow_addr_next;
input [OP_ADDR_W -1:0] addr;
begin
rd_narrow_addr_next <= addr;
rd_narrow_addr_next_is_last <= 1'b0;
rd_narrow_addr_next_is_last_half <= 1'b0;
end
endtask
task keep_rd_wide_bank;
begin
{rd_wide_bank_x} <= {rd_wide_bank_x};
{rd_wide_bank_y} <= {rd_wide_bank_y};
end
endtask
task switch_rd_wide_bank;
input [BANK_ADDR_W -1:0] bank;
begin
{rd_wide_bank_x} <= {bank};
{rd_wide_bank_y} <= {bank};
end
endtask
task keep_rd_wide_addr;
begin
{rd_wide_addr_x} <= {rd_wide_addr_x};
{rd_wide_addr_y} <= {rd_wide_addr_y};
end
endtask
task advance_rd_wide_addr;
begin
{rd_wide_addr_x} <= {rd_wide_addr_next};
{rd_wide_addr_y} <= {rd_wide_addr_next};
rd_wide_addr_is_last <= rd_wide_addr_next == word_index_last;
rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half;
end
endtask
task keep_rd_narrow_bank;
begin
{rd_narrow_bank_x} <= {rd_narrow_bank_x};
{rd_narrow_bank_y} <= {rd_narrow_bank_y};
end
endtask
task switch_rd_narrow_bank;
input [BANK_ADDR_W -1:0] bank;
begin
{rd_narrow_bank_x} <= {bank};
{rd_narrow_bank_y} <= {bank};
end
endtask
task keep_rd_narrow_addr;
begin
{rd_narrow_addr_x} <= {rd_narrow_addr_x};
{rd_narrow_addr_y} <= {rd_narrow_addr_y};
end
endtask
task advance_rd_narrow_addr;
begin
{rd_narrow_addr_x} <= {rd_narrow_addr_next};
{rd_narrow_addr_y} <= {rd_narrow_addr_next};
rd_narrow_addr_is_last <= rd_narrow_addr_next == word_index_last;
rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half;
end
endtask
task update_rd_wide_addr_flags;
begin
rd_wide_addr_next_is_last <= rd_wide_addr_next == (word_index_last - 1'b1);
rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1);
end
endtask
task update_rd_narrow_addr_flags;
begin
rd_narrow_addr_next_is_last <= rd_narrow_addr_next == (word_index_last - 1'b1);
rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1);
end
endtask
task advance_rd_wide_addr_next;
begin
rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_wide_addr_flags;
end
endtask
task advance_rd_narrow_addr_next;
begin
rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_narrow_addr_flags;
end
endtask
task advance_rd_wide_addr_next_half;
begin
rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_wide_addr_flags;
end
endtask
task advance_rd_narrow_addr_next_half;
begin
rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_narrow_addr_flags;
end
endtask
always @(posedge clk) begin
//
preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_MODULAR_SUBTRACT_X:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank;
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_SUBTRACT_Z,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next;
keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next_half;
keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank;
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank (sel_wide_out); keep_rd_wide_addr;
switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next; switch_rd_wide_bank(sel_wide_in);
advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: begin keep_rd_wide_addr; switch_rd_wide_bank (sel_wide_out);
keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next;
keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next_half;
advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_BUSY1: begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank;
else switch_rd_wide_bank(BANK_WIDE_H);
advance_rd_wide_addr; advance_rd_wide_addr_next_half;
advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: keep_rd_wide_bank;
endcase
//
endcase
//
end
//
// Destination Write Address Logic
//
reg modular_reduce_init_first_half_x;
reg modular_reduce_init_first_half_y;
reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x;
reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y;
always @(posedge clk) begin
//
modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half;
modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half;
//
modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H;
modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H;
//
end
task update_wr_wide_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
input [ OP_ADDR_W -1:0] x_addr;
input [ OP_ADDR_W -1:0] y_addr;
begin
{wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
{wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
end
endtask
task update_wr_narrow_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
input [ OP_ADDR_W -1:0] x_addr;
input [ OP_ADDR_W -1:0] y_addr;
begin
{wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
{wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
end
endtask
always @(posedge clk) begin
//
update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2,
WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]);
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3] ); end
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2,
WRK_FSM_STATE_LATENCY_POST4: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[4], rd_wide_addr_y_dly[4]);
endcase
//
endcase
//
end
//
// UOP_OPCODE_REGULAR_ADD_UNEVEN
//
reg regular_add_uneven_flag;
always @(posedge clk)
//
case (opcode)
UOP_OPCODE_REGULAR_ADD_UNEVEN:
case (wrk_fsm_state)
WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_flag <= 1'b0;
WRK_FSM_STATE_BUSY2: if (rd_wide_addr_is_last_half_dly[2]) regular_add_uneven_flag <= 1'b1;
endcase
endcase
//
// DSP Slice Array
//
reg [DSP48E1_C_W-1:0] dsp_x_x_x;
reg [DSP48E1_C_W-1:0] dsp_y_x_x;
reg [DSP48E1_C_W-1:0] dsp_x_y_x;
reg [DSP48E1_C_W-1:0] dsp_y_y_x;
reg [DSP48E1_C_W-1:0] dsp_x_x_y;
reg [DSP48E1_C_W-1:0] dsp_y_x_y;
reg [DSP48E1_C_W-1:0] dsp_x_y_y;
reg [DSP48E1_C_W-1:0] dsp_y_y_y;
wire [DSP48E1_P_W-1:0] dsp_x_x_p;
wire [DSP48E1_P_W-1:0] dsp_y_x_p;
wire [DSP48E1_P_W-1:0] dsp_x_y_p;
wire [DSP48E1_P_W-1:0] dsp_y_y_p;
wire [WORD_EXT_W-1:0] dsp_x_x_p_reduced = {CARRY_ZERO, dsp_x_x_p[WORD_W-1:0]};
wire [WORD_EXT_W-1:0] dsp_y_x_p_reduced = {CARRY_ZERO, dsp_y_x_p[WORD_W-1:0]};
wire [WORD_EXT_W-1:0] dsp_x_y_p_reduced = {CARRY_ZERO, dsp_x_y_p[WORD_W-1:0]};
wire [WORD_EXT_W-1:0] dsp_y_y_p_reduced = {CARRY_ZERO, dsp_y_y_p[WORD_W-1:0]};
reg dsp_ce_x = 1'b0;
reg dsp_ce_y = 1'b0;
reg dsp_ce_x_dly = 1'b0;
reg dsp_ce_y_dly = 1'b0;
reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_x;
reg [ DSP48E1_OPMODE_W -1:0] dsp_op_mode_y;
reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_x;
reg [ DSP48E1_ALUMODE_W -1:0] dsp_alu_mode_y;
reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_x;
reg [DSP48E1_CARRYINSEL_W -1:0] dsp_carry_in_sel_y;
wire dsp_carry_out_x;
wire dsp_carry_out_y;
//
// DSP - CE
//
always @(posedge clk) {dsp_ce_x_dly, dsp_ce_y_dly} <= {dsp_ce_x, dsp_ce_y};
always @(posedge clk or negedge rst_n)
//
if (!rst_n) {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
else case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MODULAR_SUBTRACT_Y,
UOP_OPCODE_REGULAR_ADD_UNEVEN: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
default: {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
//
endcase
//
// DSP - OPMODE, ALUMODE, CARRYINSEL
//
always @(posedge clk) begin
//
dsp_op_mode_x <= DSP48E1_OPMODE_DNC;
dsp_op_mode_y <= DSP48E1_OPMODE_DNC;
//
dsp_alu_mode_x <= DSP48E1_ALUMODE_DNC;
dsp_alu_mode_y <= DSP48E1_ALUMODE_DNC;
//
dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_DNC;
dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_DNC;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES: begin
//
if (rd_narrow_ena_x_dly2) begin
if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_X0;
else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0;
dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
end
//
if (rd_narrow_ena_y_dly2) begin
if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_X0;
else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0;
dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
end
//
end
//
UOP_OPCODE_MODULAR_SUBTRACT_X: begin
//
if (rd_narrow_ena_x_dly2) begin
dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB;
dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN;
if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB;
dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_MINUS_X_AND_Y_AND_CIN;
if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
end
//
end
//
UOP_OPCODE_MODULAR_SUBTRACT_Y: begin
//
if (rd_narrow_ena_x_dly2) begin
dsp_op_mode_x <= DSP48E1_OPMODE_ZC_Y0_XAB;
dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
else dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_op_mode_y <= DSP48E1_OPMODE_ZC_Y0_XAB;
dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
else dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYCASCOUT;
end
//
end
//
UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
//
if (rd_narrow_ena_x_dly2) begin
if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x <= DSP48E1_OPMODE_Z0_YC_XAB;
else begin
if (!regular_add_uneven_flag) dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_XAB;
else dsp_op_mode_x <= DSP48E1_OPMODE_ZP17_YC_X0;
end
dsp_alu_mode_x <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
end
//
if (rd_narrow_ena_y_dly2) begin
if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y <= DSP48E1_OPMODE_Z0_YC_XAB;
else begin
if (!regular_add_uneven_flag) dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_XAB;
else dsp_op_mode_y <= DSP48E1_OPMODE_ZP17_YC_X0;
end
dsp_alu_mode_y <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
end
//
end
//
endcase
//
end
//
// DSP Feed Logic
//
always @(posedge clk) begin
//
dsp_x_x_x <= {DSP48E1_C_W{1'bX}};
dsp_x_x_y <= {DSP48E1_C_W{1'bX}};
dsp_y_x_x <= {DSP48E1_C_W{1'bX}};
dsp_y_x_y <= {DSP48E1_C_W{1'bX}};
dsp_x_y_x <= {DSP48E1_C_W{1'bX}};
dsp_x_y_y <= {DSP48E1_C_W{1'bX}};
dsp_y_y_x <= {DSP48E1_C_W{1'bX}};
dsp_y_y_y <= {DSP48E1_C_W{1'bX}};
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES: begin
//
if (rd_narrow_ena_x_dly2) begin
dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
end
//
end
//
UOP_OPCODE_MODULAR_SUBTRACT_X: begin
//
if (rd_narrow_ena_x_dly2) begin
dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
end
//
end
//
UOP_OPCODE_MODULAR_SUBTRACT_Y: begin
//
if (rd_narrow_ena_x_dly2) begin
dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
end
//
end
//
UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
//
if (rd_narrow_ena_x_dly2) begin
dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
dsp_x_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_x [WORD_W-1:0]};
dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
dsp_y_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_x [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_x [WORD_W-1:0]};
end
//
if (rd_narrow_ena_y_dly2) begin
dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
dsp_x_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_y [WORD_W-1:0]};
dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
dsp_y_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_y [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_y [WORD_W-1:0]};
end
//
end
//
endcase
//
end
//
// DSP Slices
//
`MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_x_x
(
.clk (clk),
.ce_abc (dsp_ce_x),
.ce_p (dsp_ce_x_dly),
.ce_ctrl (dsp_ce_x),
.ab (dsp_x_x_x),
.c (dsp_x_x_y),
.p (dsp_x_x_p),
.op_mode (dsp_op_mode_x),
.alu_mode (dsp_alu_mode_x),
.carry_in_sel (dsp_carry_in_sel_x),
.casc_p_in (),
.casc_p_out (),
.carry_out (dsp_carry_out_x)
);
`MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_y_x
(
.clk (clk),
.ce_abc (dsp_ce_x),
.ce_p (dsp_ce_x_dly),
.ce_ctrl (dsp_ce_x),
.ab (dsp_y_x_x),
.c (dsp_y_x_y),
.p (dsp_y_x_p),
.op_mode (dsp_op_mode_x),
.alu_mode (dsp_alu_mode_x),
.carry_in_sel (dsp_carry_in_sel_x),
.casc_p_in (),
.casc_p_out (),
.carry_out ()
);
`MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_x_y
(
.clk (clk),
.ce_abc (dsp_ce_y),
.ce_p (dsp_ce_y_dly),
.ce_ctrl (dsp_ce_y),
.ab (dsp_x_y_x),
.c (dsp_x_y_y),
.p (dsp_x_y_p),
.op_mode (dsp_op_mode_y),
.alu_mode (dsp_alu_mode_y),
.carry_in_sel (dsp_carry_in_sel_y),
.casc_p_in (),
.casc_p_out (),
.carry_out (dsp_carry_out_y)
);
`MODEXPNG_DSP_SLICE_ADDSUB dsp_inst_y_y
(
.clk (clk),
.ce_abc (dsp_ce_y),
.ce_p (dsp_ce_y_dly),
.ce_ctrl (dsp_ce_y),
.ab (dsp_y_y_x),
.c (dsp_y_y_y),
.p (dsp_y_y_p),
.op_mode (dsp_op_mode_y),
.alu_mode (dsp_alu_mode_y),
.carry_in_sel (dsp_carry_in_sel_y),
.casc_p_in (),
.casc_p_out (),
.carry_out ()
);
//
// UOP_OPCODE_MODULAR_SUBTRACT_X
//
//
// IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_,
// the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT"
// section of Appendix A on pp. 55-56 of UG479 for more details.
//
reg modular_subtract_x_brw_flag;
reg modular_subtract_y_brw_flag;
reg [WORD_EXT_W -1:0] modular_subtract_x_mux;
reg [WORD_EXT_W -1:0] modular_subtract_y_mux;
wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};
always @(posedge clk)
//
case (opcode)
UOP_OPCODE_MODULAR_SUBTRACT_X:
case (wrk_fsm_state)
WRK_FSM_STATE_LATENCY_POST4:
{modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y};
endcase
endcase
always @(posedge clk)
//
case (opcode)
UOP_OPCODE_MODULAR_SUBTRACT_Z:
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2:
//
begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
endcase
endcase
//
// FSM Process
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
else wrk_fsm_state <= wrk_fsm_state_next;
//
// Busy Exit Logic
//
reg wrk_fsm_done = 1'b0;
always @(posedge clk) begin
//
wrk_fsm_done <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MODULAR_SUBTRACT_Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z,
UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1:
if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1;
endcase
//
endcase
//
end
//
// FSM Transition Logic
//
always @* begin
//
case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: wrk_fsm_state_next = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE2 ;
WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE3 ;
WRK_FSM_STATE_LATENCY_PRE3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE4 ;
WRK_FSM_STATE_LATENCY_PRE4: wrk_fsm_state_next = WRK_FSM_STATE_BUSY1 ;
WRK_FSM_STATE_BUSY1: wrk_fsm_state_next = WRK_FSM_STATE_BUSY2 ;
WRK_FSM_STATE_BUSY2: wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ;
WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST2 ;
WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST3 ;
WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST4 ;
WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next = WRK_FSM_STATE_STOP ;
WRK_FSM_STATE_STOP: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ;
default: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ;
endcase
//
end
//
// Ready Flag Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdy_reg <= 1'b1;
else case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: rdy_reg <= ~ena;
WRK_FSM_STATE_STOP: rdy_reg <= 1'b1;
endcase
//
// Source to Destination Data Logic
//
reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2;
reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2;
always @(posedge clk) begin
{rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2 } <= {rd_wide_x_din_x_dly1, rd_wide_y_din_x_dly1, rd_wide_x_din_y_dly1, rd_wide_y_din_y_dly1 };
{rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1};
end
task update_wide_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
{ x_x, y_x, x_y, y_y};
endtask
task update_narrow_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
{ x_x, y_x, x_y, y_y};
endtask
always @(posedge clk) begin
//
update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2,
WRK_FSM_STATE_LATENCY_POST4:
//
update_narrow_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced);
//
endcase
//
UOP_OPCODE_COPY_CRT_Y2X:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
update_wide_dout (rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); end
//
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
//
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_x_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_y_dly2);
update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end
//
endcase
//
UOP_OPCODE_CROSS_LADDERS_X2Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_y_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_x_dly2);
update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2,
WRK_FSM_STATE_LATENCY_POST4:
//
update_wide_dout(dsp_x_x_p_reduced, dsp_y_x_p_reduced, dsp_x_y_p_reduced, dsp_y_y_p_reduced);
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_wide_dout (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced);
update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end
//
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2);
//
endcase
//
endcase
//
end
endmodule