//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_general_worker
(
clk, rst_n,
ena, rdy,
sel_narrow_in, sel_narrow_out,
sel_wide_in, sel_wide_out,
opcode,
word_index_last, word_index_last_half,
wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x,
wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x,
wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y,
wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y,
wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x,
wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y,
wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_microcode.vh"
//
// Ports
//
input clk;
input rst_n;
input ena;
output rdy;
input [ BANK_ADDR_W -1:0] sel_narrow_in;
input [ BANK_ADDR_W -1:0] sel_narrow_out;
input [ BANK_ADDR_W -1:0] sel_wide_in;
input [ BANK_ADDR_W -1:0] sel_wide_out;
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
input [ OP_ADDR_W -1:0] word_index_last_half;
output wrk_rd_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x;
input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x;
input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x;
output wrk_rd_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x;
output wrk_rd_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y;
input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y;
input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y;
output wrk_rd_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y;
output wrk_wr_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x;
output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x;
output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x;
output wrk_wr_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x;
output wrk_wr_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y;
output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y;
output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y;
output wrk_wr_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y;
//
// FSM Declaration
//
localparam [5:0] WRK_FSM_STATE_IDLE = 6'h00;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1 = 6'h01;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2 = 6'h02;
localparam [5:0] WRK_FSM_STATE_BUSY = 6'h03;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST1 = 6'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
localparam [5:0] WRK_FSM_STATE_LATENCY_POST2 = 6'h06;
localparam [5:0] WRK_FSM_STATE_STOP = 6'h07;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13;
localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14;
localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 6'h17;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 6'h18;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 6'h19;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_TP = 6'h20;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_TP = 6'h21;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE3_TP = 6'h22;
localparam [5:0] WRK_FSM_STATE_LATENCY_PRE4_TP = 6'h23;
localparam [5:0] WRK_FSM_STATE_BUSY_TP = 6'h24;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_TP = 6'h25;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_TP = 6'h26;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST3_TP = 6'h27;
localparam [5:0] WRK_FSM_STATE_LATENCY_POST4_TP = 6'h28;
localparam [5:0] WRK_FSM_STATE_HOLDOFF_TP = 6'h29;
reg [5:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
reg [5:0] wrk_fsm_state_next_one_pass; // single address space sweep
reg [5:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
reg [5:0] wrk_fsm_state_next_two_pass; // two address space sweeps
reg wrk_fsm_two_pass_pass; // 0=first pass, 1=second pass
reg wrk_fsm_two_pass_pass_dly; // 0=first pass, 1=second pass
// TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
//
// Control Signals
//
reg rd_wide_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x;
reg rd_narrow_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x;
reg rd_wide_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y;
reg rd_narrow_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y;
reg wr_wide_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;
reg wr_narrow_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;
reg wr_wide_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;
reg wr_narrow_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;
//
// Mapping
//
assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x;
assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x;
assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x;
assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x;
assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x;
assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x;
assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y;
assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y;
assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y;
assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y;
assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y;
assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y;
assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x;
assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x;
assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x;
assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x;
assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x;
assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x;
assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x;
assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x;
assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x;
assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x;
assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y;
assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y;
assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y;
assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y;
assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y;
assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y;
assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y;
assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y;
assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y;
assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y;
//
// Delays
//
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3;
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4;
reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3;
//reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly4;
reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3;
//reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly4;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly2;
always @(posedge clk) begin
//
{rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
{rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
//
{rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
{rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
//
{/*wrk_rd_wide_x_din_x_dly4,*/ wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {/*wrk_rd_wide_x_din_x_dly3,*/ wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
{/*wrk_rd_wide_x_din_y_dly4,*/ wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {/*wrk_rd_wide_x_din_y_dly3,*/ wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
//
{wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
{wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x};
{wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
{wrk_rd_narrow_y_din_y_dly2, wrk_rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y_dly1, wrk_rd_narrow_y_din_y};
//
end
//
// Source Read Enable Logic
//
task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask
task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask
task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask
task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask
task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask
task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
disable_wide_xy_rd_en;
disable_narrow_xy_rd_en;
//
end else begin
//
disable_wide_xy_rd_en;
disable_narrow_xy_rd_en;
//
// one_pass
//
case (wrk_fsm_state_next_one_pass)
//
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_BUSY:
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
enable_narrow_xy_rd_en;
//
UOP_OPCODE_COPY_CRT_Y2X: begin
//
enable_wide_xy_rd_en;
enable_narrow_xy_rd_en;
//
end
//
UOP_OPCODE_MERGE_LH:
//
enable_wide_xy_rd_en;
//
endcase
//
endcase
//
// one_pass_meander
//
case (wrk_fsm_state_next_one_pass_meander)
//
WRK_FSM_STATE_LATENCY_PRE1_M1,
WRK_FSM_STATE_LATENCY_PRE1_M2,
WRK_FSM_STATE_LATENCY_PRE2_M1,
WRK_FSM_STATE_LATENCY_PRE2_M2,
WRK_FSM_STATE_BUSY_M1,
WRK_FSM_STATE_BUSY_M2:
//
case (opcode)
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
enable_wide_xy_rd_en;
enable_narrow_xy_rd_en;
//
end
//
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
enable_narrow_xy_rd_en;
//
endcase
//
endcase
//
// two_pass
//
case (wrk_fsm_state_next_two_pass)
//
WRK_FSM_STATE_LATENCY_PRE1_TP,
WRK_FSM_STATE_LATENCY_PRE2_TP,
WRK_FSM_STATE_LATENCY_PRE3_TP,
WRK_FSM_STATE_LATENCY_PRE4_TP,
WRK_FSM_STATE_BUSY_TP:
//
case (opcode)
UOP_OPCODE_MODULAR_SUBTRACT:
//
if (!wrk_fsm_two_pass_pass) begin
enable_wide_xy_rd_en;
enable_narrow_xy_rd_en;
end else
enable_narrow_xy_rd_en;
//
endcase
//
endcase
//
end
//
// Destination Write Enable Logic
//
task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask
task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask
task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask
task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
disable_wide_xy_wr_en;
disable_narrow_xy_wr_en;
//
end else begin
//
disable_wide_xy_wr_en;
disable_narrow_xy_wr_en;
//
// one_pass
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST2:
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MERGE_LH:
//
enable_narrow_xy_wr_en;
//
UOP_OPCODE_COPY_CRT_Y2X: begin
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
//
end
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
enable_wide_xy_wr_en;
//
endcase
//
endcase
//
// one_pass_meander
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_M2,
WRK_FSM_STATE_LATENCY_POST1_M2,
WRK_FSM_STATE_LATENCY_POST2_M2:
//
case (opcode)
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
//
end
//
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
enable_narrow_xy_wr_en;
//
endcase
//
endcase
//
// two_pass
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_TP,
WRK_FSM_STATE_LATENCY_POST1_TP,
WRK_FSM_STATE_LATENCY_POST2_TP,
WRK_FSM_STATE_LATENCY_POST3_TP,
WRK_FSM_STATE_LATENCY_POST4_TP:
//
case (opcode)
//
UOP_OPCODE_MODULAR_SUBTRACT:
//
if (!wrk_fsm_two_pass_pass)
enable_narrow_xy_wr_en;
else begin
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
end
//
endcase
//
endcase
//
end
//
// Source to Destination Data Logic
//
always @(posedge clk) begin
//
update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
//
// one_pass
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST2:
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES:
//
update_narrow_dout(rd_narrow_x_din_x_w_cry_reduced,
rd_narrow_y_din_x_w_cry_reduced,
rd_narrow_x_din_y_w_cry_reduced,
rd_narrow_y_din_y_w_cry_reduced);
//
UOP_OPCODE_COPY_CRT_Y2X: begin
//
update_wide_dout(wrk_rd_wide_x_din_y,
wrk_rd_wide_y_din_y,
wrk_rd_wide_x_din_y,
wrk_rd_wide_y_din_y);
//
update_narrow_dout(wrk_rd_narrow_x_din_y,
wrk_rd_narrow_y_din_y,
wrk_rd_narrow_x_din_y,
wrk_rd_narrow_y_din_y);
//
end
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
update_wide_dout(wrk_rd_narrow_x_din_x,
wrk_rd_narrow_y_din_x,
wrk_rd_narrow_x_din_y,
wrk_rd_narrow_y_din_y);
//
UOP_OPCODE_MERGE_LH:
//
update_narrow_dout(wrk_rd_wide_x_din_x,
wrk_rd_wide_y_din_x,
wrk_rd_wide_x_din_y,
wrk_rd_wide_y_din_y);
//
endcase
//
endcase
//
// one_pass_meander
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_M2,
WRK_FSM_STATE_LATENCY_POST1_M2,
WRK_FSM_STATE_LATENCY_POST2_M2:
//
case (opcode)
//
UOP_OPCODE_COPY_LADDERS_X2Y: begin
//
update_wide_dout(wrk_rd_wide_x_din_x_dly3,
wrk_rd_wide_x_din_x_dly2,
wrk_rd_wide_x_din_y_dly3,
wrk_rd_wide_x_din_y_dly2);
//
update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
wrk_rd_narrow_x_din_x_dly2,
wrk_rd_narrow_x_din_y_dly3,
wrk_rd_narrow_x_din_y_dly2);
//
end
//
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
update_wide_dout(wrk_rd_wide_x_din_x_dly3,
wrk_rd_wide_x_din_y_dly2,
wrk_rd_wide_x_din_y_dly3,
wrk_rd_wide_x_din_x_dly2);
//
update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
wrk_rd_narrow_x_din_y_dly2,
wrk_rd_narrow_x_din_y_dly3,
wrk_rd_narrow_x_din_x_dly2);
//
end
//
UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
//
update_narrow_dout(regadd_x_x_trunc,
regadd_y_x_trunc,
regadd_x_y_trunc,
regadd_y_y_trunc);
//
end
//
endcase
//
endcase
//
// two_pass
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_TP,
WRK_FSM_STATE_LATENCY_POST1_TP,
WRK_FSM_STATE_LATENCY_POST2_TP,
WRK_FSM_STATE_LATENCY_POST3_TP,
WRK_FSM_STATE_LATENCY_POST4_TP:
//
case (opcode)
//
UOP_OPCODE_MODULAR_SUBTRACT:
//
if (!wrk_fsm_two_pass_pass)
update_narrow_dout(modsub_x_ab_dly_trunc, modsub_x_abn_trunc, modsub_y_ab_dly_trunc, modsub_y_abn_trunc);
else begin
update_wide_dout (modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
update_narrow_dout(modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
end
//
endcase
//
endcase
//
end
//
// Source Read Address Logic
//
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next;
reg rd_wide_xy_addr_xy_next_last_seen;
reg rd_wide_xy_addr_xy_next_last_seen_dly1;
reg rd_wide_xy_addr_xy_next_last_seen_dly2;
wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half;
wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last;
task update_rd_wide_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
{rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, addr};
{rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, addr};
end
endtask
task update_rd_wide_bank;
input [BANK_ADDR_W -1:0] bank;
begin
{rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, rd_wide_xy_addr_x};
{rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, rd_wide_xy_addr_y};
end
endtask
task update_rd_narrow_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, addr};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, addr};
end
endtask
task update_rd_narrow_bank;
input [BANK_ADDR_W -1:0] bank;
begin
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, rd_narrow_xy_addr_x};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, rd_narrow_xy_addr_y};
end
endtask
task update_rd_wide_addr_next;
input [OP_ADDR_W -1:0] addr;
rd_wide_xy_addr_xy_next <= addr;
endtask
task update_rd_narrow_addr_next;
input [OP_ADDR_W -1:0] addr;
rd_narrow_xy_addr_xy_next <= addr;
endtask
task advance_rd_wide_addr_next;
rd_wide_xy_addr_xy_next <= !rd_wide_xy_addr_xy_next_is_last ? rd_wide_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
endtask
task advance_rd_narrow_addr_next;
rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
endtask
always @(posedge clk)
//
case (opcode)
UOP_OPCODE_MERGE_LH:
case (wrk_fsm_state_next_one_pass)
WRK_FSM_STATE_LATENCY_PRE1:
rd_wide_xy_addr_xy_next_last_seen <= 1'b0;
WRK_FSM_STATE_BUSY:
if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last)
rd_wide_xy_addr_xy_next_last_seen <= 1'b1;
endcase
UOP_OPCODE_REGULAR_ADD_UNEVEN:
case (wrk_fsm_state_next_one_pass_meander)
WRK_FSM_STATE_LATENCY_PRE1_M1: begin
rd_wide_xy_addr_xy_next_last_seen <= 1'b0;
rd_wide_xy_addr_xy_next_last_seen_dly1 <= 1'b0;
rd_wide_xy_addr_xy_next_last_seen_dly2 <= 1'b0;
end
WRK_FSM_STATE_BUSY_M1: begin
if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last)
rd_wide_xy_addr_xy_next_last_seen <= 1'b1;
rd_wide_xy_addr_xy_next_last_seen_dly1 <= rd_wide_xy_addr_xy_next_last_seen;
rd_wide_xy_addr_xy_next_last_seen_dly2 <= rd_wide_xy_addr_xy_next_last_seen_dly1;
end
endcase
endcase
always @(posedge clk) begin
//
update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
update_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
//
// one_pass
//
case (wrk_fsm_state_next_one_pass)
//
WRK_FSM_STATE_LATENCY_PRE1:
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
//
end
//
UOP_OPCODE_MERGE_LH: begin
update_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
end
//
endcase
//
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_BUSY:
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X: begin
//
update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
end
//
UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
update_rd_wide_bank_addr (sel_wide_in, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ;
update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
end
//
UOP_OPCODE_MERGE_LH: begin
//
if (!rd_wide_xy_addr_xy_next_last_seen) update_rd_wide_bank_addr (BANK_WIDE_L, rd_wide_xy_addr_xy_next );
else update_rd_wide_bank_addr (BANK_WIDE_H, rd_wide_xy_addr_xy_next );
advance_rd_wide_addr_next ;
update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
end
//
endcase
//
endcase
//
// one_pass_meander
//
case (wrk_fsm_state_next_one_pass_meander)
//
WRK_FSM_STATE_LATENCY_PRE1_M1:
case (opcode)
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
end
UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
update_rd_narrow_bank_addr(sel_wide_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
end
endcase
//
WRK_FSM_STATE_LATENCY_PRE2_M1,
WRK_FSM_STATE_BUSY_M1:
case (opcode)
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
end
UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
update_rd_narrow_bank_addr(sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
end
endcase
//
WRK_FSM_STATE_LATENCY_PRE1_M2,
WRK_FSM_STATE_LATENCY_PRE2_M2,
WRK_FSM_STATE_BUSY_M2:
case (opcode)
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
update_rd_wide_bank (sel_wide_in );
update_rd_narrow_bank(sel_narrow_in);
end
UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
update_rd_wide_bank (sel_narrow_in);
update_rd_narrow_bank(sel_narrow_in);
end
endcase
//
endcase
//
// two_pass
//
case (wrk_fsm_state_next_two_pass)
//
WRK_FSM_STATE_LATENCY_PRE1_TP:
//
case (opcode)
//
UOP_OPCODE_MODULAR_SUBTRACT:
//
if (!wrk_fsm_two_pass_pass) begin
update_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
end else begin
update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
end
//
endcase
//
WRK_FSM_STATE_LATENCY_PRE2_TP,
WRK_FSM_STATE_LATENCY_PRE3_TP,
WRK_FSM_STATE_LATENCY_PRE4_TP,
WRK_FSM_STATE_BUSY_TP:
//
case (opcode)
//
UOP_OPCODE_MODULAR_SUBTRACT:
//
if (!wrk_fsm_two_pass_pass) begin
update_rd_wide_bank_addr (BANK_WIDE_N, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
end else begin
update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
end
//
endcase
//
endcase
//
end
//
// Destination Write Address Logic
//
wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_x = uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_y = uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
task update_wr_wide_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
input [ OP_ADDR_W -1:0] x_addr;
input [ OP_ADDR_W -1:0] y_addr;
begin
{wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
{wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
end
endtask
task update_wr_narrow_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
input [ OP_ADDR_W -1:0] x_addr;
input [ OP_ADDR_W -1:0] y_addr;
begin
{wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
{wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
end
endtask
always @(posedge clk) begin
//
update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
//
// one_pass
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST2:
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_COPY_CRT_Y2X: begin
update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
end
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2);
//
UOP_OPCODE_MERGE_LH:
update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
//
endcase
//
endcase
//
// one_pass_meander
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_M2,
WRK_FSM_STATE_LATENCY_POST1_M2,
WRK_FSM_STATE_LATENCY_POST2_M2:
//
case (opcode)
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
end
UOP_OPCODE_REGULAR_ADD_UNEVEN:
update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
endcase
//
endcase
//
// two_pass
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_TP,
WRK_FSM_STATE_LATENCY_POST1_TP,
WRK_FSM_STATE_LATENCY_POST2_TP,
WRK_FSM_STATE_LATENCY_POST3_TP,
WRK_FSM_STATE_LATENCY_POST4_TP:
//
case (opcode)
//
UOP_OPCODE_MODULAR_SUBTRACT:
//
if (!wrk_fsm_two_pass_pass) begin
update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
end else begin
update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
end
//
endcase
//
endcase
//
end
//
// FSM Process
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
else case (opcode)
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_MERGE_LH: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_REGULAR_ADD_UNEVEN: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass;
default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
endcase
//
// Busy Exit Logic
//
reg wrk_fsm_done_one_pass = 1'b0;
reg wrk_fsm_done_one_pass_meander = 1'b0;
reg wrk_fsm_done_two_pass = 1'b0;
always @(posedge clk) begin
//
wrk_fsm_done_one_pass <= 1'b0;
wrk_fsm_done_one_pass_meander <= 1'b0;
wrk_fsm_done_two_pass <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY:
if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY_M2:
if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
WRK_FSM_STATE_BUSY_M1:
wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY_TP:
if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_two_pass <= 1'b1;
endcase
//
//
endcase
//
end
//
// FSM Helper Logic
//
always @(posedge clk)
//
case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: if (ena) {wrk_fsm_two_pass_pass, wrk_fsm_two_pass_pass_dly} <= {1'b0, 1'b0};
WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_two_pass_pass <= 1'b1;
WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_two_pass_pass_dly <= 1'b1;
endcase
//
// FSM Transition Logic
//
always @* begin
//
case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ;
WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ;
WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
endcase
//
end
always @* begin
//
case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ;
//
WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ;
WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ;
WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ;
WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ;
WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ;
WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ;
WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ;
WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ;
WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ;
WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ;
//
WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
//
default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
endcase
//
end
always @* begin
//
case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: wrk_fsm_state_next_two_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1_TP : WRK_FSM_STATE_IDLE;
WRK_FSM_STATE_LATENCY_PRE1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE2_TP ;
WRK_FSM_STATE_LATENCY_PRE2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE3_TP ;
WRK_FSM_STATE_LATENCY_PRE3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE4_TP ;
WRK_FSM_STATE_LATENCY_PRE4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_BUSY_TP ;
WRK_FSM_STATE_BUSY_TP: wrk_fsm_state_next_two_pass = wrk_fsm_done_two_pass ? WRK_FSM_STATE_LATENCY_POST1_TP : WRK_FSM_STATE_BUSY_TP;
WRK_FSM_STATE_LATENCY_POST1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST2_TP ;
WRK_FSM_STATE_LATENCY_POST2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST3_TP ;
WRK_FSM_STATE_LATENCY_POST3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST4_TP ;
WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_HOLDOFF_TP ;
WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_state_next_two_pass = wrk_fsm_two_pass_pass_dly ? WRK_FSM_STATE_STOP : WRK_FSM_STATE_LATENCY_PRE1_TP;
WRK_FSM_STATE_STOP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
default: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
endcase
//
end
//
// Ready Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdy_reg <= 1'b1;
else case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: rdy_reg <= ~ena;
WRK_FSM_STATE_STOP: rdy_reg <= 1'b1;
endcase
//
// UOP_OPCODE_PROPAGATE_CARRIES
//
reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
task update_wide_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
{ x_x, y_x, x_y, y_y };
endtask
task update_narrow_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
{ x_x, y_x, x_y, y_y };
endtask
task update_narrow_carries;
input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
{rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <=
{ x_x_cry, y_x_cry, x_y_cry, y_y_cry };
endtask
always @(posedge clk)
//
if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE2:
//
update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO);
//
WRK_FSM_STATE_BUSY,
WRK_FSM_STATE_LATENCY_POST1:
//
update_narrow_carries(rd_narrow_x_din_x_w_cry_msb,
rd_narrow_y_din_x_w_cry_msb,
rd_narrow_x_din_y_w_cry_msb,
rd_narrow_y_din_y_w_cry_msb);
//
endcase
//
// UOP_OPCODE_MODULAR_SUBTRACT
//
reg [WORD_W:0] modsub_x_ab;
reg [WORD_W:0] modsub_y_ab;
reg [WORD_W:0] modsub_x_ab_dly;
reg [WORD_W:0] modsub_y_ab_dly;
reg [WORD_W:0] modsub_x_abn;
reg [WORD_W:0] modsub_y_abn;
reg modsub_x_ab_mask_now;
reg modsub_y_ab_mask_now;
reg modsub_x_abn_mask_now;
reg modsub_y_abn_mask_now;
reg modsub_x_borrow_r;
reg modsub_y_borrow_r;
wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W];
wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W];
wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W];
wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W];
wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]};
wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2;
wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2;
wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]};
wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]};
task update_modsub_ab;
begin
modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked;
modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked;
end
endtask
task update_modsub_abn;
begin
modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked;
modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked;
end
endtask
always @(posedge clk)
//
if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
//
case (wrk_fsm_state)
WRK_FSM_STATE_LATENCY_POST4_TP:
if (!wrk_fsm_two_pass_pass)
{modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]};
endcase
always @(posedge clk) begin
modsub_x_ab_dly <= modsub_x_ab;
modsub_y_ab_dly <= modsub_y_ab;
end
always @(posedge clk) begin
//
modsub_x_ab <= {1'bX, WORD_DNC};
modsub_y_ab <= {1'bX, WORD_DNC};
//
modsub_x_abn <= {1'bX, WORD_DNC};
modsub_y_abn <= {1'bX, WORD_DNC};
//
if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE3_TP:
update_modsub_ab;
WRK_FSM_STATE_LATENCY_PRE4_TP,
WRK_FSM_STATE_BUSY_TP,
WRK_FSM_STATE_LATENCY_POST1_TP,
WRK_FSM_STATE_LATENCY_POST2_TP: begin
update_modsub_ab;
update_modsub_abn;
end
//
WRK_FSM_STATE_LATENCY_POST3_TP:
//
update_modsub_abn;
//
endcase
//
end
always @(posedge clk) begin
//
modsub_x_ab_mask_now <= 1'b0;
modsub_y_ab_mask_now <= 1'b0;
//
modsub_x_abn_mask_now <= 1'b0;
modsub_y_abn_mask_now <= 1'b0;
//
if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE2_TP: begin
modsub_x_ab_mask_now <= 1'b1;
modsub_y_ab_mask_now <= 1'b1;
end
//
WRK_FSM_STATE_LATENCY_PRE3_TP: begin
modsub_x_abn_mask_now <= 1'b1;
modsub_y_abn_mask_now <= 1'b1;
end
//
endcase
//
end
//
// UOP_OPCODE_ADD_UNEVEN
//
reg [WORD_W:0] regadd_x_x;
reg [WORD_W:0] regadd_y_x;
reg [WORD_W:0] regadd_x_y;
reg [WORD_W:0] regadd_y_y;
reg regadd_x_x_cry;
reg regadd_y_x_cry;
reg regadd_x_y_cry;
reg regadd_y_y_cry;
wire [WORD_EXT_W-1:0] regadd_x_x_trunc = {{CARRY_W{1'b0}}, regadd_x_x[WORD_W-1:0]};
wire [WORD_EXT_W-1:0] regadd_y_x_trunc = {{CARRY_W{1'b0}}, regadd_y_x[WORD_W-1:0]};
wire [WORD_EXT_W-1:0] regadd_x_y_trunc = {{CARRY_W{1'b0}}, regadd_x_y[WORD_W-1:0]};
wire [WORD_EXT_W-1:0] regadd_y_y_trunc = {{CARRY_W{1'b0}}, regadd_y_y[WORD_W-1:0]};
//wire regadd_x_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_x_x[WORD_W];
//wire regadd_y_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_y_x[WORD_W];
//wire regadd_x_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_x_y[WORD_W];
//wire regadd_y_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_y_y[WORD_W];
/**/
reg [WORD_W:0] regadd_x_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly2[WORD_W-1:0]};
reg [WORD_W:0] regadd_x_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0]};
reg [WORD_W:0] regadd_y_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly2[WORD_W-1:0]};
reg [WORD_W:0] regadd_y_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0]};
reg [WORD_W:0] regadd_x_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly2[WORD_W-1:0]};
reg [WORD_W:0] regadd_x_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0]};
reg [WORD_W:0] regadd_y_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly2[WORD_W-1:0]};
reg [WORD_W:0] regadd_y_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0]};
/**/
//WRK_FSM_STATE_BUSY_M1,
//WRK_FSM_STATE_LATENCY_POST1_M1,
//WRK_FSM_STATE_LATENCY_POST2_M1:
always @(posedge clk) begin
//
regadd_x_x_a_lsb_pad <= {1'bX, WORD_DNC};
regadd_x_x_b_lsb_pad <= {1'bX, WORD_DNC};
regadd_y_x_a_lsb_pad <= {1'bX, WORD_DNC};
regadd_y_x_b_lsb_pad <= {1'bX, WORD_DNC};
regadd_x_y_a_lsb_pad <= {1'bX, WORD_DNC};
regadd_x_y_b_lsb_pad <= {1'bX, WORD_DNC};
regadd_y_y_a_lsb_pad <= {1'bX, WORD_DNC};
regadd_y_y_b_lsb_pad <= {1'bX, WORD_DNC};
//
if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE2_M2,
WRK_FSM_STATE_BUSY_M2,
WRK_FSM_STATE_LATENCY_POST1_M2: begin
regadd_x_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0] : WORD_ZERO};
regadd_x_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_x [WORD_W-1:0] };
regadd_y_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0] : WORD_ZERO};
regadd_y_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_x [WORD_W-1:0] };
regadd_x_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0] : WORD_ZERO};
regadd_x_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_y [WORD_W-1:0] };
regadd_y_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0] : WORD_ZERO};
regadd_y_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_y [WORD_W-1:0] };
end
//
endcase
end
always @(posedge clk) begin
//
regadd_x_x <= {1'bX, WORD_DNC};
regadd_y_x <= {1'bX, WORD_DNC};
regadd_x_y <= {1'bX, WORD_DNC};
regadd_y_y <= {1'bX, WORD_DNC};
//
if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY_M1,
WRK_FSM_STATE_LATENCY_POST1_M1,
WRK_FSM_STATE_LATENCY_POST2_M1: begin
regadd_x_x <= regadd_x_x_a_lsb_pad + regadd_x_x_b_lsb_pad + regadd_x_x_cry;
regadd_y_x <= regadd_y_x_a_lsb_pad + regadd_y_x_b_lsb_pad + regadd_y_x_cry;
regadd_x_y <= regadd_x_y_a_lsb_pad + regadd_x_y_b_lsb_pad + regadd_x_y_cry;
regadd_y_y <= regadd_y_y_a_lsb_pad + regadd_y_y_b_lsb_pad + regadd_y_y_cry;
end
//
endcase
//
end
always @(posedge clk) begin
//
regadd_x_x_cry <= 1'bX;
regadd_y_x_cry <= 1'bX;
regadd_x_y_cry <= 1'bX;
regadd_y_y_cry <= 1'bX;
//
if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE2_M2: begin
regadd_x_x_cry <= 1'b0;
regadd_y_x_cry <= 1'b0;
regadd_x_y_cry <= 1'b0;
regadd_y_y_cry <= 1'b0;
end
//
WRK_FSM_STATE_BUSY_M2,
WRK_FSM_STATE_LATENCY_POST1_M2: begin
regadd_x_x_cry <= regadd_x_x[WORD_W];
regadd_y_x_cry <= regadd_y_x[WORD_W];
regadd_x_y_cry <= regadd_x_y[WORD_W];
regadd_y_y_cry <= regadd_y_y[WORD_W];
end
//
endcase
//
end
endmodule