//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_general_worker
(
clk, rst_n,
ena, rdy,
sel_narrow_in, sel_narrow_out,
sel_wide_in, sel_wide_out,
opcode,
word_index_last, word_index_last_half,
wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x,
wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x,
wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y,
wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y,
wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x,
wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y,
wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_microcode.vh"
//
// Ports
//
input clk;
input rst_n;
input ena;
output rdy;
input [ BANK_ADDR_W -1:0] sel_narrow_in;
input [ BANK_ADDR_W -1:0] sel_narrow_out;
input [ BANK_ADDR_W -1:0] sel_wide_in;
input [ BANK_ADDR_W -1:0] sel_wide_out;
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
input [ OP_ADDR_W -1:0] word_index_last_half;
output wrk_rd_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x;
input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x;
input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x;
output wrk_rd_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x;
output wrk_rd_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y;
input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y;
input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y;
output wrk_rd_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y;
input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y;
output wrk_wr_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x;
output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x;
output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x;
output wrk_wr_narrow_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x;
output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x;
output wrk_wr_wide_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y;
output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y;
output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y;
output wrk_wr_narrow_xy_ena_y;
output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y;
output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y;
output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y;
//
// FSM Declaration
//
localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3 = 4'h3;
localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4 = 4'h4;
localparam [3:0] WRK_FSM_STATE_BUSY1 = 4'hA;
localparam [3:0] WRK_FSM_STATE_BUSY2 = 4'hB;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7;
localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8;
localparam [3:0] WRK_FSM_STATE_STOP = 4'hF;
reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
reg [3:0] wrk_fsm_state_next;
//
// Control Signals
//
reg rd_wide_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] rd_wide_bank_x;
reg [ OP_ADDR_W -1:0] rd_wide_addr_x;
reg rd_narrow_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] rd_narrow_bank_x;
reg [ OP_ADDR_W -1:0] rd_narrow_addr_x;
reg rd_wide_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] rd_wide_bank_y;
reg [ OP_ADDR_W -1:0] rd_wide_addr_y;
reg rd_narrow_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] rd_narrow_bank_y;
reg [ OP_ADDR_W -1:0] rd_narrow_addr_y;
reg wr_wide_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;
reg wr_narrow_xy_ena_x = 1'b0;
reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;
reg wr_wide_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;
reg wr_narrow_xy_ena_y = 1'b0;
reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;
//
// Mapping
//
assign wrk_rd_wide_xy_ena_x = rd_wide_ena_x;
assign wrk_rd_wide_xy_bank_x = rd_wide_bank_x;
assign wrk_rd_wide_xy_addr_x = rd_wide_addr_x;
assign wrk_rd_narrow_xy_ena_x = rd_narrow_ena_x;
assign wrk_rd_narrow_xy_bank_x = rd_narrow_bank_x;
assign wrk_rd_narrow_xy_addr_x = rd_narrow_addr_x;
assign wrk_rd_wide_xy_ena_y = rd_wide_ena_y;
assign wrk_rd_wide_xy_bank_y = rd_wide_bank_y;
assign wrk_rd_wide_xy_addr_y = rd_wide_addr_y;
assign wrk_rd_narrow_xy_ena_y = rd_narrow_ena_y;
assign wrk_rd_narrow_xy_bank_y = rd_narrow_bank_y;
assign wrk_rd_narrow_xy_addr_y = rd_narrow_addr_y;
assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x;
assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x;
assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x;
assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x;
assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x;
assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x;
assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x;
assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x;
assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x;
assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x;
assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y;
assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y;
assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y;
assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y;
assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y;
assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y;
assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y;
assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y;
assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y;
assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y;
//
// Delays
//
reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:3];
reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:3];
reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3];
reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3];
reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1;
always @(posedge clk) begin
//
{rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x};
{rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x};
{rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y};
{rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y};
//
{rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x};
{rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x};
{rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y};
{rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y};
//
{rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x};
{rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y};
//
{rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x};
{rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y};
//
end
//
// Source Read Enable Logic
//
task _update_wide_rd_en; input _en; {rd_wide_ena_x, rd_wide_ena_y } <= {2{_en}}; endtask
task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask
task enable_wide_rd_en; _update_wide_rd_en(1'b1); endtask
task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask
task enable_narrow_rd_en; _update_narrow_rd_en(1'b1); endtask
task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
disable_wide_rd_en;
disable_narrow_rd_en;
//
end else begin
//
disable_wide_rd_en;
disable_narrow_rd_en;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_MODULAR_SUBTRACT_X:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: enable_narrow_rd_en;
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_SUBTRACT_Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en; end
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en; end
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1,
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: enable_wide_rd_en;
endcase
//
endcase
//
end
//
// Destination Write Enable Logic
//
task _update_wide_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
task enable_wide_wr_en; _update_wide_wr_en(1'b1); endtask
task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask
task enable_narrow_wr_en; _update_narrow_wr_en(1'b1); endtask
task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
disable_wide_wr_en;
disable_narrow_wr_en;
//
end else begin
//
disable_wide_wr_en;
disable_narrow_wr_en;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en;
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en;
endcase
//
endcase
//
end
//
// Source Read Address Logic
//
reg [OP_ADDR_W -1:0] rd_wide_addr_next;
reg [OP_ADDR_W -1:0] rd_narrow_addr_next;
reg rd_wide_addr_is_last = 1'b0;
reg rd_narrow_addr_is_last = 1'b0;
reg rd_wide_addr_is_last_half = 1'b0;
reg rd_narrow_addr_is_last_half = 1'b0;
reg rd_wide_addr_next_is_last = 1'b0;
reg rd_narrow_addr_next_is_last = 1'b0;
reg rd_wide_addr_next_is_last_half = 1'b0;
reg rd_narrow_addr_next_is_last_half = 1'b0;
reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0;
reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0;
always @(posedge clk) begin
//
rd_wide_addr_is_last_half_dly <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half};
rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half};
//
end
task preset_rd_wide_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
{rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr};
{rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr};
rd_wide_addr_is_last <= 1'b0;
rd_wide_addr_is_last_half <= 1'b0;
end
endtask
task preset_rd_narrow_bank_addr;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
begin
{rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr};
{rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr};
rd_narrow_addr_is_last <= 1'b0;
rd_narrow_addr_is_last_half <= 1'b0;
end
endtask
task preset_rd_wide_addr_next;
input [OP_ADDR_W -1:0] addr;
begin
rd_wide_addr_next <= addr;
rd_wide_addr_next_is_last <= 1'b0;
rd_wide_addr_next_is_last_half <= 1'b0;
end
endtask
task preset_rd_narrow_addr_next;
input [OP_ADDR_W -1:0] addr;
begin
rd_narrow_addr_next <= addr;
rd_narrow_addr_next_is_last <= 1'b0;
rd_narrow_addr_next_is_last_half <= 1'b0;
end
endtask
task keep_rd_wide_bank;
begin
{rd_wide_bank_x} <= {rd_wide_bank_x};
{rd_wide_bank_y} <= {rd_wide_bank_y};
end
endtask
task switch_rd_wide_bank;
input [BANK_ADDR_W -1:0] bank;
begin
{rd_wide_bank_x} <= {bank};
{rd_wide_bank_y} <= {bank};
end
endtask
task keep_rd_wide_addr;
begin
{rd_wide_addr_x} <= {rd_wide_addr_x};
{rd_wide_addr_y} <= {rd_wide_addr_y};
end
endtask
task advance_rd_wide_addr;
begin
{rd_wide_addr_x} <= {rd_wide_addr_next};
{rd_wide_addr_y} <= {rd_wide_addr_next};
rd_wide_addr_is_last <= rd_wide_addr_next == word_index_last;
rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half;
end
endtask
task keep_rd_narrow_bank;
begin
{rd_narrow_bank_x} <= {rd_narrow_bank_x};
{rd_narrow_bank_y} <= {rd_narrow_bank_y};
end
endtask
task switch_rd_narrow_bank;
input [BANK_ADDR_W -1:0] bank;
begin
{rd_narrow_bank_x} <= {bank};
{rd_narrow_bank_y} <= {bank};
end
endtask
task keep_rd_narrow_addr;
begin
{rd_narrow_addr_x} <= {rd_narrow_addr_x};
{rd_narrow_addr_y} <= {rd_narrow_addr_y};
end
endtask
task advance_rd_narrow_addr;
begin
{rd_narrow_addr_x} <= {rd_narrow_addr_next};
{rd_narrow_addr_y} <= {rd_narrow_addr_next};
rd_narrow_addr_is_last <= rd_narrow_addr_next == word_index_last;
rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half;
end
endtask
task update_rd_wide_addr_flags;
begin
rd_wide_addr_next_is_last <= rd_wide_addr_next == (word_index_last - 1'b1);
rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1);
end
endtask
task update_rd_narrow_addr_flags;
begin
rd_narrow_addr_next_is_last <= rd_narrow_addr_next == (word_index_last - 1'b1);
rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1);
end
endtask
task advance_rd_wide_addr_next;
begin
rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_wide_addr_flags;
end
endtask
task advance_rd_narrow_addr_next;
begin
rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_narrow_addr_flags;
end
endtask
task advance_rd_wide_addr_next_half;
begin
rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_wide_addr_flags;
end
endtask
task advance_rd_narrow_addr_next_half;
begin
rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO;
update_rd_narrow_addr_flags;
end
endtask
always @(posedge clk) begin
//
preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_MODULAR_SUBTRACT_X:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank;
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_SUBTRACT_Z,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next;
keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next_half;
keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank;
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank (sel_wide_out); keep_rd_wide_addr;
switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next; switch_rd_wide_bank(sel_wide_in);
advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: begin keep_rd_wide_addr; switch_rd_wide_bank (sel_wide_out);
keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3,
WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next;
keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state_next)
WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE);
preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end
WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next_half;
advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_BUSY1: begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank;
else switch_rd_wide_bank(BANK_WIDE_H);
advance_rd_wide_addr; advance_rd_wide_addr_next_half;
advance_rd_narrow_addr; advance_rd_narrow_addr_next; end
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2: keep_rd_wide_bank;
endcase
//
endcase
//
end
//
// Destination Write Address Logic
//
reg modular_reduce_init_first_half_x;
reg modular_reduce_init_first_half_y;
reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x;
reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y;
always @(posedge clk) begin
//
modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half;
modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half;
//
modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H;
modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H;
//
end
task update_wr_wide_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
input [ OP_ADDR_W -1:0] x_addr;
input [ OP_ADDR_W -1:0] y_addr;
begin
{wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
{wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
end
endtask
task update_wr_narrow_bank_addr;
input [BANK_ADDR_W -1:0] x_bank;
input [BANK_ADDR_W -1:0] y_bank;
input [ OP_ADDR_W -1:0] x_addr;
input [ OP_ADDR_W -1:0] y_addr;
begin
{wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
{wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
end
endtask
always @(posedge clk) begin
//
update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
endcase
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]);
update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3] ); end
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]);
endcase
//
endcase
//
end
//
// UOP_OPCODE_PROPAGATE_CARRIES
//
reg [CARRY_W -1:0] propagate_carries_x_x_cry_r;
reg [CARRY_W -1:0] propagate_carries_y_x_cry_r;
reg [CARRY_W -1:0] propagate_carries_x_y_cry_r;
reg [CARRY_W -1:0] propagate_carries_y_y_cry_r;
wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_x_cry_r};
wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_x_cry_r};
wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_y_cry_r};
wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_y_cry_r};
reg [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_r;
reg [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_r;
reg [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_r;
reg [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_r;
wire [CARRY_W -1:0] propagate_carries_x_x_w_cry_msb = propagate_carries_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] propagate_carries_y_x_w_cry_msb = propagate_carries_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] propagate_carries_x_y_w_cry_msb = propagate_carries_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] propagate_carries_y_y_w_cry_msb = propagate_carries_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [WORD_W -1:0] propagate_carries_x_x_w_cry_lsb = propagate_carries_x_x_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] propagate_carries_y_x_w_cry_lsb = propagate_carries_y_x_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] propagate_carries_x_y_w_cry_lsb = propagate_carries_x_y_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] propagate_carries_y_y_w_cry_lsb = propagate_carries_y_y_w_cry_r[WORD_W -1:0];
wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_x_w_cry_lsb};
wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_x_w_cry_lsb};
wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_y_w_cry_lsb};
wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_y_w_cry_lsb};
task _propagate_carries_update_cry;
input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
{ propagate_carries_x_x_cry_r, propagate_carries_y_x_cry_r, propagate_carries_x_y_cry_r, propagate_carries_y_y_cry_r} <=
{ x_x_cry, y_x_cry, x_y_cry, y_y_cry};
endtask
task propagate_carries_clear_cry; _propagate_carries_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask
task propagate_carries_store_cry; _propagate_carries_update_cry(propagate_carries_x_x_w_cry_msb, propagate_carries_y_x_w_cry_msb, propagate_carries_x_y_w_cry_msb, propagate_carries_y_y_w_cry_msb); endtask
task _propagate_carries_update_sum_w_cry;
input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry;
{ propagate_carries_x_x_w_cry_r, propagate_carries_y_x_w_cry_r, propagate_carries_x_y_w_cry_r, propagate_carries_y_y_w_cry_r} <=
{ x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry};
endtask
task propagate_carries_store_sum_w_cry; _propagate_carries_update_sum_w_cry(propagate_carries_x_x_w_cry, propagate_carries_y_x_w_cry, propagate_carries_x_y_w_cry, propagate_carries_y_y_w_cry); endtask
always @(posedge clk)
//
if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE3: propagate_carries_clear_cry;
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1: propagate_carries_store_cry;
//
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2: propagate_carries_store_sum_w_cry;
//
endcase
//
// UOP_OPCODE_MODULAR_SUBTRACT_X
// UOP_OPCODE_MODULAR_SUBTRACT_Y
//
reg modular_subtract_x_brw_r;
reg modular_subtract_y_brw_r;
reg modular_subtract_x_cry_r;
reg modular_subtract_y_cry_r;
wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r};
wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r};
wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r};
wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_brw_r};
reg [WORD_W:0] modular_subtract_x_w_brw_r;
reg [WORD_W:0] modular_subtract_y_w_brw_r;
reg [WORD_W:0] modular_subtract_x_w_cry_r;
reg [WORD_W:0] modular_subtract_y_w_cry_r;
wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W];
wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W];
wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W];
wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W];
wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0];
wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0];
wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0];
wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb};
wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb};
wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb};
wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb};
reg [WORD_EXT_W -1:0] modular_subtract_x_mux;
reg [WORD_EXT_W -1:0] modular_subtract_y_mux;
wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};
task _modular_subtract_update_brw;
input x_brw, y_brw;
{modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw};
endtask
task _modular_subtract_update_cry;
input x_cry, y_cry;
{modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry};
endtask
task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask
task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask
task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask
task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask
task _modular_subtract_update_diff_w_brw;
input [WORD_W:0] x_diff_w_brw, y_diff_w_brw;
{modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw};
endtask
task _modular_subtract_update_sum_w_cry;
input [WORD_W:0] x_sum_w_cry, y_sum_w_cry;
{modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry};
endtask
task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask
task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask
always @(posedge clk)
//
case (opcode)
//
UOP_OPCODE_MODULAR_SUBTRACT_X:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw;
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too!
//
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw;
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry;
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry;
//
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry;
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE4,
WRK_FSM_STATE_BUSY2,
WRK_FSM_STATE_LATENCY_POST2:
//
begin modular_subtract_x_mux <= !modular_subtract_x_brw_r ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
modular_subtract_y_mux <= !modular_subtract_y_brw_r ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
//
endcase
//
endcase
//
// UOP_OPCODE_REGULAR_ADD_UNEVEN
//
reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r;
reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r;
reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r;
reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r;
wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r};
wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r};
wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r};
wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r};
wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1;
wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1;
wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1;
wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1;
reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r;
reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r;
reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r;
reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r;
wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0];
wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0];
wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb};
wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb};
wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb};
wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb};
reg regular_add_uneven_store_lsb_now;
task _regular_add_uneven_update_cry;
input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
{ regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <=
{ x_x_cry, y_x_cry, x_y_cry, y_y_cry};
endtask
task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask
task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask
task _regular_add_uneven_update_sum_w_cry;
input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry;
{ regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <=
{ x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry};
endtask
task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask
task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask
always @(posedge clk)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1;
WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0;
//
endcase
always @(posedge clk)
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_clear_cry;
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry;
//
WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_store_sum_lsb_w_cry;
WRK_FSM_STATE_BUSY2: if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry;
else regular_add_uneven_store_sum_msb_w_cry;
WRK_FSM_STATE_LATENCY_POST2: regular_add_uneven_store_sum_msb_w_cry;
//
endcase
//
// FSM Process
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
else wrk_fsm_state <= wrk_fsm_state_next;
//
// Busy Exit Logic
//
reg wrk_fsm_done = 1'b0;
always @(posedge clk) begin
//
wrk_fsm_done <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y,
UOP_OPCODE_MODULAR_SUBTRACT_X,
UOP_OPCODE_MODULAR_SUBTRACT_Y,
UOP_OPCODE_MODULAR_SUBTRACT_Z,
UOP_OPCODE_MERGE_LH,
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY1:
if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1;
endcase
//
endcase
//
end
//
// FSM Transition Logic
//
always @* begin
//
case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: wrk_fsm_state_next = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE2 ;
WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE3 ;
WRK_FSM_STATE_LATENCY_PRE3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE4 ;
WRK_FSM_STATE_LATENCY_PRE4: wrk_fsm_state_next = WRK_FSM_STATE_BUSY1 ;
WRK_FSM_STATE_BUSY1: wrk_fsm_state_next = WRK_FSM_STATE_BUSY2 ;
WRK_FSM_STATE_BUSY2: wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ;
WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST2 ;
WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST3 ;
WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST4 ;
WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next = WRK_FSM_STATE_STOP ;
WRK_FSM_STATE_STOP: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ;
default: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ;
endcase
//
end
//
// Ready Flag Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdy_reg <= 1'b1;
else case (wrk_fsm_state)
WRK_FSM_STATE_IDLE: rdy_reg <= ~ena;
WRK_FSM_STATE_STOP: rdy_reg <= 1'b1;
endcase
//
// Source to Destination Data Logic
//
reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2;
reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2;
reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2;
always @(posedge clk) begin
{rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2 } <= {rd_wide_x_din_x_dly1, rd_wide_y_din_x_dly1, rd_wide_x_din_y_dly1, rd_wide_y_din_y_dly1 };
{rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1};
end
task update_wide_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
{ x_x, y_x, x_y, y_y};
endtask
task update_narrow_dout;
input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
{wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
{ x_x, y_x, x_y, y_y};
endtask
always @(posedge clk) begin
//
update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced);
//
endcase
//
UOP_OPCODE_COPY_CRT_Y2X:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
update_wide_dout (rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); end
//
endcase
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2);
//
endcase
//
UOP_OPCODE_COPY_LADDERS_X2Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_x_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_y_dly2);
update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end
//
endcase
//
UOP_OPCODE_CROSS_LADDERS_X2Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_y_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_x_dly2);
update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_X:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_narrow_dout(modular_subtract_x_w_brw_reduced, modular_subtract_x_w_brw_reduced, modular_subtract_y_w_brw_reduced, modular_subtract_y_w_brw_reduced);
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Y:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_wide_dout(modular_subtract_x_w_cry_reduced, modular_subtract_x_w_cry_reduced, modular_subtract_y_w_cry_reduced, modular_subtract_y_w_cry_reduced);
//
endcase
//
UOP_OPCODE_MODULAR_SUBTRACT_Z:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
begin update_wide_dout (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced);
update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end
//
endcase
//
UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2);
//
endcase
//
UOP_OPCODE_REGULAR_ADD_UNEVEN:
//
case (wrk_fsm_state)
//
WRK_FSM_STATE_BUSY1,
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST3:
//
update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced);
//
endcase
endcase
//
end
endmodule