//======================================================================
//
// Copyright (c) 2019, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_reductor
(
clk, rst_n,
ena, rdy,
word_index_last,
sel_wide_out, sel_narrow_out,
rd_wide_x_din_aux, rd_wide_y_din_aux,
rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din, rcmb_final_xy_valid,
rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_dout, rdct_wide_y_dout, rdct_wide_xy_valid,
rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_dsp48e1.vh"
`include "modexpng_dsp_slice_primitives.vh"
//
// Ports
//
input clk;
input rst_n;
input ena;
output rdy;
input [ OP_ADDR_W -1:0] word_index_last;
input [BANK_ADDR_W -1:0] sel_wide_out;
input [BANK_ADDR_W -1:0] sel_narrow_out;
input [ WORD_EXT_W -1:0] rd_wide_x_din_aux;
input [ WORD_EXT_W -1:0] rd_wide_y_din_aux;
//
input [BANK_ADDR_W -1:0] rcmb_final_xy_bank;
input [ OP_ADDR_W -1:0] rcmb_final_xy_addr;
input [ WORD_EXT_W -1:0] rcmb_final_x_din;
input [ WORD_EXT_W -1:0] rcmb_final_y_din;
input rcmb_final_xy_valid;
output [BANK_ADDR_W -1:0] rdct_wide_xy_bank;
output [ OP_ADDR_W -1:0] rdct_wide_xy_addr;
output [ WORD_EXT_W -1:0] rdct_wide_x_dout;
output [ WORD_EXT_W -1:0] rdct_wide_y_dout;
output rdct_wide_xy_valid;
output [BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr;
output [ WORD_EXT_W -1:0] rdct_narrow_x_dout;
output [ WORD_EXT_W -1:0] rdct_narrow_y_dout;
output rdct_narrow_xy_valid;
//
// Output Registers
//
reg [BANK_ADDR_W -1:0] wide_xy_bank;
reg [ OP_ADDR_W -1:0] wide_xy_addr;
reg [ WORD_EXT_W -1:0] wide_x_dout;
reg [ WORD_EXT_W -1:0] wide_y_dout;
reg wide_xy_valid = 1'b0;
reg [BANK_ADDR_W -1:0] narrow_xy_bank;
reg [ OP_ADDR_W -1:0] narrow_xy_addr;
reg [ WORD_EXT_W -1:0] narrow_x_dout;
reg [ WORD_EXT_W -1:0] narrow_y_dout;
reg narrow_xy_valid = 1'b0;
//
// Mapping
//
assign rdct_wide_xy_bank = wide_xy_bank;
assign rdct_wide_xy_addr = wide_xy_addr;
assign rdct_wide_x_dout = wide_x_dout;
assign rdct_wide_y_dout = wide_y_dout;
assign rdct_wide_xy_valid = wide_xy_valid;
assign rdct_narrow_xy_bank = narrow_xy_bank;
assign rdct_narrow_xy_addr = narrow_xy_addr;
assign rdct_narrow_x_dout = narrow_x_dout;
assign rdct_narrow_y_dout = narrow_y_dout;
assign rdct_narrow_xy_valid = narrow_xy_valid;
//
// Helper Tasks
//
task _update_rdct_wide;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
input valid;
begin
wide_xy_bank <= bank;
wide_xy_addr <= addr;
wide_x_dout <= dout_x;
wide_y_dout <= dout_y;
wide_xy_valid <= valid;
end
endtask
task _update_rdct_narrow;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
input valid;
begin
narrow_xy_bank <= bank;
narrow_xy_addr <= addr;
narrow_x_dout <= dout_x;
narrow_y_dout <= dout_y;
narrow_xy_valid <= valid;
end
endtask
task set_rdct_wide;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
_update_rdct_wide(bank, addr, dout_x, dout_y, 1'b1);
endtask
task set_rdct_narrow;
input [BANK_ADDR_W -1:0] bank;
input [ OP_ADDR_W -1:0] addr;
input [ WORD_EXT_W -1:0] dout_x;
input [ WORD_EXT_W -1:0] dout_y;
_update_rdct_narrow(bank, addr, dout_x, dout_y, 1'b1);
endtask
task clear_rdct_wide;
_update_rdct_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
endtask
task clear_rdct_narrow;
_update_rdct_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
endtask
//
// Pipeline rd_wide_*
//
reg [WORD_EXT_W -1:0] rd_wide_x_din_aux_pipe;
reg [WORD_EXT_W -1:0] rd_wide_y_din_aux_pipe;
always @(posedge clk)
//
{rd_wide_y_din_aux_pipe, rd_wide_x_din_aux_pipe} <= {rd_wide_y_din_aux, rd_wide_x_din_aux};
//
// Counter
//
integer i;
//
// Delay rcmb_final_* to match rd_wide_*
//
reg rcmb_xy_valid_dly[1:6];
reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly [1:6];
reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly [1:6];
reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly [1:4];
reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly [1:4];
initial for (i=1; i<=6; i=i+1) rcmb_xy_valid_dly[i] = 1'b0;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) for (i=1; i<=6; i=i+1) rcmb_xy_valid_dly[i] <= 1'b0;
else begin
rcmb_xy_valid_dly[1] <= rcmb_final_xy_valid;
for (i=2; i<=6; i=i+1) rcmb_xy_valid_dly[i] <= rcmb_xy_valid_dly[i-1];
end
always @(posedge clk) begin
//
{rcmb_xy_bank_dly[1], rcmb_xy_addr_dly[1], rcmb_x_dout_dly[1], rcmb_y_dout_dly[1]} <= {rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din };
for (i=2; i<=6; i=i+1) {rcmb_xy_bank_dly[i], rcmb_xy_addr_dly[i] } <= {rcmb_xy_bank_dly[i-1], rcmb_xy_addr_dly[i-1] };
for (i=2; i<=4; i=i+1) { rcmb_x_dout_dly[i], rcmb_y_dout_dly[i]} <= { rcmb_x_dout_dly[i-1], rcmb_y_dout_dly[i-1]};
//
end
//
// Internal Busy Flag Logic
//
reg busy_next = 1'b0;
reg [4:0] busy_now_shreg = 5'b00000;
wire busy_now = busy_now_shreg[4];
always @(posedge clk or negedge rst_n)
//
if (!rst_n) busy_now_shreg <= 5'b00000;
else begin
if (rdy && ena) busy_now_shreg <= 5'b11111;
else busy_now_shreg <= {busy_now_shreg[3:0], busy_next};
end
always @(posedge clk or negedge rst_n)
//
if (!rst_n) busy_next <= 1'b0;
else begin
if (rdy && ena) busy_next <= 1'b1;
if (!rdy && rcmb_xy_valid_dly[4] && (rcmb_xy_bank_dly[4] == BANK_RCMB_EXT)) busy_next <= 1'b0;
end
//
// Ready Flag Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdy_reg <= 1'b1;
else begin
if (rdy && ena) rdy_reg <= 1'b0;
if (!rdy && !busy_now) rdy_reg <= 1'b1;
end
//
// Pipelined Flags
//
reg rcmb_xy_addr_dly3_is_zero;
reg rcmb_xy_addr_dly3_is_one;
reg rcmb_xy_addr_dly3_gt_one;
reg rcmb_xy_addr_dly5_is_one;
reg rcmb_xy_addr_dly5_gt_one;
reg rcmb_xy_addr_dly6_is_zero;
always @(posedge clk) begin
rcmb_xy_addr_dly3_is_zero <= rcmb_xy_addr_dly[2] == OP_ADDR_ZERO;
rcmb_xy_addr_dly3_is_one <= rcmb_xy_addr_dly[2] == OP_ADDR_ONE;
rcmb_xy_addr_dly3_gt_one <= rcmb_xy_addr_dly[2] > OP_ADDR_ONE;
rcmb_xy_addr_dly5_is_one <= rcmb_xy_addr_dly[4] == OP_ADDR_ONE;
rcmb_xy_addr_dly5_gt_one <= rcmb_xy_addr_dly[4] > OP_ADDR_ONE;
rcmb_xy_addr_dly6_is_zero <= rcmb_xy_addr_dly[5] == OP_ADDR_ZERO;
end
//
// LSB Math
//
reg lsb_ce = 1'b0;
reg lsb_ce_dly = 1'b0;
reg [DSP48E1_OPMODE_W -1:0] lsb_opmode;
wire [DSP48E1_P_W -1:0] lsb_px;
wire [DSP48E1_P_W -1:0] lsb_py;
wire [DSP48E1_C_W -1:0] lsb_ax = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rcmb_x_dout_dly[4][WORD_EXT_W-1:WORD_W], 1'b1, rcmb_x_dout_dly[4][WORD_W-1:0]};
wire [DSP48E1_C_W -1:0] lsb_ay = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rcmb_y_dout_dly[4][WORD_EXT_W-1:WORD_W], 1'b1, rcmb_y_dout_dly[4][WORD_W-1:0]};
wire [DSP48E1_C_W -1:0] lsb_bx = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_wide_x_din_aux_pipe[WORD_EXT_W-1:WORD_W], 1'b0, rd_wide_x_din_aux_pipe[WORD_W-1:0]};
wire [DSP48E1_C_W -1:0] lsb_by = {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, rd_wide_y_din_aux_pipe[WORD_EXT_W-1:WORD_W], 1'b0, rd_wide_y_din_aux_pipe[WORD_W-1:0]};
wire [DSP48E1_P_W -1:0] lsb2msb_px_casc;
wire [DSP48E1_P_W -1:0] lsb2msb_py_casc;
`MODEXPNG_DSP_SLICE_ADDSUB dsp_lsb_x
(
.clk (clk),
.ce_abc (lsb_ce),
.ce_p (lsb_ce_dly),
.ce_ctrl (lsb_ce),
.ab (lsb_ax),
.c (lsb_bx),
.p (lsb_px),
.op_mode (lsb_opmode),
.alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
.carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
.casc_p_in (),
.casc_p_out (lsb2msb_px_casc),
.carry_out ()
);
`MODEXPNG_DSP_SLICE_ADDSUB dsp_lsb_y
(
.clk (clk),
.ce_abc (lsb_ce),
.ce_p (lsb_ce_dly),
.ce_ctrl (lsb_ce),
.ab (lsb_ay),
.c (lsb_by),
.p (lsb_py),
.op_mode (lsb_opmode),
.alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
.carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
.casc_p_in (),
.casc_p_out (lsb2msb_py_casc),
.carry_out ()
);
always @(posedge clk or negedge rst_n)
//
if (!rst_n) lsb_ce <= 1'b0;
else begin
lsb_ce <= 1'b0;
if (rcmb_xy_valid_dly[3])
//
case (rcmb_xy_bank_dly[3])
BANK_RCMB_ML: lsb_ce <= 1'b1;
BANK_RCMB_MH: if (rcmb_xy_addr_dly3_is_zero) lsb_ce <= 1'b1;
endcase
//
end
always @(posedge clk) begin
//
//lsb_opmode <= DSP48E1_OPMODE_DNC;
//
if (rcmb_xy_valid_dly[3])
//
case (rcmb_xy_bank_dly[3])
BANK_RCMB_ML: if (rcmb_xy_addr_dly3_is_zero) lsb_opmode <= DSP48E1_OPMODE_Z0_YC_XAB;
else lsb_opmode <= DSP48E1_OPMODE_ZP17_YC_XAB;
BANK_RCMB_MH: if (rcmb_xy_addr_dly3_is_zero) lsb_opmode <= DSP48E1_OPMODE_ZP17_YC_XAB;
endcase
//
end
always @(posedge clk or negedge rst_n)
//
if (!rst_n) lsb_ce_dly <= 1'b0;
else lsb_ce_dly <= lsb_ce;
//
// MSB Math
//
reg msb_ce = 1'b0;
reg msb_ce_dly1 = 1'b0;
reg msb_ce_dly2 = 1'b0;
reg [DSP48E1_OPMODE_W -1:0] msb_opmode;
wire [DSP48E1_P_W -1:0] msb_px;
wire [DSP48E1_P_W -1:0] msb_py;
wire [DSP48E1_C_W -1:0] msb_ax = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rcmb_x_dout_dly[4][WORD_EXT_W-1:WORD_W], rcmb_x_dout_dly[4][WORD_W-1:0]};
wire [DSP48E1_C_W -1:0] msb_ay = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rcmb_y_dout_dly[4][WORD_EXT_W-1:WORD_W], rcmb_y_dout_dly[4][WORD_W-1:0]};
wire [DSP48E1_C_W -1:0] msb_bx = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_wide_x_din_aux_pipe[WORD_EXT_W-1:WORD_W], rd_wide_x_din_aux_pipe[WORD_W-1:0]};
wire [DSP48E1_C_W -1:0] msb_by = {{(DSP48E1_C_W-WORD_EXT_W){1'b0}}, rd_wide_y_din_aux_pipe[WORD_EXT_W-1:WORD_W], rd_wide_y_din_aux_pipe[WORD_W-1:0]};
`MODEXPNG_DSP_SLICE_ADDSUB dsp_msb_x
(
.clk (clk),
.ce_abc (msb_ce),
.ce_p (msb_ce_dly1),
.ce_ctrl (msb_ce),
.ab (msb_ax),
.c (msb_bx),
.p (msb_px),
.op_mode (msb_opmode),
.alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
.carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
.casc_p_in (lsb2msb_px_casc),
.casc_p_out (),
.carry_out ()
);
`MODEXPNG_DSP_SLICE_ADDSUB dsp_msb_y
(
.clk (clk),
.ce_abc (msb_ce),
.ce_p (msb_ce_dly1),
.ce_ctrl (msb_ce),
.ab (msb_ay),
.c (msb_by),
.p (msb_py),
.op_mode (msb_opmode),
.alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
.carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
.casc_p_in (lsb2msb_py_casc),
.casc_p_out (),
.carry_out ()
);
always @(posedge clk or negedge rst_n)
//
if (!rst_n) msb_ce <= 1'b0;
else begin
msb_ce <= 1'b0;
if (rcmb_xy_valid_dly[3])
//
case (rcmb_xy_bank_dly[3])
BANK_RCMB_MH: if (!rcmb_xy_addr_dly3_is_zero) msb_ce <= 1'b1;
BANK_RCMB_EXT: msb_ce <= 1'b1;
endcase
//
end
always @(posedge clk) begin
//
msb_opmode <= DSP48E1_OPMODE_DNC;
//
if (rcmb_xy_valid_dly[3])
//
case (rcmb_xy_bank_dly[3])
BANK_RCMB_MH: if (rcmb_xy_addr_dly3_is_one) msb_opmode <= DSP48E1_OPMODE_ZPCIN17_YC_XAB;
else if (rcmb_xy_addr_dly3_gt_one) msb_opmode <= DSP48E1_OPMODE_Z0_YC_XAB;
BANK_RCMB_EXT: msb_opmode <= DSP48E1_OPMODE_Z0_Y0_XAB;
endcase
//
end
always @(posedge clk or negedge rst_n)
//
if (!rst_n) {msb_ce_dly2, msb_ce_dly1} <= {2'b00};
else {msb_ce_dly2, msb_ce_dly1} <= {msb_ce_dly1, msb_ce};
//
// Output Logic
//
reg [OP_ADDR_W -1:0] wide_xy_addr_next;
reg [OP_ADDR_W -1:0] narrow_xy_addr_next;
always @(posedge clk)
//
if (msb_ce_dly1)
//
case (rcmb_xy_bank_dly[5])
BANK_RCMB_MH:
if (rcmb_xy_addr_dly5_is_one)
{wide_xy_addr_next, narrow_xy_addr_next} <= {OP_ADDR_ZERO, OP_ADDR_ZERO};
else if (rcmb_xy_addr_dly5_gt_one)
{wide_xy_addr_next, narrow_xy_addr_next} <= {rcmb_xy_addr_dly[5] - 1'b1, rcmb_xy_addr_dly[5] - 1'b1};
BANK_RCMB_EXT:
{wide_xy_addr_next, narrow_xy_addr_next} <= {word_index_last, word_index_last};
endcase
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
clear_rdct_wide;
clear_rdct_narrow;
end else begin
//
clear_rdct_wide;
clear_rdct_narrow;
//
if (msb_ce_dly2)
//
case (rcmb_xy_bank_dly[6])
//
BANK_RCMB_MH: if (!rcmb_xy_addr_dly6_is_zero) begin
set_rdct_wide (sel_wide_out, wide_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]);
set_rdct_narrow(sel_narrow_out, narrow_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]);
end
//
BANK_RCMB_EXT: begin
set_rdct_wide (sel_wide_out, wide_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]);
set_rdct_narrow(sel_narrow_out, narrow_xy_addr_next, msb_px[WORD_EXT_W-1:0], msb_py[WORD_EXT_W-1:0]);
end
//
endcase
//
end
endmodule