module modexpng_reductor
(
clk, rst,
ena, rdy,
word_index_last,
sel_wide_out, sel_narrow_out,
rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_din_aux, rd_wide_y_din_aux,
rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din, rcmb_final_xy_valid,
rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_dout, rdct_wide_y_dout, rdct_wide_xy_valid,
rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);
//
// Headers
//
`include "modexpng_parameters.vh"
//`include "../rtl_1/modexpng_mmm_fsm.vh"
//`include "../rtl_1/modexpng_parameters_x8.vh"
input clk;
input rst;
input ena;
output rdy;
/*
input [FSM_STATE_WIDTH-1:0] fsm_state_next;*/
input [7:0] word_index_last;/*
input dsp_xy_ce_p;
*/
input [2:0] sel_wide_out;
input [2:0] sel_narrow_out;
/*
input [9*47-1:0] dsp_x_p;
input [9*47-1:0] dsp_y_p;
input [ 4:0] col_index;
input [ 4:0] col_index_last;
*//*
input [ 7:0] rd_narrow_xy_addr;
input [ 1:0] rd_narrow_xy_bank;
*/
input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
input [ 7:0] rd_wide_xy_addr_aux;
input [ 17:0] rd_wide_x_din_aux;
input [ 17:0] rd_wide_y_din_aux;
//
input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank;
input [ 7:0] rcmb_final_xy_addr;
input [ 17:0] rcmb_final_x_din;
input [ 17:0] rcmb_final_y_din;
input rcmb_final_xy_valid;
output [ 2:0] rdct_wide_xy_bank;
output [ 7:0] rdct_wide_xy_addr;
output [ 17:0] rdct_wide_x_dout;
output [ 17:0] rdct_wide_y_dout;
output rdct_wide_xy_valid;
output [ 2:0] rdct_narrow_xy_bank;
output [ 7:0] rdct_narrow_xy_addr;
output [ 17:0] rdct_narrow_x_dout;
output [ 17:0] rdct_narrow_y_dout;
output rdct_narrow_xy_valid;
//
// Ready
//
reg rdy_reg = 1'b1;
wire busy_now;
assign rdy = rdy_reg;
always @(posedge clk)
//
if (rst) rdy_reg <= 1'b1;
else begin
if (rdy && ena) rdy_reg <= 1'b0;
if (!rdy && !busy_now) rdy_reg <= 1'b1;
end
//
// Pipeline (Delay Match)
//
reg rcmb_xy_valid_dly1 = 1'b0;
reg rcmb_xy_valid_dly2 = 1'b0;
reg rcmb_xy_valid_dly3 = 1'b0;
reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1;
reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2;
reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3;
reg [7:0] rcmb_xy_addr_dly1;
reg [7:0] rcmb_xy_addr_dly2;
reg [7:0] rcmb_xy_addr_dly3;
reg [17:0] rcmb_x_dout_dly1;
reg [17:0] rcmb_x_dout_dly2;
reg [17:0] rcmb_x_dout_dly3;
reg [17:0] rcmb_y_dout_dly1;
reg [17:0] rcmb_y_dout_dly2;
reg [17:0] rcmb_y_dout_dly3;
always @(posedge clk)
//
if (rst) begin
rcmb_xy_valid_dly1 <= 1'b0;
rcmb_xy_valid_dly2 <= 1'b0;
rcmb_xy_valid_dly3 <= 1'b0;
end else begin
rcmb_xy_valid_dly1 <= rcmb_final_xy_valid;
rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1;
rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2;
end
always @(posedge clk) begin
//
if (rcmb_final_xy_valid) begin
rcmb_xy_bank_dly1 <= rcmb_final_xy_bank;
rcmb_xy_addr_dly1 <= rcmb_final_xy_addr;
rcmb_x_dout_dly1 <= rcmb_final_x_din;
rcmb_y_dout_dly1 <= rcmb_final_y_din;
end
//
if (rcmb_xy_valid_dly1) begin
rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1;
rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1;
rcmb_x_dout_dly2 <= rcmb_x_dout_dly1;
rcmb_y_dout_dly2 <= rcmb_y_dout_dly1;
end
//
if (rcmb_xy_valid_dly2) begin
rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2;
rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2;
rcmb_x_dout_dly3 <= rcmb_x_dout_dly2;
rcmb_y_dout_dly3 <= rcmb_y_dout_dly2;
end
//
end
reg [ 1:0] rcmb_x_lsb_carry;
reg [15:0] rcmb_x_lsb_dummy;
reg [17:0] rcmb_x_lsb_dout;
reg [ 1:0] rcmb_y_lsb_carry;
reg [15:0] rcmb_y_lsb_dummy;
reg [17:0] rcmb_y_lsb_dout;
//reg [17:0] reductor_fat_bram_x_msb_dout;
//reg reductor_fat_bram_x_msb_dout_valid = 1'b0;
//reg [ 7:0] reductor_fat_bram_x_msb_addr;
//
// Carry Computation
//
always @(posedge clk)
//
if (ena) begin
rcmb_x_lsb_carry <= 2'b00;
rcmb_y_lsb_carry <= 2'b00;
end else if (rcmb_xy_valid_dly3)
//
case (rcmb_xy_bank_dly3)
BANK_RCMB_ML: begin
{rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
{rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
end
BANK_RCMB_MH:
if (rcmb_xy_addr_dly3 == 8'd0) begin
{rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
{rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
end
endcase
//
// Reduction
//
reg [ 2:0] wide_xy_bank;
reg [ 7:0] wide_xy_addr;
reg [ 17:0] wide_x_dout;
reg [ 17:0] wide_y_dout;
reg wide_xy_valid = 1'b0;
reg [ 2:0] narrow_xy_bank;
reg [ 7:0] narrow_xy_addr;
reg [ 17:0] narrow_x_dout;
reg [ 17:0] narrow_y_dout;
reg narrow_xy_valid = 1'b0;
assign rdct_wide_xy_bank = wide_xy_bank;
assign rdct_wide_xy_addr = wide_xy_addr;
assign rdct_wide_x_dout = wide_x_dout;
assign rdct_wide_y_dout = wide_y_dout;
assign rdct_wide_xy_valid = wide_xy_valid;
assign rdct_narrow_xy_bank = narrow_xy_bank;
assign rdct_narrow_xy_addr = narrow_xy_addr;
assign rdct_narrow_x_dout = narrow_x_dout;
assign rdct_narrow_y_dout = narrow_y_dout;
assign rdct_narrow_xy_valid = narrow_xy_valid;
task _update_rdct_wide;
input [ 2:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
input valid;
begin
wide_xy_bank <= bank;
wide_xy_addr <= addr;
wide_x_dout <= dout_x;
wide_y_dout <= dout_y;
wide_xy_valid <= valid;
end
endtask
task _update_rdct_narrow;
input [ 2:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
input valid;
begin
narrow_xy_bank <= bank;
narrow_xy_addr <= addr;
narrow_x_dout <= dout_x;
narrow_y_dout <= dout_y;
narrow_xy_valid <= valid;
end
endtask
task set_rdct_wide;
input [ 2:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
begin
_update_rdct_wide(bank, addr, dout_x, dout_y, 1'b1);
end
endtask
task set_rdct_narrow;
input [ 2:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
begin
_update_rdct_narrow(bank, addr, dout_x, dout_y, 1'b1);
end
endtask
task clear_rdct_wide;
begin
_update_rdct_wide(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
task clear_rdct_narrow;
begin
_update_rdct_narrow(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
//
//
//
wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux;
wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux;
wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry};
wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry};
//
//
//
always @(posedge clk)
//
if (rst) begin
clear_rdct_wide;
clear_rdct_narrow;
end else begin
//
clear_rdct_wide;
clear_rdct_narrow;
//
if (rcmb_xy_valid_dly3)
//
case (rcmb_xy_bank_dly3)
BANK_RCMB_MH:
if (rcmb_xy_addr_dly3 == 8'd1) begin
set_rdct_wide (sel_wide_out, 8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
set_rdct_narrow(sel_narrow_out, 8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
end else if (rcmb_xy_addr_dly3 > 8'd1) begin
set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
end
BANK_RCMB_EXT: begin
set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
end
endcase
//
end
//
// Busy
//
reg busy_next = 1'b0;
reg [2:0] busy_now_shreg = {3{1'b0}};
assign busy_now = busy_now_shreg[2];
always @(posedge clk)
//
if (rst) busy_now_shreg <= {3{1'b0}};
else begin
if (rdy && ena) busy_now_shreg <= {3{1'b1}};
else busy_now_shreg <= {busy_now_shreg[1:0], busy_next};
end
always @(posedge clk)
//
if (rst) busy_next <= 1'b0;
else begin
if (rdy && ena) busy_next <= 1'b1;
if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0;
end
endmodule