//======================================================================
//
// Copyright: 2019, The Commons Conservancy Cryptech Project
// SPDX-License-Identifier: BSD-3-Clause
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//======================================================================
module modexpng_mmm_dual
(
clk, rst_n,
ena, rdy,
ladder_mode,
word_index_last,
word_index_last_minus1,
force_unity_b,
only_reduce,
just_multiply,
sel_wide_in, sel_narrow_in,
rd_wide_xy_ena,
rd_wide_xy_ena_aux,
rd_wide_xy_bank,
rd_wide_xy_bank_aux,
rd_wide_xy_addr,
rd_wide_xy_addr_aux,
rd_wide_x_din,
rd_wide_y_din,
rd_wide_x_din_aux,
rd_wide_y_din_aux,
rd_narrow_xy_ena,
rd_narrow_xy_bank,
rd_narrow_xy_addr,
rd_narrow_x_din,
rd_narrow_y_din,
rcmb_wide_xy_bank,
rcmb_wide_xy_addr,
rcmb_wide_x_dout,
rcmb_wide_y_dout,
rcmb_wide_xy_valid,
rcmb_narrow_xy_bank,
rcmb_narrow_xy_addr,
rcmb_narrow_x_dout,
rcmb_narrow_y_dout,
rcmb_narrow_xy_valid,
rcmb_xy_bank,
rcmb_xy_addr,
rcmb_x_dout,
rcmb_y_dout,
rcmb_xy_valid,
rdct_ena, rdct_rdy
);
//
// Headers
//
`include "modexpng_parameters.vh"
`include "modexpng_mmm_dual_fsm.vh"
//
// Ports
//
input clk;
input rst_n;
input ena;
output rdy;
input ladder_mode;
input [ OP_ADDR_W -1:0] word_index_last;
input [ OP_ADDR_W -1:0] word_index_last_minus1;
input force_unity_b;
input only_reduce;
input just_multiply;
input [BANK_ADDR_W -1:0] sel_wide_in;
input [BANK_ADDR_W -1:0] sel_narrow_in;
output rd_wide_xy_ena;
output rd_wide_xy_ena_aux;
output [BANK_ADDR_W -1:0] rd_wide_xy_bank;
output [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
output [ OP_ADDR_W * NUM_MULTS_HALF -1:0] rd_wide_xy_addr;
output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
input [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_x_din;
input [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_y_din;
input [ WORD_EXT_W -1:0] rd_wide_x_din_aux;
input [ WORD_EXT_W -1:0] rd_wide_y_din_aux;
output rd_narrow_xy_ena;
output [BANK_ADDR_W -1:0] rd_narrow_xy_bank;
output [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
input [ WORD_EXT_W -1:0] rd_narrow_x_din;
input [ WORD_EXT_W -1:0] rd_narrow_y_din;
output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
output [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
output [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
output rcmb_wide_xy_valid;
output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
output rcmb_narrow_xy_valid;
output [BANK_ADDR_W -1:0] rcmb_xy_bank;
output [ OP_ADDR_W -1:0] rcmb_xy_addr;
output [ WORD_EXT_W -1:0] rcmb_x_dout;
output [ WORD_EXT_W -1:0] rcmb_y_dout;
output rcmb_xy_valid;
output rdct_ena;
input rdct_rdy;
//
// FSM Declaration
//
reg [MMM_FSM_STATE_W -1:0] fsm_state = MMM_FSM_STATE_IDLE;
reg [MMM_FSM_STATE_W -1:0] fsm_state_next;
wire [MMM_FSM_STATE_W -1:0] fsm_state_after_idle;
wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_square;
wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_triangle;
wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_rectangle;
wire [MMM_FSM_STATE_W -1:0] fsm_state_after_square_holdoff;
//
// FSM Process
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) fsm_state <= MMM_FSM_STATE_IDLE;
else fsm_state <= fsm_state_next;
//
// Storage Control Interface
//
reg wide_xy_ena = 1'b0;
reg wide_xy_ena_aux = 1'b0;
reg [BANK_ADDR_W -1:0] wide_xy_bank;
reg [BANK_ADDR_W -1:0] wide_xy_bank_aux;
reg [ OP_ADDR_W -1:0] wide_xy_addr[0:NUM_MULTS_HALF-1];
reg [ OP_ADDR_W -1:0] wide_xy_addr_aux;
reg narrow_xy_ena = 1'b0;
reg [BANK_ADDR_W -1:0] narrow_xy_bank;
reg [ OP_ADDR_W -1:0] narrow_xy_addr;
reg [ OP_ADDR_W -1:0] narrow_xy_addr_dly;
wire [ OP_ADDR_W -1:0] narrow_xy_addr_inc = narrow_xy_addr + 1'b1;
//
// Outmap Port Mapping
//
assign rd_wide_xy_ena = wide_xy_ena;
assign rd_wide_xy_ena_aux = wide_xy_ena_aux;
assign rd_wide_xy_bank = wide_xy_bank;
assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
assign rd_wide_xy_addr_aux = wide_xy_addr_aux;
assign rd_narrow_xy_ena = narrow_xy_ena;
assign rd_narrow_xy_bank = narrow_xy_bank;
assign rd_narrow_xy_addr = narrow_xy_addr;
genvar z;
generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
begin : gen_rd_wide_xy_addr
assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z];
end
endgenerate
//
// Column Counter
//
reg [COL_INDEX_W -1:0] col_index; // current column index
reg [COL_INDEX_W -1:0] col_index_prev; // delayed column index value
reg [COL_INDEX_W -1:0] col_index_last; // index of the very last column
reg [COL_INDEX_W -1:0] col_index_next; // precomputed next column index
reg col_is_last; // flag set during the very last column
always @(posedge clk)
//
col_index_prev <= col_index;
//
// Column Counter Increment Logic
//
always @(posedge clk)
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
col_index <= COL_INDEX_ZERO;
col_index_last <= word_index_last[OP_ADDR_W-1:MAC_INDEX_W];
col_index_next <= COL_INDEX_ONE;
col_is_last <= 1'b0;
end
//
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
col_index <= col_index_next;
col_is_last <= col_index_next == col_index_last;
col_index_next <= col_index_next == col_index_last ? COL_INDEX_ZERO : col_index_next + 1'b1;
end
//
endcase
//
// Completion Flags
//
wire square_almost_done_comb;
reg square_almost_done_flop = 1'b0;
reg square_surely_done_flop = 1'b0;
wire triangle_almost_done_comb;
reg triangle_almost_done_flop = 1'b0;
reg triangle_surely_done_flop = 1'b0;
reg triangle_tardy_done_flop = 1'b0;
wire rectangle_almost_done_comb;
reg rectangle_almost_done_flop = 1'b0;
reg rectangle_surely_done_flop = 1'b0;
reg rectangle_tardy_done_flop = 1'b0;
assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
assign triangle_almost_done_comb = narrow_xy_addr == {col_index, word_index_last_minus1[MAC_INDEX_W-1:0]};
assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
//
// Square Completion Flags
//
always @(posedge clk) begin
//
case (fsm_state)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: square_almost_done_flop <= square_almost_done_comb;
//
default: square_almost_done_flop <= 1'b0;
//
endcase
//
square_surely_done_flop <= square_almost_done_flop;
//
end
//
// Triangle Completion Flags
//
always @(posedge clk) begin
//
case (fsm_state)
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: triangle_almost_done_flop <= triangle_almost_done_comb;
//
default: triangle_almost_done_flop <= 1'b0;
//
endcase
//
triangle_surely_done_flop <= triangle_almost_done_flop;
triangle_tardy_done_flop <= triangle_surely_done_flop;
//
end
//
// Rectangle Completion Flags
//
always @(posedge clk) begin
//
case (fsm_state)
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: rectangle_almost_done_flop <= rectangle_almost_done_comb;
//
default: rectangle_almost_done_flop <= 1'b0;
//
endcase
//
rectangle_surely_done_flop <= rectangle_almost_done_flop;
rectangle_tardy_done_flop <= rectangle_surely_done_flop;
//
end
//
// Narrow Storage Control Logic
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) narrow_xy_ena <= 1'b0;
else begin
//
// Narrow Address
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= OP_ADDR_ZERO;
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr_inc : OP_ADDR_ZERO;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? OP_ADDR_ZERO : narrow_xy_addr_inc;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? OP_ADDR_ONE : narrow_xy_addr_inc;
//
default: narrow_xy_addr <= OP_ADDR_DNC;
//
endcase
//
// Narrow Bank
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? BANK_NARROW_EXT : BANK_NARROW_COEFF;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? BANK_NARROW_EXT : BANK_NARROW_Q;
//
default: narrow_xy_bank <= BANK_DNC;
//
endcase
//
// Narrow Enable
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop;
//
default: narrow_xy_ena <= 1'b0;
//
endcase
//
end
//
// Wide Storage Control Logic
//
wire [MAC_INDEX_W-1:0] wide_offset_rom[0:NUM_MULTS_HALF-1];
generate for (z=1; z<NUM_MULTS; z=z+2)
begin : gen_wide_offset_rom
assign wide_offset_rom[(z-1)/2] = z[MAC_INDEX_W-1:0];
end
endgenerate
function [OP_ADDR_W-1:0] wide_xy_addr_next;
input [OP_ADDR_W-1:0] wide_xy_addr_current;
input [OP_ADDR_W-1:0] wide_xy_addr_last;
if (wide_xy_addr_current > OP_ADDR_ZERO) wide_xy_addr_next = wide_xy_addr_current - 1'b1;
else wide_xy_addr_next = wide_xy_addr_last;
endfunction
integer j;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
wide_xy_ena <= 1'b0;
wide_xy_ena_aux <= 1'b0;
end else begin
//
// Wide Address
//
for (j=0; j<NUM_MULTS_HALF; j=j+1)
//
case (fsm_state_next)
//
// another way to code this is to extend the look-up table to 8 entries and just use shifts
// instead of subtractions (this requires further research, let's maybe not do it right now)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {OP_ADDR_ZERO, wide_offset_rom[j]};
//
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
//
default: wide_xy_addr[j] <= OP_ADDR_DNC;
//
endcase
//
// Wide Aux Address
//
case (fsm_state_next)
//
// there's a potentially more efficient way to code the switch (see above)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= OP_ADDR_ONE;
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= OP_ADDR_DNC;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : OP_ADDR_DNC;
//
default: wide_xy_addr_aux <= OP_ADDR_DNC;
//
endcase
//
// Wide Bank
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N;
//
default: wide_xy_bank <= BANK_DNC;
//
endcase
//
// Wide Aux Bank
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: begin
wide_xy_bank_aux <= BANK_DNC;
if (rcmb_xy_valid)
case (rcmb_xy_bank)
BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
endcase
end
//
default: wide_xy_bank_aux <= BANK_DNC;
//
endcase
//
// Wide Enable
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1;
//
default: wide_xy_ena <= 1'b0;
//
endcase
//
// Wide Aux Enable
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;
//
default: wide_xy_ena_aux <= 1'b0;
//
endcase
//
end
//
// Delay Lines
//
always @(posedge clk)
//
narrow_xy_addr_dly <= narrow_xy_addr;
//
// DSP Array Logic
//
reg dsp_xy_ce_a = 1'b0;
reg dsp_xy_ce_b = 1'b0;
reg dsp_xy_ce_b_dly = 1'b0;
reg dsp_xy_ce_m = 1'b0;
reg dsp_xy_ce_p = 1'b0;
reg dsp_xy_ce_mode = 1'b0;
reg [ NUM_MULTS_AUX -1:0] dsp_xy_mode_z = {NUM_MULTS_AUX{1'b1}};
wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_x_a;
wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_y_a;
reg [WORD_W -1:0] dsp_x_b;
reg [WORD_W -1:0] dsp_y_b;
reg [CARRY_W -1:0] dsp_xy_b_carry;
wire [MAC_W * NUM_MULTS_AUX -1:0] dsp_x_p;
wire [MAC_W * NUM_MULTS_AUX -1:0] dsp_y_p;
assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din};
assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din};
always @(posedge clk)
//
dsp_xy_ce_b_dly <= dsp_xy_ce_b;
modexpng_dsp_array_block dsp_array_block_x
(
.clk (clk),
.ce_a (dsp_xy_ce_a),
.ce_b (dsp_xy_ce_b),
.ce_m (dsp_xy_ce_m),
.ce_p (dsp_xy_ce_p),
.ce_mode (dsp_xy_ce_mode),
.mode_z (dsp_xy_mode_z),
.a (dsp_x_a),
.b (dsp_x_b),
.p (dsp_x_p)
);
modexpng_dsp_array_block dsp_array_block_y
(
.clk (clk),
.ce_a (dsp_xy_ce_a),
.ce_b (dsp_xy_ce_b),
.ce_m (dsp_xy_ce_m),
.ce_p (dsp_xy_ce_p),
.ce_mode (dsp_xy_ce_mode),
.mode_z (dsp_xy_mode_z),
.a (dsp_y_a),
.b (dsp_y_b),
.p (dsp_y_p)
);
//
// DSP Control Logic
//
reg narrow_xy_ena_dly1 = 1'b0;
reg narrow_xy_ena_dly2 = 1'b0;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
//
narrow_xy_ena_dly1 <= 1'b0;
narrow_xy_ena_dly2 <= 1'b0;
//
dsp_xy_ce_a <= 1'b0;
dsp_xy_ce_b <= 1'b0;
dsp_xy_ce_m <= 1'b0;
dsp_xy_ce_p <= 1'b0;
dsp_xy_ce_mode <= 1'b0;
//
end else begin
//
narrow_xy_ena_dly1 <= narrow_xy_ena;
narrow_xy_ena_dly2 <= narrow_xy_ena_dly1;
//
dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2;
dsp_xy_ce_b <= narrow_xy_ena_dly2;
dsp_xy_ce_m <= dsp_xy_ce_b_dly;
dsp_xy_ce_p <= dsp_xy_ce_m;
dsp_xy_ce_mode <= dsp_xy_ce_b_dly;
//
end
//
// DSP Feed Logic
//
reg dsp_merge_xy_b;
reg dsp_merge_xy_b_first;
always @(posedge clk) begin
//
case (fsm_state)
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1;
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
endcase
//
case (fsm_state)
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1;
default: dsp_merge_xy_b_first <= 1'b0;
endcase
//
end
//
// On-the-fly Carry Recombination Logic
//
wire [WORD_EXT_W-1:0] rd_narrow_x_din_carry = rd_narrow_x_din + {WORD_ZERO, dsp_xy_b_carry};
wire [WORD_EXT_W-1:0] rd_narrow_y_din_carry = rd_narrow_y_din + {WORD_ZERO, dsp_xy_b_carry};
wire [WORD_EXT_W-1:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry;
wire [WORD_W-1:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ?
rd_narrow_xy_din_carry_mux[WORD_W-1:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
always @(posedge clk) begin
//
dsp_x_b <= WORD_DNC;
dsp_y_b <= WORD_DNC;
//
dsp_xy_b_carry <= CARRY_ZERO;
//
if (narrow_xy_ena_dly2) begin
//
if (!dsp_merge_xy_b) begin
//
dsp_x_b <= rd_narrow_x_din[WORD_W-1:0];
dsp_y_b <= rd_narrow_y_din[WORD_W-1:0];
//
dsp_xy_b_carry <= CARRY_ZERO;
//
end else begin
//
dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity;
dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity;
//
dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[WORD_EXT_W-1:WORD_W];
//
end
//
end
//
end
//
// DSP Mode Logic
//
reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv1 = {NUM_MULTS_AUX{1'b1}};
reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv2 = {NUM_MULTS_AUX{1'b1}};
reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv3 = {NUM_MULTS_AUX{1'b1}};
reg [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv4 = {NUM_MULTS_AUX{1'b1}};
function [NUM_MULTS_AUX -1:0] calc_mac_mode_z_square;
//
input [NUM_MULTS_HALF_AUX-1:0] col_index_value;
input [OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_mac_mode_z_square = ~({{NUM_MULTS_AUX-1{1'b0}}, 1'b1} << narrow_xy_addr_value[MAC_INDEX_W-1:0]);
else calc_mac_mode_z_square = {NUM_MULTS_AUX{1'b1}};
endfunction
function [NUM_MULTS_AUX -1:0] calc_mac_mode_z_rectangle;
//
input [NUM_MULTS_HALF_AUX -1:0] col_index_value;
input [OP_ADDR_W -1:0] narrow_xy_addr_value;
//
if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_mac_mode_z_rectangle = ~({{NUM_MULTS_AUX-1{1'b0}}, 1'b1} << narrow_xy_addr_value[MAC_INDEX_W-1:0]);
else calc_mac_mode_z_rectangle = {NUM_MULTS_AUX{1'b1}};
endfunction
always @(posedge clk)
//
case (fsm_state_next)
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b0}};
//
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
//
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}};
//
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
//
default: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}};
//
endcase
always @(posedge clk) begin
//
dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
//
dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
//
end
//
// Recombinator
//
reg rcmb_ena = 1'b0;
wire rcmb_rdy;
modexpng_recombinator_block recombinator_block
(
.clk (clk),
.rst_n (rst_n),
.ena (rcmb_ena),
.rdy (rcmb_rdy),
.fsm_state_next (fsm_state_next),
.word_index_last (word_index_last),
.dsp_xy_ce_p (dsp_xy_ce_p),
.dsp_x_p (dsp_x_p),
.dsp_y_p (dsp_y_p),
.col_index (col_index),
.col_index_last (col_index_last),
.rd_narrow_xy_addr (narrow_xy_addr),
.rd_narrow_xy_bank (narrow_xy_bank),
.rcmb_wide_xy_bank (rcmb_wide_xy_bank),
.rcmb_wide_xy_addr (rcmb_wide_xy_addr),
.rcmb_wide_x_dout (rcmb_wide_x_dout),
.rcmb_wide_y_dout (rcmb_wide_y_dout),
.rcmb_wide_xy_valid (rcmb_wide_xy_valid),
.rcmb_narrow_xy_bank (rcmb_narrow_xy_bank),
.rcmb_narrow_xy_addr (rcmb_narrow_xy_addr),
.rcmb_narrow_x_dout (rcmb_narrow_x_dout),
.rcmb_narrow_y_dout (rcmb_narrow_y_dout),
.rcmb_narrow_xy_valid (rcmb_narrow_xy_valid),
.rdct_narrow_xy_bank (rcmb_xy_bank),
.rdct_narrow_xy_addr (rcmb_xy_addr),
.rdct_narrow_x_dout (rcmb_x_dout),
.rdct_narrow_y_dout (rcmb_y_dout),
.rdct_narrow_xy_valid (rcmb_xy_valid)
);
//
// Recombinator Enable Logic
//
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rcmb_ena <= 1'b0;
else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
//
// Handy Completion Flags
//
wire square_done = square_surely_done_flop;
wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop;
wire rectangle_done = rectangle_tardy_done_flop;
//
// FSM Transition Logic
//
assign fsm_state_after_idle = !only_reduce ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT ;
assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT ;
assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT ;
assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT ;
assign fsm_state_after_square_holdoff = just_multiply ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT ;
always @* begin
//
fsm_state_next = MMM_FSM_STATE_IDLE;
//
case (fsm_state)
MMM_FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle : MMM_FSM_STATE_IDLE;
MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? fsm_state_after_square_holdoff : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_WAIT_REDUCTOR : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
MMM_FSM_STATE_WAIT_REDUCTOR: fsm_state_next = rdct_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_WAIT_REDUCTOR;
MMM_FSM_STATE_STOP: fsm_state_next = MMM_FSM_STATE_IDLE ;
default: fsm_state_next = MMM_FSM_STATE_IDLE ;
endcase
//
end
//
// Reductor Control Logic
//
reg rdct_ena_reg = 1'b0;
assign rdct_ena = rdct_ena_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdct_ena_reg <= 1'b0;
else case (fsm_state)
MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
default: rdct_ena_reg <= 1'b0;
endcase
//
// Ready Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) rdy_reg <= 1'b1;
else begin
if (rdy && ena) rdy_reg <= 1'b0;
if (!rdy && (fsm_state == MMM_FSM_STATE_STOP)) rdy_reg <= 1'b1;
end
//
// Debug
//
`ifdef MODEXPNG_ENABLE_DEBUG
real load_cyc_mult = 0.0;
always @(posedge clk)
//
if (dsp_xy_ce_m)
load_cyc_mult <= load_cyc_mult + 1.0;
`endif
endmodule