aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:40:25 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:40:25 +0300
commitaffada8d5da7426d22035360c3674ab3b3311ab5 (patch)
treea3db075dc03033db45e3ad5279badf2da48b4566
parent0b4b42da734c1164b65a334351274f946b2d4dcb (diff)
Reworked storage architecture (moved I/O memory to a separate module, since there's
only one instance of input/output values, while storage manager has dual storage space for P and Q multipliers). Started working on microcoded layer, added input operation and modular multiplication.
-rw-r--r--rtl/modexpng_core_top.v1009
-rw-r--r--rtl/modexpng_io_block.v158
-rw-r--r--rtl/modexpng_io_manager.v527
-rw-r--r--rtl/modexpng_microcode.vh54
-rw-r--r--rtl/modexpng_parameters.vh37
-rw-r--r--rtl/modexpng_recombinator_block.v6
-rw-r--r--rtl/modexpng_reductor.v139
-rw-r--r--rtl/modexpng_sdp_36k_x16_x32_wrapper.v6
-rw-r--r--rtl/modexpng_sdp_36k_x18_wrapper.v67
-rw-r--r--rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v (renamed from rtl/modexpng_sdp_36k_wrapper.v)2
-rw-r--r--rtl/modexpng_storage_block.v139
-rw-r--r--rtl/modexpng_storage_manager.v56
-rw-r--r--rtl/modexpng_tdp_36k_x16_x32_wrapper.v18
-rw-r--r--rtl/modexpng_uop_rom.v37
14 files changed, 2035 insertions, 220 deletions
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index e834d37..eb6826c 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -3,7 +3,9 @@ module modexpng_core_top
clk, clk_bus,
rst,
next, valid,
- word_index_last,
+ crt_mode,
+ word_index_last_n,
+ word_index_last_pq,
bus_cs,
bus_we,
bus_addr,
@@ -16,26 +18,1011 @@ module modexpng_core_top
// Headers
//
`include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
//
// Ports
//
- input clk;
- input clk_bus;
+ input clk;
+ input clk_bus;
- input rst;
+ input rst;
- input next;
- output valid;
+ input next;
+ output valid;
+
+ input crt_mode;
+
+ input [ OP_ADDR_W -1:0] word_index_last_n;
+ input [ OP_ADDR_W -1:0] word_index_last_pq;
+
+ input bus_cs;
+ input bus_we;
+ input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
+ input [ BUS_DATA_W -1:0] bus_data_wr;
+ output [ BUS_DATA_W -1:0] bus_data_rd;
+
+
+ //
+ // UOP_FSM
+ //
+ localparam [1:0] UOP_FSM_STATE_IDLE = 2'b00;
+ localparam [1:0] UOP_FSM_STATE_FETCH = 2'b01;
+ localparam [1:0] UOP_FSM_STATE_DECODE = 2'b10;
+ localparam [1:0] UOP_FSM_STATE_BUSY = 2'b11;
+
+ reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE;
+ reg [1:0] uop_fsm_state_next;
+
+
+ //
+ // UOP ROM
+ //
+ reg [UOP_ADDR_W -1:0] uop_addr;
+ wire [UOP_W -1:0] uop_data;
+ wire [UOP_OPCODE_W -1:0] uop_data_opcode = uop_data[UOP_W -1-: UOP_OPCODE_W];
+ wire [UOP_CRT_W -1:0] uop_data_crt = uop_data[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ];
+ wire [UOP_NPQ_W -1:0] uop_data_npq = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ];
+ wire [UOP_AUX_W -1:0] uop_data_aux = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ];
+ wire [UOP_LADDER_W -1:0] uop_data_ladder = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_wide_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
+
+ wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP;
+ wire uop_opcode_is_io = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
+ (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ||
+ (uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW) ;
+ wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY;
+
+ wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
+ wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
+
+ modexpng_uop_rom uop_rom
+ (
+ .clk (clk),
+ .addr (uop_addr),
+ .data (uop_data)
+ );
+
+
+ //
+ // UOP ROM Address Logic
+ //
+
+ always @(posedge clk)
+ //
+ if (uop_fsm_state_next == UOP_FSM_STATE_FETCH)
+ uop_addr <= (uop_fsm_state == UOP_FSM_STATE_IDLE) ? uop_addr_offset : uop_addr_next;
+
+
+ //
+ // Storage Interfaces (X, Y)
+ //
+ wire wr_wide_xy_ena_x;
+ wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] wr_wide_x_din_x;
+ wire [ WORD_EXT_W -1:0] wr_wide_y_din_x;
+
+ wire wr_narrow_xy_ena_x;
+ wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] wr_narrow_x_din_x;
+ wire [ WORD_EXT_W -1:0] wr_narrow_y_din_x;
+
+ wire rd_wide_xy_ena_x;
+ wire rd_wide_xy_ena_aux_x;
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x;
+ wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x;
+ wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x;
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout_x;
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout_x;
+ wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux_x;
+ wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux_x;
+
+ wire rd_narrow_xy_ena_x;
+ wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] rd_narrow_x_dout_x;
+ wire [ WORD_EXT_W -1:0] rd_narrow_y_dout_x;
+
+ wire ext_wide_xy_ena_x;
+ wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] ext_wide_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] ext_wide_x_din_x;
+ wire [ WORD_EXT_W -1:0] ext_wide_y_din_x;
+
+ wire ext_narrow_xy_ena_x;
+ wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
+ wire [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
+
+ wire wr_wide_xy_ena_y;
+ wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] wr_wide_x_din_y;
+ wire [ WORD_EXT_W -1:0] wr_wide_y_din_y;
+
+ wire wr_narrow_xy_ena_y;
+ wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] wr_narrow_x_din_y;
+ wire [ WORD_EXT_W -1:0] wr_narrow_y_din_y;
+
+ wire rd_wide_xy_ena_y;
+ wire rd_wide_xy_ena_aux_y;
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y;
+ wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y;
+ wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y;
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout_y;
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout_y;
+ wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux_y;
+ wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux_y;
+
+ wire rd_narrow_xy_ena_y;
+ wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] rd_narrow_x_dout_y;
+ wire [ WORD_EXT_W -1:0] rd_narrow_y_dout_y;
+
+ wire ext_wide_xy_ena_y;
+ wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] ext_wide_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] ext_wide_x_din_y;
+ wire [ WORD_EXT_W -1:0] ext_wide_y_din_y;
+
+ wire ext_narrow_xy_ena_y;
+ wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
+ wire [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+
+
+ //
+ // Recombinator Interfaces (X, Y)
+ //
+ wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_x;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_x;
+ wire rcmb_wide_xy_valid_x;
+
+ wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_x;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_x;
+ wire rcmb_narrow_xy_valid_x;
+
+ wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_x;
+ wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_x;
+ wire rcmb_final_xy_valid_x;
- input bus_cs;
- input bus_we;
- input [4 * (BANK_ADDR_W + BUS_OP_ADDR_W) -1:0] bus_addr;
- input [ BUS_DATA_W -1:0] bus_data_wr;
- output [ BUS_DATA_W -1:0] bus_data_rd;
+ wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_y;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_y;
+ wire rcmb_wide_xy_valid_y;
+
+ wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_y;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_y;
+ wire rcmb_narrow_xy_valid_y;
+
+ wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_y;
+ wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_y;
+ wire rcmb_final_xy_valid_y;
+
+
+ //
+ // Reductor Interfaces (X, Y)
+ //
+ wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_x;
+ wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_x;
+ wire rdct_wide_xy_valid_x;
+
+ wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_x;
+ wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_x;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_x;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_x;
+ wire rdct_narrow_xy_valid_x;
+
+ wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_y;
+ wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_y;
+ wire rdct_wide_xy_valid_y;
+
+ wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_y;
+ wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_y;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_y;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_y;
+ wire rdct_narrow_xy_valid_y;
+
+
+ //
+ // Storage Blocks (X, Y)
+ //
+ modexpng_storage_block storage_block_x
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .wr_wide_xy_ena (wr_wide_xy_ena_x),
+ .wr_wide_xy_bank (wr_wide_xy_bank_x),
+ .wr_wide_xy_addr (wr_wide_xy_addr_x),
+ .wr_wide_x_din (wr_wide_x_din_x),
+ .wr_wide_y_din (wr_wide_y_din_x),
+
+ .wr_narrow_xy_ena (wr_narrow_xy_ena_x),
+ .wr_narrow_xy_bank (wr_narrow_xy_bank_x),
+ .wr_narrow_xy_addr (wr_narrow_xy_addr_x),
+ .wr_narrow_x_din (wr_narrow_x_din_x),
+ .wr_narrow_y_din (wr_narrow_y_din_x),
+
+ .rd_wide_xy_ena (rd_wide_xy_ena_x),
+ .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_x),
+ .rd_wide_xy_bank (rd_wide_xy_bank_x),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x),
+ .rd_wide_xy_addr (rd_wide_xy_addr_x),
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x),
+ .rd_wide_x_dout (rd_wide_x_dout_x),
+ .rd_wide_y_dout (rd_wide_y_dout_x),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x),
+
+ .rd_narrow_xy_ena (rd_narrow_xy_ena_x),
+ .rd_narrow_xy_bank (rd_narrow_xy_bank_x),
+ .rd_narrow_xy_addr (rd_narrow_xy_addr_x),
+ .rd_narrow_x_dout (rd_narrow_x_dout_x),
+ .rd_narrow_y_dout (rd_narrow_y_dout_x)
+ );
+
+ modexpng_storage_block storage_block_y
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .wr_wide_xy_ena (wr_wide_xy_ena_y),
+ .wr_wide_xy_bank (wr_wide_xy_bank_y),
+ .wr_wide_xy_addr (wr_wide_xy_addr_y),
+ .wr_wide_x_din (wr_wide_x_din_y),
+ .wr_wide_y_din (wr_wide_y_din_y),
+
+ .wr_narrow_xy_ena (wr_narrow_xy_ena_y),
+ .wr_narrow_xy_bank (wr_narrow_xy_bank_y),
+ .wr_narrow_xy_addr (wr_narrow_xy_addr_y),
+ .wr_narrow_x_din (wr_narrow_x_din_y),
+ .wr_narrow_y_din (wr_narrow_y_din_y),
+
+ .rd_wide_xy_ena (rd_wide_xy_ena_y),
+ .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_y),
+ .rd_wide_xy_bank (rd_wide_xy_bank_y),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y),
+ .rd_wide_xy_addr (rd_wide_xy_addr_y),
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y),
+ .rd_wide_x_dout (rd_wide_x_dout_y),
+ .rd_wide_y_dout (rd_wide_y_dout_y),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y),
+
+ .rd_narrow_xy_ena (rd_narrow_xy_ena_y),
+ .rd_narrow_xy_bank (rd_narrow_xy_bank_y),
+ .rd_narrow_xy_addr (rd_narrow_xy_addr_y),
+ .rd_narrow_x_dout (rd_narrow_x_dout_y),
+ .rd_narrow_y_dout (rd_narrow_y_dout_y)
+ );
+
+
+ //
+ // Storage Managers (X, Y)
+ //
+ modexpng_storage_manager storage_manager_x
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .wr_wide_xy_ena (wr_wide_xy_ena_x),
+ .wr_wide_xy_bank (wr_wide_xy_bank_x),
+ .wr_wide_xy_addr (wr_wide_xy_addr_x),
+ .wr_wide_x_din (wr_wide_x_din_x),
+ .wr_wide_y_din (wr_wide_y_din_x),
+
+ .wr_narrow_xy_ena (wr_narrow_xy_ena_x),
+ .wr_narrow_xy_bank (wr_narrow_xy_bank_x),
+ .wr_narrow_xy_addr (wr_narrow_xy_addr_x),
+ .wr_narrow_x_din (wr_narrow_x_din_x),
+ .wr_narrow_y_din (wr_narrow_y_din_x),
+
+ .ext_wide_xy_ena (ext_wide_xy_ena_x),
+ .ext_wide_xy_bank (ext_wide_xy_bank_x),
+ .ext_wide_xy_addr (ext_wide_xy_addr_x),
+ .ext_wide_x_din (ext_wide_x_din_x),
+ .ext_wide_y_din (ext_wide_y_din_x),
+
+ .ext_narrow_xy_ena (ext_narrow_xy_ena_x),
+ .ext_narrow_xy_bank (ext_narrow_xy_bank_x),
+ .ext_narrow_xy_addr (ext_narrow_xy_addr_x),
+ .ext_narrow_x_din (ext_narrow_x_din_x),
+ .ext_narrow_y_din (ext_narrow_y_din_x),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank_x),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr_x),
+ .rcmb_wide_x_din (rcmb_wide_x_dout_x),
+ .rcmb_wide_y_din (rcmb_wide_y_dout_x),
+ .rcmb_wide_xy_ena (rcmb_wide_xy_valid_x),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x),
+ .rcmb_narrow_x_din (rcmb_narrow_x_dout_x),
+ .rcmb_narrow_y_din (rcmb_narrow_y_dout_x),
+ .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_x),
+
+ .rdct_wide_xy_bank (rdct_wide_xy_bank_x),
+ .rdct_wide_xy_addr (rdct_wide_xy_addr_x),
+ .rdct_wide_x_din (rdct_wide_x_dout_x), // TODO: maybe just rename to {x|y}_x, since that's an
+ .rdct_wide_y_din (rdct_wide_y_dout_x), // internal signal??
+ .rdct_wide_xy_valid (rdct_wide_xy_valid_x),
+
+ .rdct_narrow_xy_bank (rdct_narrow_xy_bank_x),
+ .rdct_narrow_xy_addr (rdct_narrow_xy_addr_x),
+ .rdct_narrow_x_din (rdct_narrow_x_dout_x),
+ .rdct_narrow_y_din (rdct_narrow_y_dout_x),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x)
+ );
+
+ modexpng_storage_manager storage_manager_y
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .wr_wide_xy_ena (wr_wide_xy_ena_y),
+ .wr_wide_xy_bank (wr_wide_xy_bank_y),
+ .wr_wide_xy_addr (wr_wide_xy_addr_y),
+ .wr_wide_x_din (wr_wide_x_din_y),
+ .wr_wide_y_din (wr_wide_y_din_y),
+
+ .wr_narrow_xy_ena (wr_narrow_xy_ena_y),
+ .wr_narrow_xy_bank (wr_narrow_xy_bank_y),
+ .wr_narrow_xy_addr (wr_narrow_xy_addr_y),
+ .wr_narrow_x_din (wr_narrow_x_din_y),
+ .wr_narrow_y_din (wr_narrow_y_din_y),
+
+ .ext_wide_xy_ena (ext_wide_xy_ena_y),
+ .ext_wide_xy_bank (ext_wide_xy_bank_y),
+ .ext_wide_xy_addr (ext_wide_xy_addr_y),
+ .ext_wide_x_din (ext_wide_x_din_y),
+ .ext_wide_y_din (ext_wide_y_din_y),
+
+ .ext_narrow_xy_ena (ext_narrow_xy_ena_y),
+ .ext_narrow_xy_bank (ext_narrow_xy_bank_y),
+ .ext_narrow_xy_addr (ext_narrow_xy_addr_y),
+ .ext_narrow_x_din (ext_narrow_x_din_y),
+ .ext_narrow_y_din (ext_narrow_y_din_y),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank_y),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr_y),
+ .rcmb_wide_x_din (rcmb_wide_x_dout_y),
+ .rcmb_wide_y_din (rcmb_wide_y_dout_y),
+ .rcmb_wide_xy_ena (rcmb_wide_xy_valid_y),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y),
+ .rcmb_narrow_x_din (rcmb_narrow_x_dout_y),
+ .rcmb_narrow_y_din (rcmb_narrow_y_dout_y),
+ .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_y),
+
+ .rdct_wide_xy_bank (rdct_wide_xy_bank_y),
+ .rdct_wide_xy_addr (rdct_wide_xy_addr_y),
+ .rdct_wide_x_din (rdct_wide_x_dout_y),
+ .rdct_wide_y_din (rdct_wide_y_dout_y),
+ .rdct_wide_xy_valid (rdct_wide_xy_valid_y),
+
+ .rdct_narrow_xy_bank (rdct_narrow_xy_bank_y),
+ .rdct_narrow_xy_addr (rdct_narrow_xy_addr_y),
+ .rdct_narrow_x_din (rdct_narrow_x_dout_y),
+ .rdct_narrow_y_din (rdct_narrow_y_dout_y),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y)
+
+ );
+
+
+ //
+ // IO Block
+ //
+ wire io_in_1_en;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
+ wire [ WORD_W -1:0] io_in_1_dout;
+ wire io_in_2_en;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
+ wire [ WORD_W -1:0] io_in_2_dout;
+ wire io_out_en;
+ wire io_out_we;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
+ wire [ WORD_W -1:0] io_out_din;
+
+ // TODO: Separate reset for clock domains (core/bus)???
+
+ modexpng_io_block io_block
+ (
+ .clk (clk),
+ .clk_bus (clk_bus),
+
+ .rst (rst),
+
+ .bus_cs (bus_cs),
+ .bus_we (bus_we),
+ .bus_addr (bus_addr),
+ .bus_data_wr (bus_data_wr),
+ .bus_data_rd (bus_data_rd),
+
+ .in_1_en (io_in_1_en),
+ .in_1_addr (io_in_1_addr),
+ .in_1_dout (io_in_1_dout),
+
+ .in_2_en (io_in_2_en),
+ .in_2_addr (io_in_2_addr),
+ .in_2_dout (io_in_2_dout),
+
+ .out_en (io_out_en),
+ .out_we (io_out_we),
+ .out_addr (io_out_addr),
+ .out_din (io_out_din)
+ );
+
+
+ //
+ // IO Manager
+ //
+ reg io_mgr_ena = 1'b0;
+ wire io_mgr_rdy;
+ reg [UOP_CRT_W -1:0] io_mgr_sel_crt;
+ reg [UOP_AUX_W -1:0] io_mgr_sel_aux;
+ reg [BANK_ADDR_W -1:0] io_mgr_sel_in;
+ reg [BANK_ADDR_W -1:0] io_mgr_sel_out;
+ reg [OP_ADDR_W -1:0] io_mgr_word_index_last;
+ reg [UOP_OPCODE_W -1:0] io_mgr_opcode;
+
+ modexpng_io_manager io_manager
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (io_mgr_ena),
+ .rdy (io_mgr_rdy),
+
+ .sel_crt (io_mgr_sel_crt),
+ .sel_aux (io_mgr_sel_aux),
+ .sel_in (io_mgr_sel_in),
+ .sel_out (io_mgr_sel_out),
+
+ .opcode (io_mgr_opcode),
+
+ .word_index_last (io_mgr_word_index_last),
+
+ .ext_wide_xy_ena_x (ext_wide_xy_ena_x),
+ .ext_wide_xy_bank_x (ext_wide_xy_bank_x),
+ .ext_wide_xy_addr_x (ext_wide_xy_addr_x),
+ .ext_wide_x_din_x (ext_wide_x_din_x),
+ .ext_wide_y_din_x (ext_wide_y_din_x),
+
+ .ext_narrow_xy_ena_x (ext_narrow_xy_ena_x),
+ .ext_narrow_xy_bank_x (ext_narrow_xy_bank_x),
+ .ext_narrow_xy_addr_x (ext_narrow_xy_addr_x),
+ .ext_narrow_x_din_x (ext_narrow_x_din_x),
+ .ext_narrow_y_din_x (ext_narrow_y_din_x),
+
+ .ext_wide_xy_ena_y (ext_wide_xy_ena_y),
+ .ext_wide_xy_bank_y (ext_wide_xy_bank_y),
+ .ext_wide_xy_addr_y (ext_wide_xy_addr_y),
+ .ext_wide_x_din_y (ext_wide_x_din_y),
+ .ext_wide_y_din_y (ext_wide_y_din_y),
+
+ .ext_narrow_xy_ena_y (ext_narrow_xy_ena_y),
+ .ext_narrow_xy_bank_y (ext_narrow_xy_bank_y),
+ .ext_narrow_xy_addr_y (ext_narrow_xy_addr_y),
+ .ext_narrow_x_din_y (ext_narrow_x_din_y),
+ .ext_narrow_y_din_y (ext_narrow_y_din_y),
+
+ .io_in_1_en (io_in_1_en),
+ .io_in_1_addr (io_in_1_addr),
+ .io_in_1_dout (io_in_1_dout),
+
+ .io_in_2_en (io_in_2_en),
+ .io_in_2_addr (io_in_2_addr),
+ .io_in_2_dout (io_in_2_dout),
+
+ .io_out_en (io_out_en),
+ .io_out_we (io_out_we),
+ .io_out_addr (io_out_addr),
+ .io_out_din (io_out_din)
+ );
+
+
+ //
+ // Multipliers (X, Y)
+ //
+ reg mmm_ena_x = 1'b0;
+ reg mmm_ena_y = 1'b0;
+ wire mmm_ena = mmm_ena_x & mmm_ena_y;
+
+ wire mmm_rdy_x;
+ wire mmm_rdy_y;
+ wire mmm_rdy = mmm_rdy_x & mmm_rdy_y;
+
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_x;
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_y;
+
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x;
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y;
+
+ reg mmm_ladder_mode_x;
+ reg mmm_ladder_mode_y;
+
+ reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_x;
+ reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_y;
+ reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x;
+ reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y;
+
+ wire rdct_ena_x;
+ wire rdct_ena_y;
+ wire rdct_rdy_x;
+ wire rdct_rdy_y;
+
+ modexpng_mmm_dual mmm_x
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (mmm_ena_x),
+ .rdy (mmm_rdy_x),
+
+ .ladder_mode (mmm_ladder_mode_x),
+ .word_index_last (mmm_word_index_last_x),
+ .word_index_last_minus1 (mmm_word_index_last_minus1_x),
+
+ .sel_wide_in (mmm_sel_wide_in_x),
+ .sel_narrow_in (mmm_sel_narrow_in_x),
+
+ .rd_wide_xy_ena (rd_wide_xy_ena_x),
+ .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_x),
+ .rd_wide_xy_bank (rd_wide_xy_bank_x),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x),
+ .rd_wide_xy_addr (rd_wide_xy_addr_x),
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x),
+ .rd_wide_x_dout (rd_wide_x_dout_x),
+ .rd_wide_y_dout (rd_wide_y_dout_x),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x),
+
+ .rd_narrow_xy_ena (rd_narrow_xy_ena_x),
+ .rd_narrow_xy_bank (rd_narrow_xy_bank_x),
+ .rd_narrow_xy_addr (rd_narrow_xy_addr_x),
+ .rd_narrow_x_dout (rd_narrow_x_dout_x),
+ .rd_narrow_y_dout (rd_narrow_y_dout_x),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank_x),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr_x),
+ .rcmb_wide_x_dout (rcmb_wide_x_dout_x),
+ .rcmb_wide_y_dout (rcmb_wide_y_dout_x),
+ .rcmb_wide_xy_valid (rcmb_wide_xy_valid_x),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_dout_x),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_dout_x),
+ .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_x),
+
+ .rcmb_xy_bank (rcmb_final_xy_bank_x),
+ .rcmb_xy_addr (rcmb_final_xy_addr_x),
+ .rcmb_x_dout (rcmb_final_x_dout_x),
+ .rcmb_y_dout (rcmb_final_y_dout_x),
+ .rcmb_xy_valid (rcmb_final_xy_valid_x),
+
+ .rdct_ena (rdct_ena_x),
+ .rdct_rdy (rdct_rdy_x)
+ );
+
+ modexpng_mmm_dual mmm_y
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (mmm_ena_y),
+ .rdy (mmm_rdy_y),
+
+ .ladder_mode (mmm_ladder_mode_y),
+ .word_index_last (mmm_word_index_last_y),
+ .word_index_last_minus1 (mmm_word_index_last_minus1_y),
+
+ .sel_wide_in (mmm_sel_wide_in_y),
+ .sel_narrow_in (mmm_sel_narrow_in_y),
+
+ .rd_wide_xy_ena (rd_wide_xy_ena_y),
+ .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_y),
+ .rd_wide_xy_bank (rd_wide_xy_bank_y),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y),
+ .rd_wide_xy_addr (rd_wide_xy_addr_y),
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y),
+ .rd_wide_x_dout (rd_wide_x_dout_y),
+ .rd_wide_y_dout (rd_wide_y_dout_y),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y),
+
+ .rd_narrow_xy_ena (rd_narrow_xy_ena_y),
+ .rd_narrow_xy_bank (rd_narrow_xy_bank_y),
+ .rd_narrow_xy_addr (rd_narrow_xy_addr_y),
+ .rd_narrow_x_dout (rd_narrow_x_dout_y),
+ .rd_narrow_y_dout (rd_narrow_y_dout_y),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank_y),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr_y),
+ .rcmb_wide_x_dout (rcmb_wide_x_dout_y),
+ .rcmb_wide_y_dout (rcmb_wide_y_dout_y),
+ .rcmb_wide_xy_valid (rcmb_wide_xy_valid_y),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_dout_y),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_dout_y),
+ .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_y),
+
+ .rcmb_xy_bank (rcmb_final_xy_bank_y),
+ .rcmb_xy_addr (rcmb_final_xy_addr_y),
+ .rcmb_x_dout (rcmb_final_x_dout_y),
+ .rcmb_y_dout (rcmb_final_y_dout_y),
+ .rcmb_xy_valid (rcmb_final_xy_valid_y),
+
+ .rdct_ena (rdct_ena_y),
+ .rdct_rdy (rdct_rdy_y)
+ );
+
+ //
+ // Reductors (X, Y)
+ //
+ reg [ OP_ADDR_W -1:0] rdct_word_index_last_x;
+ reg [ OP_ADDR_W -1:0] rdct_word_index_last_y;
+
+ reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_x;
+ reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x;
+
+ reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_y;
+ reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y;
+
+ modexpng_reductor reductor_x
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (rdct_ena_x),
+ .rdy (rdct_rdy_x),
+
+ .word_index_last (rdct_word_index_last_x),
+
+ .sel_wide_out (rdct_sel_wide_out_x),
+ .sel_narrow_out (rdct_sel_narrow_out_x),
+
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x),
+
+ .rcmb_final_xy_bank (rcmb_final_xy_bank_x),
+ .rcmb_final_xy_addr (rcmb_final_xy_addr_x),
+ .rcmb_final_x_dout (rcmb_final_x_dout_x),
+ .rcmb_final_y_dout (rcmb_final_y_dout_x),
+ .rcmb_final_xy_valid (rcmb_final_xy_valid_x),
+
+ .rdct_wide_xy_bank (rdct_wide_xy_bank_x),
+ .rdct_wide_xy_addr (rdct_wide_xy_addr_x),
+ .rdct_wide_x_dout (rdct_wide_x_dout_x),
+ .rdct_wide_y_dout (rdct_wide_y_dout_x),
+ .rdct_wide_xy_valid (rdct_wide_xy_valid_x),
+
+ .rdct_narrow_xy_bank (rdct_narrow_xy_bank_x),
+ .rdct_narrow_xy_addr (rdct_narrow_xy_addr_x),
+ .rdct_narrow_x_dout (rdct_narrow_x_dout_x),
+ .rdct_narrow_y_dout (rdct_narrow_y_dout_x),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x)
+ );
+
+ modexpng_reductor reductor_y
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (rdct_ena_y),
+ .rdy (rdct_rdy_y),
+
+ .word_index_last (rdct_word_index_last_y),
+
+ .sel_wide_out (rdct_sel_wide_out_y),
+ .sel_narrow_out (rdct_sel_narrow_out_y),
+
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y),
+
+ .rcmb_final_xy_bank (rcmb_final_xy_bank_y),
+ .rcmb_final_xy_addr (rcmb_final_xy_addr_y),
+ .rcmb_final_x_dout (rcmb_final_x_dout_y),
+ .rcmb_final_y_dout (rcmb_final_y_dout_y),
+ .rcmb_final_xy_valid (rcmb_final_xy_valid_y),
+
+ .rdct_wide_xy_bank (rdct_wide_xy_bank_y),
+ .rdct_wide_xy_addr (rdct_wide_xy_addr_y),
+ .rdct_wide_x_dout (rdct_wide_x_dout_y),
+ .rdct_wide_y_dout (rdct_wide_y_dout_y),
+ .rdct_wide_xy_valid (rdct_wide_xy_valid_y),
+
+ .rdct_narrow_xy_bank (rdct_narrow_xy_bank_y),
+ .rdct_narrow_xy_addr (rdct_narrow_xy_addr_y),
+ .rdct_narrow_x_dout (rdct_narrow_x_dout_y),
+ .rdct_narrow_y_dout (rdct_narrow_y_dout_y),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y)
+ );
+
+
+ //
+ // uOP Completion Detector
+ //
+ reg uop_exit_from_busy;
+
+ always @* begin
+ //
+ uop_exit_from_busy = 0;
+ //
+ if (uop_opcode_is_io) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
+ if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy;
+ //if (uop_data_opcode_is_add) uop_exit_from_busy = ~mod_add_ena & mod_add_rdy;
+ //if (uop_data_opcode_is_sub) uop_exit_from_busy = ~mod_sub_ena & mod_sub_rdy;
+ //
+ end
+
+
+ //
+ // uOP Trigger Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) begin
+ io_mgr_ena <= 1'b0;
+ mmm_ena_x <= 1'b0;
+ mmm_ena_y <= 1'b0;
+ end else begin
+ io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_io : 1'b0;
+ mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ end
+
+ //
+ // Parameters
+ //
+ always @(posedge clk)
+ //
+ if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
+ //
+ io_mgr_opcode <= uop_data_opcode;
+ //
+ case (uop_data_opcode)
+ //
+ UOP_OPCODE_INPUT_TO_WIDE: begin
+ io_mgr_sel_crt <= uop_data_crt;
+ io_mgr_sel_aux <= uop_data_aux;
+ io_mgr_sel_in <= uop_data_sel_narrow_in;
+ io_mgr_sel_out <= uop_data_sel_wide_out;
+ end
+ //
+ UOP_OPCODE_INPUT_TO_NARROW: begin
+ io_mgr_sel_crt <= uop_data_crt;
+ io_mgr_sel_aux <= uop_data_aux;
+ io_mgr_sel_in <= uop_data_sel_narrow_in;
+ io_mgr_sel_out <= uop_data_sel_narrow_out;
+ end
+ //
+ UOP_OPCODE_MODULAR_MULTIPLY: begin
+ //
+ case (uop_data_ladder)
+ UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00;
+ UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11;
+ UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+ UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+ endcase
+ //
+ {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }};
+ {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }};
+ {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }};
+ {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}};
+
+ //
+ end
+ //
+ endcase
+ //
+ end
+
+ //
+ // Length
+ //
+ wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1;
+ wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1;
+
+ wire uop_npq_is_n = uop_data_npq == UOP_NPQ_N;
+
+ always @(posedge clk)
+ //
+ if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
+ //
+ case (uop_data_opcode)
+ //
+ UOP_OPCODE_INPUT_TO_WIDE,
+ UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ //
+ UOP_OPCODE_MODULAR_MULTIPLY: begin
+ {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
+ {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{uop_npq_is_n ? word_index_last_n_minus1 : word_index_last_pq_minus1}};
+ {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
+ end
+ //
+ endcase
+ //
+ end
+
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE;
+ else uop_fsm_state <= uop_fsm_state_next;
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @* begin
+ //
+ case (uop_fsm_state)
+ UOP_FSM_STATE_IDLE: uop_fsm_state_next = next ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE;
+ UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE ;
+ UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY;
+ UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY;
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg valid_reg = 1'b1;
+ assign valid = valid_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) valid_reg <= 1'b1;
+ else case (uop_fsm_state)
+ UOP_FSM_STATE_IDLE: valid_reg <= ~next;
+ UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop;
+ endcase
+
+
+
+ //
+ // BEGIN DEBUG
+ //
+ integer i;
+ always @(posedge clk)
+ //
+ if ((uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop) begin
+ //
+ $display("STOP - BANKS DUMP FOLLOWS");
+ //
+ // X.X
+ //
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[0*256+i]); $write("\n");
+ $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[1*256+i]); $write("\n");
+ $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[2*256+i]); $write("\n");
+ $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[3*256+i]); $write("\n");
+ $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[4*256+i]); $write("\n");
+ $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[5*256+i]); $write("\n");
+ $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[6*256+i]); $write("\n");
+ $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
+ $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
+ $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
+ $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
+ $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
+ $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
+ $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
+ $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+ //
+ // X.Y
+ //
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[0*256+i]); $write("\n");
+ $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[1*256+i]); $write("\n");
+ $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[2*256+i]); $write("\n");
+ $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[3*256+i]); $write("\n");
+ $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[4*256+i]); $write("\n");
+ $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[5*256+i]); $write("\n");
+ $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[6*256+i]); $write("\n");
+ $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
+ $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
+ $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
+ $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
+ $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
+ $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
+ $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
+ $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n");
+ //
+ // Y.X
+ //
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[0*256+i]); $write("\n");
+ $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[1*256+i]); $write("\n");
+ $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[2*256+i]); $write("\n");
+ $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[3*256+i]); $write("\n");
+ $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[4*256+i]); $write("\n");
+ $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[5*256+i]); $write("\n");
+ $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[6*256+i]); $write("\n");
+ $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
+ $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
+ $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
+ $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
+ $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
+ $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
+ $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
+ $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+ //
+ // Y.Y
+ //
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[0*256+i]); $write("\n");
+ $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[1*256+i]); $write("\n");
+ $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[2*256+i]); $write("\n");
+ $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[3*256+i]); $write("\n");
+ $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[4*256+i]); $write("\n");
+ $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[5*256+i]); $write("\n");
+ $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[6*256+i]); $write("\n");
+ $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
+ $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
+ $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
+ $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
+ $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
+ $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
+ $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
+ $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n"); //
+ end
+
+ //
+ // END DEBUG
+ //
endmodule
diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v
new file mode 100644
index 0000000..68d13c4
--- /dev/null
+++ b/rtl/modexpng_io_block.v
@@ -0,0 +1,158 @@
+module modexpng_io_block
+(
+ clk, clk_bus, rst,
+
+ bus_cs,
+ bus_we,
+ bus_addr,
+ bus_data_wr,
+ bus_data_rd,
+
+ in_1_en,
+ in_1_addr,
+ in_1_dout,
+
+ in_2_en,
+ in_2_addr,
+ in_2_dout,
+
+ out_en,
+ out_we,
+ out_addr,
+ out_din
+);
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input clk_bus;
+ input rst;
+
+ input bus_cs;
+ input bus_we;
+ input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
+ input [ BUS_DATA_W -1:0] bus_data_wr;
+ output [ BUS_DATA_W -1:0] bus_data_rd;
+
+ input in_1_en;
+ input [ BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr;
+ output [ WORD_W -1:0] in_1_dout;
+
+ input in_2_en;
+ input [ BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr;
+ output [ WORD_W -1:0] in_2_dout;
+
+ input out_en;
+ input out_we;
+ input [ BANK_ADDR_W + OP_ADDR_W -1:0] out_addr;
+ input [ WORD_W -1:0] out_din;
+
+
+ //
+ // Internal Registers
+ //
+ reg in_1_reg_en = 1'b0;
+ reg in_2_reg_en = 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ in_1_reg_en <= 1'b0;
+ in_2_reg_en <= 1'b0;
+ end else begin
+ in_1_reg_en <= in_1_en;
+ in_2_reg_en <= in_2_en;
+ end
+
+
+ //
+ // INPUT, OUTPUT Storage Buffers
+ //
+ wire [ 2 -1:0] bus_addr_msb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W +: 2];
+ wire [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr_lsb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W -1:0];
+ reg [ 2 -1:0] bus_addr_msb_dly;
+ wire [ BUS_DATA_W -1:0] bus_data_rd_input_1;
+ wire [ BUS_DATA_W -1:0] bus_data_rd_output;
+
+ wire bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
+ wire bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
+
+ /* INPUT_1 */
+ modexpng_tdp_36k_x16_x32_wrapper bram_input_1
+ (
+ .clk (clk), // core clock
+ .clk_bus (clk_bus), // bus clock
+
+ .ena (bus_cs), // bus side read-write
+ .wea (bus_data_wr_input_1), //
+ .addra (bus_addr_lsb), //
+ .dina (bus_data_wr), //
+ .douta (bus_data_rd_input_1), //
+
+ .enb (in_1_en), // core side read-only
+ .regceb (in_1_reg_en), //
+ .addrb (in_1_addr), //
+ .doutb (in_1_dout) //
+ );
+
+
+ /* INPUT_2 */
+ modexpng_sdp_36k_x16_x32_wrapper bram_input_2
+ (
+ .clk (clk), // core clock
+ .clk_bus (clk_bus), // bus clock
+
+ .ena (bus_cs), // bus side write-only
+ .wea (bus_data_wr_input_2), //
+ .addra (bus_addr_lsb), //
+ .dina (bus_data_wr), //
+
+ .enb (in_2_en), // core side read-only
+ .regceb (in_2_reg_en), //
+ .addrb (in_2_addr), //
+ .doutb (in_2_dout) //
+ );
+
+
+ /* OUTPUT */
+ modexpng_sdp_36k_x32_x16_wrapper bram_output
+ (
+ .clk (clk), // core clock
+ .clk_bus (clk_bus), // bus clock
+
+ .ena (out_en), // core side write-only
+ .wea (out_we), //
+ .addra (out_addr), //
+ .dina (out_din), //
+
+ .enb (bus_cs), // bus side read-only
+ .addrb (bus_addr_lsb), //
+ .doutb (bus_data_rd_output) //
+ );
+
+ reg [31: 0] bus_data_rd_mux;
+ assign bus_data_rd = bus_data_rd_mux;
+
+ always @(posedge clk_bus)
+ bus_addr_msb_dly <= bus_addr_msb;
+
+ always @(*)
+ //
+ case (bus_addr_msb_dly)
+ //
+ 2'd0: bus_data_rd_mux = bus_data_rd_input_1;
+ 2'd1: bus_data_rd_mux = 32'hDEADC0DE;
+ 2'd2: bus_data_rd_mux = bus_data_rd_output;
+ 2'd3: bus_data_rd_mux = 32'hDEADC0DE;
+ //
+ endcase
+
+endmodule
+
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
new file mode 100644
index 0000000..81f582f
--- /dev/null
+++ b/rtl/modexpng_io_manager.v
@@ -0,0 +1,527 @@
+module modexpng_io_manager
+(
+ clk,
+ rst,
+
+ ena,
+ rdy,
+
+ sel_crt,
+ sel_aux,
+ sel_in,
+ sel_out,
+
+ opcode,
+
+ word_index_last,
+
+ ext_wide_xy_ena_x,
+ ext_wide_xy_bank_x,
+ ext_wide_xy_addr_x,
+ ext_wide_x_din_x,
+ ext_wide_y_din_x,
+
+ ext_narrow_xy_ena_x,
+ ext_narrow_xy_bank_x,
+ ext_narrow_xy_addr_x,
+ ext_narrow_x_din_x,
+ ext_narrow_y_din_x,
+
+ ext_wide_xy_ena_y,
+ ext_wide_xy_bank_y,
+ ext_wide_xy_addr_y,
+ ext_wide_x_din_y,
+ ext_wide_y_din_y,
+
+ ext_narrow_xy_ena_y,
+ ext_narrow_xy_bank_y,
+ ext_narrow_xy_addr_y,
+ ext_narrow_x_din_y,
+ ext_narrow_y_din_y,
+
+ io_in_1_en,
+ io_in_1_addr,
+ io_in_1_dout,
+
+ io_in_2_en,
+ io_in_2_addr,
+ io_in_2_dout,
+
+ io_out_en,
+ io_out_we,
+ io_out_addr,
+ io_out_din
+);
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ input [ UOP_CRT_W -1:0] sel_crt;
+ input [ UOP_AUX_W -1:0] sel_aux;
+ input [ BANK_ADDR_W -1:0] sel_in;
+ input [ BANK_ADDR_W -1:0] sel_out;
+
+ input [ UOP_OPCODE_W -1:0] opcode;
+
+ input [ OP_ADDR_W -1:0] word_index_last;
+
+ output ext_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] ext_wide_xy_addr_x;
+ output [ WORD_EXT_W -1:0] ext_wide_x_din_x;
+ output [ WORD_EXT_W -1:0] ext_wide_y_din_x;
+
+ output ext_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
+ output [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
+ output [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
+
+ output ext_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] ext_wide_xy_addr_y;
+ output [ WORD_EXT_W -1:0] ext_wide_x_din_y;
+ output [ WORD_EXT_W -1:0] ext_wide_y_din_y;
+
+ output ext_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
+ output [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
+ output [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+
+ output io_in_1_en;
+ output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
+ input [ WORD_W -1:0] io_in_1_dout;
+
+ output io_in_2_en;
+ output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
+ input [ WORD_W -1:0] io_in_2_dout;
+
+ output io_out_en;
+ output io_out_we;
+ output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
+ output [ WORD_W -1:0] io_out_din;
+
+
+ //
+ // FSM Declaration
+ //
+ localparam [2:0] IO_FSM_STATE_IDLE = 3'b000;
+ localparam [2:0] IO_FSM_STATE_LATENCY_PRE1 = 3'b001;
+ localparam [2:0] IO_FSM_STATE_LATENCY_PRE2 = 3'b010;
+ localparam [2:0] IO_FSM_STATE_BUSY = 3'b011;
+ localparam [2:0] IO_FSM_STATE_EXTRA = 3'b100;
+ localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101;
+ localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110;
+
+ reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE;
+ reg [2:0] io_fsm_state_next;
+
+
+ //
+ // Control Signals
+ //
+ reg in_1_en = 1'b0;
+ reg [BANK_ADDR_W -1:0] in_1_addr_bank;
+ reg [ OP_ADDR_W -1:0] in_1_addr_op;
+
+ reg in_2_en = 1'b0;
+ reg [BANK_ADDR_W -1:0] in_2_addr_bank;
+ reg [ OP_ADDR_W -1:0] in_2_addr_op;
+
+ reg out_en = 1'b0;
+ reg out_we;
+ reg [BANK_ADDR_W -1:0] out_addr_bank;
+ reg [ OP_ADDR_W -1:0] out_addr_op;
+ reg [ WORD_W -1:0] out_din;
+
+
+ //
+ // Control Signals
+ //
+ reg wide_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] wide_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] wide_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] wide_x_din_x;
+ reg [ WORD_EXT_W -1:0] wide_y_din_x;
+
+ reg narrow_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] narrow_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] narrow_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] narrow_x_din_x;
+ reg [ WORD_EXT_W -1:0] narrow_y_din_x;
+
+ reg wide_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] wide_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] wide_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] wide_x_din_y;
+ reg [ WORD_EXT_W -1:0] wide_y_din_y;
+
+ reg narrow_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] narrow_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] narrow_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] narrow_x_din_y;
+ reg [ WORD_EXT_W -1:0] narrow_y_din_y;
+
+
+ //
+ // Mapping
+ //
+ assign io_in_1_en = in_1_en;
+ assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op};
+
+ assign io_in_2_en = in_2_en;
+ assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
+
+ assign io_out_en = out_en;
+ assign io_out_we = out_we;
+ assign io_out_addr = {out_addr_bank, out_addr_op};
+ assign io_out_din = out_din;
+
+
+ //
+ // Mapping
+ //
+ assign ext_wide_xy_ena_x = wide_xy_ena_x;
+ assign ext_wide_xy_bank_x = wide_xy_bank_x;
+ assign ext_wide_xy_addr_x = wide_xy_addr_x;
+ assign ext_wide_x_din_x = wide_x_din_x;
+ assign ext_wide_y_din_x = wide_y_din_x;
+
+ assign ext_narrow_xy_ena_x = narrow_xy_ena_x;
+ assign ext_narrow_xy_bank_x = narrow_xy_bank_x;
+ assign ext_narrow_xy_addr_x = narrow_xy_addr_x;
+ assign ext_narrow_x_din_x = narrow_x_din_x;
+ assign ext_narrow_y_din_x = narrow_y_din_x;
+
+ assign ext_wide_xy_ena_y = wide_xy_ena_y;
+ assign ext_wide_xy_bank_y = wide_xy_bank_y;
+ assign ext_wide_xy_addr_y = wide_xy_addr_y;
+ assign ext_wide_x_din_y = wide_x_din_y;
+ assign ext_wide_y_din_y = wide_y_din_y;
+
+ assign ext_narrow_xy_ena_y = narrow_xy_ena_y;
+ assign ext_narrow_xy_bank_y = narrow_xy_bank_y;
+ assign ext_narrow_xy_addr_y = narrow_xy_addr_y;
+ assign ext_narrow_x_din_y = narrow_x_din_y;
+ assign ext_narrow_y_din_y = narrow_y_din_y;
+
+
+ //
+ // Delays
+ //
+ reg [ OP_ADDR_W -1:0] in_1_addr_op_dly1;
+ reg [ OP_ADDR_W -1:0] in_1_addr_op_dly2;
+ reg [ OP_ADDR_W -1:0] in_2_addr_op_dly1;
+ reg [ OP_ADDR_W -1:0] in_2_addr_op_dly2;
+
+ always @(posedge clk) begin
+ //
+ {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op};
+ {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op};
+ //
+ end
+
+
+ //
+ // Handy Wires
+ //
+ wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);
+
+ wire opcode_is_wide = (opcode == UOP_OPCODE_INPUT_TO_WIDE );
+ wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW);
+
+ wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF);
+
+ wire sel_crt_is_x = sel_crt == UOP_CRT_X;
+ wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
+
+ wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
+ wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
+
+ wire in_1_addr_op_is_last;
+ wire in_2_addr_op_is_last;
+
+ wire in_1_addr_next_op_is_last;
+ wire in_2_addr_next_op_is_last;
+
+
+ //
+ // Enable Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) begin
+ in_1_en <= 1'b0;
+ in_2_en <= 1'b0;
+ out_en <= 1'b0;
+ end else case (io_fsm_state_next)
+ //
+ IO_FSM_STATE_LATENCY_PRE1,
+ IO_FSM_STATE_LATENCY_PRE2,
+ IO_FSM_STATE_BUSY: begin
+ in_1_en <= opcode_is_input && sel_aux_is_1;
+ in_2_en <= opcode_is_input && sel_aux_is_2;
+ end
+ //
+ IO_FSM_STATE_EXTRA: begin
+ in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
+ in_2_en <= 1'b0;
+ end
+ //
+ default: begin
+ in_1_en <= 1'b0;
+ in_2_en <= 1'b0;
+ end
+ //
+ endcase
+
+ //
+ // Enable Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ wide_xy_ena_x <= 1'b0;
+ wide_xy_ena_y <= 1'b0;
+ narrow_xy_ena_x <= 1'b0;
+ narrow_xy_ena_y <= 1'b0;
+ //
+ end else case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY,
+ IO_FSM_STATE_EXTRA,
+ IO_FSM_STATE_LATENCY_POST1: begin
+ wide_xy_ena_x <= opcode_is_wide && sel_crt_is_x;
+ wide_xy_ena_y <= opcode_is_wide && sel_crt_is_y;
+ narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x;
+ narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y;
+ end
+ //
+ IO_FSM_STATE_LATENCY_POST2: begin
+ wide_xy_ena_x <= 1'b0;
+ wide_xy_ena_y <= 1'b0;
+ narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra;
+ narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra;
+ end
+ //
+ default: begin
+ wide_xy_ena_x <= 1'b0;
+ wide_xy_ena_y <= 1'b0;
+ narrow_xy_ena_x <= 1'b0;
+ narrow_xy_ena_y <= 1'b0;
+ end
+ //
+ endcase
+
+
+ //
+ // Data Logic
+ //
+ wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout};
+
+ always @(posedge clk) begin
+ //
+ wide_x_din_x <= WORD_EXT_DNC;
+ wide_y_din_x <= WORD_EXT_DNC;
+ wide_x_din_y <= WORD_EXT_DNC;
+ wide_y_din_y <= WORD_EXT_DNC;
+ narrow_x_din_x <= WORD_EXT_DNC;
+ narrow_y_din_x <= WORD_EXT_DNC;
+ narrow_x_din_y <= WORD_EXT_DNC;
+ narrow_y_din_y <= WORD_EXT_DNC;
+ //
+ case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY,
+ IO_FSM_STATE_EXTRA,
+ IO_FSM_STATE_LATENCY_POST1: begin
+ if (opcode_is_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}};
+ if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ end
+ //
+ IO_FSM_STATE_LATENCY_POST2: begin
+ if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Address Logic
+ //
+ wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux =
+ sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2;
+
+ always @(posedge clk) begin
+ //
+ {wide_xy_bank_x, wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
+ {wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
+ {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+ {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ //
+ case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY,
+ IO_FSM_STATE_EXTRA,
+ IO_FSM_STATE_LATENCY_POST1: begin
+ if (opcode_is_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+ end
+ //
+ IO_FSM_STATE_LATENCY_POST2: begin
+ if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+ if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Address Logic
+ //
+ reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next;
+ reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
+ reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next;
+
+ wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] out_addr_next_op = out_addr_next [OP_ADDR_W -1:0];
+
+ assign in_1_addr_op_is_last = in_1_addr_op == word_index_last;
+ assign in_2_addr_op_is_last = in_2_addr_op == word_index_last;
+
+ assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last;
+ assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last;
+
+ always @(posedge clk)
+ //
+ case (io_fsm_state_next)
+ //
+ IO_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {in_1_addr_bank, in_1_addr_op} <= {sel_in, OP_ADDR_ZERO};
+ {in_2_addr_bank, in_2_addr_op} <= {sel_in, OP_ADDR_ZERO};
+ {out_addr_bank, out_addr_op } <= {sel_out, OP_ADDR_ZERO};
+ //
+ in_1_addr_next <= {sel_in, OP_ADDR_ONE};
+ in_2_addr_next <= {sel_in, OP_ADDR_ONE};
+ out_addr_next <= {sel_out, OP_ADDR_ONE};
+ //
+ end
+ //
+ IO_FSM_STATE_LATENCY_PRE2,
+ IO_FSM_STATE_BUSY: begin
+ //
+ {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+ {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
+ {out_addr_bank, out_addr_op } <= out_addr_next;
+ //
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ in_2_addr_next <= in_2_addr_next + 1'b1;
+ out_addr_next <= out_addr_next + 1'b1;
+ //
+ end
+ //
+ IO_FSM_STATE_EXTRA:
+ //
+ if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin
+ //
+ {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+ //
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ //
+ end
+ //
+ endcase
+
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) io_fsm_state <= IO_FSM_STATE_IDLE;
+ else io_fsm_state <= io_fsm_state_next;
+
+
+ //
+ // Busy Exit Logic
+ //
+ reg io_fsm_done = 1'b0;
+
+ always @(posedge clk) begin
+ //
+ io_fsm_done <= 1'b0;
+ //
+ if (io_fsm_state == IO_FSM_STATE_BUSY) begin
+ //
+ if (opcode_is_input) begin
+ if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1;
+ if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1;
+ end
+
+ end
+ //
+ end
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @* begin
+ //
+ case (io_fsm_state)
+ IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ;
+ IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ;
+ IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_BUSY ;
+ IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_BUSY ;
+ IO_FSM_STATE_EXTRA: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ;
+ IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ;
+ IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_reg <= 1'b1;
+ else case (io_fsm_state)
+ IO_FSM_STATE_IDLE: rdy_reg <= ~ena;
+ IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1;
+ endcase
+
+
+endmodule
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
new file mode 100644
index 0000000..af21391
--- /dev/null
+++ b/rtl/modexpng_microcode.vh
@@ -0,0 +1,54 @@
+localparam UOP_OPCODE_W = 4;
+localparam UOP_CRT_W = 1;
+localparam UOP_NPQ_W = 1;
+localparam UOP_AUX_W = 1;
+localparam UOP_LADDER_W = 1;
+localparam UOP_SEL_W = 4 * BANK_ADDR_W;
+
+localparam UOP_ADDR_W = 6; // 64 instructions
+
+localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0;
+localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31;
+
+
+// 4 1 1 1 2 4*3=12
+localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W;
+// [20:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0]
+
+// OPCODE
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 4'd0;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 4'd1;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+
+//localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd0;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8;
+
+// CRT
+localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0;
+localparam [UOP_CRT_W -1:0] UOP_CRT_Y = 1'b1;
+localparam [UOP_CRT_W -1:0] UOP_CRT_DNC = 1'bX;
+
+// NPQ
+localparam [UOP_NPQ_W -1:0] UOP_NPQ_N = 1'b0;
+localparam [UOP_NPQ_W -1:0] UOP_NPQ_PQ = 1'b1;
+localparam [UOP_NPQ_W -1:0] UOP_NPQ_DNC = 1'bX;
+
+// AUX
+localparam [UOP_AUX_W -1:0] UOP_AUX_1 = 1'b0;
+localparam [UOP_AUX_W -1:0] UOP_AUX_2 = 1'b1;
+localparam [UOP_AUX_W -1:0] UOP_AUX_DNC = 1'bX;
+
+// LADDER
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_00 = 2'b00;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_11 = 2'b11;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_D = 2'b01;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_PQ = 2'b10;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_DNC = 2'bXX;
+
+
+// SEL
+localparam [UOP_SEL_W -1:0] UOP_SEL_DNC_ALL = {4{BANK_DNC}};
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 6d63735..25fafb4 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -14,14 +14,16 @@ function integer cryptech_clog2;
//
endfunction
+localparam MAX_OP_W = 4096;
+
localparam WORD_W = 16;
localparam WORD_EXT_W = 18;
localparam MAC_W = 47;
-localparam BUS_DATA_W = 32;
+localparam BUS_DATA_W = 32;
localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
-localparam MAX_OP_W = 4096;
+
localparam BANK_ADDR_W = 3;
localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W);
@@ -55,20 +57,41 @@ localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML = 3'd0;
localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH = 3'd1;
localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH'
-localparam [BANK_ADDR_W-1:0] BANK_DONT_CARE = {BANK_ADDR_W{1'bX}};
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_M = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_N = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_N_FACTOR = 3'd2;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_N_COEFF = 3'd3;/*
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_EXT = 3'd4; // [0] -> N_COEFF'*/
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_X = 3'd5;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_Y = 3'd6;
+
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_D = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_P = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_P_FACTOR = 3'd2;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_P_COEFF = 3'd3;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q = 3'd4;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q_FACTOR = 3'd5;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q_COEFF = 3'd6;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_QINV = 3'd7;
+
+localparam [BANK_ADDR_W-1:0] BANK_OUT_S = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_OUT_XM = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2;
+
+localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}};
localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0;
localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1;
localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}};
localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1};
-localparam [OP_ADDR_W-1:0] OP_ADDR_DONT_CARE = {OP_ADDR_W{1'bX}};
+localparam [OP_ADDR_W-1:0] OP_ADDR_DNC = {OP_ADDR_W{1'bX}};
localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}};
localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}};
-localparam [WORD_EXT_W-1:0] WORD_EXT_DONT_CARE = {WORD_EXT_W{1'bX}};
+localparam [WORD_EXT_W-1:0] WORD_EXT_DNC = {WORD_EXT_W{1'bX}};
-localparam [MAC_INDEX_W-1:0] MAC_INDEX_DONT_CARE = {MAC_INDEX_W{1'bX}};
+localparam [MAC_INDEX_W-1:0] MAC_INDEX_DNC = {MAC_INDEX_W{1'bX}};
-localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}}; \ No newline at end of file
+localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}};
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index fe4ffb9..f698c18 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -959,19 +959,19 @@ module modexpng_recombinator_block
task clear_wide;
begin
- _update_wide(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
+ _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
end
endtask
task clear_narrow;
begin
- _update_narrow(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
+ _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
end
endtask
task clear_rdct;
begin
- _update_rdct(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
+ _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
end
endtask
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index aafb38c..a37333e 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -2,18 +2,14 @@ module modexpng_reductor
(
clk, rst,
ena, rdy,
- //fsm_state_next,
word_index_last,
- //dsp_xy_ce_p,
- //dsp_x_p, dsp_y_p,
- //col_index, col_index_last,
+ sel_wide_out, sel_narrow_out,
rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux,
- //rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid,
- rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid,
- rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid
+ rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid,
+ rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_dout, rdct_wide_y_dout, rdct_wide_xy_valid,
+ rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);
-
//
// Headers
//
@@ -31,7 +27,10 @@ module modexpng_reductor
input [FSM_STATE_WIDTH-1:0] fsm_state_next;*/
input [7:0] word_index_last;/*
input dsp_xy_ce_p;
- *//*
+ */
+ input [2:0] sel_wide_out;
+ input [2:0] sel_narrow_out;
+ /*
input [9*47-1:0] dsp_x_p;
input [9*47-1:0] dsp_y_p;
input [ 4:0] col_index;
@@ -51,11 +50,17 @@ module modexpng_reductor
input [ 17:0] rcmb_final_y_dout;
input rcmb_final_xy_valid;
- output [ 7:0] rdct_final_xy_addr;
- output [ 17:0] rdct_final_x_dout;
- output [ 17:0] rdct_final_y_dout;
- output rdct_final_xy_valid;
+ output [ 2:0] rdct_wide_xy_bank;
+ output [ 7:0] rdct_wide_xy_addr;
+ output [ 17:0] rdct_wide_x_dout;
+ output [ 17:0] rdct_wide_y_dout;
+ output rdct_wide_xy_valid;
+ output [ 2:0] rdct_narrow_xy_bank;
+ output [ 7:0] rdct_narrow_xy_addr;
+ output [ 17:0] rdct_narrow_x_dout;
+ output [ 17:0] rdct_narrow_y_dout;
+ output rdct_narrow_xy_valid;
//
// Ready
@@ -178,41 +183,89 @@ module modexpng_reductor
//
// Reduction
//
- reg [ 7:0] rdct_xy_addr;
- reg [ 17:0] rdct_x_dout;
- reg [ 17:0] rdct_y_dout;
- reg rdct_xy_valid = 1'b0;
+ reg [ 2:0] wide_xy_bank;
+ reg [ 7:0] wide_xy_addr;
+ reg [ 17:0] wide_x_dout;
+ reg [ 17:0] wide_y_dout;
+ reg wide_xy_valid = 1'b0;
- assign rdct_final_xy_addr = rdct_xy_addr;
- assign rdct_final_x_dout = rdct_x_dout;
- assign rdct_final_y_dout = rdct_y_dout;
- assign rdct_final_xy_valid = rdct_xy_valid;
+ reg [ 2:0] narrow_xy_bank;
+ reg [ 7:0] narrow_xy_addr;
+ reg [ 17:0] narrow_x_dout;
+ reg [ 17:0] narrow_y_dout;
+ reg narrow_xy_valid = 1'b0;
- task _update_rdct;
+ assign rdct_wide_xy_bank = wide_xy_bank;
+ assign rdct_wide_xy_addr = wide_xy_addr;
+ assign rdct_wide_x_dout = wide_x_dout;
+ assign rdct_wide_y_dout = wide_y_dout;
+ assign rdct_wide_xy_valid = wide_xy_valid;
+
+ assign rdct_narrow_xy_bank = narrow_xy_bank;
+ assign rdct_narrow_xy_addr = narrow_xy_addr;
+ assign rdct_narrow_x_dout = narrow_x_dout;
+ assign rdct_narrow_y_dout = narrow_y_dout;
+ assign rdct_narrow_xy_valid = narrow_xy_valid;
+
+ task _update_rdct_wide;
+ input [ 2:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
input valid;
begin
- rdct_xy_addr <= addr;
- rdct_x_dout <= dout_x;
- rdct_y_dout <= dout_y;
- rdct_xy_valid <= valid;
+ wide_xy_bank <= bank;
+ wide_xy_addr <= addr;
+ wide_x_dout <= dout_x;
+ wide_y_dout <= dout_y;
+ wide_xy_valid <= valid;
end
endtask
- task set_rdct;
+ task _update_rdct_narrow;
+ input [ 2:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
+ input valid;
begin
- _update_rdct(addr, dout_x, dout_y, 1'b1);
+ narrow_xy_bank <= bank;
+ narrow_xy_addr <= addr;
+ narrow_x_dout <= dout_x;
+ narrow_y_dout <= dout_y;
+ narrow_xy_valid <= valid;
end
endtask
- task clear_rdct;
+ task set_rdct_wide;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
begin
- _update_rdct(8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_rdct_wide(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task set_rdct_narrow;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_rdct_narrow(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
+ task clear_rdct_wide;
+ begin
+ _update_rdct_wide(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
+
+ task clear_rdct_narrow;
+ begin
+ _update_rdct_narrow(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
@@ -232,23 +285,31 @@ module modexpng_reductor
//
always @(posedge clk)
//
- if (rst) clear_rdct;
- else begin
+ if (rst) begin
+ clear_rdct_wide;
+ clear_rdct_narrow;
+ end else begin
//
- clear_rdct;
+ clear_rdct_wide;
+ clear_rdct_narrow;
//
if (rcmb_xy_valid_dly3)
//
case (rcmb_xy_bank_dly3)
BANK_RCMB_MH:
- if (rcmb_xy_addr_dly3 == 8'd1)
- set_rdct(8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
- else if (rcmb_xy_addr_dly3 > 8'd1)
- set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+ if (rcmb_xy_addr_dly3 == 8'd1) begin
+ set_rdct_wide (sel_wide_out, 8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
+ set_rdct_narrow(sel_narrow_out, 8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
+ end else if (rcmb_xy_addr_dly3 > 8'd1) begin
+ set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+ set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+ end
- BANK_RCMB_EXT:
- set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+ BANK_RCMB_EXT: begin
+ set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+ set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+ end
endcase
//
diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
index 69c5383..4c6fe46 100644
--- a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
+++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
@@ -25,7 +25,7 @@ module modexpng_sdp_36k_x16_x32_wrapper
input ena;
input wea;
input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
- input [ BUD_DATA_W -1:0] dina;
+ input [ BUS_DATA_W -1:0] dina;
input enb;
input regceb;
@@ -42,10 +42,10 @@ module modexpng_sdp_36k_x16_x32_wrapper
.BRAM_SIZE ("36Kb"),
- .WRITE_WIDTH (BUD_DATA_W),
+ .WRITE_WIDTH (BUS_DATA_W),
.READ_WIDTH (WORD_W),
- .DO_REG (0),
+ .DO_REG (1),
.WRITE_MODE ("READ_FIRST"),
.SRVAL (72'h000000000000000000),
diff --git a/rtl/modexpng_sdp_36k_x18_wrapper.v b/rtl/modexpng_sdp_36k_x18_wrapper.v
new file mode 100644
index 0000000..ded9425
--- /dev/null
+++ b/rtl/modexpng_sdp_36k_x18_wrapper.v
@@ -0,0 +1,67 @@
+module modexpng_sdp_36k_x18_wrapper
+(
+ clk,
+
+ ena, wea,
+ addra, dina,
+
+ enb, regceb,
+ addrb, doutb
+);
+
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+
+ input ena;
+ input wea;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addra;
+ input [ WORD_EXT_W -1:0] dina;
+
+ input enb;
+ input regceb;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
+ output [ WORD_EXT_W -1:0] doutb;
+
+ //
+ // Memory
+ //
+ reg [WORD_EXT_W -1:0] mem[0:2**(BANK_ADDR_W+OP_ADDR_W)-1];
+
+ //
+ // Write Port
+ //
+ always @(posedge clk)
+ //
+ if (ena && wea)
+ mem[addra] <= dina;
+
+ //
+ // Read Port
+ //
+ reg [WORD_EXT_W -1:0] doutb_reg1;
+ reg [WORD_EXT_W -1:0] doutb_reg2;
+
+ assign doutb = doutb_reg2;
+
+ always @(posedge clk)
+ //
+ if (enb)
+ doutb_reg1 <= mem[addrb];
+
+ always @(posedge clk)
+ //
+ if (regceb)
+ doutb_reg2 <= doutb_reg1;
+
+
+
+endmodule
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v
index 433ee55..b9e40ae 100644
--- a/rtl/modexpng_sdp_36k_wrapper.v
+++ b/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v
@@ -1,4 +1,4 @@
-module modexpng_sdp_36k_wrapper
+module modexpng_sdp_36k_x18_wrapper
(
clk,
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index d5b9b24..f1d5ae2 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -29,26 +29,7 @@ module modexpng_storage_block
rd_narrow_xy_bank,
rd_narrow_xy_addr,
rd_narrow_x_dout,
- rd_narrow_y_dout,
-
- bus_cs,
- bus_we,
- bus_addr,
- bus_data_wr,
- bus_data_rd,
-
- in_1_en,
- in_1_addr,
- in_1_dout,
-
- in_2_en,
- in_2_addr,
- in_2_dout,
-
- out_en,
- out_we,
- out_addr,
- out_din
+ rd_narrow_y_dout
);
//
@@ -93,25 +74,6 @@ module modexpng_storage_block
output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
- input bus_cs;
- input bus_we;
- input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
- input [ BUS_DATA_W -1:0] bus_data_wr;
- output [ BUS_DATA_W -1:0] bus_data_rd;
-
- input in_1_en;
- input [ BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr;
- output [ WORD_W -1:0] in_1_dout;
-
- input in_2_en;
- input [ BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr;
- output [ WORD_W -1:0] in_2_dout;
-
- input out_en;
- input out_we;
- input [ BANK_ADDR_W + OP_ADDR_W -1:0] out_addr;
- input [ WORD_W -1:0] out_din;
-
//
// Internal Registers
@@ -119,8 +81,6 @@ module modexpng_storage_block
reg rd_wide_xy_reg_ena = 1'b0;
reg rd_wide_xy_reg_ena_aux = 1'b0;
reg rd_narrow_xy_reg_ena = 1'b0;
- reg in_1_reg_en = 1'b0;
- reg in_2_reg_en = 1'b0;
always @(posedge clk)
//
@@ -128,14 +88,10 @@ module modexpng_storage_block
rd_wide_xy_reg_ena <= 1'b0;
rd_wide_xy_reg_ena_aux <= 1'b0;
rd_narrow_xy_reg_ena <= 1'b0;
- in_1_reg_en <= 1'b0;
- in_2_reg_en <= 1'b0;
end else begin
rd_wide_xy_reg_ena <= rd_wide_xy_ena;
rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
rd_narrow_xy_reg_ena <= rd_narrow_xy_ena;
- in_1_reg_en <= in_1_en;
- in_2_reg_en <= in_2_en;
end
//
@@ -161,7 +117,7 @@ module modexpng_storage_block
//
assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
//
- modexpng_sdp_36k_wrapper wide_bram_x
+ modexpng_sdp_36k_x18_wrapper wide_bram_x
(
.clk (clk),
@@ -176,7 +132,7 @@ module modexpng_storage_block
.doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
);
//
- modexpng_sdp_36k_wrapper wide_bram_y
+ modexpng_sdp_36k_x18_wrapper wide_bram_y
(
.clk (clk),
@@ -197,7 +153,7 @@ module modexpng_storage_block
//
// Auxilary Storage
//
- modexpng_sdp_36k_wrapper wide_bram_x_aux
+ modexpng_sdp_36k_x18_wrapper wide_bram_x_aux
(
.clk (clk),
@@ -212,7 +168,7 @@ module modexpng_storage_block
.doutb (rd_wide_x_dout_aux)
);
//
- modexpng_sdp_36k_wrapper wide_bram_y_aux
+ modexpng_sdp_36k_x18_wrapper wide_bram_y_aux
(
.clk (clk),
@@ -230,7 +186,7 @@ module modexpng_storage_block
//
// "Narrow" Storage
//
- modexpng_sdp_36k_wrapper narrow_bram_x
+ modexpng_sdp_36k_x18_wrapper narrow_bram_x
(
.clk (clk),
@@ -245,7 +201,7 @@ module modexpng_storage_block
.doutb (rd_narrow_x_dout)
);
- modexpng_sdp_36k_wrapper narrow_bram_y
+ modexpng_sdp_36k_x18_wrapper narrow_bram_y
(
.clk (clk),
@@ -260,87 +216,6 @@ module modexpng_storage_block
.doutb (rd_narrow_y_dout)
);
- //
- // INPUT, OUTPUT Storage Buffers
- //
- wire [ 2 -1:0] bus_addr_msb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W +: 2];
- wire [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr_lsb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W -1:0];
- reg [ 2 -1:0] bus_addr_msb_dly;
- wire [ BUS_DATA_W -1:0] bus_data_rd_input_1;
- wire [ BUS_DATA_W -1:0] bus_data_rd_output;
-
- wire bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
- wire bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
-
- /* INPUT_1 */
- modexpng_sdp_36k_x16_x32_wrapper bram_input_1
- (
- .clk (clk), // core clock
- .clk_bus (clk_bus), // bus clock
-
- .ena (bus_cs), // bus side read-write
- .wea (bus_data_wr_input_1), //
- .addra (bus_addr_lsb), //
- .dina (bus_data_wr), //
- .douta (bus_data_rd_input_1), //
-
- .enb (in_1_en), // core side read-only
- .regceb (in_1_reg_en), //
- .addrb (in_1_addr), //
- .doutb (in_1_dout) //
- );
-
-
- /* INPUT_2 */
- modexpng_sdp_36k_x16_x32_wrapper bram_input_2
- (
- .clk (clk), // core clock
- .clk_bus (clk_bus), // bus clock
-
- .ena (bus_cs), // bus side write-only
- .wea (bus_data_wr_input_2), //
- .addra (bus_addr_lsb), //
- .dina (bus_data_wr), //
-
- .enb (in_2_en), // core side read-only
- .regceb (in_2_reg_en), //
- .addrb (in_2_addr), //
- .doutb (in_2_dout) //
- );
-
-
- /* OUTPUT */
- modexpng_sdp_36k_x32_x16_wrapper bram_output
- (
- .clk (clk), // core clock
- .clk_bus (clk_bus), // bus clock
-
- .ena (out_en), // core side write-only
- .wea (out_we), //
- .addra (out_addr), //
- .dina (out_din), //
-
- .enb (bus_cs), // bus side read-only
- .addrb (bus_addr_lsb), //
- .doutb (bus_data_rd_output) //
- );
-
- reg [31: 0] bus_data_rd_mux;
- assign bus_data_rd = bus_data_rd_mux;
-
- always @(posedge clk_bus)
- bus_addr_msb_dly <= bus_addr_msb;
-
- always @(*)
- //
- case (bus_addr_msb_dly)
- //
- 2'd0: bus_data_rd_mux = bus_data_rd_input_1;
- 2'd1: bus_data_rd_mux = 32'hDEADC0DE;
- 2'd2: bus_data_rd_mux = bus_data_rd_output;
- 2'd3: bus_data_rd_mux = 32'hDEADC0DE;
- //
- endcase
endmodule
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
index e5ac83f..6b34bed 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/modexpng_storage_manager.v
@@ -36,7 +36,19 @@ module modexpng_storage_manager
rcmb_narrow_xy_bank,
rcmb_narrow_xy_addr,
rcmb_narrow_x_din,
- rcmb_narrow_y_din
+ rcmb_narrow_y_din,
+
+ rdct_wide_xy_bank,
+ rdct_wide_xy_addr,
+ rdct_wide_x_din,
+ rdct_wide_y_din,
+ rdct_wide_xy_valid,
+
+ rdct_narrow_xy_bank,
+ rdct_narrow_xy_addr,
+ rdct_narrow_x_din,
+ rdct_narrow_y_din,
+ rdct_narrow_xy_valid
);
@@ -76,18 +88,30 @@ module modexpng_storage_manager
input [ WORD_EXT_W -1:0] ext_narrow_x_din;
input [ WORD_EXT_W -1:0] ext_narrow_y_din;
- input rcmb_wide_xy_ena;
- input [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+ input rcmb_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
input [ 7:0] rcmb_wide_xy_addr;
input [17:0] rcmb_wide_x_din;
input [17:0] rcmb_wide_y_din;
- input rcmb_narrow_xy_ena;
- input [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+ input rcmb_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
input [ 7:0] rcmb_narrow_xy_addr;
input [17:0] rcmb_narrow_x_din;
input [17:0] rcmb_narrow_y_din;
+ input [ 2:0] rdct_wide_xy_bank;
+ input [ 7:0] rdct_wide_xy_addr;
+ input [ 17:0] rdct_wide_x_din;
+ input [ 17:0] rdct_wide_y_din;
+ input rdct_wide_xy_valid;
+
+ input [ 2:0] rdct_narrow_xy_bank;
+ input [ 7:0] rdct_narrow_xy_addr;
+ input [ 17:0] rdct_narrow_x_din;
+ input [ 17:0] rdct_narrow_y_din;
+ input rdct_narrow_xy_valid;
+
reg wr_wide_xy_ena_reg = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
@@ -152,35 +176,37 @@ module modexpng_storage_manager
task disable_wide;
begin
- _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
+ _update_wide(1'b0, BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
end
endtask
task disable_narrow;
begin
- _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
+ _update_narrow(1'b0, BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
end
endtask
always @(posedge clk)
//
- if (rst) disable_wide;
+ if (rst) disable_wide;
else begin
//
- if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din);
- else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
- else disable_wide;
+ if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din);
+ else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
+ else if (rdct_wide_xy_valid) enable_wide(rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_din, rdct_wide_y_din);
+ else disable_wide;
//
end
always @(posedge clk)
//
- if (rst) disable_narrow;
+ if (rst) disable_narrow;
else begin
//
- if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din);
- else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
- else disable_narrow;
+ if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din);
+ else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
+ else if (rdct_narrow_xy_valid) enable_narrow(rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_din, rdct_narrow_y_din);
+ else disable_narrow;
//
end
diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
index 37a5cbc..40930f3 100644
--- a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
+++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
@@ -1,4 +1,4 @@
-module modexpng_sdp_36k_x16_x32_wrapper
+module modexpng_tdp_36k_x16_x32_wrapper
(
clk, clk_bus,
@@ -25,8 +25,8 @@ module modexpng_sdp_36k_x16_x32_wrapper
input ena;
input wea;
input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
- input [ BUD_DATA_W -1:0] dina;
- output [ BUD_DATA_W -1:0] douta;
+ input [ BUS_DATA_W -1:0] dina;
+ output [ BUS_DATA_W -1:0] douta;
input enb;
input regceb;
@@ -42,8 +42,8 @@ module modexpng_sdp_36k_x16_x32_wrapper
.DEVICE ("7SERIES"),
.BRAM_SIZE ("36Kb"),
- .WRITE_WIDTH_A (BUD_DATA_W),
- .READ_WIDTH_A (BUD_DATA_W),
+ .WRITE_WIDTH_A (BUS_DATA_W),
+ .READ_WIDTH_A (BUS_DATA_W),
.WRITE_WIDTH_B (WORD_W),
.READ_WIDTH_B (WORD_W),
@@ -61,7 +61,7 @@ module modexpng_sdp_36k_x16_x32_wrapper
.INIT_B (36'h000000000),
.INIT_FILE ("NONE"),
- .SIM_COLLISION_CHECK ("NONE"),
+ .SIM_COLLISION_CHECK ("NONE")
)
BRAM_TDP_MACRO_inst
(
@@ -72,9 +72,9 @@ module modexpng_sdp_36k_x16_x32_wrapper
.ENA (ena),
.REGCEA (1'b0),
.WEA ({4{wea}}),
- .ADDRA (),
- .DIA (),
- .DOA (),
+ .ADDRA (addra),
+ .DIA (dina),
+ .DOA (douta),
.CLKB (clk),
.ENB (enb),
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
new file mode 100644
index 0000000..d0b6253
--- /dev/null
+++ b/rtl/modexpng_uop_rom.v
@@ -0,0 +1,37 @@
+module modexpng_uop_rom
+(
+ clk,
+ addr,
+ data
+);
+
+ `include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
+
+ input wire clk;
+ input wire [UOP_ADDR_W -1:0] addr;
+ output reg [UOP_W -1:0] data;
+
+ always @(posedge clk)
+ //
+ case (addr)
+ 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC };
+ 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC };
+ 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC };
+ 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC };
+ 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC };
+ 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC };
+
+ 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF};
+ 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF};
+ 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A };
+ 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A };
+ 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E };
+ 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E };
+
+ 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B };
+
+ default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL};
+ endcase
+
+endmodule