aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:47:39 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:47:39 +0300
commite340b1489b08905e3d8acd17686e178028de7922 (patch)
tree8804029e5fcad42a52745fe7951ac4d203ba3c35
parent8ee5a19240722f397d55f57a426992350f8019a3 (diff)
Added more micro-operations, also added "general worker" module. The worker is basically
a block memory data mover, but it can also do some supporting operations required for the Garner's formula part of the exponentiation.
-rw-r--r--rtl/modexpng_core_top.v895
-rw-r--r--rtl/modexpng_dsp_array_block.v8
-rw-r--r--rtl/modexpng_general_worker.v679
-rw-r--r--rtl/modexpng_io_block.v10
-rw-r--r--rtl/modexpng_io_manager.v347
-rw-r--r--rtl/modexpng_microcode.vh21
-rw-r--r--rtl/modexpng_mmm_dual.v43
-rw-r--r--rtl/modexpng_parameters.vh4
-rw-r--r--rtl/modexpng_reductor.v28
-rw-r--r--rtl/modexpng_storage_block.v126
-rw-r--r--rtl/modexpng_storage_manager.v167
-rw-r--r--rtl/modexpng_uop_rom.v61
12 files changed, 1734 insertions, 655 deletions
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index e117e5d..6b194dc 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -71,11 +71,13 @@ module modexpng_core_top
wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
- wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP;
- wire uop_opcode_is_io = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
- (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ||
- (uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW) ;
- wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY;
+ wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
+ wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
+ (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
+ wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
+ wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ;
+ wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
+ (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ;
wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
@@ -101,87 +103,135 @@ module modexpng_core_top
//
// Storage Interfaces (X, Y)
//
- wire wr_wide_xy_ena_x;
- wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
- wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
- wire [ WORD_EXT_W -1:0] wr_wide_x_din_x;
- wire [ WORD_EXT_W -1:0] wr_wide_y_din_x;
-
- wire wr_narrow_xy_ena_x;
- wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
- wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
- wire [ WORD_EXT_W -1:0] wr_narrow_x_din_x;
- wire [ WORD_EXT_W -1:0] wr_narrow_y_din_x;
-
- wire rd_wide_xy_ena_x;
- wire rd_wide_xy_ena_aux_x;
- wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
- wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x;
- wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x;
- wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x;
- wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout_x;
- wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout_x;
- wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux_x;
- wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux_x;
-
- wire rd_narrow_xy_ena_x;
- wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
- wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x;
- wire [ WORD_EXT_W -1:0] rd_narrow_x_dout_x;
- wire [ WORD_EXT_W -1:0] rd_narrow_y_dout_x;
-
- wire ext_wide_xy_ena_x;
- wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
- wire [ OP_ADDR_W -1:0] ext_wide_xy_addr_x;
- wire [ WORD_EXT_W -1:0] ext_wide_x_din_x;
- wire [ WORD_EXT_W -1:0] ext_wide_y_din_x;
-
- wire ext_narrow_xy_ena_x;
- wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
- wire [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
- wire [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
- wire [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
-
- wire wr_wide_xy_ena_y;
- wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
- wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
- wire [ WORD_EXT_W -1:0] wr_wide_x_din_y;
- wire [ WORD_EXT_W -1:0] wr_wide_y_din_y;
-
- wire wr_narrow_xy_ena_y;
- wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
- wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
- wire [ WORD_EXT_W -1:0] wr_narrow_x_din_y;
- wire [ WORD_EXT_W -1:0] wr_narrow_y_din_y;
-
- wire rd_wide_xy_ena_y;
- wire rd_wide_xy_ena_aux_y;
- wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
- wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y;
- wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y;
- wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y;
- wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout_y;
- wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout_y;
- wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux_y;
- wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux_y;
-
- wire rd_narrow_xy_ena_y;
- wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
- wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y;
- wire [ WORD_EXT_W -1:0] rd_narrow_x_dout_y;
- wire [ WORD_EXT_W -1:0] rd_narrow_y_dout_y;
-
- wire ext_wide_xy_ena_y;
- wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
- wire [ OP_ADDR_W -1:0] ext_wide_xy_addr_y;
- wire [ WORD_EXT_W -1:0] ext_wide_x_din_y;
- wire [ WORD_EXT_W -1:0] ext_wide_y_din_y;
-
- wire ext_narrow_xy_ena_y;
- wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
- wire [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
- wire [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
- wire [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+ wire wr_wide_xy_ena_x; // \ \
+ wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_x; // | WIDE | WR
+ wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] wr_wide_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] wr_wide_y_data_x; // / |
+ // |
+ wire wr_narrow_xy_ena_x; // \ |
+ wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; // | NARROW |
+ wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] wr_narrow_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] wr_narrow_y_data_x; // / /
+ //
+ wire rd_wide_xy_ena_x; // \ \
+ wire rd_wide_xy_ena_aux_x; // | WIDE | RD
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x; // | |
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; // | |
+ wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x; // | |
+ wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x; // | |
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_x; // | |
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_x; // | |
+ wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_x; // | |
+ wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_x; // / |
+ // |
+ wire rd_narrow_xy_ena_x; // \ |
+ wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; // | NARROW |
+ wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] rd_narrow_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] rd_narrow_y_data_x; // / /
+ //
+ wire wrk_rd_wide_xy_ena_x; // \ \
+ wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; // | WIDE | WRK
+ wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_x; // / |
+ // |
+ wire wrk_rd_narrow_xy_ena_x; // \ |
+ wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; // | NARROW |
+ wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_x; // / /
+
+ wire wrk_wr_wide_xy_ena_x; // \ \
+ wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; // | WIDE | WRK
+ wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_x; // / |
+ // |
+ wire wrk_wr_narrow_xy_ena_x; // \ |
+ wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; // | NARROW |
+ wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_x; // / /
+ //
+ wire io_wide_xy_ena_x; // \ \
+ wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; // | WIDE | IO
+ wire [ OP_ADDR_W -1:0] io_wide_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] io_wide_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] io_wide_y_data_x; // / |
+ // |
+ wire io_narrow_xy_ena_x; // \ |
+ wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; // | NARROW |
+ wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; // | |
+ wire [ WORD_EXT_W -1:0] io_narrow_x_data_x; // | |
+ wire [ WORD_EXT_W -1:0] io_narrow_y_data_x; // / /
+ //
+ wire wr_wide_xy_ena_y; // \
+ wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] wr_wide_x_data_y; //
+ wire [ WORD_EXT_W -1:0] wr_wide_y_data_y; //
+ //
+ wire wr_narrow_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] wr_narrow_x_data_y; //
+ wire [ WORD_EXT_W -1:0] wr_narrow_y_data_y; //
+ //
+ wire rd_wide_xy_ena_y; //
+ wire rd_wide_xy_ena_aux_y; //
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y; //
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; //
+ wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y; //
+ wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y; //
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_y; //
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_y; //
+ wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_y; //
+ wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_y; //
+ //
+ wire rd_narrow_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] rd_narrow_x_data_y; //
+ wire [ WORD_EXT_W -1:0] rd_narrow_y_data_y; //
+ //
+ wire wrk_rd_wide_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_y; //
+ wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_y; //
+ //
+ wire wrk_rd_narrow_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_y; //
+ wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_y; //
+
+ wire wrk_wr_wide_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_y; //
+ wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_y; //
+ //
+ wire wrk_wr_narrow_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_y; //
+ wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_y; //
+ //
+ wire io_wide_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] io_wide_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] io_wide_x_data_y; //
+ wire [ WORD_EXT_W -1:0] io_wide_y_data_y; //
+ //
+ wire io_narrow_xy_ena_y; //
+ wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; //
+ wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; //
+ wire [ WORD_EXT_W -1:0] io_narrow_x_data_y; //
+ wire [ WORD_EXT_W -1:0] io_narrow_y_data_y; //
//
@@ -189,38 +239,38 @@ module modexpng_core_top
//
wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_x;
wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_x;
- wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_x;
- wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_x;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_x;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_x;
wire rcmb_wide_xy_valid_x;
wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_x;
wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_x;
- wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_x;
- wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_x;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_x;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_x;
wire rcmb_narrow_xy_valid_x;
wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_x;
wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_x;
- wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_x;
- wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_x;
+ wire [ WORD_EXT_W -1:0] rcmb_final_x_data_x;
+ wire [ WORD_EXT_W -1:0] rcmb_final_y_data_x;
wire rcmb_final_xy_valid_x;
wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_y;
wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_y;
- wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_y;
- wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_y;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_y;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_y;
wire rcmb_wide_xy_valid_y;
wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_y;
wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_y;
- wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_y;
- wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_y;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_y;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_y;
wire rcmb_narrow_xy_valid_y;
wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_y;
wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_y;
- wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_y;
- wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_y;
+ wire [ WORD_EXT_W -1:0] rcmb_final_x_data_y;
+ wire [ WORD_EXT_W -1:0] rcmb_final_y_data_y;
wire rcmb_final_xy_valid_y;
@@ -229,26 +279,26 @@ module modexpng_core_top
//
wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_x;
wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_x;
- wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_x;
- wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_x;
+ wire [ WORD_EXT_W -1:0] rdct_wide_x_data_x;
+ wire [ WORD_EXT_W -1:0] rdct_wide_y_data_x;
wire rdct_wide_xy_valid_x;
wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_x;
wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_x;
- wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_x;
- wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_x;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_x;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_x;
wire rdct_narrow_xy_valid_x;
wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_y;
wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_y;
- wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_y;
- wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_y;
+ wire [ WORD_EXT_W -1:0] rdct_wide_x_data_y;
+ wire [ WORD_EXT_W -1:0] rdct_wide_y_data_y;
wire rdct_wide_xy_valid_y;
wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_y;
wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_y;
- wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_y;
- wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_y;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_y;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_y;
wire rdct_narrow_xy_valid_y;
@@ -263,14 +313,14 @@ module modexpng_core_top
.wr_wide_xy_ena (wr_wide_xy_ena_x),
.wr_wide_xy_bank (wr_wide_xy_bank_x),
.wr_wide_xy_addr (wr_wide_xy_addr_x),
- .wr_wide_x_din (wr_wide_x_din_x),
- .wr_wide_y_din (wr_wide_y_din_x),
+ .wr_wide_x_din (wr_wide_x_data_x),
+ .wr_wide_y_din (wr_wide_y_data_x),
.wr_narrow_xy_ena (wr_narrow_xy_ena_x),
.wr_narrow_xy_bank (wr_narrow_xy_bank_x),
.wr_narrow_xy_addr (wr_narrow_xy_addr_x),
- .wr_narrow_x_din (wr_narrow_x_din_x),
- .wr_narrow_y_din (wr_narrow_y_din_x),
+ .wr_narrow_x_din (wr_narrow_x_data_x),
+ .wr_narrow_y_din (wr_narrow_y_data_x),
.rd_wide_xy_ena (rd_wide_xy_ena_x),
.rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_x),
@@ -278,16 +328,28 @@ module modexpng_core_top
.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x),
.rd_wide_xy_addr (rd_wide_xy_addr_x),
.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x),
- .rd_wide_x_dout (rd_wide_x_dout_x),
- .rd_wide_y_dout (rd_wide_y_dout_x),
- .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x),
- .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x),
+ .rd_wide_x_dout (rd_wide_x_data_x),
+ .rd_wide_y_dout (rd_wide_y_data_x),
+ .rd_wide_x_dout_aux (rd_wide_x_data_aux_x),
+ .rd_wide_y_dout_aux (rd_wide_y_data_aux_x),
.rd_narrow_xy_ena (rd_narrow_xy_ena_x),
.rd_narrow_xy_bank (rd_narrow_xy_bank_x),
.rd_narrow_xy_addr (rd_narrow_xy_addr_x),
- .rd_narrow_x_dout (rd_narrow_x_dout_x),
- .rd_narrow_y_dout (rd_narrow_y_dout_x)
+ .rd_narrow_x_dout (rd_narrow_x_data_x),
+ .rd_narrow_y_dout (rd_narrow_y_data_x),
+
+ .wrk_wide_xy_ena (wrk_rd_wide_xy_ena_x),
+ .wrk_wide_xy_bank (wrk_rd_wide_xy_bank_x),
+ .wrk_wide_xy_addr (wrk_rd_wide_xy_addr_x),
+ .wrk_wide_x_dout (wrk_rd_wide_x_data_x),
+ .wrk_wide_y_dout (wrk_rd_wide_y_data_x),
+
+ .wrk_narrow_xy_ena (wrk_rd_narrow_xy_ena_x),
+ .wrk_narrow_xy_bank (wrk_rd_narrow_xy_bank_x),
+ .wrk_narrow_xy_addr (wrk_rd_narrow_xy_addr_x),
+ .wrk_narrow_x_dout (wrk_rd_narrow_x_data_x),
+ .wrk_narrow_y_dout (wrk_rd_narrow_y_data_x)
);
modexpng_storage_block storage_block_y
@@ -298,14 +360,14 @@ module modexpng_core_top
.wr_wide_xy_ena (wr_wide_xy_ena_y),
.wr_wide_xy_bank (wr_wide_xy_bank_y),
.wr_wide_xy_addr (wr_wide_xy_addr_y),
- .wr_wide_x_din (wr_wide_x_din_y),
- .wr_wide_y_din (wr_wide_y_din_y),
+ .wr_wide_x_din (wr_wide_x_data_y),
+ .wr_wide_y_din (wr_wide_y_data_y),
.wr_narrow_xy_ena (wr_narrow_xy_ena_y),
.wr_narrow_xy_bank (wr_narrow_xy_bank_y),
.wr_narrow_xy_addr (wr_narrow_xy_addr_y),
- .wr_narrow_x_din (wr_narrow_x_din_y),
- .wr_narrow_y_din (wr_narrow_y_din_y),
+ .wr_narrow_x_din (wr_narrow_x_data_y),
+ .wr_narrow_y_din (wr_narrow_y_data_y),
.rd_wide_xy_ena (rd_wide_xy_ena_y),
.rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_y),
@@ -313,16 +375,29 @@ module modexpng_core_top
.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y),
.rd_wide_xy_addr (rd_wide_xy_addr_y),
.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y),
- .rd_wide_x_dout (rd_wide_x_dout_y),
- .rd_wide_y_dout (rd_wide_y_dout_y),
- .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y),
- .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y),
+ .rd_wide_x_dout (rd_wide_x_data_y),
+ .rd_wide_y_dout (rd_wide_y_data_y),
+ .rd_wide_x_dout_aux (rd_wide_x_data_aux_y),
+ .rd_wide_y_dout_aux (rd_wide_y_data_aux_y),
.rd_narrow_xy_ena (rd_narrow_xy_ena_y),
.rd_narrow_xy_bank (rd_narrow_xy_bank_y),
.rd_narrow_xy_addr (rd_narrow_xy_addr_y),
- .rd_narrow_x_dout (rd_narrow_x_dout_y),
- .rd_narrow_y_dout (rd_narrow_y_dout_y)
+ .rd_narrow_x_dout (rd_narrow_x_data_y),
+ .rd_narrow_y_dout (rd_narrow_y_data_y),
+
+ .wrk_wide_xy_ena (wrk_rd_wide_xy_ena_y),
+ .wrk_wide_xy_bank (wrk_rd_wide_xy_bank_y),
+ .wrk_wide_xy_addr (wrk_rd_wide_xy_addr_y),
+ .wrk_wide_x_dout (wrk_rd_wide_x_data_y),
+ .wrk_wide_y_dout (wrk_rd_wide_y_data_y),
+
+ .wrk_narrow_xy_ena (wrk_rd_narrow_xy_ena_y),
+ .wrk_narrow_xy_bank (wrk_rd_narrow_xy_bank_y),
+ .wrk_narrow_xy_addr (wrk_rd_narrow_xy_addr_y),
+ .wrk_narrow_x_dout (wrk_rd_narrow_x_data_y),
+ .wrk_narrow_y_dout (wrk_rd_narrow_y_data_y)
+
);
@@ -337,50 +412,62 @@ module modexpng_core_top
.wr_wide_xy_ena (wr_wide_xy_ena_x),
.wr_wide_xy_bank (wr_wide_xy_bank_x),
.wr_wide_xy_addr (wr_wide_xy_addr_x),
- .wr_wide_x_din (wr_wide_x_din_x),
- .wr_wide_y_din (wr_wide_y_din_x),
+ .wr_wide_x_dout (wr_wide_x_data_x),
+ .wr_wide_y_dout (wr_wide_y_data_x),
.wr_narrow_xy_ena (wr_narrow_xy_ena_x),
.wr_narrow_xy_bank (wr_narrow_xy_bank_x),
.wr_narrow_xy_addr (wr_narrow_xy_addr_x),
- .wr_narrow_x_din (wr_narrow_x_din_x),
- .wr_narrow_y_din (wr_narrow_y_din_x),
+ .wr_narrow_x_dout (wr_narrow_x_data_x),
+ .wr_narrow_y_dout (wr_narrow_y_data_x),
- .ext_wide_xy_ena (ext_wide_xy_ena_x),
- .ext_wide_xy_bank (ext_wide_xy_bank_x),
- .ext_wide_xy_addr (ext_wide_xy_addr_x),
- .ext_wide_x_din (ext_wide_x_din_x),
- .ext_wide_y_din (ext_wide_y_din_x),
+ .io_wide_xy_ena (io_wide_xy_ena_x),
+ .io_wide_xy_bank (io_wide_xy_bank_x),
+ .io_wide_xy_addr (io_wide_xy_addr_x),
+ .io_wide_x_din (io_wide_x_data_x),
+ .io_wide_y_din (io_wide_y_data_x),
- .ext_narrow_xy_ena (ext_narrow_xy_ena_x),
- .ext_narrow_xy_bank (ext_narrow_xy_bank_x),
- .ext_narrow_xy_addr (ext_narrow_xy_addr_x),
- .ext_narrow_x_din (ext_narrow_x_din_x),
- .ext_narrow_y_din (ext_narrow_y_din_x),
+ .io_narrow_xy_ena (io_narrow_xy_ena_x),
+ .io_narrow_xy_bank (io_narrow_xy_bank_x),
+ .io_narrow_xy_addr (io_narrow_xy_addr_x),
+ .io_narrow_x_din (io_narrow_x_data_x),
+ .io_narrow_y_din (io_narrow_y_data_x),
.rcmb_wide_xy_bank (rcmb_wide_xy_bank_x),
.rcmb_wide_xy_addr (rcmb_wide_xy_addr_x),
- .rcmb_wide_x_din (rcmb_wide_x_dout_x),
- .rcmb_wide_y_din (rcmb_wide_y_dout_x),
+ .rcmb_wide_x_din (rcmb_wide_x_data_x),
+ .rcmb_wide_y_din (rcmb_wide_y_data_x),
.rcmb_wide_xy_ena (rcmb_wide_xy_valid_x),
.rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x),
.rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x),
- .rcmb_narrow_x_din (rcmb_narrow_x_dout_x),
- .rcmb_narrow_y_din (rcmb_narrow_y_dout_x),
+ .rcmb_narrow_x_din (rcmb_narrow_x_data_x),
+ .rcmb_narrow_y_din (rcmb_narrow_y_data_x),
.rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_x),
.rdct_wide_xy_bank (rdct_wide_xy_bank_x),
.rdct_wide_xy_addr (rdct_wide_xy_addr_x),
- .rdct_wide_x_din (rdct_wide_x_dout_x), // TODO: maybe just rename to {x|y}_x, since that's an
- .rdct_wide_y_din (rdct_wide_y_dout_x), // internal signal??
+ .rdct_wide_x_din (rdct_wide_x_data_x),
+ .rdct_wide_y_din (rdct_wide_y_data_x),
.rdct_wide_xy_valid (rdct_wide_xy_valid_x),
.rdct_narrow_xy_bank (rdct_narrow_xy_bank_x),
.rdct_narrow_xy_addr (rdct_narrow_xy_addr_x),
- .rdct_narrow_x_din (rdct_narrow_x_dout_x),
- .rdct_narrow_y_din (rdct_narrow_y_dout_x),
- .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x)
+ .rdct_narrow_x_din (rdct_narrow_x_data_x),
+ .rdct_narrow_y_din (rdct_narrow_y_data_x),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x),
+
+ .wrk_wide_xy_ena (wrk_wr_wide_xy_ena_x),
+ .wrk_wide_xy_bank (wrk_wr_wide_xy_bank_x),
+ .wrk_wide_xy_addr (wrk_wr_wide_xy_addr_x),
+ .wrk_wide_x_din (wrk_wr_wide_x_data_x),
+ .wrk_wide_y_din (wrk_wr_wide_y_data_x),
+
+ .wrk_narrow_xy_ena (wrk_wr_narrow_xy_ena_x),
+ .wrk_narrow_xy_bank (wrk_wr_narrow_xy_bank_x),
+ .wrk_narrow_xy_addr (wrk_wr_narrow_xy_addr_x),
+ .wrk_narrow_x_din (wrk_wr_narrow_x_data_x),
+ .wrk_narrow_y_din (wrk_wr_narrow_y_data_x)
);
modexpng_storage_manager storage_manager_y
@@ -391,51 +478,62 @@ module modexpng_core_top
.wr_wide_xy_ena (wr_wide_xy_ena_y),
.wr_wide_xy_bank (wr_wide_xy_bank_y),
.wr_wide_xy_addr (wr_wide_xy_addr_y),
- .wr_wide_x_din (wr_wide_x_din_y),
- .wr_wide_y_din (wr_wide_y_din_y),
+ .wr_wide_x_dout (wr_wide_x_data_y),
+ .wr_wide_y_dout (wr_wide_y_data_y),
.wr_narrow_xy_ena (wr_narrow_xy_ena_y),
.wr_narrow_xy_bank (wr_narrow_xy_bank_y),
.wr_narrow_xy_addr (wr_narrow_xy_addr_y),
- .wr_narrow_x_din (wr_narrow_x_din_y),
- .wr_narrow_y_din (wr_narrow_y_din_y),
+ .wr_narrow_x_dout (wr_narrow_x_data_y),
+ .wr_narrow_y_dout (wr_narrow_y_data_y),
- .ext_wide_xy_ena (ext_wide_xy_ena_y),
- .ext_wide_xy_bank (ext_wide_xy_bank_y),
- .ext_wide_xy_addr (ext_wide_xy_addr_y),
- .ext_wide_x_din (ext_wide_x_din_y),
- .ext_wide_y_din (ext_wide_y_din_y),
+ .io_wide_xy_ena (io_wide_xy_ena_y),
+ .io_wide_xy_bank (io_wide_xy_bank_y),
+ .io_wide_xy_addr (io_wide_xy_addr_y),
+ .io_wide_x_din (io_wide_x_data_y),
+ .io_wide_y_din (io_wide_y_data_y),
- .ext_narrow_xy_ena (ext_narrow_xy_ena_y),
- .ext_narrow_xy_bank (ext_narrow_xy_bank_y),
- .ext_narrow_xy_addr (ext_narrow_xy_addr_y),
- .ext_narrow_x_din (ext_narrow_x_din_y),
- .ext_narrow_y_din (ext_narrow_y_din_y),
+ .io_narrow_xy_ena (io_narrow_xy_ena_y),
+ .io_narrow_xy_bank (io_narrow_xy_bank_y),
+ .io_narrow_xy_addr (io_narrow_xy_addr_y),
+ .io_narrow_x_din (io_narrow_x_data_y),
+ .io_narrow_y_din (io_narrow_y_data_y),
.rcmb_wide_xy_bank (rcmb_wide_xy_bank_y),
.rcmb_wide_xy_addr (rcmb_wide_xy_addr_y),
- .rcmb_wide_x_din (rcmb_wide_x_dout_y),
- .rcmb_wide_y_din (rcmb_wide_y_dout_y),
+ .rcmb_wide_x_din (rcmb_wide_x_data_y),
+ .rcmb_wide_y_din (rcmb_wide_y_data_y),
.rcmb_wide_xy_ena (rcmb_wide_xy_valid_y),
.rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y),
.rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y),
- .rcmb_narrow_x_din (rcmb_narrow_x_dout_y),
- .rcmb_narrow_y_din (rcmb_narrow_y_dout_y),
+ .rcmb_narrow_x_din (rcmb_narrow_x_data_y),
+ .rcmb_narrow_y_din (rcmb_narrow_y_data_y),
.rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_y),
.rdct_wide_xy_bank (rdct_wide_xy_bank_y),
.rdct_wide_xy_addr (rdct_wide_xy_addr_y),
- .rdct_wide_x_din (rdct_wide_x_dout_y),
- .rdct_wide_y_din (rdct_wide_y_dout_y),
+ .rdct_wide_x_din (rdct_wide_x_data_y),
+ .rdct_wide_y_din (rdct_wide_y_data_y),
.rdct_wide_xy_valid (rdct_wide_xy_valid_y),
.rdct_narrow_xy_bank (rdct_narrow_xy_bank_y),
.rdct_narrow_xy_addr (rdct_narrow_xy_addr_y),
- .rdct_narrow_x_din (rdct_narrow_x_dout_y),
- .rdct_narrow_y_din (rdct_narrow_y_dout_y),
- .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y)
-
+ .rdct_narrow_x_din (rdct_narrow_x_data_y),
+ .rdct_narrow_y_din (rdct_narrow_y_data_y),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y),
+
+ .wrk_wide_xy_ena (wrk_wr_wide_xy_ena_y),
+ .wrk_wide_xy_bank (wrk_wr_wide_xy_bank_y),
+ .wrk_wide_xy_addr (wrk_wr_wide_xy_addr_y),
+ .wrk_wide_x_din (wrk_wr_wide_x_data_y),
+ .wrk_wide_y_din (wrk_wr_wide_y_data_y),
+
+ .wrk_narrow_xy_ena (wrk_wr_narrow_xy_ena_y),
+ .wrk_narrow_xy_bank (wrk_wr_narrow_xy_bank_y),
+ .wrk_narrow_xy_addr (wrk_wr_narrow_xy_addr_y),
+ .wrk_narrow_x_din (wrk_wr_narrow_x_data_y),
+ .wrk_narrow_y_din (wrk_wr_narrow_y_data_y)
);
@@ -444,16 +542,16 @@ module modexpng_core_top
//
wire io_in_1_en;
wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
- wire [ WORD_W -1:0] io_in_1_dout;
+ wire [ WORD_W -1:0] io_in_1_data;
wire io_in_2_en;
wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
- wire [ WORD_W -1:0] io_in_2_dout;
+ wire [ WORD_W -1:0] io_in_2_data;
wire io_out_en;
wire io_out_we;
wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
- wire [ WORD_W -1:0] io_out_din;
+ wire [ WORD_W -1:0] io_out_data;
// TODO: Separate reset for clock domains (core/bus)???
@@ -472,16 +570,16 @@ module modexpng_core_top
.in_1_en (io_in_1_en),
.in_1_addr (io_in_1_addr),
- .in_1_dout (io_in_1_dout),
+ .in_1_dout (io_in_1_data),
.in_2_en (io_in_2_en),
.in_2_addr (io_in_2_addr),
- .in_2_dout (io_in_2_dout),
+ .in_2_dout (io_in_2_data),
.out_en (io_out_en),
.out_we (io_out_we),
.out_addr (io_out_addr),
- .out_din (io_out_din)
+ .out_din (io_out_data)
);
@@ -497,59 +595,65 @@ module modexpng_core_top
reg [OP_ADDR_W -1:0] io_mgr_word_index_last;
reg [UOP_OPCODE_W -1:0] io_mgr_opcode;
+ wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0];
+ wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0];
+
modexpng_io_manager io_manager
(
- .clk (clk),
- .rst (rst),
+ .clk (clk),
+ .rst (rst),
- .ena (io_mgr_ena),
- .rdy (io_mgr_rdy),
+ .ena (io_mgr_ena),
+ .rdy (io_mgr_rdy),
- .sel_crt (io_mgr_sel_crt),
- .sel_aux (io_mgr_sel_aux),
- .sel_in (io_mgr_sel_in),
- .sel_out (io_mgr_sel_out),
+ .sel_crt (io_mgr_sel_crt),
+ .sel_aux (io_mgr_sel_aux),
+ .sel_in (io_mgr_sel_in),
+ .sel_out (io_mgr_sel_out),
- .opcode (io_mgr_opcode),
+ .opcode (io_mgr_opcode),
- .word_index_last (io_mgr_word_index_last),
+ .word_index_last (io_mgr_word_index_last),
- .ext_wide_xy_ena_x (ext_wide_xy_ena_x),
- .ext_wide_xy_bank_x (ext_wide_xy_bank_x),
- .ext_wide_xy_addr_x (ext_wide_xy_addr_x),
- .ext_wide_x_din_x (ext_wide_x_din_x),
- .ext_wide_y_din_x (ext_wide_y_din_x),
-
- .ext_narrow_xy_ena_x (ext_narrow_xy_ena_x),
- .ext_narrow_xy_bank_x (ext_narrow_xy_bank_x),
- .ext_narrow_xy_addr_x (ext_narrow_xy_addr_x),
- .ext_narrow_x_din_x (ext_narrow_x_din_x),
- .ext_narrow_y_din_x (ext_narrow_y_din_x),
-
- .ext_wide_xy_ena_y (ext_wide_xy_ena_y),
- .ext_wide_xy_bank_y (ext_wide_xy_bank_y),
- .ext_wide_xy_addr_y (ext_wide_xy_addr_y),
- .ext_wide_x_din_y (ext_wide_x_din_y),
- .ext_wide_y_din_y (ext_wide_y_din_y),
-
- .ext_narrow_xy_ena_y (ext_narrow_xy_ena_y),
- .ext_narrow_xy_bank_y (ext_narrow_xy_bank_y),
- .ext_narrow_xy_addr_y (ext_narrow_xy_addr_y),
- .ext_narrow_x_din_y (ext_narrow_x_din_y),
- .ext_narrow_y_din_y (ext_narrow_y_din_y),
+ .io_wide_xy_ena_x (io_wide_xy_ena_x),
+ .io_wide_xy_bank_x (io_wide_xy_bank_x),
+ .io_wide_xy_addr_x (io_wide_xy_addr_x),
+ .io_wide_x_din_x (io_wide_x_data_x),
+ .io_wide_y_din_x (io_wide_y_data_x),
+
+ .io_narrow_xy_ena_x (io_narrow_xy_ena_x),
+ .io_narrow_xy_bank_x (io_narrow_xy_bank_x),
+ .io_narrow_xy_addr_x (io_narrow_xy_addr_x),
+ .io_narrow_x_din_x (io_narrow_x_data_x),
+ .io_narrow_y_din_x (io_narrow_y_data_x),
+
+ .io_wide_xy_ena_y (io_wide_xy_ena_y),
+ .io_wide_xy_bank_y (io_wide_xy_bank_y),
+ .io_wide_xy_addr_y (io_wide_xy_addr_y),
+ .io_wide_x_din_y (io_wide_x_data_y),
+ .io_wide_y_din_y (io_wide_y_data_y),
+
+ .io_narrow_xy_ena_y (io_narrow_xy_ena_y),
+ .io_narrow_xy_bank_y (io_narrow_xy_bank_y),
+ .io_narrow_xy_addr_y (io_narrow_xy_addr_y),
+ .io_narrow_x_din_y (io_narrow_x_data_y),
+ .io_narrow_y_din_y (io_narrow_y_data_y),
- .io_in_1_en (io_in_1_en),
- .io_in_1_addr (io_in_1_addr),
- .io_in_1_dout (io_in_1_dout),
+ .io_in_1_en (io_in_1_en),
+ .io_in_1_addr (io_in_1_addr),
+ .io_in_1_din (io_in_1_data),
- .io_in_2_en (io_in_2_en),
- .io_in_2_addr (io_in_2_addr),
- .io_in_2_dout (io_in_2_dout),
+ .io_in_2_en (io_in_2_en),
+ .io_in_2_addr (io_in_2_addr),
+ .io_in_2_din (io_in_2_data),
- .io_out_en (io_out_en),
- .io_out_we (io_out_we),
- .io_out_addr (io_out_addr),
- .io_out_din (io_out_din)
+ .io_out_en (io_out_en),
+ .io_out_we (io_out_we),
+ .io_out_addr (io_out_addr),
+ .io_out_dout (io_out_data),
+
+ .wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc),
+ .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc)
);
@@ -608,33 +712,33 @@ module modexpng_core_top
.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x),
.rd_wide_xy_addr (rd_wide_xy_addr_x),
.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x),
- .rd_wide_x_dout (rd_wide_x_dout_x),
- .rd_wide_y_dout (rd_wide_y_dout_x),
- .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x),
- .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x),
+ .rd_wide_x_din (rd_wide_x_data_x),
+ .rd_wide_y_din (rd_wide_y_data_x),
+ .rd_wide_x_din_aux (rd_wide_x_data_aux_x),
+ .rd_wide_y_din_aux (rd_wide_y_data_aux_x),
.rd_narrow_xy_ena (rd_narrow_xy_ena_x),
.rd_narrow_xy_bank (rd_narrow_xy_bank_x),
.rd_narrow_xy_addr (rd_narrow_xy_addr_x),
- .rd_narrow_x_dout (rd_narrow_x_dout_x),
- .rd_narrow_y_dout (rd_narrow_y_dout_x),
+ .rd_narrow_x_din (rd_narrow_x_data_x),
+ .rd_narrow_y_din (rd_narrow_y_data_x),
.rcmb_wide_xy_bank (rcmb_wide_xy_bank_x),
.rcmb_wide_xy_addr (rcmb_wide_xy_addr_x),
- .rcmb_wide_x_dout (rcmb_wide_x_dout_x),
- .rcmb_wide_y_dout (rcmb_wide_y_dout_x),
+ .rcmb_wide_x_dout (rcmb_wide_x_data_x),
+ .rcmb_wide_y_dout (rcmb_wide_y_data_x),
.rcmb_wide_xy_valid (rcmb_wide_xy_valid_x),
.rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x),
.rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x),
- .rcmb_narrow_x_dout (rcmb_narrow_x_dout_x),
- .rcmb_narrow_y_dout (rcmb_narrow_y_dout_x),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_data_x),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_data_x),
.rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_x),
.rcmb_xy_bank (rcmb_final_xy_bank_x),
.rcmb_xy_addr (rcmb_final_xy_addr_x),
- .rcmb_x_dout (rcmb_final_x_dout_x),
- .rcmb_y_dout (rcmb_final_y_dout_x),
+ .rcmb_x_dout (rcmb_final_x_data_x),
+ .rcmb_y_dout (rcmb_final_y_data_x),
.rcmb_xy_valid (rcmb_final_xy_valid_x),
.rdct_ena (rdct_ena_x),
@@ -663,33 +767,33 @@ module modexpng_core_top
.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y),
.rd_wide_xy_addr (rd_wide_xy_addr_y),
.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y),
- .rd_wide_x_dout (rd_wide_x_dout_y),
- .rd_wide_y_dout (rd_wide_y_dout_y),
- .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y),
- .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y),
+ .rd_wide_x_din (rd_wide_x_data_y),
+ .rd_wide_y_din (rd_wide_y_data_y),
+ .rd_wide_x_din_aux (rd_wide_x_data_aux_y),
+ .rd_wide_y_din_aux (rd_wide_y_data_aux_y),
.rd_narrow_xy_ena (rd_narrow_xy_ena_y),
.rd_narrow_xy_bank (rd_narrow_xy_bank_y),
.rd_narrow_xy_addr (rd_narrow_xy_addr_y),
- .rd_narrow_x_dout (rd_narrow_x_dout_y),
- .rd_narrow_y_dout (rd_narrow_y_dout_y),
+ .rd_narrow_x_din (rd_narrow_x_data_y),
+ .rd_narrow_y_din (rd_narrow_y_data_y),
.rcmb_wide_xy_bank (rcmb_wide_xy_bank_y),
.rcmb_wide_xy_addr (rcmb_wide_xy_addr_y),
- .rcmb_wide_x_dout (rcmb_wide_x_dout_y),
- .rcmb_wide_y_dout (rcmb_wide_y_dout_y),
+ .rcmb_wide_x_dout (rcmb_wide_x_data_y),
+ .rcmb_wide_y_dout (rcmb_wide_y_data_y),
.rcmb_wide_xy_valid (rcmb_wide_xy_valid_y),
.rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y),
.rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y),
- .rcmb_narrow_x_dout (rcmb_narrow_x_dout_y),
- .rcmb_narrow_y_dout (rcmb_narrow_y_dout_y),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_data_y),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_data_y),
.rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_y),
.rcmb_xy_bank (rcmb_final_xy_bank_y),
.rcmb_xy_addr (rcmb_final_xy_addr_y),
- .rcmb_x_dout (rcmb_final_x_dout_y),
- .rcmb_y_dout (rcmb_final_y_dout_y),
+ .rcmb_x_dout (rcmb_final_x_data_y),
+ .rcmb_y_dout (rcmb_final_y_data_y),
.rcmb_xy_valid (rcmb_final_xy_valid_y),
.rdct_ena (rdct_ena_y),
@@ -723,25 +827,25 @@ module modexpng_core_top
.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x),
.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x),
- .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x),
- .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x),
+ .rd_wide_x_din_aux (rd_wide_x_data_aux_x),
+ .rd_wide_y_din_aux (rd_wide_y_data_aux_x),
.rcmb_final_xy_bank (rcmb_final_xy_bank_x),
.rcmb_final_xy_addr (rcmb_final_xy_addr_x),
- .rcmb_final_x_dout (rcmb_final_x_dout_x),
- .rcmb_final_y_dout (rcmb_final_y_dout_x),
+ .rcmb_final_x_din (rcmb_final_x_data_x),
+ .rcmb_final_y_din (rcmb_final_y_data_x),
.rcmb_final_xy_valid (rcmb_final_xy_valid_x),
.rdct_wide_xy_bank (rdct_wide_xy_bank_x),
.rdct_wide_xy_addr (rdct_wide_xy_addr_x),
- .rdct_wide_x_dout (rdct_wide_x_dout_x),
- .rdct_wide_y_dout (rdct_wide_y_dout_x),
+ .rdct_wide_x_dout (rdct_wide_x_data_x),
+ .rdct_wide_y_dout (rdct_wide_y_data_x),
.rdct_wide_xy_valid (rdct_wide_xy_valid_x),
.rdct_narrow_xy_bank (rdct_narrow_xy_bank_x),
.rdct_narrow_xy_addr (rdct_narrow_xy_addr_x),
- .rdct_narrow_x_dout (rdct_narrow_x_dout_x),
- .rdct_narrow_y_dout (rdct_narrow_y_dout_x),
+ .rdct_narrow_x_dout (rdct_narrow_x_data_x),
+ .rdct_narrow_y_dout (rdct_narrow_y_data_x),
.rdct_narrow_xy_valid (rdct_narrow_xy_valid_x)
);
@@ -760,30 +864,110 @@ module modexpng_core_top
.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y),
.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y),
- .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y),
- .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y),
+ .rd_wide_x_din_aux (rd_wide_x_data_aux_y),
+ .rd_wide_y_din_aux (rd_wide_y_data_aux_y),
.rcmb_final_xy_bank (rcmb_final_xy_bank_y),
.rcmb_final_xy_addr (rcmb_final_xy_addr_y),
- .rcmb_final_x_dout (rcmb_final_x_dout_y),
- .rcmb_final_y_dout (rcmb_final_y_dout_y),
+ .rcmb_final_x_din (rcmb_final_x_data_y),
+ .rcmb_final_y_din (rcmb_final_y_data_y),
.rcmb_final_xy_valid (rcmb_final_xy_valid_y),
.rdct_wide_xy_bank (rdct_wide_xy_bank_y),
.rdct_wide_xy_addr (rdct_wide_xy_addr_y),
- .rdct_wide_x_dout (rdct_wide_x_dout_y),
- .rdct_wide_y_dout (rdct_wide_y_dout_y),
+ .rdct_wide_x_dout (rdct_wide_x_data_y),
+ .rdct_wide_y_dout (rdct_wide_y_data_y),
.rdct_wide_xy_valid (rdct_wide_xy_valid_y),
.rdct_narrow_xy_bank (rdct_narrow_xy_bank_y),
.rdct_narrow_xy_addr (rdct_narrow_xy_addr_y),
- .rdct_narrow_x_dout (rdct_narrow_x_dout_y),
- .rdct_narrow_y_dout (rdct_narrow_y_dout_y),
+ .rdct_narrow_x_dout (rdct_narrow_x_data_y),
+ .rdct_narrow_y_dout (rdct_narrow_y_data_y),
.rdct_narrow_xy_valid (rdct_narrow_xy_valid_y)
);
//
+ // General Worker
+ //
+ reg wrk_ena = 1'b0;
+ wire wrk_rdy;
+
+ reg [ BANK_ADDR_W -1:0] wrk_sel_wide_in;
+ reg [ BANK_ADDR_W -1:0] wrk_sel_wide_out;
+ reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in;
+ reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out;
+ reg [ OP_ADDR_W -1:0] wrk_word_index_last;
+ reg [UOP_OPCODE_W -1:0] wrk_opcode;
+
+ modexpng_general_worker general_worker
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (wrk_ena),
+ .rdy (wrk_rdy),
+
+ .sel_narrow_in (wrk_sel_narrow_in),
+ .sel_narrow_out (wrk_sel_narrow_out),
+ .sel_wide_in (wrk_sel_wide_in),
+ .sel_wide_out (wrk_sel_wide_out),
+
+ .opcode (wrk_opcode),
+
+ .word_index_last (wrk_word_index_last),
+
+ .wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x),
+ .wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x),
+ .wrk_rd_wide_xy_addr_x (wrk_rd_wide_xy_addr_x),
+ .wrk_rd_wide_x_din_x (wrk_rd_wide_x_data_x),
+ .wrk_rd_wide_y_din_x (wrk_rd_wide_y_data_x),
+
+ .wrk_rd_narrow_xy_ena_x (wrk_rd_narrow_xy_ena_x),
+ .wrk_rd_narrow_xy_bank_x (wrk_rd_narrow_xy_bank_x),
+ .wrk_rd_narrow_xy_addr_x (wrk_rd_narrow_xy_addr_x),
+ .wrk_rd_narrow_x_din_x (wrk_rd_narrow_x_data_x),
+ .wrk_rd_narrow_y_din_x (wrk_rd_narrow_y_data_x),
+
+ .wrk_rd_wide_xy_ena_y (wrk_rd_wide_xy_ena_y),
+ .wrk_rd_wide_xy_bank_y (wrk_rd_wide_xy_bank_y),
+ .wrk_rd_wide_xy_addr_y (wrk_rd_wide_xy_addr_y),
+ .wrk_rd_wide_x_din_y (wrk_rd_wide_x_data_y),
+ .wrk_rd_wide_y_din_y (wrk_rd_wide_y_data_y),
+
+ .wrk_rd_narrow_xy_ena_y (wrk_rd_narrow_xy_ena_y),
+ .wrk_rd_narrow_xy_bank_y (wrk_rd_narrow_xy_bank_y),
+ .wrk_rd_narrow_xy_addr_y (wrk_rd_narrow_xy_addr_y),
+ .wrk_rd_narrow_x_din_y (wrk_rd_narrow_x_data_y),
+ .wrk_rd_narrow_y_din_y (wrk_rd_narrow_y_data_y),
+
+ .wrk_wr_wide_xy_ena_x (wrk_wr_wide_xy_ena_x),
+ .wrk_wr_wide_xy_bank_x (wrk_wr_wide_xy_bank_x),
+ .wrk_wr_wide_xy_addr_x (wrk_wr_wide_xy_addr_x),
+ .wrk_wr_wide_x_dout_x (wrk_wr_wide_x_data_x),
+ .wrk_wr_wide_y_dout_x (wrk_wr_wide_y_data_x),
+
+ .wrk_wr_narrow_xy_ena_x (wrk_wr_narrow_xy_ena_x),
+ .wrk_wr_narrow_xy_bank_x (wrk_wr_narrow_xy_bank_x),
+ .wrk_wr_narrow_xy_addr_x (wrk_wr_narrow_xy_addr_x),
+ .wrk_wr_narrow_x_dout_x (wrk_wr_narrow_x_data_x),
+ .wrk_wr_narrow_y_dout_x (wrk_wr_narrow_y_data_x),
+
+ .wrk_wr_wide_xy_ena_y (wrk_wr_wide_xy_ena_y),
+ .wrk_wr_wide_xy_bank_y (wrk_wr_wide_xy_bank_y),
+ .wrk_wr_wide_xy_addr_y (wrk_wr_wide_xy_addr_y),
+ .wrk_wr_wide_x_dout_y (wrk_wr_wide_x_data_y),
+ .wrk_wr_wide_y_dout_y (wrk_wr_wide_y_data_y),
+
+ .wrk_wr_narrow_xy_ena_y (wrk_wr_narrow_xy_ena_y),
+ .wrk_wr_narrow_xy_bank_y (wrk_wr_narrow_xy_bank_y),
+ .wrk_wr_narrow_xy_addr_y (wrk_wr_narrow_xy_addr_y),
+ .wrk_wr_narrow_x_dout_y (wrk_wr_narrow_x_data_y),
+ .wrk_wr_narrow_y_dout_y (wrk_wr_narrow_y_data_y)
+ );
+
+
+ //
// uOP Completion Detector
//
reg uop_exit_from_busy;
@@ -792,10 +976,10 @@ module modexpng_core_top
//
uop_exit_from_busy = 0;
//
- if (uop_opcode_is_io) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
- if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy;
- //if (uop_data_opcode_is_add) uop_exit_from_busy = ~mod_add_ena & mod_add_rdy;
- //if (uop_data_opcode_is_sub) uop_exit_from_busy = ~mod_sub_ena & mod_sub_rdy;
+ if (uop_opcode_is_in) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
+ if (uop_opcode_is_out) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy);
+ if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy ;
+ if (uop_opcode_is_wrk) uop_exit_from_busy = ~wrk_ena & wrk_rdy ;
//
end
@@ -809,10 +993,12 @@ module modexpng_core_top
io_mgr_ena <= 1'b0;
mmm_ena_x <= 1'b0;
mmm_ena_y <= 1'b0;
+ wrk_ena <= 1'b0;
end else begin
- io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_io : 1'b0;
- mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out) : 1'b0;
+ mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0;
end
//
@@ -825,6 +1011,7 @@ module modexpng_core_top
if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
//
io_mgr_opcode <= uop_data_opcode;
+ wrk_opcode <= uop_data_opcode;
//
case (uop_data_opcode)
//
@@ -842,6 +1029,15 @@ module modexpng_core_top
io_mgr_sel_out <= uop_data_sel_narrow_out;
end
//
+ UOP_OPCODE_OUTPUT_FROM_NARROW: begin
+ io_mgr_sel_crt <= uop_data_crt;
+ io_mgr_sel_aux <= UOP_AUX_DNC;
+ io_mgr_sel_in <= BANK_DNC;
+ io_mgr_sel_out <= uop_data_sel_narrow_out;
+ //
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ end
+ //
UOP_OPCODE_MODULAR_MULTIPLY: begin
//
case (uop_data_ladder)
@@ -856,10 +1052,21 @@ module modexpng_core_top
{mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }};
{rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }};
{rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }};
-
//
end
//
+ UOP_OPCODE_PROPAGATE_CARRIES: begin
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ wrk_sel_narrow_out <= uop_data_sel_narrow_out;
+ end
+ //
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ wrk_sel_wide_in <= uop_data_sel_wide_in;
+ wrk_sel_wide_out <= uop_data_sel_wide_out;
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ wrk_sel_narrow_out <= uop_data_sel_narrow_out;
+ end
+ //
endcase
//
end
@@ -887,6 +1094,9 @@ module modexpng_core_top
{rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
end
//
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ //
endcase
//
end
@@ -945,87 +1155,88 @@ module modexpng_core_top
//
// X.X
//
- $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
- $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[0*256+i]); $write("\n");
- $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[1*256+i]); $write("\n");
- $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[2*256+i]); $write("\n");
- $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[3*256+i]); $write("\n");
- $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[4*256+i]); $write("\n");
- $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[5*256+i]); $write("\n");
- $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[6*256+i]); $write("\n");
- $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[7*256+i]); $write("\n");
- $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
- $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
- $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
- $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
- $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
- $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
- $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
- $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
- $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[0*256+i]); $write("\n");
+ $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[1*256+i]); $write("\n");
+ $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[2*256+i]); $write("\n");
+ $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[3*256+i]); $write("\n");
+ $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[4*256+i]); $write("\n");
+ $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[5*256+i]); $write("\n");
+ $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[6*256+i]); $write("\n");
+ $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[0*256+i]); $write("\n");
+ $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[1*256+i]); $write("\n");
+ $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[2*256+i]); $write("\n");
+ $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[3*256+i]); $write("\n");
+ $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[4*256+i]); $write("\n");
+ $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[5*256+i]); $write("\n");
+ $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[6*256+i]); $write("\n");
+ $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[7*256+i]); $write("\n");
//
// X.Y
//
- $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
- $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[0*256+i]); $write("\n");
- $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[1*256+i]); $write("\n");
- $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[2*256+i]); $write("\n");
- $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[3*256+i]); $write("\n");
- $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[4*256+i]); $write("\n");
- $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[5*256+i]); $write("\n");
- $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[6*256+i]); $write("\n");
- $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[7*256+i]); $write("\n");
- $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
- $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
- $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
- $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
- $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
- $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
- $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
- $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
- $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[0*256+i]); $write("\n");
+ $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[1*256+i]); $write("\n");
+ $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[2*256+i]); $write("\n");
+ $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[3*256+i]); $write("\n");
+ $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[4*256+i]); $write("\n");
+ $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[5*256+i]); $write("\n");
+ $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[6*256+i]); $write("\n");
+ $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[0*256+i]); $write("\n");
+ $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[1*256+i]); $write("\n");
+ $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[2*256+i]); $write("\n");
+ $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[3*256+i]); $write("\n");
+ $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[4*256+i]); $write("\n");
+ $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[5*256+i]); $write("\n");
+ $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[6*256+i]); $write("\n");
+ $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[7*256+i]); $write("\n");
//
// Y.X
//
- $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
- $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[0*256+i]); $write("\n");
- $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[1*256+i]); $write("\n");
- $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[2*256+i]); $write("\n");
- $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[3*256+i]); $write("\n");
- $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[4*256+i]); $write("\n");
- $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[5*256+i]); $write("\n");
- $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[6*256+i]); $write("\n");
- $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[7*256+i]); $write("\n");
- $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
- $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
- $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
- $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
- $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
- $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
- $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
- $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
- $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[0*256+i]); $write("\n");
+ $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[1*256+i]); $write("\n");
+ $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[2*256+i]); $write("\n");
+ $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[3*256+i]); $write("\n");
+ $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[4*256+i]); $write("\n");
+ $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[5*256+i]); $write("\n");
+ $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[6*256+i]); $write("\n");
+ $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[0*256+i]); $write("\n");
+ $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[1*256+i]); $write("\n");
+ $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[2*256+i]); $write("\n");
+ $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[3*256+i]); $write("\n");
+ $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[4*256+i]); $write("\n");
+ $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[5*256+i]); $write("\n");
+ $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[6*256+i]); $write("\n");
+ $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[7*256+i]); $write("\n");
//
// Y.Y
//
- $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
- $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[0*256+i]); $write("\n");
- $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[1*256+i]); $write("\n");
- $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[2*256+i]); $write("\n");
- $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[3*256+i]); $write("\n");
- $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[4*256+i]); $write("\n");
- $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[5*256+i]); $write("\n");
- $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[6*256+i]); $write("\n");
- $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[7*256+i]); $write("\n");
- $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
- $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
- $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
- $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
- $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
- $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
- $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
- $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
- $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n"); //
+ $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n");
+ $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[0*256+i]); $write("\n");
+ $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[1*256+i]); $write("\n");
+ $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[2*256+i]); $write("\n");
+ $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[3*256+i]); $write("\n");
+ $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[4*256+i]); $write("\n");
+ $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[5*256+i]); $write("\n");
+ $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[6*256+i]); $write("\n");
+ $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[7*256+i]); $write("\n");
+ $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n");
+ $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[0*256+i]); $write("\n");
+ $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[1*256+i]); $write("\n");
+ $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[2*256+i]); $write("\n");
+ $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[3*256+i]); $write("\n");
+ $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[4*256+i]); $write("\n");
+ $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[5*256+i]); $write("\n");
+ $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[6*256+i]); $write("\n");
+ $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[7*256+i]); $write("\n");
+ //
end
//
diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v
index 8ab64f0..8c4e844 100644
--- a/rtl/modexpng_dsp_array_block.v
+++ b/rtl/modexpng_dsp_array_block.v
@@ -70,8 +70,8 @@ module modexpng_dsp_array_block
.opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
.alumode ({DSP48E1_ALUMODE_W{1'b0}}),
- .casc_a_in (WORD_EXT_NULL),
- .casc_b_in (WORD_NULL),
+ .casc_a_in (WORD_EXT_ZERO),
+ .casc_b_in (WORD_ZERO),
.casc_a_out (casc_a[z]),
.casc_b_out (casc_b[z])
@@ -138,8 +138,8 @@ module modexpng_dsp_array_block
.opmode ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}),
.alumode ({DSP48E1_ALUMODE_W{1'b0}}),
- .casc_a_in (WORD_EXT_NULL),
- .casc_b_in (WORD_NULL),
+ .casc_a_in (WORD_EXT_ZERO),
+ .casc_b_in (WORD_ZERO),
.casc_a_out (),
.casc_b_out ()
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
new file mode 100644
index 0000000..c35f0b3
--- /dev/null
+++ b/rtl/modexpng_general_worker.v
@@ -0,0 +1,679 @@
+module modexpng_general_worker
+(
+ clk,
+ rst,
+
+ ena,
+ rdy,
+
+ sel_narrow_in,
+ sel_narrow_out,
+ sel_wide_in,
+ sel_wide_out,
+
+ opcode,
+
+ word_index_last,
+
+ wrk_rd_wide_xy_ena_x,
+ wrk_rd_wide_xy_bank_x,
+ wrk_rd_wide_xy_addr_x,
+ wrk_rd_wide_x_din_x,
+ wrk_rd_wide_y_din_x,
+
+ wrk_rd_narrow_xy_ena_x,
+ wrk_rd_narrow_xy_bank_x,
+ wrk_rd_narrow_xy_addr_x,
+ wrk_rd_narrow_x_din_x,
+ wrk_rd_narrow_y_din_x,
+
+ wrk_rd_wide_xy_ena_y,
+ wrk_rd_wide_xy_bank_y,
+ wrk_rd_wide_xy_addr_y,
+ wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y,
+
+ wrk_rd_narrow_xy_ena_y,
+ wrk_rd_narrow_xy_bank_y,
+ wrk_rd_narrow_xy_addr_y,
+ wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y,
+
+ wrk_wr_wide_xy_ena_x,
+ wrk_wr_wide_xy_bank_x,
+ wrk_wr_wide_xy_addr_x,
+ wrk_wr_wide_x_dout_x,
+ wrk_wr_wide_y_dout_x,
+
+ wrk_wr_narrow_xy_ena_x,
+ wrk_wr_narrow_xy_bank_x,
+ wrk_wr_narrow_xy_addr_x,
+ wrk_wr_narrow_x_dout_x,
+ wrk_wr_narrow_y_dout_x,
+
+ wrk_wr_wide_xy_ena_y,
+ wrk_wr_wide_xy_bank_y,
+ wrk_wr_wide_xy_addr_y,
+ wrk_wr_wide_x_dout_y,
+ wrk_wr_wide_y_dout_y,
+
+ wrk_wr_narrow_xy_ena_y,
+ wrk_wr_narrow_xy_bank_y,
+ wrk_wr_narrow_xy_addr_y,
+ wrk_wr_narrow_x_dout_y,
+ wrk_wr_narrow_y_dout_y
+);
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ input [ BANK_ADDR_W -1:0] sel_narrow_in;
+ input [ BANK_ADDR_W -1:0] sel_narrow_out;
+ input [ BANK_ADDR_W -1:0] sel_wide_in;
+ input [ BANK_ADDR_W -1:0] sel_wide_out;
+
+ input [ UOP_OPCODE_W -1:0] opcode;
+
+ input [ OP_ADDR_W -1:0] word_index_last;
+
+ output wrk_rd_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x;
+
+ output wrk_rd_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x;
+
+ output wrk_rd_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y;
+
+ output wrk_rd_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y;
+ input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y;
+
+ output wrk_wr_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x;
+
+ output wrk_wr_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x;
+
+ output wrk_wr_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y;
+
+ output wrk_wr_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y;
+ output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y;
+
+
+ //
+ // FSM Declaration
+ //
+ localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
+ localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3;
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
+ localparam [3:0] WRK_FSM_STATE_STOP = 4'h7;
+
+ reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep
+
+
+ //
+ // Control Signals
+ //
+ reg rd_wide_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x;
+
+ reg rd_narrow_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x;
+
+ reg rd_wide_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y;
+
+ reg rd_narrow_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y;
+
+ reg wr_wide_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
+ reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;
+
+ reg wr_narrow_xy_ena_x = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
+ reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
+ reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
+ reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;
+
+ reg wr_wide_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
+ reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;
+
+ reg wr_narrow_xy_ena_y = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
+ reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
+ reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
+ reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;
+
+
+ //
+ // Mapping
+ //
+ assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x;
+ assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x;
+ assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x;
+
+ assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x;
+ assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x;
+ assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x;
+
+ assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y;
+ assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y;
+ assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y;
+
+ assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y;
+ assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y;
+ assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y;
+
+ assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x;
+ assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x;
+ assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x;
+ assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x;
+ assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x;
+
+ assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x;
+ assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x;
+ assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x;
+ assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x;
+ assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x;
+
+ assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y;
+ assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y;
+ assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y;
+ assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y;
+ assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y;
+
+ assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y;
+ assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y;
+ assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y;
+ assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y;
+ assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y;
+
+
+ //
+ // Delays
+ //
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+ //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+
+ always @(posedge clk) begin
+ //
+ //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
+ //
+ end
+
+
+ //
+ // Handy Wires
+ //
+ wire rd_narrow_xy_addr_x_next_is_last;
+ wire rd_narrow_xy_addr_y_next_is_last;
+
+
+ //
+ // Read Enable Logic
+ //
+
+ task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask
+ task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask
+
+ task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask
+ task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask
+
+ task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask
+ task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ disable_wide_xy_rd_en;
+ disable_narrow_xy_rd_en;
+ /*
+ rd_wide_xy_ena_x <= 1'b0;
+ rd_wide_xy_ena_y <= 1'b0;
+ rd_narrow_xy_ena_x <= 1'b0;
+ rd_narrow_xy_ena_y <= 1'b0;
+ */
+ end else begin
+ //
+ disable_wide_xy_rd_en;
+ disable_narrow_xy_rd_en;
+ //
+ //rd_wide_xy_ena_x <= 1'b0;
+ //rd_wide_xy_ena_y <= 1'b0;
+ //rd_narrow_xy_ena_x <= 1'b0;
+ //rd_narrow_xy_ena_y <= 1'b0;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
+ //
+ enable_narrow_xy_rd_en;
+ //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ //
+ endcase
+ //
+ //
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ enable_narrow_xy_rd_en;
+ enable_wide_xy_rd_en;
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Write Enable Logic
+ //
+
+ task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
+ task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
+
+ task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask
+ task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask
+
+ task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask
+ task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ disable_wide_xy_wr_en;
+ disable_narrow_xy_wr_en;
+ //wr_wide_xy_ena_x <= 1'b0;
+ //wr_wide_xy_ena_y <= 1'b0;
+ //wr_narrow_xy_ena_x <= 1'b0;
+ //wr_narrow_xy_ena_y <= 1'b0;
+ //
+ end else begin
+ //
+ disable_wide_xy_wr_en;
+ disable_narrow_xy_wr_en;
+ //
+ //wr_wide_xy_ena_x <= 1'b0;
+ //wr_wide_xy_ena_y <= 1'b0;
+ //wr_narrow_xy_ena_x <= 1'b0;
+ //wr_narrow_xy_ena_y <= 1'b0;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ enable_narrow_xy_wr_en;
+ //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
+ //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Data Logic
+ //
+ reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
+ reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
+ reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
+ reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
+
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
+
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
+ wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
+
+ always @(posedge clk) begin
+ //
+ wr_wide_x_dout_x <= WORD_EXT_DNC;
+ wr_wide_y_dout_x <= WORD_EXT_DNC;
+ wr_wide_x_dout_y <= WORD_EXT_DNC;
+ wr_wide_y_dout_y <= WORD_EXT_DNC;
+ wr_narrow_x_dout_x <= WORD_EXT_DNC;
+ wr_narrow_y_dout_x <= WORD_EXT_DNC;
+ wr_narrow_x_dout_y <= WORD_EXT_DNC;
+ wr_narrow_y_dout_y <= WORD_EXT_DNC;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE2: begin
+ rd_narrow_x_din_x_cry_r <= CARRY_ZERO;
+ rd_narrow_y_din_x_cry_r <= CARRY_ZERO;
+ rd_narrow_x_din_y_cry_r <= CARRY_ZERO;
+ rd_narrow_y_din_y_cry_r <= CARRY_ZERO;
+ end
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word
+ //
+ rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ //
+ wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced;
+ wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced;
+ wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced;
+ wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_wide_x_din_y;
+ wr_wide_y_dout_x <= wrk_rd_wide_y_din_y;
+ wr_wide_x_dout_y <= wrk_rd_wide_x_din_y;
+ wr_wide_y_dout_y <= wrk_rd_wide_y_din_y;
+ //
+ wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y;
+ wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y;
+ wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y;
+ wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y;
+ //
+ end
+ //
+ endcase
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Write Address Logic
+ //
+ always @(posedge clk) begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2};
+ //
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2};
+ //
+ end
+ //
+ endcase
+ //
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Read Address Logic
+ //
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
+
+ assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+ assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+
+ always @(posedge clk) begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+ else case (opcode)
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+ endcase
+
+
+ //
+ // Busy Exit Logic
+ //
+ reg wrk_fsm_done_one_pass = 1'b0;
+
+ always @(posedge clk) begin
+ //
+ wrk_fsm_done_one_pass <= 1'b0;
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
+ //
+ if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary...
+ if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
+ //
+ end
+ //
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ;
+ WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ;
+ WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
+ WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
+ WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_reg <= 1'b1;
+ else case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: rdy_reg <= ~ena;
+ WRK_FSM_STATE_STOP: rdy_reg <= 1'b1;
+ endcase
+
+
+endmodule
diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v
index 68d13c4..d7dd72e 100644
--- a/rtl/modexpng_io_block.v
+++ b/rtl/modexpng_io_block.v
@@ -84,13 +84,17 @@ module modexpng_io_block
wire bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
wire bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
+ wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'b00);
+ wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'b01);
+ wire bus_cs_output = bus_cs && (bus_addr_msb == 2'b10);
+
/* INPUT_1 */
modexpng_tdp_36k_x16_x32_wrapper bram_input_1
(
.clk (clk), // core clock
.clk_bus (clk_bus), // bus clock
- .ena (bus_cs), // bus side read-write
+ .ena (bus_cs_input_1), // bus side read-write
.wea (bus_data_wr_input_1), //
.addra (bus_addr_lsb), //
.dina (bus_data_wr), //
@@ -109,7 +113,7 @@ module modexpng_io_block
.clk (clk), // core clock
.clk_bus (clk_bus), // bus clock
- .ena (bus_cs), // bus side write-only
+ .ena (bus_cs_input_2), // bus side write-only
.wea (bus_data_wr_input_2), //
.addra (bus_addr_lsb), //
.dina (bus_data_wr), //
@@ -132,7 +136,7 @@ module modexpng_io_block
.addra (out_addr), //
.dina (out_din), //
- .enb (bus_cs), // bus side read-only
+ .enb (bus_cs_output), // bus side read-only
.addrb (bus_addr_lsb), //
.doutb (bus_data_rd_output) //
);
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
index 81f582f..dfbd676 100644
--- a/rtl/modexpng_io_manager.v
+++ b/rtl/modexpng_io_manager.v
@@ -15,42 +15,45 @@ module modexpng_io_manager
word_index_last,
- ext_wide_xy_ena_x,
- ext_wide_xy_bank_x,
- ext_wide_xy_addr_x,
- ext_wide_x_din_x,
- ext_wide_y_din_x,
-
- ext_narrow_xy_ena_x,
- ext_narrow_xy_bank_x,
- ext_narrow_xy_addr_x,
- ext_narrow_x_din_x,
- ext_narrow_y_din_x,
-
- ext_wide_xy_ena_y,
- ext_wide_xy_bank_y,
- ext_wide_xy_addr_y,
- ext_wide_x_din_y,
- ext_wide_y_din_y,
-
- ext_narrow_xy_ena_y,
- ext_narrow_xy_bank_y,
- ext_narrow_xy_addr_y,
- ext_narrow_x_din_y,
- ext_narrow_y_din_y,
+ io_wide_xy_ena_x,
+ io_wide_xy_bank_x,
+ io_wide_xy_addr_x,
+ io_wide_x_din_x,
+ io_wide_y_din_x,
+
+ io_narrow_xy_ena_x,
+ io_narrow_xy_bank_x,
+ io_narrow_xy_addr_x,
+ io_narrow_x_din_x,
+ io_narrow_y_din_x,
+
+ io_wide_xy_ena_y,
+ io_wide_xy_bank_y,
+ io_wide_xy_addr_y,
+ io_wide_x_din_y,
+ io_wide_y_din_y,
+
+ io_narrow_xy_ena_y,
+ io_narrow_xy_bank_y,
+ io_narrow_xy_addr_y,
+ io_narrow_x_din_y,
+ io_narrow_y_din_y,
io_in_1_en,
io_in_1_addr,
- io_in_1_dout,
+ io_in_1_din,
io_in_2_en,
io_in_2_addr,
- io_in_2_dout,
+ io_in_2_din,
io_out_en,
io_out_we,
io_out_addr,
- io_out_din
+ io_out_dout,
+
+ wrk_narrow_x_din_x_trunc,
+ wrk_narrow_x_din_y_trunc
);
//
@@ -78,42 +81,45 @@ module modexpng_io_manager
input [ OP_ADDR_W -1:0] word_index_last;
- output ext_wide_xy_ena_x;
- output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
- output [ OP_ADDR_W -1:0] ext_wide_xy_addr_x;
- output [ WORD_EXT_W -1:0] ext_wide_x_din_x;
- output [ WORD_EXT_W -1:0] ext_wide_y_din_x;
-
- output ext_narrow_xy_ena_x;
- output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
- output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
- output [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
- output [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
-
- output ext_wide_xy_ena_y;
- output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
- output [ OP_ADDR_W -1:0] ext_wide_xy_addr_y;
- output [ WORD_EXT_W -1:0] ext_wide_x_din_y;
- output [ WORD_EXT_W -1:0] ext_wide_y_din_y;
-
- output ext_narrow_xy_ena_y;
- output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
- output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
- output [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
- output [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+ output io_wide_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] io_wide_xy_bank_x;
+ output [ OP_ADDR_W -1:0] io_wide_xy_addr_x;
+ output [ WORD_EXT_W -1:0] io_wide_x_din_x;
+ output [ WORD_EXT_W -1:0] io_wide_y_din_x;
+
+ output io_narrow_xy_ena_x;
+ output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x;
+ output [ OP_ADDR_W -1:0] io_narrow_xy_addr_x;
+ output [ WORD_EXT_W -1:0] io_narrow_x_din_x;
+ output [ WORD_EXT_W -1:0] io_narrow_y_din_x;
+
+ output io_wide_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] io_wide_xy_bank_y;
+ output [ OP_ADDR_W -1:0] io_wide_xy_addr_y;
+ output [ WORD_EXT_W -1:0] io_wide_x_din_y;
+ output [ WORD_EXT_W -1:0] io_wide_y_din_y;
+
+ output io_narrow_xy_ena_y;
+ output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y;
+ output [ OP_ADDR_W -1:0] io_narrow_xy_addr_y;
+ output [ WORD_EXT_W -1:0] io_narrow_x_din_y;
+ output [ WORD_EXT_W -1:0] io_narrow_y_din_y;
output io_in_1_en;
output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
- input [ WORD_W -1:0] io_in_1_dout;
+ input [ WORD_W -1:0] io_in_1_din;
output io_in_2_en;
output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
- input [ WORD_W -1:0] io_in_2_dout;
+ input [ WORD_W -1:0] io_in_2_din;
output io_out_en;
output io_out_we;
output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
- output [ WORD_W -1:0] io_out_din;
+ output [ WORD_W -1:0] io_out_dout;
+
+ output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc;
+ output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc;
//
@@ -126,6 +132,7 @@ module modexpng_io_manager
localparam [2:0] IO_FSM_STATE_EXTRA = 3'b100;
localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101;
localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110;
+ localparam [2:0] IO_FSM_STATE_STOP = 3'b111;
reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE;
reg [2:0] io_fsm_state_next;
@@ -143,10 +150,11 @@ module modexpng_io_manager
reg [ OP_ADDR_W -1:0] in_2_addr_op;
reg out_en = 1'b0;
- reg out_we;
reg [BANK_ADDR_W -1:0] out_addr_bank;
reg [ OP_ADDR_W -1:0] out_addr_op;
- reg [ WORD_W -1:0] out_din;
+ reg [ WORD_W -1:0] out_dout;
+
+ reg [ OP_ADDR_W -1:0] dummy_addr_op;
//
@@ -187,51 +195,54 @@ module modexpng_io_manager
assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
assign io_out_en = out_en;
- assign io_out_we = out_we;
+ assign io_out_we = io_out_en; // we can only write!
assign io_out_addr = {out_addr_bank, out_addr_op};
- assign io_out_din = out_din;
+ assign io_out_dout = out_dout;
//
// Mapping
//
- assign ext_wide_xy_ena_x = wide_xy_ena_x;
- assign ext_wide_xy_bank_x = wide_xy_bank_x;
- assign ext_wide_xy_addr_x = wide_xy_addr_x;
- assign ext_wide_x_din_x = wide_x_din_x;
- assign ext_wide_y_din_x = wide_y_din_x;
+ assign io_wide_xy_ena_x = wide_xy_ena_x;
+ assign io_wide_xy_bank_x = wide_xy_bank_x;
+ assign io_wide_xy_addr_x = wide_xy_addr_x;
+ assign io_wide_x_din_x = wide_x_din_x;
+ assign io_wide_y_din_x = wide_y_din_x;
- assign ext_narrow_xy_ena_x = narrow_xy_ena_x;
- assign ext_narrow_xy_bank_x = narrow_xy_bank_x;
- assign ext_narrow_xy_addr_x = narrow_xy_addr_x;
- assign ext_narrow_x_din_x = narrow_x_din_x;
- assign ext_narrow_y_din_x = narrow_y_din_x;
+ assign io_narrow_xy_ena_x = narrow_xy_ena_x;
+ assign io_narrow_xy_bank_x = narrow_xy_bank_x;
+ assign io_narrow_xy_addr_x = narrow_xy_addr_x;
+ assign io_narrow_x_din_x = narrow_x_din_x;
+ assign io_narrow_y_din_x = narrow_y_din_x;
- assign ext_wide_xy_ena_y = wide_xy_ena_y;
- assign ext_wide_xy_bank_y = wide_xy_bank_y;
- assign ext_wide_xy_addr_y = wide_xy_addr_y;
- assign ext_wide_x_din_y = wide_x_din_y;
- assign ext_wide_y_din_y = wide_y_din_y;
+ assign io_wide_xy_ena_y = wide_xy_ena_y;
+ assign io_wide_xy_bank_y = wide_xy_bank_y;
+ assign io_wide_xy_addr_y = wide_xy_addr_y;
+ assign io_wide_x_din_y = wide_x_din_y;
+ assign io_wide_y_din_y = wide_y_din_y;
- assign ext_narrow_xy_ena_y = narrow_xy_ena_y;
- assign ext_narrow_xy_bank_y = narrow_xy_bank_y;
- assign ext_narrow_xy_addr_y = narrow_xy_addr_y;
- assign ext_narrow_x_din_y = narrow_x_din_y;
- assign ext_narrow_y_din_y = narrow_y_din_y;
+ assign io_narrow_xy_ena_y = narrow_xy_ena_y;
+ assign io_narrow_xy_bank_y = narrow_xy_bank_y;
+ assign io_narrow_xy_addr_y = narrow_xy_addr_y;
+ assign io_narrow_x_din_y = narrow_x_din_y;
+ assign io_narrow_y_din_y = narrow_y_din_y;
//
// Delays
//
- reg [ OP_ADDR_W -1:0] in_1_addr_op_dly1;
- reg [ OP_ADDR_W -1:0] in_1_addr_op_dly2;
- reg [ OP_ADDR_W -1:0] in_2_addr_op_dly1;
- reg [ OP_ADDR_W -1:0] in_2_addr_op_dly2;
+ reg [OP_ADDR_W -1:0] in_1_addr_op_dly1;
+ reg [OP_ADDR_W -1:0] in_1_addr_op_dly2;
+ reg [OP_ADDR_W -1:0] in_2_addr_op_dly1;
+ reg [OP_ADDR_W -1:0] in_2_addr_op_dly2;
+ reg [OP_ADDR_W -1:0] dummy_addr_op_dly1;
+ reg [OP_ADDR_W -1:0] dummy_addr_op_dly2;
always @(posedge clk) begin
//
- {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op};
- {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op};
+ {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op};
+ {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op};
+ {dummy_addr_op_dly2, dummy_addr_op_dly1} <= {dummy_addr_op_dly1, dummy_addr_op};
//
end
@@ -241,10 +252,14 @@ module modexpng_io_manager
//
wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);
- wire opcode_is_wide = (opcode == UOP_OPCODE_INPUT_TO_WIDE );
- wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW);
+ wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;
+
+ wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE;
+ wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW;
- wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF);
+ wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) ||
+ (sel_in == BANK_IN_2_P_COEFF) ||
+ (sel_in == BANK_IN_2_Q_COEFF) ;
wire sel_crt_is_x = sel_crt == UOP_CRT_X;
wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
@@ -252,22 +267,18 @@ module modexpng_io_manager
wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
- wire in_1_addr_op_is_last;
- wire in_2_addr_op_is_last;
-
- wire in_1_addr_next_op_is_last;
- wire in_2_addr_next_op_is_last;
-
+ wire in_1_addr_op_next_is_last;
+ wire in_2_addr_op_next_is_last;
+ wire dummy_addr_op_next_is_last;
//
- // Enable Logic
+ // Source Enable Logic
//
always @(posedge clk)
//
if (rst) begin
in_1_en <= 1'b0;
in_2_en <= 1'b0;
- out_en <= 1'b0;
end else case (io_fsm_state_next)
//
IO_FSM_STATE_LATENCY_PRE1,
@@ -279,7 +290,7 @@ module modexpng_io_manager
//
IO_FSM_STATE_EXTRA: begin
in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
- in_2_en <= 1'b0;
+ in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra;
end
//
default: begin
@@ -290,7 +301,7 @@ module modexpng_io_manager
endcase
//
- // Enable Logic
+ // Destination Enable Logic
//
always @(posedge clk)
//
@@ -301,38 +312,52 @@ module modexpng_io_manager
narrow_xy_ena_x <= 1'b0;
narrow_xy_ena_y <= 1'b0;
//
+ out_en <= 1'b0;
+ //
end else case (io_fsm_state)
//
IO_FSM_STATE_BUSY,
IO_FSM_STATE_EXTRA,
IO_FSM_STATE_LATENCY_POST1: begin
- wide_xy_ena_x <= opcode_is_wide && sel_crt_is_x;
- wide_xy_ena_y <= opcode_is_wide && sel_crt_is_y;
- narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x;
- narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y;
+ //
+ wide_xy_ena_x <= opcode_is_input_wide && sel_crt_is_x;
+ wide_xy_ena_y <= opcode_is_input_wide && sel_crt_is_y;
+ narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x;
+ narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y;
+ //
+ out_en <= opcode_is_output;
+ //
end
//
IO_FSM_STATE_LATENCY_POST2: begin
+ //
wide_xy_ena_x <= 1'b0;
wide_xy_ena_y <= 1'b0;
- narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra;
- narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra;
+ narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra;
+ narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra;
+ //
+ out_en <= opcode_is_output;
+ //
end
//
default: begin
+ //
wide_xy_ena_x <= 1'b0;
wide_xy_ena_y <= 1'b0;
narrow_xy_ena_x <= 1'b0;
narrow_xy_ena_y <= 1'b0;
+ //
+ out_en <= 1'b0;
+ //
end
//
endcase
//
- // Data Logic
+ // Output Data Logic
//
- wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout};
+ wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_din : io_in_2_din};
always @(posedge clk) begin
//
@@ -345,20 +370,30 @@ module modexpng_io_manager
narrow_x_din_y <= WORD_EXT_DNC;
narrow_y_din_y <= WORD_EXT_DNC;
//
+ out_dout <= WORD_DNC;
+ //
case (io_fsm_state)
//
IO_FSM_STATE_BUSY,
IO_FSM_STATE_EXTRA,
IO_FSM_STATE_LATENCY_POST1: begin
- if (opcode_is_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}};
- if (opcode_is_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}};
- if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
- if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ //
+ if (opcode_is_input_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}}; // TODO: Make external ports smaller (WORD_W, not WORD_EXT_W)??
+ if (opcode_is_input_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}};
+ if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ //
+ if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc;
+ //
end
//
IO_FSM_STATE_LATENCY_POST2: begin
- if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
- if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ //
+ if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+ if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+ //
+ if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc;
+ //
end
//
endcase
@@ -367,7 +402,7 @@ module modexpng_io_manager
//
- // Address Logic
+ // Destination Address Logic
//
wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux =
sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2;
@@ -378,21 +413,24 @@ module modexpng_io_manager
{wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
{narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
{narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ {out_addr_bank, out_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
//
case (io_fsm_state)
//
IO_FSM_STATE_BUSY,
IO_FSM_STATE_EXTRA,
IO_FSM_STATE_LATENCY_POST1: begin
- if (opcode_is_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux};
- if (opcode_is_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux};
- if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
- if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_input_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_input_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+ if (opcode_is_output ) {out_addr_bank, out_addr_op} <= {sel_out, dummy_addr_op_dly2};
end
//
IO_FSM_STATE_LATENCY_POST2: begin
- if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
- if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+ if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
+ if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
+ if (opcode_is_output ) {out_addr_bank, out_addr_op } <= {sel_out, dummy_addr_op_dly2};
end
//
endcase
@@ -401,21 +439,19 @@ module modexpng_io_manager
//
- // Address Logic
+ // Source Address Logic
//
reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next;
reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
- reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next;
+ reg [ OP_ADDR_W -1:0] dummy_addr_next;
- wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0];
- wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0];
- wire [OP_ADDR_W -1:0] out_addr_next_op = out_addr_next [OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] in_1_addr_op_next = in_1_addr_next[OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] in_2_addr_op_next = in_2_addr_next[OP_ADDR_W -1:0];
+ wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;
- assign in_1_addr_op_is_last = in_1_addr_op == word_index_last;
- assign in_2_addr_op_is_last = in_2_addr_op == word_index_last;
-
- assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last;
- assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last;
+ assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last;
+ assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last;
+ assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last;
always @(posedge clk)
//
@@ -423,36 +459,42 @@ module modexpng_io_manager
//
IO_FSM_STATE_LATENCY_PRE1: begin
//
- {in_1_addr_bank, in_1_addr_op} <= {sel_in, OP_ADDR_ZERO};
- {in_2_addr_bank, in_2_addr_op} <= {sel_in, OP_ADDR_ZERO};
- {out_addr_bank, out_addr_op } <= {sel_out, OP_ADDR_ZERO};
+ {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
+ {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
+ { dummy_addr_op} <= { OP_ADDR_ZERO};
//
- in_1_addr_next <= {sel_in, OP_ADDR_ONE};
- in_2_addr_next <= {sel_in, OP_ADDR_ONE};
- out_addr_next <= {sel_out, OP_ADDR_ONE};
+ in_1_addr_next <= {sel_in, OP_ADDR_ONE};
+ in_2_addr_next <= {sel_in, OP_ADDR_ONE};
+ dummy_addr_next <= { OP_ADDR_ONE};
//
end
//
IO_FSM_STATE_LATENCY_PRE2,
IO_FSM_STATE_BUSY: begin
//
- {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
- {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
- {out_addr_bank, out_addr_op } <= out_addr_next;
+ {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
+ {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
+ { dummy_addr_op} <= dummy_addr_next;
//
- in_1_addr_next <= in_1_addr_next + 1'b1;
- in_2_addr_next <= in_2_addr_next + 1'b1;
- out_addr_next <= out_addr_next + 1'b1;
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ in_2_addr_next <= in_2_addr_next + 1'b1;
+ dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
IO_FSM_STATE_EXTRA:
//
- if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin
+ if (opcode_is_input && sel_in_needs_extra) begin
//
- {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+ if (sel_aux_is_1) begin
+ {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ end
//
- in_1_addr_next <= in_1_addr_next + 1'b1;
+ if (sel_aux_is_2) begin
+ {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
+ in_2_addr_next <= in_2_addr_next + 1'b1;
+ end
//
end
//
@@ -481,28 +523,33 @@ module modexpng_io_manager
if (io_fsm_state == IO_FSM_STATE_BUSY) begin
//
if (opcode_is_input) begin
- if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1;
- if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1;
+ if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1;
+ if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1;
+ end else if (opcode_is_output) begin
+ if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1;
end
-
+ //
end
//
end
-
+
//
// FSM Transition Logic
//
+ wire [2:0] io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_LATENCY_POST1;
+
always @* begin
//
case (io_fsm_state)
IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ;
IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ;
IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_BUSY ;
- IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_BUSY ;
+ IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy : IO_FSM_STATE_BUSY ;
IO_FSM_STATE_EXTRA: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ;
IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ;
- IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_IDLE ;
+ IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_STOP ;
+ IO_FSM_STATE_STOP: io_fsm_state_next = IO_FSM_STATE_IDLE ;
endcase
//
end
@@ -517,10 +564,10 @@ module modexpng_io_manager
always @(posedge clk)
//
- if (rst) rdy_reg <= 1'b1;
+ if (rst) rdy_reg <= 1'b1;
else case (io_fsm_state)
- IO_FSM_STATE_IDLE: rdy_reg <= ~ena;
- IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1;
+ IO_FSM_STATE_IDLE: rdy_reg <= ~ena;
+ IO_FSM_STATE_STOP: rdy_reg <= 1'b1;
endcase
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 1465c48..2e591e7 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -32,8 +32,19 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2;
*/
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+/* CRT tells from which of the dual MMM to read
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ * source and destination WIDE are don't care
+ */
-//localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd0;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd4;
+/* CRT is don't care
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ */
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8;
/* CRT is don't care
@@ -42,6 +53,14 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8;
* LADDER specifies Montgomery ladder mode
*/
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11;
+/* CRT is don't care
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ * source and destination WIDE are don't care
+ */
+
// CRT
localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0;
localparam [UOP_CRT_W -1:0] UOP_CRT_Y = 1'b1;
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index b9b41e8..14f1b47 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -17,16 +17,16 @@ module modexpng_mmm_dual
rd_wide_xy_bank_aux,
rd_wide_xy_addr,
rd_wide_xy_addr_aux,
- rd_wide_x_dout,
- rd_wide_y_dout,
- rd_wide_x_dout_aux,
- rd_wide_y_dout_aux,
+ rd_wide_x_din,
+ rd_wide_y_din,
+ rd_wide_x_din_aux,
+ rd_wide_y_din_aux,
rd_narrow_xy_ena,
rd_narrow_xy_bank,
rd_narrow_xy_addr,
- rd_narrow_x_dout,
- rd_narrow_y_dout,
+ rd_narrow_x_din,
+ rd_narrow_y_din,
rcmb_wide_xy_bank,
rcmb_wide_xy_addr,
@@ -82,16 +82,16 @@ module modexpng_mmm_dual
output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
output [ 8-1:0] rd_wide_xy_addr_aux;
- input [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
- input [18*NUM_MULTS/2-1:0] rd_wide_y_dout;
- input [ 18-1:0] rd_wide_x_dout_aux;
- input [ 18-1:0] rd_wide_y_dout_aux;
+ input [18*NUM_MULTS/2-1:0] rd_wide_x_din;
+ input [18*NUM_MULTS/2-1:0] rd_wide_y_din;
+ input [ 18-1:0] rd_wide_x_din_aux;
+ input [ 18-1:0] rd_wide_y_din_aux;
output rd_narrow_xy_ena;
output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
output [ 7:0] rd_narrow_xy_addr;
- input [18-1:0] rd_narrow_x_dout;
- input [18-1:0] rd_narrow_y_dout;
+ input [18-1:0] rd_narrow_x_din;
+ input [18-1:0] rd_narrow_y_din;
output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
output [ 7:0] rcmb_wide_xy_addr;
@@ -626,8 +626,8 @@ module modexpng_mmm_dual
//end
//endgenerate
- assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout};
- assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout};
+ assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din};
+ assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din};
//assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
//assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
@@ -730,25 +730,25 @@ module modexpng_mmm_dual
//
// On-the-fly Carry Recombination
//
- wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry};
- wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry};
- wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry;
+ wire [17:0] rd_narrow_x_din_carry = rd_narrow_x_din + {{16{1'b0}}, dsp_xy_b_carry};
+ wire [17:0] rd_narrow_y_din_carry = rd_narrow_y_din + {{16{1'b0}}, dsp_xy_b_carry};
+ wire [17:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry;
wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ?
- rd_narrow_xy_dout_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
+ rd_narrow_xy_din_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
always @(posedge clk)
//
if (narrow_xy_ena_dly2) begin // rewrite
//
if (!dsp_merge_xy_b) begin
- dsp_x_b <= rd_narrow_x_dout[15:0];
- dsp_y_b <= rd_narrow_y_dout[15:0];
+ dsp_x_b <= rd_narrow_x_din[15:0];
+ dsp_y_b <= rd_narrow_y_din[15:0];
dsp_xy_b_carry <= 2'b00;
end else begin
dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity;
dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity;
- dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16];
+ dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[17:16];
end
//
end else begin
@@ -845,6 +845,7 @@ module modexpng_mmm_dual
modexpng_recombinator_block recombinator_block
(
.clk (clk),
+ .rst (rst),
.ena (rcmb_ena),
.rdy (rcmb_rdy),
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index e610e47..6e6c3ca 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -31,9 +31,9 @@ localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS);
localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS);
-localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W;
+localparam CARRY_W = WORD_EXT_W - WORD_W;
-localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}};
+localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}};
localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0;
localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1;
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index a37333e..c100b8b 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -4,8 +4,8 @@ module modexpng_reductor
ena, rdy,
word_index_last,
sel_wide_out, sel_narrow_out,
- rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux,
- rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid,
+ rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_din_aux, rd_wide_y_din_aux,
+ rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din, rcmb_final_xy_valid,
rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_dout, rdct_wide_y_dout, rdct_wide_xy_valid,
rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
);
@@ -41,13 +41,13 @@ module modexpng_reductor
*/
input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
input [ 7:0] rd_wide_xy_addr_aux;
- input [ 17:0] rd_wide_x_dout_aux;
- input [ 17:0] rd_wide_y_dout_aux;
+ input [ 17:0] rd_wide_x_din_aux;
+ input [ 17:0] rd_wide_y_din_aux;
//
input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank;
input [ 7:0] rcmb_final_xy_addr;
- input [ 17:0] rcmb_final_x_dout;
- input [ 17:0] rcmb_final_y_dout;
+ input [ 17:0] rcmb_final_x_din;
+ input [ 17:0] rcmb_final_y_din;
input rcmb_final_xy_valid;
output [ 2:0] rdct_wide_xy_bank;
@@ -121,8 +121,8 @@ module modexpng_reductor
if (rcmb_final_xy_valid) begin
rcmb_xy_bank_dly1 <= rcmb_final_xy_bank;
rcmb_xy_addr_dly1 <= rcmb_final_xy_addr;
- rcmb_x_dout_dly1 <= rcmb_final_x_dout;
- rcmb_y_dout_dly1 <= rcmb_final_y_dout;
+ rcmb_x_dout_dly1 <= rcmb_final_x_din;
+ rcmb_y_dout_dly1 <= rcmb_final_y_din;
end
//
if (rcmb_xy_valid_dly1) begin
@@ -167,14 +167,14 @@ module modexpng_reductor
case (rcmb_xy_bank_dly3)
BANK_RCMB_ML: begin
- {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
- {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
+ {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
+ {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
end
BANK_RCMB_MH:
if (rcmb_xy_addr_dly3 == 8'd0) begin
- {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
- {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
+ {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
+ {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
end
endcase
@@ -273,8 +273,8 @@ module modexpng_reductor
//
//
//
- wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux;
- wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux;
+ wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux;
+ wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux;
wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry};
wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry};
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index f1d5ae2..19601ef 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -1,6 +1,6 @@
module modexpng_storage_block
(
- clk, clk_bus, rst,
+ clk, rst,
wr_wide_xy_ena,
wr_wide_xy_bank,
@@ -29,7 +29,19 @@ module modexpng_storage_block
rd_narrow_xy_bank,
rd_narrow_xy_addr,
rd_narrow_x_dout,
- rd_narrow_y_dout
+ rd_narrow_y_dout,
+
+ wrk_wide_xy_ena,
+ wrk_wide_xy_bank,
+ wrk_wide_xy_addr,
+ wrk_wide_x_dout,
+ wrk_wide_y_dout,
+
+ wrk_narrow_xy_ena,
+ wrk_narrow_xy_bank,
+ wrk_narrow_xy_addr,
+ wrk_narrow_x_dout,
+ wrk_narrow_y_dout
);
//
@@ -42,7 +54,6 @@ module modexpng_storage_block
// Ports
//
input clk;
- input clk_bus;
input rst;
input wr_wide_xy_ena;
@@ -74,6 +85,18 @@ module modexpng_storage_block
output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
+ input wrk_wide_xy_ena;
+ input [ BANK_ADDR_W -1:0] wrk_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] wrk_wide_xy_addr;
+ output [ WORD_EXT_W -1:0] wrk_wide_x_dout;
+ output [ WORD_EXT_W -1:0] wrk_wide_y_dout;
+
+ input wrk_narrow_xy_ena;
+ input [ BANK_ADDR_W -1:0] wrk_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] wrk_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] wrk_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] wrk_narrow_y_dout;
+
//
// Internal Registers
@@ -81,6 +104,8 @@ module modexpng_storage_block
reg rd_wide_xy_reg_ena = 1'b0;
reg rd_wide_xy_reg_ena_aux = 1'b0;
reg rd_narrow_xy_reg_ena = 1'b0;
+ reg wrk_wide_xy_reg_ena = 1'b0;
+ reg wrk_narrow_xy_reg_ena = 1'b0;
always @(posedge clk)
//
@@ -88,10 +113,14 @@ module modexpng_storage_block
rd_wide_xy_reg_ena <= 1'b0;
rd_wide_xy_reg_ena_aux <= 1'b0;
rd_narrow_xy_reg_ena <= 1'b0;
+ wrk_wide_xy_reg_ena <= 1'b0;
+ wrk_narrow_xy_reg_ena <= 1'b0;
end else begin
rd_wide_xy_reg_ena <= rd_wide_xy_ena;
rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
rd_narrow_xy_reg_ena <= rd_narrow_xy_ena;
+ wrk_wide_xy_reg_ena <= wrk_wide_xy_ena;
+ wrk_narrow_xy_reg_ena <= wrk_narrow_xy_ena;
end
//
@@ -102,22 +131,26 @@ module modexpng_storage_block
wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset;
wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset;
wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] wrk_wide_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] wrk_narrow_xy_offset;
assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
- assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr};
- assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr};
- assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr};
+ assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr };
+ assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr };
+ assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr };
+ assign wrk_wide_xy_offset = {wrk_wide_xy_bank, wrk_wide_xy_addr };
+ assign wrk_narrow_xy_offset = {wrk_narrow_xy_bank, wrk_narrow_xy_addr };
//
// "Wide" Storage
//
genvar z;
generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
- begin : gen_wide_bram
+ begin : gen_wide
//
assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
//
- modexpng_sdp_36k_x18_wrapper wide_bram_x
+ modexpng_sdp_36k_x18_wrapper wide_x
(
.clk (clk),
@@ -132,7 +165,7 @@ module modexpng_storage_block
.doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
);
//
- modexpng_sdp_36k_x18_wrapper wide_bram_y
+ modexpng_sdp_36k_x18_wrapper wide_y
(
.clk (clk),
@@ -151,9 +184,42 @@ module modexpng_storage_block
endgenerate
//
- // Auxilary Storage
+ // Worker "Wide" Storage
+ //
+ modexpng_sdp_36k_x18_wrapper wrk_wide_x
+ (
+ .clk (clk),
+
+ .ena (wr_wide_xy_ena),
+ .wea (wr_wide_xy_ena),
+ .addra (wr_wide_xy_offset),
+ .dina (wr_wide_x_din),
+
+ .enb (wrk_wide_xy_ena),
+ .regceb (wrk_wide_xy_reg_ena),
+ .addrb (wrk_wide_xy_offset),
+ .doutb (wrk_wide_x_dout)
+ );
//
- modexpng_sdp_36k_x18_wrapper wide_bram_x_aux
+ modexpng_sdp_36k_x18_wrapper wrk_wide_y
+ (
+ .clk (clk),
+
+ .ena (wr_wide_xy_ena),
+ .wea (wr_wide_xy_ena),
+ .addra (wr_wide_xy_offset),
+ .dina (wr_wide_y_din),
+
+ .enb (wrk_wide_xy_ena),
+ .regceb (wrk_wide_xy_reg_ena),
+ .addrb (wrk_wide_xy_offset),
+ .doutb (wrk_wide_y_dout)
+ );
+
+ //
+ // Auxilary "Wide" Storage
+ //
+ modexpng_sdp_36k_x18_wrapper wide_x_aux
(
.clk (clk),
@@ -168,7 +234,7 @@ module modexpng_storage_block
.doutb (rd_wide_x_dout_aux)
);
//
- modexpng_sdp_36k_x18_wrapper wide_bram_y_aux
+ modexpng_sdp_36k_x18_wrapper wide_y_aux
(
.clk (clk),
@@ -186,7 +252,7 @@ module modexpng_storage_block
//
// "Narrow" Storage
//
- modexpng_sdp_36k_x18_wrapper narrow_bram_x
+ modexpng_sdp_36k_x18_wrapper narrow_x
(
.clk (clk),
@@ -201,7 +267,7 @@ module modexpng_storage_block
.doutb (rd_narrow_x_dout)
);
- modexpng_sdp_36k_x18_wrapper narrow_bram_y
+ modexpng_sdp_36k_x18_wrapper narrow_y
(
.clk (clk),
@@ -215,7 +281,39 @@ module modexpng_storage_block
.addrb (rd_narrow_xy_offset),
.doutb (rd_narrow_y_dout)
);
+
+ //
+ // Worker "Narrow" Storage
+ //
+ modexpng_sdp_36k_x18_wrapper wrk_narrow_x
+ (
+ .clk (clk),
+
+ .ena (wr_narrow_xy_ena),
+ .wea (wr_narrow_xy_ena),
+ .addra (wr_narrow_xy_offset),
+ .dina (wr_narrow_x_din),
+
+ .enb (wrk_narrow_xy_ena),
+ .regceb (wrk_narrow_xy_reg_ena),
+ .addrb (wrk_narrow_xy_offset),
+ .doutb (wrk_narrow_x_dout)
+ );
+
+ modexpng_sdp_36k_x18_wrapper wrk_narrow_y
+ (
+ .clk (clk),
+ .ena (wr_narrow_xy_ena),
+ .wea (wr_narrow_xy_ena),
+ .addra (wr_narrow_xy_offset),
+ .dina (wr_narrow_y_din),
+
+ .enb (wrk_narrow_xy_ena),
+ .regceb (wrk_narrow_xy_reg_ena),
+ .addrb (wrk_narrow_xy_offset),
+ .doutb (wrk_narrow_y_dout)
+ );
endmodule
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
index 6b34bed..c39e07a 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/modexpng_storage_manager.v
@@ -2,53 +2,20 @@ module modexpng_storage_manager
(
clk, rst,
- wr_wide_xy_ena,
- wr_wide_xy_bank,
- wr_wide_xy_addr,
- wr_wide_x_din,
- wr_wide_y_din,
+ wr_wide_xy_ena, wr_wide_xy_bank, wr_wide_xy_addr, wr_wide_x_dout, wr_wide_y_dout,
+ wr_narrow_xy_ena, wr_narrow_xy_bank, wr_narrow_xy_addr, wr_narrow_x_dout, wr_narrow_y_dout,
- wr_narrow_xy_ena,
- wr_narrow_xy_bank,
- wr_narrow_xy_addr,
- wr_narrow_x_din,
- wr_narrow_y_din,
-
- ext_wide_xy_ena,
- ext_wide_xy_bank,
- ext_wide_xy_addr,
- ext_wide_x_din,
- ext_wide_y_din,
-
- ext_narrow_xy_ena,
- ext_narrow_xy_bank,
- ext_narrow_xy_addr,
- ext_narrow_x_din,
- ext_narrow_y_din,
-
- rcmb_wide_xy_ena,
- rcmb_wide_xy_bank,
- rcmb_wide_xy_addr,
- rcmb_wide_x_din,
- rcmb_wide_y_din,
-
- rcmb_narrow_xy_ena,
- rcmb_narrow_xy_bank,
- rcmb_narrow_xy_addr,
- rcmb_narrow_x_din,
- rcmb_narrow_y_din,
-
- rdct_wide_xy_bank,
- rdct_wide_xy_addr,
- rdct_wide_x_din,
- rdct_wide_y_din,
- rdct_wide_xy_valid,
-
- rdct_narrow_xy_bank,
- rdct_narrow_xy_addr,
- rdct_narrow_x_din,
- rdct_narrow_y_din,
- rdct_narrow_xy_valid
+ io_narrow_xy_ena, io_narrow_xy_bank, io_narrow_xy_addr, io_narrow_x_din, io_narrow_y_din,
+ io_wide_xy_ena, io_wide_xy_bank, io_wide_xy_addr, io_wide_x_din, io_wide_y_din,
+
+ rcmb_wide_xy_ena, rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din,
+ rcmb_narrow_xy_ena, rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din,
+
+ rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_din, rdct_wide_y_din, rdct_wide_xy_valid,
+ rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_din, rdct_narrow_y_din, rdct_narrow_xy_valid,
+
+ wrk_wide_xy_ena, wrk_wide_xy_bank, wrk_wide_xy_addr, wrk_wide_x_din, wrk_wide_y_din,
+ wrk_narrow_xy_ena, wrk_narrow_xy_bank, wrk_narrow_xy_addr, wrk_narrow_x_din, wrk_narrow_y_din
);
@@ -67,51 +34,67 @@ module modexpng_storage_manager
output wr_wide_xy_ena;
output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
output [ OP_ADDR_W -1:0] wr_wide_xy_addr;
- output [ WORD_EXT_W -1:0] wr_wide_x_din;
- output [ WORD_EXT_W -1:0] wr_wide_y_din;
+ output [ WORD_EXT_W -1:0] wr_wide_x_dout;
+ output [ WORD_EXT_W -1:0] wr_wide_y_dout;
output wr_narrow_xy_ena;
output [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
output [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] wr_narrow_x_din;
- output [ WORD_EXT_W -1:0] wr_narrow_y_din;
+ output [ WORD_EXT_W -1:0] wr_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] wr_narrow_y_dout;
- input ext_wide_xy_ena;
- input [BANK_ADDR_W -1:0] ext_wide_xy_bank;
- input [ OP_ADDR_W -1:0] ext_wide_xy_addr;
- input [ WORD_EXT_W -1:0] ext_wide_x_din;
- input [ WORD_EXT_W -1:0] ext_wide_y_din;
-
- input ext_narrow_xy_ena;
- input [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] ext_narrow_xy_addr;
- input [ WORD_EXT_W -1:0] ext_narrow_x_din;
- input [ WORD_EXT_W -1:0] ext_narrow_y_din;
+ input io_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] io_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] io_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] io_wide_x_din;
+ input [ WORD_EXT_W -1:0] io_wide_y_din;
+
+ input io_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] io_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] io_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] io_narrow_x_din;
+ input [ WORD_EXT_W -1:0] io_narrow_y_din;
input rcmb_wide_xy_ena;
input [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
- input [ 7:0] rcmb_wide_xy_addr;
- input [17:0] rcmb_wide_x_din;
- input [17:0] rcmb_wide_y_din;
+ input [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] rcmb_wide_x_din;
+ input [ WORD_EXT_W -1:0] rcmb_wide_y_din;
input rcmb_narrow_xy_ena;
input [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
- input [ 7:0] rcmb_narrow_xy_addr;
- input [17:0] rcmb_narrow_x_din;
- input [17:0] rcmb_narrow_y_din;
-
- input [ 2:0] rdct_wide_xy_bank;
- input [ 7:0] rdct_wide_xy_addr;
- input [ 17:0] rdct_wide_x_din;
- input [ 17:0] rdct_wide_y_din;
- input rdct_wide_xy_valid;
-
- input [ 2:0] rdct_narrow_xy_bank;
- input [ 7:0] rdct_narrow_xy_addr;
- input [ 17:0] rdct_narrow_x_din;
- input [ 17:0] rdct_narrow_y_din;
- input rdct_narrow_xy_valid;
+ input [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] rcmb_narrow_x_din;
+ input [ WORD_EXT_W -1:0] rcmb_narrow_y_din;
+
+ input [BANK_ADDR_W -1:0] rdct_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] rdct_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] rdct_wide_x_din;
+ input [ WORD_EXT_W -1:0] rdct_wide_y_din;
+ input rdct_wide_xy_valid;
+
+ input [BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] rdct_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] rdct_narrow_x_din;
+ input [ WORD_EXT_W -1:0] rdct_narrow_y_din;
+ input rdct_narrow_xy_valid;
+ input wrk_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] wrk_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] wrk_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] wrk_wide_x_din;
+ input [ WORD_EXT_W -1:0] wrk_wide_y_din;
+
+ input wrk_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] wrk_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] wrk_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] wrk_narrow_x_din;
+ input [ WORD_EXT_W -1:0] wrk_narrow_y_din;
+
+
+ //
+ // Output Registers
+ //
reg wr_wide_xy_ena_reg = 1'b0;
reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
@@ -186,40 +169,54 @@ module modexpng_storage_manager
end
endtask
+
+ //
+ // Write Arbiter
+ //
always @(posedge clk)
//
if (rst) disable_wide;
else begin
//
- if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din);
+ if (io_wide_xy_ena) enable_wide(io_wide_xy_bank, io_wide_xy_addr, io_wide_x_din, io_wide_y_din);
else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
else if (rdct_wide_xy_valid) enable_wide(rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_din, rdct_wide_y_din);
+ else if (wrk_wide_xy_ena) enable_wide(wrk_wide_xy_bank, wrk_wide_xy_addr, wrk_wide_x_din, wrk_wide_y_din);
else disable_wide;
//
end
+
+ //
+ // Read Arbiter
+ //
always @(posedge clk)
//
if (rst) disable_narrow;
else begin
//
- if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din);
+ if (io_narrow_xy_ena) enable_narrow(io_narrow_xy_bank, io_narrow_xy_addr, io_narrow_x_din, io_narrow_y_din);
else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
else if (rdct_narrow_xy_valid) enable_narrow(rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_din, rdct_narrow_y_din);
+ else if (wrk_narrow_xy_ena) enable_narrow(wrk_narrow_xy_bank, wrk_narrow_xy_addr, wrk_narrow_x_din, wrk_narrow_y_din);
else disable_narrow;
//
end
+
+ //
+ // Port Mapping
+ //
assign wr_wide_xy_ena = wr_wide_xy_ena_reg;
assign wr_wide_xy_bank = wr_wide_xy_bank_reg;
assign wr_wide_xy_addr = wr_wide_xy_addr_reg;
- assign wr_wide_x_din = wr_wide_x_din_reg;
- assign wr_wide_y_din = wr_wide_y_din_reg;
+ assign wr_wide_x_dout = wr_wide_x_din_reg;
+ assign wr_wide_y_dout = wr_wide_y_din_reg;
assign wr_narrow_xy_ena = wr_narrow_xy_ena_reg;
assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg;
assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg;
- assign wr_narrow_x_din = wr_narrow_x_din_reg;
- assign wr_narrow_y_din = wr_narrow_y_din_reg;
+ assign wr_narrow_x_dout = wr_narrow_x_din_reg;
+ assign wr_narrow_y_dout = wr_narrow_y_din_reg;
endmodule
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 73b3142..016b1b0 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -15,25 +15,48 @@ module modexpng_uop_rom
always @(posedge clk)
//
case (addr)
- 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC };
- 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC };
- 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC };
- 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC };
- 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC };
- 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC };
-
- 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF};
- 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF};
- 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A };
- 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A };
- 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E };
- 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E };
-
- 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B };
- 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C };
- 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D };
-
- default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL};
+ 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; //
+ 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
+ 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ //
+ 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ //
+ 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
+ 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
+ //
+ 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
+ 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
+ //
+ 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
+ //
+ 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
+ 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
+ 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
+ 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
+ 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
+ //
+ 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; //
+ //
+ default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
endcase
endmodule