From e340b1489b08905e3d8acd17686e178028de7922 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 3 Oct 2019 16:47:39 +0300 Subject: Added more micro-operations, also added "general worker" module. The worker is basically a block memory data mover, but it can also do some supporting operations required for the Garner's formula part of the exponentiation. --- rtl/modexpng_core_top.v | 895 +++++++++++++++++++++++++---------------- rtl/modexpng_dsp_array_block.v | 8 +- rtl/modexpng_general_worker.v | 679 +++++++++++++++++++++++++++++++ rtl/modexpng_io_block.v | 10 +- rtl/modexpng_io_manager.v | 347 +++++++++------- rtl/modexpng_microcode.vh | 21 +- rtl/modexpng_mmm_dual.v | 43 +- rtl/modexpng_parameters.vh | 4 +- rtl/modexpng_reductor.v | 28 +- rtl/modexpng_storage_block.v | 126 +++++- rtl/modexpng_storage_manager.v | 167 ++++---- rtl/modexpng_uop_rom.v | 61 ++- 12 files changed, 1734 insertions(+), 655 deletions(-) create mode 100644 rtl/modexpng_general_worker.v (limited to 'rtl') diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v index e117e5d..6b194dc 100644 --- a/rtl/modexpng_core_top.v +++ b/rtl/modexpng_core_top.v @@ -71,11 +71,13 @@ module modexpng_core_top wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ]; wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ]; - wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP; - wire uop_opcode_is_io = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || - (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) || - (uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW) ; - wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY; + wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; + wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || + (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; + wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; + wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ; + wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || + (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ; wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT; wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1; @@ -101,87 +103,135 @@ module modexpng_core_top // // Storage Interfaces (X, Y) // - wire wr_wide_xy_ena_x; - wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_x; - wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; - wire [ WORD_EXT_W -1:0] wr_wide_x_din_x; - wire [ WORD_EXT_W -1:0] wr_wide_y_din_x; - - wire wr_narrow_xy_ena_x; - wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; - wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; - wire [ WORD_EXT_W -1:0] wr_narrow_x_din_x; - wire [ WORD_EXT_W -1:0] wr_narrow_y_din_x; - - wire rd_wide_xy_ena_x; - wire rd_wide_xy_ena_aux_x; - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x; - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; - wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x; - wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x; - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout_x; - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout_x; - wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux_x; - wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux_x; - - wire rd_narrow_xy_ena_x; - wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; - wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; - wire [ WORD_EXT_W -1:0] rd_narrow_x_dout_x; - wire [ WORD_EXT_W -1:0] rd_narrow_y_dout_x; - - wire ext_wide_xy_ena_x; - wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_x; - wire [ OP_ADDR_W -1:0] ext_wide_xy_addr_x; - wire [ WORD_EXT_W -1:0] ext_wide_x_din_x; - wire [ WORD_EXT_W -1:0] ext_wide_y_din_x; - - wire ext_narrow_xy_ena_x; - wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_x; - wire [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x; - wire [ WORD_EXT_W -1:0] ext_narrow_x_din_x; - wire [ WORD_EXT_W -1:0] ext_narrow_y_din_x; - - wire wr_wide_xy_ena_y; - wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_y; - wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; - wire [ WORD_EXT_W -1:0] wr_wide_x_din_y; - wire [ WORD_EXT_W -1:0] wr_wide_y_din_y; - - wire wr_narrow_xy_ena_y; - wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; - wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; - wire [ WORD_EXT_W -1:0] wr_narrow_x_din_y; - wire [ WORD_EXT_W -1:0] wr_narrow_y_din_y; - - wire rd_wide_xy_ena_y; - wire rd_wide_xy_ena_aux_y; - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y; - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; - wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y; - wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y; - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout_y; - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout_y; - wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux_y; - wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux_y; - - wire rd_narrow_xy_ena_y; - wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; - wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; - wire [ WORD_EXT_W -1:0] rd_narrow_x_dout_y; - wire [ WORD_EXT_W -1:0] rd_narrow_y_dout_y; - - wire ext_wide_xy_ena_y; - wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_y; - wire [ OP_ADDR_W -1:0] ext_wide_xy_addr_y; - wire [ WORD_EXT_W -1:0] ext_wide_x_din_y; - wire [ WORD_EXT_W -1:0] ext_wide_y_din_y; - - wire ext_narrow_xy_ena_y; - wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_y; - wire [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y; - wire [ WORD_EXT_W -1:0] ext_narrow_x_din_y; - wire [ WORD_EXT_W -1:0] ext_narrow_y_din_y; + wire wr_wide_xy_ena_x; // \ \ + wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_x; // | WIDE | WR + wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] wr_wide_x_data_x; // | | + wire [ WORD_EXT_W -1:0] wr_wide_y_data_x; // / | + // | + wire wr_narrow_xy_ena_x; // \ | + wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; // | NARROW | + wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] wr_narrow_x_data_x; // | | + wire [ WORD_EXT_W -1:0] wr_narrow_y_data_x; // / / + // + wire rd_wide_xy_ena_x; // \ \ + wire rd_wide_xy_ena_aux_x; // | WIDE | RD + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x; // | | + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; // | | + wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x; // | | + wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x; // | | + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_x; // | | + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_x; // | | + wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_x; // | | + wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_x; // / | + // | + wire rd_narrow_xy_ena_x; // \ | + wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; // | NARROW | + wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] rd_narrow_x_data_x; // | | + wire [ WORD_EXT_W -1:0] rd_narrow_y_data_x; // / / + // + wire wrk_rd_wide_xy_ena_x; // \ \ + wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; // | WIDE | WRK + wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_x; // | | + wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_x; // / | + // | + wire wrk_rd_narrow_xy_ena_x; // \ | + wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; // | NARROW | + wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_x; // | | + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_x; // / / + + wire wrk_wr_wide_xy_ena_x; // \ \ + wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; // | WIDE | WRK + wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_x; // | | + wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_x; // / | + // | + wire wrk_wr_narrow_xy_ena_x; // \ | + wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; // | NARROW | + wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_x; // | | + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_x; // / / + // + wire io_wide_xy_ena_x; // \ \ + wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; // | WIDE | IO + wire [ OP_ADDR_W -1:0] io_wide_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] io_wide_x_data_x; // | | + wire [ WORD_EXT_W -1:0] io_wide_y_data_x; // / | + // | + wire io_narrow_xy_ena_x; // \ | + wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; // | NARROW | + wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; // | | + wire [ WORD_EXT_W -1:0] io_narrow_x_data_x; // | | + wire [ WORD_EXT_W -1:0] io_narrow_y_data_x; // / / + // + wire wr_wide_xy_ena_y; // \ + wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wr_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] wr_wide_y_data_y; // + // + wire wr_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wr_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] wr_narrow_y_data_y; // + // + wire rd_wide_xy_ena_y; // + wire rd_wide_xy_ena_aux_y; // + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y; // + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; // + wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y; // + wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y; // + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_y; // + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_y; // + wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_y; // + wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_y; // + // + wire rd_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] rd_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] rd_narrow_y_data_y; // + // + wire wrk_rd_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_y; // + // + wire wrk_rd_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_y; // + + wire wrk_wr_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_y; // + // + wire wrk_wr_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_y; // + // + wire io_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] io_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] io_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] io_wide_y_data_y; // + // + wire io_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] io_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] io_narrow_y_data_y; // // @@ -189,38 +239,38 @@ module modexpng_core_top // wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_x; wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_x; - wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_x; - wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_x; + wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_x; + wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_x; wire rcmb_wide_xy_valid_x; wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_x; wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_x; - wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_x; - wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_x; + wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_x; + wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_x; wire rcmb_narrow_xy_valid_x; wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_x; wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_x; - wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_x; - wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_x; + wire [ WORD_EXT_W -1:0] rcmb_final_x_data_x; + wire [ WORD_EXT_W -1:0] rcmb_final_y_data_x; wire rcmb_final_xy_valid_x; wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_y; wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_y; - wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_y; - wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_y; + wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_y; + wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_y; wire rcmb_wide_xy_valid_y; wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_y; wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_y; - wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_y; - wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_y; + wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_y; + wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_y; wire rcmb_narrow_xy_valid_y; wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_y; wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_y; - wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_y; - wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_y; + wire [ WORD_EXT_W -1:0] rcmb_final_x_data_y; + wire [ WORD_EXT_W -1:0] rcmb_final_y_data_y; wire rcmb_final_xy_valid_y; @@ -229,26 +279,26 @@ module modexpng_core_top // wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_x; wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_x; - wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_x; - wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_x; + wire [ WORD_EXT_W -1:0] rdct_wide_x_data_x; + wire [ WORD_EXT_W -1:0] rdct_wide_y_data_x; wire rdct_wide_xy_valid_x; wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_x; wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_x; - wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_x; - wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_x; + wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_x; + wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_x; wire rdct_narrow_xy_valid_x; wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_y; wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_y; - wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_y; - wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_y; + wire [ WORD_EXT_W -1:0] rdct_wide_x_data_y; + wire [ WORD_EXT_W -1:0] rdct_wide_y_data_y; wire rdct_wide_xy_valid_y; wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_y; wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_y; - wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_y; - wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_y; + wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_y; + wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_y; wire rdct_narrow_xy_valid_y; @@ -263,14 +313,14 @@ module modexpng_core_top .wr_wide_xy_ena (wr_wide_xy_ena_x), .wr_wide_xy_bank (wr_wide_xy_bank_x), .wr_wide_xy_addr (wr_wide_xy_addr_x), - .wr_wide_x_din (wr_wide_x_din_x), - .wr_wide_y_din (wr_wide_y_din_x), + .wr_wide_x_din (wr_wide_x_data_x), + .wr_wide_y_din (wr_wide_y_data_x), .wr_narrow_xy_ena (wr_narrow_xy_ena_x), .wr_narrow_xy_bank (wr_narrow_xy_bank_x), .wr_narrow_xy_addr (wr_narrow_xy_addr_x), - .wr_narrow_x_din (wr_narrow_x_din_x), - .wr_narrow_y_din (wr_narrow_y_din_x), + .wr_narrow_x_din (wr_narrow_x_data_x), + .wr_narrow_y_din (wr_narrow_y_data_x), .rd_wide_xy_ena (rd_wide_xy_ena_x), .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_x), @@ -278,16 +328,28 @@ module modexpng_core_top .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), .rd_wide_xy_addr (rd_wide_xy_addr_x), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), - .rd_wide_x_dout (rd_wide_x_dout_x), - .rd_wide_y_dout (rd_wide_y_dout_x), - .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x), - .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x), + .rd_wide_x_dout (rd_wide_x_data_x), + .rd_wide_y_dout (rd_wide_y_data_x), + .rd_wide_x_dout_aux (rd_wide_x_data_aux_x), + .rd_wide_y_dout_aux (rd_wide_y_data_aux_x), .rd_narrow_xy_ena (rd_narrow_xy_ena_x), .rd_narrow_xy_bank (rd_narrow_xy_bank_x), .rd_narrow_xy_addr (rd_narrow_xy_addr_x), - .rd_narrow_x_dout (rd_narrow_x_dout_x), - .rd_narrow_y_dout (rd_narrow_y_dout_x) + .rd_narrow_x_dout (rd_narrow_x_data_x), + .rd_narrow_y_dout (rd_narrow_y_data_x), + + .wrk_wide_xy_ena (wrk_rd_wide_xy_ena_x), + .wrk_wide_xy_bank (wrk_rd_wide_xy_bank_x), + .wrk_wide_xy_addr (wrk_rd_wide_xy_addr_x), + .wrk_wide_x_dout (wrk_rd_wide_x_data_x), + .wrk_wide_y_dout (wrk_rd_wide_y_data_x), + + .wrk_narrow_xy_ena (wrk_rd_narrow_xy_ena_x), + .wrk_narrow_xy_bank (wrk_rd_narrow_xy_bank_x), + .wrk_narrow_xy_addr (wrk_rd_narrow_xy_addr_x), + .wrk_narrow_x_dout (wrk_rd_narrow_x_data_x), + .wrk_narrow_y_dout (wrk_rd_narrow_y_data_x) ); modexpng_storage_block storage_block_y @@ -298,14 +360,14 @@ module modexpng_core_top .wr_wide_xy_ena (wr_wide_xy_ena_y), .wr_wide_xy_bank (wr_wide_xy_bank_y), .wr_wide_xy_addr (wr_wide_xy_addr_y), - .wr_wide_x_din (wr_wide_x_din_y), - .wr_wide_y_din (wr_wide_y_din_y), + .wr_wide_x_din (wr_wide_x_data_y), + .wr_wide_y_din (wr_wide_y_data_y), .wr_narrow_xy_ena (wr_narrow_xy_ena_y), .wr_narrow_xy_bank (wr_narrow_xy_bank_y), .wr_narrow_xy_addr (wr_narrow_xy_addr_y), - .wr_narrow_x_din (wr_narrow_x_din_y), - .wr_narrow_y_din (wr_narrow_y_din_y), + .wr_narrow_x_din (wr_narrow_x_data_y), + .wr_narrow_y_din (wr_narrow_y_data_y), .rd_wide_xy_ena (rd_wide_xy_ena_y), .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_y), @@ -313,16 +375,29 @@ module modexpng_core_top .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), .rd_wide_xy_addr (rd_wide_xy_addr_y), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), - .rd_wide_x_dout (rd_wide_x_dout_y), - .rd_wide_y_dout (rd_wide_y_dout_y), - .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y), - .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y), + .rd_wide_x_dout (rd_wide_x_data_y), + .rd_wide_y_dout (rd_wide_y_data_y), + .rd_wide_x_dout_aux (rd_wide_x_data_aux_y), + .rd_wide_y_dout_aux (rd_wide_y_data_aux_y), .rd_narrow_xy_ena (rd_narrow_xy_ena_y), .rd_narrow_xy_bank (rd_narrow_xy_bank_y), .rd_narrow_xy_addr (rd_narrow_xy_addr_y), - .rd_narrow_x_dout (rd_narrow_x_dout_y), - .rd_narrow_y_dout (rd_narrow_y_dout_y) + .rd_narrow_x_dout (rd_narrow_x_data_y), + .rd_narrow_y_dout (rd_narrow_y_data_y), + + .wrk_wide_xy_ena (wrk_rd_wide_xy_ena_y), + .wrk_wide_xy_bank (wrk_rd_wide_xy_bank_y), + .wrk_wide_xy_addr (wrk_rd_wide_xy_addr_y), + .wrk_wide_x_dout (wrk_rd_wide_x_data_y), + .wrk_wide_y_dout (wrk_rd_wide_y_data_y), + + .wrk_narrow_xy_ena (wrk_rd_narrow_xy_ena_y), + .wrk_narrow_xy_bank (wrk_rd_narrow_xy_bank_y), + .wrk_narrow_xy_addr (wrk_rd_narrow_xy_addr_y), + .wrk_narrow_x_dout (wrk_rd_narrow_x_data_y), + .wrk_narrow_y_dout (wrk_rd_narrow_y_data_y) + ); @@ -337,50 +412,62 @@ module modexpng_core_top .wr_wide_xy_ena (wr_wide_xy_ena_x), .wr_wide_xy_bank (wr_wide_xy_bank_x), .wr_wide_xy_addr (wr_wide_xy_addr_x), - .wr_wide_x_din (wr_wide_x_din_x), - .wr_wide_y_din (wr_wide_y_din_x), + .wr_wide_x_dout (wr_wide_x_data_x), + .wr_wide_y_dout (wr_wide_y_data_x), .wr_narrow_xy_ena (wr_narrow_xy_ena_x), .wr_narrow_xy_bank (wr_narrow_xy_bank_x), .wr_narrow_xy_addr (wr_narrow_xy_addr_x), - .wr_narrow_x_din (wr_narrow_x_din_x), - .wr_narrow_y_din (wr_narrow_y_din_x), + .wr_narrow_x_dout (wr_narrow_x_data_x), + .wr_narrow_y_dout (wr_narrow_y_data_x), - .ext_wide_xy_ena (ext_wide_xy_ena_x), - .ext_wide_xy_bank (ext_wide_xy_bank_x), - .ext_wide_xy_addr (ext_wide_xy_addr_x), - .ext_wide_x_din (ext_wide_x_din_x), - .ext_wide_y_din (ext_wide_y_din_x), + .io_wide_xy_ena (io_wide_xy_ena_x), + .io_wide_xy_bank (io_wide_xy_bank_x), + .io_wide_xy_addr (io_wide_xy_addr_x), + .io_wide_x_din (io_wide_x_data_x), + .io_wide_y_din (io_wide_y_data_x), - .ext_narrow_xy_ena (ext_narrow_xy_ena_x), - .ext_narrow_xy_bank (ext_narrow_xy_bank_x), - .ext_narrow_xy_addr (ext_narrow_xy_addr_x), - .ext_narrow_x_din (ext_narrow_x_din_x), - .ext_narrow_y_din (ext_narrow_y_din_x), + .io_narrow_xy_ena (io_narrow_xy_ena_x), + .io_narrow_xy_bank (io_narrow_xy_bank_x), + .io_narrow_xy_addr (io_narrow_xy_addr_x), + .io_narrow_x_din (io_narrow_x_data_x), + .io_narrow_y_din (io_narrow_y_data_x), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_x), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_x), - .rcmb_wide_x_din (rcmb_wide_x_dout_x), - .rcmb_wide_y_din (rcmb_wide_y_dout_x), + .rcmb_wide_x_din (rcmb_wide_x_data_x), + .rcmb_wide_y_din (rcmb_wide_y_data_x), .rcmb_wide_xy_ena (rcmb_wide_xy_valid_x), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x), - .rcmb_narrow_x_din (rcmb_narrow_x_dout_x), - .rcmb_narrow_y_din (rcmb_narrow_y_dout_x), + .rcmb_narrow_x_din (rcmb_narrow_x_data_x), + .rcmb_narrow_y_din (rcmb_narrow_y_data_x), .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_x), .rdct_wide_xy_bank (rdct_wide_xy_bank_x), .rdct_wide_xy_addr (rdct_wide_xy_addr_x), - .rdct_wide_x_din (rdct_wide_x_dout_x), // TODO: maybe just rename to {x|y}_x, since that's an - .rdct_wide_y_din (rdct_wide_y_dout_x), // internal signal?? + .rdct_wide_x_din (rdct_wide_x_data_x), + .rdct_wide_y_din (rdct_wide_y_data_x), .rdct_wide_xy_valid (rdct_wide_xy_valid_x), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_x), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_x), - .rdct_narrow_x_din (rdct_narrow_x_dout_x), - .rdct_narrow_y_din (rdct_narrow_y_dout_x), - .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x) + .rdct_narrow_x_din (rdct_narrow_x_data_x), + .rdct_narrow_y_din (rdct_narrow_y_data_x), + .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x), + + .wrk_wide_xy_ena (wrk_wr_wide_xy_ena_x), + .wrk_wide_xy_bank (wrk_wr_wide_xy_bank_x), + .wrk_wide_xy_addr (wrk_wr_wide_xy_addr_x), + .wrk_wide_x_din (wrk_wr_wide_x_data_x), + .wrk_wide_y_din (wrk_wr_wide_y_data_x), + + .wrk_narrow_xy_ena (wrk_wr_narrow_xy_ena_x), + .wrk_narrow_xy_bank (wrk_wr_narrow_xy_bank_x), + .wrk_narrow_xy_addr (wrk_wr_narrow_xy_addr_x), + .wrk_narrow_x_din (wrk_wr_narrow_x_data_x), + .wrk_narrow_y_din (wrk_wr_narrow_y_data_x) ); modexpng_storage_manager storage_manager_y @@ -391,51 +478,62 @@ module modexpng_core_top .wr_wide_xy_ena (wr_wide_xy_ena_y), .wr_wide_xy_bank (wr_wide_xy_bank_y), .wr_wide_xy_addr (wr_wide_xy_addr_y), - .wr_wide_x_din (wr_wide_x_din_y), - .wr_wide_y_din (wr_wide_y_din_y), + .wr_wide_x_dout (wr_wide_x_data_y), + .wr_wide_y_dout (wr_wide_y_data_y), .wr_narrow_xy_ena (wr_narrow_xy_ena_y), .wr_narrow_xy_bank (wr_narrow_xy_bank_y), .wr_narrow_xy_addr (wr_narrow_xy_addr_y), - .wr_narrow_x_din (wr_narrow_x_din_y), - .wr_narrow_y_din (wr_narrow_y_din_y), + .wr_narrow_x_dout (wr_narrow_x_data_y), + .wr_narrow_y_dout (wr_narrow_y_data_y), - .ext_wide_xy_ena (ext_wide_xy_ena_y), - .ext_wide_xy_bank (ext_wide_xy_bank_y), - .ext_wide_xy_addr (ext_wide_xy_addr_y), - .ext_wide_x_din (ext_wide_x_din_y), - .ext_wide_y_din (ext_wide_y_din_y), + .io_wide_xy_ena (io_wide_xy_ena_y), + .io_wide_xy_bank (io_wide_xy_bank_y), + .io_wide_xy_addr (io_wide_xy_addr_y), + .io_wide_x_din (io_wide_x_data_y), + .io_wide_y_din (io_wide_y_data_y), - .ext_narrow_xy_ena (ext_narrow_xy_ena_y), - .ext_narrow_xy_bank (ext_narrow_xy_bank_y), - .ext_narrow_xy_addr (ext_narrow_xy_addr_y), - .ext_narrow_x_din (ext_narrow_x_din_y), - .ext_narrow_y_din (ext_narrow_y_din_y), + .io_narrow_xy_ena (io_narrow_xy_ena_y), + .io_narrow_xy_bank (io_narrow_xy_bank_y), + .io_narrow_xy_addr (io_narrow_xy_addr_y), + .io_narrow_x_din (io_narrow_x_data_y), + .io_narrow_y_din (io_narrow_y_data_y), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_y), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_y), - .rcmb_wide_x_din (rcmb_wide_x_dout_y), - .rcmb_wide_y_din (rcmb_wide_y_dout_y), + .rcmb_wide_x_din (rcmb_wide_x_data_y), + .rcmb_wide_y_din (rcmb_wide_y_data_y), .rcmb_wide_xy_ena (rcmb_wide_xy_valid_y), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y), - .rcmb_narrow_x_din (rcmb_narrow_x_dout_y), - .rcmb_narrow_y_din (rcmb_narrow_y_dout_y), + .rcmb_narrow_x_din (rcmb_narrow_x_data_y), + .rcmb_narrow_y_din (rcmb_narrow_y_data_y), .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_y), .rdct_wide_xy_bank (rdct_wide_xy_bank_y), .rdct_wide_xy_addr (rdct_wide_xy_addr_y), - .rdct_wide_x_din (rdct_wide_x_dout_y), - .rdct_wide_y_din (rdct_wide_y_dout_y), + .rdct_wide_x_din (rdct_wide_x_data_y), + .rdct_wide_y_din (rdct_wide_y_data_y), .rdct_wide_xy_valid (rdct_wide_xy_valid_y), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_y), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_y), - .rdct_narrow_x_din (rdct_narrow_x_dout_y), - .rdct_narrow_y_din (rdct_narrow_y_dout_y), - .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y) - + .rdct_narrow_x_din (rdct_narrow_x_data_y), + .rdct_narrow_y_din (rdct_narrow_y_data_y), + .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y), + + .wrk_wide_xy_ena (wrk_wr_wide_xy_ena_y), + .wrk_wide_xy_bank (wrk_wr_wide_xy_bank_y), + .wrk_wide_xy_addr (wrk_wr_wide_xy_addr_y), + .wrk_wide_x_din (wrk_wr_wide_x_data_y), + .wrk_wide_y_din (wrk_wr_wide_y_data_y), + + .wrk_narrow_xy_ena (wrk_wr_narrow_xy_ena_y), + .wrk_narrow_xy_bank (wrk_wr_narrow_xy_bank_y), + .wrk_narrow_xy_addr (wrk_wr_narrow_xy_addr_y), + .wrk_narrow_x_din (wrk_wr_narrow_x_data_y), + .wrk_narrow_y_din (wrk_wr_narrow_y_data_y) ); @@ -444,16 +542,16 @@ module modexpng_core_top // wire io_in_1_en; wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; - wire [ WORD_W -1:0] io_in_1_dout; + wire [ WORD_W -1:0] io_in_1_data; wire io_in_2_en; wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; - wire [ WORD_W -1:0] io_in_2_dout; + wire [ WORD_W -1:0] io_in_2_data; wire io_out_en; wire io_out_we; wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; - wire [ WORD_W -1:0] io_out_din; + wire [ WORD_W -1:0] io_out_data; // TODO: Separate reset for clock domains (core/bus)??? @@ -472,16 +570,16 @@ module modexpng_core_top .in_1_en (io_in_1_en), .in_1_addr (io_in_1_addr), - .in_1_dout (io_in_1_dout), + .in_1_dout (io_in_1_data), .in_2_en (io_in_2_en), .in_2_addr (io_in_2_addr), - .in_2_dout (io_in_2_dout), + .in_2_dout (io_in_2_data), .out_en (io_out_en), .out_we (io_out_we), .out_addr (io_out_addr), - .out_din (io_out_din) + .out_din (io_out_data) ); @@ -497,59 +595,65 @@ module modexpng_core_top reg [OP_ADDR_W -1:0] io_mgr_word_index_last; reg [UOP_OPCODE_W -1:0] io_mgr_opcode; + wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; + wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0]; + modexpng_io_manager io_manager ( - .clk (clk), - .rst (rst), + .clk (clk), + .rst (rst), - .ena (io_mgr_ena), - .rdy (io_mgr_rdy), + .ena (io_mgr_ena), + .rdy (io_mgr_rdy), - .sel_crt (io_mgr_sel_crt), - .sel_aux (io_mgr_sel_aux), - .sel_in (io_mgr_sel_in), - .sel_out (io_mgr_sel_out), + .sel_crt (io_mgr_sel_crt), + .sel_aux (io_mgr_sel_aux), + .sel_in (io_mgr_sel_in), + .sel_out (io_mgr_sel_out), - .opcode (io_mgr_opcode), + .opcode (io_mgr_opcode), - .word_index_last (io_mgr_word_index_last), + .word_index_last (io_mgr_word_index_last), - .ext_wide_xy_ena_x (ext_wide_xy_ena_x), - .ext_wide_xy_bank_x (ext_wide_xy_bank_x), - .ext_wide_xy_addr_x (ext_wide_xy_addr_x), - .ext_wide_x_din_x (ext_wide_x_din_x), - .ext_wide_y_din_x (ext_wide_y_din_x), - - .ext_narrow_xy_ena_x (ext_narrow_xy_ena_x), - .ext_narrow_xy_bank_x (ext_narrow_xy_bank_x), - .ext_narrow_xy_addr_x (ext_narrow_xy_addr_x), - .ext_narrow_x_din_x (ext_narrow_x_din_x), - .ext_narrow_y_din_x (ext_narrow_y_din_x), - - .ext_wide_xy_ena_y (ext_wide_xy_ena_y), - .ext_wide_xy_bank_y (ext_wide_xy_bank_y), - .ext_wide_xy_addr_y (ext_wide_xy_addr_y), - .ext_wide_x_din_y (ext_wide_x_din_y), - .ext_wide_y_din_y (ext_wide_y_din_y), - - .ext_narrow_xy_ena_y (ext_narrow_xy_ena_y), - .ext_narrow_xy_bank_y (ext_narrow_xy_bank_y), - .ext_narrow_xy_addr_y (ext_narrow_xy_addr_y), - .ext_narrow_x_din_y (ext_narrow_x_din_y), - .ext_narrow_y_din_y (ext_narrow_y_din_y), + .io_wide_xy_ena_x (io_wide_xy_ena_x), + .io_wide_xy_bank_x (io_wide_xy_bank_x), + .io_wide_xy_addr_x (io_wide_xy_addr_x), + .io_wide_x_din_x (io_wide_x_data_x), + .io_wide_y_din_x (io_wide_y_data_x), + + .io_narrow_xy_ena_x (io_narrow_xy_ena_x), + .io_narrow_xy_bank_x (io_narrow_xy_bank_x), + .io_narrow_xy_addr_x (io_narrow_xy_addr_x), + .io_narrow_x_din_x (io_narrow_x_data_x), + .io_narrow_y_din_x (io_narrow_y_data_x), + + .io_wide_xy_ena_y (io_wide_xy_ena_y), + .io_wide_xy_bank_y (io_wide_xy_bank_y), + .io_wide_xy_addr_y (io_wide_xy_addr_y), + .io_wide_x_din_y (io_wide_x_data_y), + .io_wide_y_din_y (io_wide_y_data_y), + + .io_narrow_xy_ena_y (io_narrow_xy_ena_y), + .io_narrow_xy_bank_y (io_narrow_xy_bank_y), + .io_narrow_xy_addr_y (io_narrow_xy_addr_y), + .io_narrow_x_din_y (io_narrow_x_data_y), + .io_narrow_y_din_y (io_narrow_y_data_y), - .io_in_1_en (io_in_1_en), - .io_in_1_addr (io_in_1_addr), - .io_in_1_dout (io_in_1_dout), + .io_in_1_en (io_in_1_en), + .io_in_1_addr (io_in_1_addr), + .io_in_1_din (io_in_1_data), - .io_in_2_en (io_in_2_en), - .io_in_2_addr (io_in_2_addr), - .io_in_2_dout (io_in_2_dout), + .io_in_2_en (io_in_2_en), + .io_in_2_addr (io_in_2_addr), + .io_in_2_din (io_in_2_data), - .io_out_en (io_out_en), - .io_out_we (io_out_we), - .io_out_addr (io_out_addr), - .io_out_din (io_out_din) + .io_out_en (io_out_en), + .io_out_we (io_out_we), + .io_out_addr (io_out_addr), + .io_out_dout (io_out_data), + + .wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc), + .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc) ); @@ -608,33 +712,33 @@ module modexpng_core_top .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), .rd_wide_xy_addr (rd_wide_xy_addr_x), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), - .rd_wide_x_dout (rd_wide_x_dout_x), - .rd_wide_y_dout (rd_wide_y_dout_x), - .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x), - .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x), + .rd_wide_x_din (rd_wide_x_data_x), + .rd_wide_y_din (rd_wide_y_data_x), + .rd_wide_x_din_aux (rd_wide_x_data_aux_x), + .rd_wide_y_din_aux (rd_wide_y_data_aux_x), .rd_narrow_xy_ena (rd_narrow_xy_ena_x), .rd_narrow_xy_bank (rd_narrow_xy_bank_x), .rd_narrow_xy_addr (rd_narrow_xy_addr_x), - .rd_narrow_x_dout (rd_narrow_x_dout_x), - .rd_narrow_y_dout (rd_narrow_y_dout_x), + .rd_narrow_x_din (rd_narrow_x_data_x), + .rd_narrow_y_din (rd_narrow_y_data_x), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_x), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_x), - .rcmb_wide_x_dout (rcmb_wide_x_dout_x), - .rcmb_wide_y_dout (rcmb_wide_y_dout_x), + .rcmb_wide_x_dout (rcmb_wide_x_data_x), + .rcmb_wide_y_dout (rcmb_wide_y_data_x), .rcmb_wide_xy_valid (rcmb_wide_xy_valid_x), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x), - .rcmb_narrow_x_dout (rcmb_narrow_x_dout_x), - .rcmb_narrow_y_dout (rcmb_narrow_y_dout_x), + .rcmb_narrow_x_dout (rcmb_narrow_x_data_x), + .rcmb_narrow_y_dout (rcmb_narrow_y_data_x), .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_x), .rcmb_xy_bank (rcmb_final_xy_bank_x), .rcmb_xy_addr (rcmb_final_xy_addr_x), - .rcmb_x_dout (rcmb_final_x_dout_x), - .rcmb_y_dout (rcmb_final_y_dout_x), + .rcmb_x_dout (rcmb_final_x_data_x), + .rcmb_y_dout (rcmb_final_y_data_x), .rcmb_xy_valid (rcmb_final_xy_valid_x), .rdct_ena (rdct_ena_x), @@ -663,33 +767,33 @@ module modexpng_core_top .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), .rd_wide_xy_addr (rd_wide_xy_addr_y), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), - .rd_wide_x_dout (rd_wide_x_dout_y), - .rd_wide_y_dout (rd_wide_y_dout_y), - .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y), - .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y), + .rd_wide_x_din (rd_wide_x_data_y), + .rd_wide_y_din (rd_wide_y_data_y), + .rd_wide_x_din_aux (rd_wide_x_data_aux_y), + .rd_wide_y_din_aux (rd_wide_y_data_aux_y), .rd_narrow_xy_ena (rd_narrow_xy_ena_y), .rd_narrow_xy_bank (rd_narrow_xy_bank_y), .rd_narrow_xy_addr (rd_narrow_xy_addr_y), - .rd_narrow_x_dout (rd_narrow_x_dout_y), - .rd_narrow_y_dout (rd_narrow_y_dout_y), + .rd_narrow_x_din (rd_narrow_x_data_y), + .rd_narrow_y_din (rd_narrow_y_data_y), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_y), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_y), - .rcmb_wide_x_dout (rcmb_wide_x_dout_y), - .rcmb_wide_y_dout (rcmb_wide_y_dout_y), + .rcmb_wide_x_dout (rcmb_wide_x_data_y), + .rcmb_wide_y_dout (rcmb_wide_y_data_y), .rcmb_wide_xy_valid (rcmb_wide_xy_valid_y), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y), - .rcmb_narrow_x_dout (rcmb_narrow_x_dout_y), - .rcmb_narrow_y_dout (rcmb_narrow_y_dout_y), + .rcmb_narrow_x_dout (rcmb_narrow_x_data_y), + .rcmb_narrow_y_dout (rcmb_narrow_y_data_y), .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_y), .rcmb_xy_bank (rcmb_final_xy_bank_y), .rcmb_xy_addr (rcmb_final_xy_addr_y), - .rcmb_x_dout (rcmb_final_x_dout_y), - .rcmb_y_dout (rcmb_final_y_dout_y), + .rcmb_x_dout (rcmb_final_x_data_y), + .rcmb_y_dout (rcmb_final_y_data_y), .rcmb_xy_valid (rcmb_final_xy_valid_y), .rdct_ena (rdct_ena_y), @@ -723,25 +827,25 @@ module modexpng_core_top .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), - .rd_wide_x_dout_aux (rd_wide_x_dout_aux_x), - .rd_wide_y_dout_aux (rd_wide_y_dout_aux_x), + .rd_wide_x_din_aux (rd_wide_x_data_aux_x), + .rd_wide_y_din_aux (rd_wide_y_data_aux_x), .rcmb_final_xy_bank (rcmb_final_xy_bank_x), .rcmb_final_xy_addr (rcmb_final_xy_addr_x), - .rcmb_final_x_dout (rcmb_final_x_dout_x), - .rcmb_final_y_dout (rcmb_final_y_dout_x), + .rcmb_final_x_din (rcmb_final_x_data_x), + .rcmb_final_y_din (rcmb_final_y_data_x), .rcmb_final_xy_valid (rcmb_final_xy_valid_x), .rdct_wide_xy_bank (rdct_wide_xy_bank_x), .rdct_wide_xy_addr (rdct_wide_xy_addr_x), - .rdct_wide_x_dout (rdct_wide_x_dout_x), - .rdct_wide_y_dout (rdct_wide_y_dout_x), + .rdct_wide_x_dout (rdct_wide_x_data_x), + .rdct_wide_y_dout (rdct_wide_y_data_x), .rdct_wide_xy_valid (rdct_wide_xy_valid_x), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_x), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_x), - .rdct_narrow_x_dout (rdct_narrow_x_dout_x), - .rdct_narrow_y_dout (rdct_narrow_y_dout_x), + .rdct_narrow_x_dout (rdct_narrow_x_data_x), + .rdct_narrow_y_dout (rdct_narrow_y_data_x), .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x) ); @@ -760,29 +864,109 @@ module modexpng_core_top .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), - .rd_wide_x_dout_aux (rd_wide_x_dout_aux_y), - .rd_wide_y_dout_aux (rd_wide_y_dout_aux_y), + .rd_wide_x_din_aux (rd_wide_x_data_aux_y), + .rd_wide_y_din_aux (rd_wide_y_data_aux_y), .rcmb_final_xy_bank (rcmb_final_xy_bank_y), .rcmb_final_xy_addr (rcmb_final_xy_addr_y), - .rcmb_final_x_dout (rcmb_final_x_dout_y), - .rcmb_final_y_dout (rcmb_final_y_dout_y), + .rcmb_final_x_din (rcmb_final_x_data_y), + .rcmb_final_y_din (rcmb_final_y_data_y), .rcmb_final_xy_valid (rcmb_final_xy_valid_y), .rdct_wide_xy_bank (rdct_wide_xy_bank_y), .rdct_wide_xy_addr (rdct_wide_xy_addr_y), - .rdct_wide_x_dout (rdct_wide_x_dout_y), - .rdct_wide_y_dout (rdct_wide_y_dout_y), + .rdct_wide_x_dout (rdct_wide_x_data_y), + .rdct_wide_y_dout (rdct_wide_y_data_y), .rdct_wide_xy_valid (rdct_wide_xy_valid_y), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_y), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_y), - .rdct_narrow_x_dout (rdct_narrow_x_dout_y), - .rdct_narrow_y_dout (rdct_narrow_y_dout_y), + .rdct_narrow_x_dout (rdct_narrow_x_data_y), + .rdct_narrow_y_dout (rdct_narrow_y_data_y), .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y) ); + // + // General Worker + // + reg wrk_ena = 1'b0; + wire wrk_rdy; + + reg [ BANK_ADDR_W -1:0] wrk_sel_wide_in; + reg [ BANK_ADDR_W -1:0] wrk_sel_wide_out; + reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in; + reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out; + reg [ OP_ADDR_W -1:0] wrk_word_index_last; + reg [UOP_OPCODE_W -1:0] wrk_opcode; + + modexpng_general_worker general_worker + ( + .clk (clk), + .rst (rst), + + .ena (wrk_ena), + .rdy (wrk_rdy), + + .sel_narrow_in (wrk_sel_narrow_in), + .sel_narrow_out (wrk_sel_narrow_out), + .sel_wide_in (wrk_sel_wide_in), + .sel_wide_out (wrk_sel_wide_out), + + .opcode (wrk_opcode), + + .word_index_last (wrk_word_index_last), + + .wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x), + .wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x), + .wrk_rd_wide_xy_addr_x (wrk_rd_wide_xy_addr_x), + .wrk_rd_wide_x_din_x (wrk_rd_wide_x_data_x), + .wrk_rd_wide_y_din_x (wrk_rd_wide_y_data_x), + + .wrk_rd_narrow_xy_ena_x (wrk_rd_narrow_xy_ena_x), + .wrk_rd_narrow_xy_bank_x (wrk_rd_narrow_xy_bank_x), + .wrk_rd_narrow_xy_addr_x (wrk_rd_narrow_xy_addr_x), + .wrk_rd_narrow_x_din_x (wrk_rd_narrow_x_data_x), + .wrk_rd_narrow_y_din_x (wrk_rd_narrow_y_data_x), + + .wrk_rd_wide_xy_ena_y (wrk_rd_wide_xy_ena_y), + .wrk_rd_wide_xy_bank_y (wrk_rd_wide_xy_bank_y), + .wrk_rd_wide_xy_addr_y (wrk_rd_wide_xy_addr_y), + .wrk_rd_wide_x_din_y (wrk_rd_wide_x_data_y), + .wrk_rd_wide_y_din_y (wrk_rd_wide_y_data_y), + + .wrk_rd_narrow_xy_ena_y (wrk_rd_narrow_xy_ena_y), + .wrk_rd_narrow_xy_bank_y (wrk_rd_narrow_xy_bank_y), + .wrk_rd_narrow_xy_addr_y (wrk_rd_narrow_xy_addr_y), + .wrk_rd_narrow_x_din_y (wrk_rd_narrow_x_data_y), + .wrk_rd_narrow_y_din_y (wrk_rd_narrow_y_data_y), + + .wrk_wr_wide_xy_ena_x (wrk_wr_wide_xy_ena_x), + .wrk_wr_wide_xy_bank_x (wrk_wr_wide_xy_bank_x), + .wrk_wr_wide_xy_addr_x (wrk_wr_wide_xy_addr_x), + .wrk_wr_wide_x_dout_x (wrk_wr_wide_x_data_x), + .wrk_wr_wide_y_dout_x (wrk_wr_wide_y_data_x), + + .wrk_wr_narrow_xy_ena_x (wrk_wr_narrow_xy_ena_x), + .wrk_wr_narrow_xy_bank_x (wrk_wr_narrow_xy_bank_x), + .wrk_wr_narrow_xy_addr_x (wrk_wr_narrow_xy_addr_x), + .wrk_wr_narrow_x_dout_x (wrk_wr_narrow_x_data_x), + .wrk_wr_narrow_y_dout_x (wrk_wr_narrow_y_data_x), + + .wrk_wr_wide_xy_ena_y (wrk_wr_wide_xy_ena_y), + .wrk_wr_wide_xy_bank_y (wrk_wr_wide_xy_bank_y), + .wrk_wr_wide_xy_addr_y (wrk_wr_wide_xy_addr_y), + .wrk_wr_wide_x_dout_y (wrk_wr_wide_x_data_y), + .wrk_wr_wide_y_dout_y (wrk_wr_wide_y_data_y), + + .wrk_wr_narrow_xy_ena_y (wrk_wr_narrow_xy_ena_y), + .wrk_wr_narrow_xy_bank_y (wrk_wr_narrow_xy_bank_y), + .wrk_wr_narrow_xy_addr_y (wrk_wr_narrow_xy_addr_y), + .wrk_wr_narrow_x_dout_y (wrk_wr_narrow_x_data_y), + .wrk_wr_narrow_y_dout_y (wrk_wr_narrow_y_data_y) + ); + + // // uOP Completion Detector // @@ -792,10 +976,10 @@ module modexpng_core_top // uop_exit_from_busy = 0; // - if (uop_opcode_is_io) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; - if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy; - //if (uop_data_opcode_is_add) uop_exit_from_busy = ~mod_add_ena & mod_add_rdy; - //if (uop_data_opcode_is_sub) uop_exit_from_busy = ~mod_sub_ena & mod_sub_rdy; + if (uop_opcode_is_in) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; + if (uop_opcode_is_out) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy); + if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy ; + if (uop_opcode_is_wrk) uop_exit_from_busy = ~wrk_ena & wrk_rdy ; // end @@ -809,10 +993,12 @@ module modexpng_core_top io_mgr_ena <= 1'b0; mmm_ena_x <= 1'b0; mmm_ena_y <= 1'b0; + wrk_ena <= 1'b0; end else begin - io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_io : 1'b0; - mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out) : 1'b0; + mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0; end // @@ -825,6 +1011,7 @@ module modexpng_core_top if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin // io_mgr_opcode <= uop_data_opcode; + wrk_opcode <= uop_data_opcode; // case (uop_data_opcode) // @@ -842,6 +1029,15 @@ module modexpng_core_top io_mgr_sel_out <= uop_data_sel_narrow_out; end // + UOP_OPCODE_OUTPUT_FROM_NARROW: begin + io_mgr_sel_crt <= uop_data_crt; + io_mgr_sel_aux <= UOP_AUX_DNC; + io_mgr_sel_in <= BANK_DNC; + io_mgr_sel_out <= uop_data_sel_narrow_out; + // + wrk_sel_narrow_in <= uop_data_sel_narrow_in; + end + // UOP_OPCODE_MODULAR_MULTIPLY: begin // case (uop_data_ladder) @@ -856,10 +1052,21 @@ module modexpng_core_top {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }}; - // end // + UOP_OPCODE_PROPAGATE_CARRIES: begin + wrk_sel_narrow_in <= uop_data_sel_narrow_in; + wrk_sel_narrow_out <= uop_data_sel_narrow_out; + end + // + UOP_OPCODE_COPY_CRT_Y2X: begin + wrk_sel_wide_in <= uop_data_sel_wide_in; + wrk_sel_wide_out <= uop_data_sel_wide_out; + wrk_sel_narrow_in <= uop_data_sel_narrow_in; + wrk_sel_narrow_out <= uop_data_sel_narrow_out; + end + // endcase // end @@ -887,6 +1094,9 @@ module modexpng_core_top {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; end // + UOP_OPCODE_PROPAGATE_CARRIES: + wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq; + // endcase // end @@ -945,87 +1155,88 @@ module modexpng_core_top // // X.X // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[0*256+i]); $write("\n"); - $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[1*256+i]); $write("\n"); - $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[2*256+i]); $write("\n"); - $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[3*256+i]); $write("\n"); - $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[4*256+i]); $write("\n"); - $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[5*256+i]); $write("\n"); - $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[6*256+i]); $write("\n"); - $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n"); - $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n"); - $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n"); - $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n"); - $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n"); - $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n"); - $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n"); - $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[0*256+i]); $write("\n"); + $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[1*256+i]); $write("\n"); + $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[2*256+i]); $write("\n"); + $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[3*256+i]); $write("\n"); + $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[4*256+i]); $write("\n"); + $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[5*256+i]); $write("\n"); + $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[6*256+i]); $write("\n"); + $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); + $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); + $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); + $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); + $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); + $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); + $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); + $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); // // X.Y // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[0*256+i]); $write("\n"); - $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[1*256+i]); $write("\n"); - $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[2*256+i]); $write("\n"); - $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[3*256+i]); $write("\n"); - $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[4*256+i]); $write("\n"); - $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[5*256+i]); $write("\n"); - $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[6*256+i]); $write("\n"); - $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n"); - $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n"); - $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n"); - $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n"); - $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n"); - $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n"); - $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n"); - $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[0*256+i]); $write("\n"); + $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[1*256+i]); $write("\n"); + $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[2*256+i]); $write("\n"); + $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[3*256+i]); $write("\n"); + $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[4*256+i]); $write("\n"); + $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[5*256+i]); $write("\n"); + $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[6*256+i]); $write("\n"); + $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); + $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); + $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); + $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); + $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); + $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); + $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); + $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); // // Y.X // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[0*256+i]); $write("\n"); - $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[1*256+i]); $write("\n"); - $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[2*256+i]); $write("\n"); - $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[3*256+i]); $write("\n"); - $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[4*256+i]); $write("\n"); - $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[5*256+i]); $write("\n"); - $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[6*256+i]); $write("\n"); - $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n"); - $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n"); - $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n"); - $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n"); - $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n"); - $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n"); - $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n"); - $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[0*256+i]); $write("\n"); + $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[1*256+i]); $write("\n"); + $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[2*256+i]); $write("\n"); + $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[3*256+i]); $write("\n"); + $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[4*256+i]); $write("\n"); + $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[5*256+i]); $write("\n"); + $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[6*256+i]); $write("\n"); + $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); + $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); + $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); + $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); + $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); + $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); + $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); + $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); // // Y.Y // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[0*256+i]); $write("\n"); - $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[1*256+i]); $write("\n"); - $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[2*256+i]); $write("\n"); - $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[3*256+i]); $write("\n"); - $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[4*256+i]); $write("\n"); - $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[5*256+i]); $write("\n"); - $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[6*256+i]); $write("\n"); - $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n"); - $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n"); - $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n"); - $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n"); - $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n"); - $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n"); - $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n"); - $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n"); // + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[0*256+i]); $write("\n"); + $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[1*256+i]); $write("\n"); + $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[2*256+i]); $write("\n"); + $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[3*256+i]); $write("\n"); + $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[4*256+i]); $write("\n"); + $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[5*256+i]); $write("\n"); + $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[6*256+i]); $write("\n"); + $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); + $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); + $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); + $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); + $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); + $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); + $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); + $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); + // end // diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v index 8ab64f0..8c4e844 100644 --- a/rtl/modexpng_dsp_array_block.v +++ b/rtl/modexpng_dsp_array_block.v @@ -70,8 +70,8 @@ module modexpng_dsp_array_block .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in (WORD_EXT_NULL), - .casc_b_in (WORD_NULL), + .casc_a_in (WORD_EXT_ZERO), + .casc_b_in (WORD_ZERO), .casc_a_out (casc_a[z]), .casc_b_out (casc_b[z]) @@ -138,8 +138,8 @@ module modexpng_dsp_array_block .opmode ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}), .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in (WORD_EXT_NULL), - .casc_b_in (WORD_NULL), + .casc_a_in (WORD_EXT_ZERO), + .casc_b_in (WORD_ZERO), .casc_a_out (), .casc_b_out () diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v new file mode 100644 index 0000000..c35f0b3 --- /dev/null +++ b/rtl/modexpng_general_worker.v @@ -0,0 +1,679 @@ +module modexpng_general_worker +( + clk, + rst, + + ena, + rdy, + + sel_narrow_in, + sel_narrow_out, + sel_wide_in, + sel_wide_out, + + opcode, + + word_index_last, + + wrk_rd_wide_xy_ena_x, + wrk_rd_wide_xy_bank_x, + wrk_rd_wide_xy_addr_x, + wrk_rd_wide_x_din_x, + wrk_rd_wide_y_din_x, + + wrk_rd_narrow_xy_ena_x, + wrk_rd_narrow_xy_bank_x, + wrk_rd_narrow_xy_addr_x, + wrk_rd_narrow_x_din_x, + wrk_rd_narrow_y_din_x, + + wrk_rd_wide_xy_ena_y, + wrk_rd_wide_xy_bank_y, + wrk_rd_wide_xy_addr_y, + wrk_rd_wide_x_din_y, + wrk_rd_wide_y_din_y, + + wrk_rd_narrow_xy_ena_y, + wrk_rd_narrow_xy_bank_y, + wrk_rd_narrow_xy_addr_y, + wrk_rd_narrow_x_din_y, + wrk_rd_narrow_y_din_y, + + wrk_wr_wide_xy_ena_x, + wrk_wr_wide_xy_bank_x, + wrk_wr_wide_xy_addr_x, + wrk_wr_wide_x_dout_x, + wrk_wr_wide_y_dout_x, + + wrk_wr_narrow_xy_ena_x, + wrk_wr_narrow_xy_bank_x, + wrk_wr_narrow_xy_addr_x, + wrk_wr_narrow_x_dout_x, + wrk_wr_narrow_y_dout_x, + + wrk_wr_wide_xy_ena_y, + wrk_wr_wide_xy_bank_y, + wrk_wr_wide_xy_addr_y, + wrk_wr_wide_x_dout_y, + wrk_wr_wide_y_dout_y, + + wrk_wr_narrow_xy_ena_y, + wrk_wr_narrow_xy_bank_y, + wrk_wr_narrow_xy_addr_y, + wrk_wr_narrow_x_dout_y, + wrk_wr_narrow_y_dout_y +); + + // + // Headers + // + `include "modexpng_parameters.vh" + `include "modexpng_microcode.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + input [ BANK_ADDR_W -1:0] sel_narrow_in; + input [ BANK_ADDR_W -1:0] sel_narrow_out; + input [ BANK_ADDR_W -1:0] sel_wide_in; + input [ BANK_ADDR_W -1:0] sel_wide_out; + + input [ UOP_OPCODE_W -1:0] opcode; + + input [ OP_ADDR_W -1:0] word_index_last; + + output wrk_rd_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; + input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x; + input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x; + + output wrk_rd_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x; + + output wrk_rd_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; + input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y; + input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y; + + output wrk_rd_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y; + input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y; + + output wrk_wr_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; + output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x; + output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x; + + output wrk_wr_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x; + + output wrk_wr_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; + output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y; + output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y; + + output wrk_wr_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y; + output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y; + + + // + // FSM Declaration + // + localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2; + localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3; + localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! + localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6; + localparam [3:0] WRK_FSM_STATE_STOP = 4'h7; + + reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; + reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep + + + // + // Control Signals + // + reg rd_wide_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x; + reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x; + + reg rd_narrow_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; + reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; + + reg rd_wide_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y; + reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y; + + reg rd_narrow_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; + reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; + + reg wr_wide_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; + reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x; + reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x; + + reg wr_narrow_xy_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; + reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x; + reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x; + + reg wr_wide_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y; + reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; + reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y; + reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y; + + reg wr_narrow_xy_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; + reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; + reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y; + reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y; + + + // + // Mapping + // + assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x; + assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x; + assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x; + + assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x; + assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x; + assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x; + + assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y; + assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y; + assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y; + + assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y; + assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y; + assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y; + + assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x; + assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x; + assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x; + assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x; + assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x; + + assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x; + assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x; + assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x; + assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x; + assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x; + + assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y; + assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y; + assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y; + assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y; + assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y; + + assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y; + assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y; + assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y; + assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y; + assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y; + + + // + // Delays + // + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; + //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; + + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2; + + always @(posedge clk) begin + // + //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; + //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + // + {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; + {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; + // + end + + + // + // Handy Wires + // + wire rd_narrow_xy_addr_x_next_is_last; + wire rd_narrow_xy_addr_y_next_is_last; + + + // + // Read Enable Logic + // + + task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask + task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask + + task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask + task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask + + task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask + task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask + + always @(posedge clk) + // + if (rst) begin + // + disable_wide_xy_rd_en; + disable_narrow_xy_rd_en; + /* + rd_wide_xy_ena_x <= 1'b0; + rd_wide_xy_ena_y <= 1'b0; + rd_narrow_xy_ena_x <= 1'b0; + rd_narrow_xy_ena_y <= 1'b0; + */ + end else begin + // + disable_wide_xy_rd_en; + disable_narrow_xy_rd_en; + // + //rd_wide_xy_ena_x <= 1'b0; + //rd_wide_xy_ena_y <= 1'b0; + //rd_narrow_xy_ena_x <= 1'b0; + //rd_narrow_xy_ena_y <= 1'b0; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: + // + enable_narrow_xy_rd_en; + //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}}; + // + // + endcase + // + // + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: begin + // + enable_narrow_xy_rd_en; + enable_wide_xy_rd_en; + // + end + // + endcase + // + endcase + // + end + + + // + // Write Enable Logic + // + + task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask + task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask + + task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask + task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask + + task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask + task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask + + always @(posedge clk) + // + if (rst) begin + // + disable_wide_xy_wr_en; + disable_narrow_xy_wr_en; + //wr_wide_xy_ena_x <= 1'b0; + //wr_wide_xy_ena_y <= 1'b0; + //wr_narrow_xy_ena_x <= 1'b0; + //wr_narrow_xy_ena_y <= 1'b0; + // + end else begin + // + disable_wide_xy_wr_en; + disable_narrow_xy_wr_en; + // + //wr_wide_xy_ena_x <= 1'b0; + //wr_wide_xy_ena_y <= 1'b0; + //wr_narrow_xy_ena_x <= 1'b0; + //wr_narrow_xy_ena_y <= 1'b0; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: + // + enable_narrow_xy_wr_en; + //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + // + // + endcase + // + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + enable_wide_xy_wr_en; + enable_narrow_xy_wr_en; + //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + // + end + // + endcase + // + endcase + // + end + + + // + // Data Logic + // + reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r; + reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r; + reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; + reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; + + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; + + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]}; + + always @(posedge clk) begin + // + wr_wide_x_dout_x <= WORD_EXT_DNC; + wr_wide_y_dout_x <= WORD_EXT_DNC; + wr_wide_x_dout_y <= WORD_EXT_DNC; + wr_wide_y_dout_y <= WORD_EXT_DNC; + wr_narrow_x_dout_x <= WORD_EXT_DNC; + wr_narrow_y_dout_x <= WORD_EXT_DNC; + wr_narrow_x_dout_y <= WORD_EXT_DNC; + wr_narrow_y_dout_y <= WORD_EXT_DNC; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE2: begin + rd_narrow_x_din_x_cry_r <= CARRY_ZERO; + rd_narrow_y_din_x_cry_r <= CARRY_ZERO; + rd_narrow_x_din_y_cry_r <= CARRY_ZERO; + rd_narrow_y_din_y_cry_r <= CARRY_ZERO; + end + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word + // + rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W]; + rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W]; + rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W]; + rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W]; + // + wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced; + wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced; + wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced; + wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced; + // + end + // + endcase + // + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + wr_wide_x_dout_x <= wrk_rd_wide_x_din_y; + wr_wide_y_dout_x <= wrk_rd_wide_y_din_y; + wr_wide_x_dout_y <= wrk_rd_wide_x_din_y; + wr_wide_y_dout_y <= wrk_rd_wide_y_din_y; + // + wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y; + wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y; + wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y; + wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y; + // + end + // + endcase + // + endcase + // + end + + + // + // Write Address Logic + // + always @(posedge clk) begin + // + {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; + {wr_wide_xy_bank_y, wr_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; + {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; + {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2}; + {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2}; + // + {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2}; + {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2}; + // + end + // + endcase + // + // + endcase + // + end + + + // + // Read Address Logic + // + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next; + + assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last; + assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last; + + always @(posedge clk) begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ?? + {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_COPY_CRT_Y2X: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO}; + // + rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; + rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; + // + end + // + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next}; + // + rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; + rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; + // + end + // + endcase + // + // + endcase + // + end + + + // + // FSM Process + // + always @(posedge clk) + // + if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE; + else case (opcode) + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass; + default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; + endcase + + + // + // Busy Exit Logic + // + reg wrk_fsm_done_one_pass = 1'b0; + + always @(posedge clk) begin + // + wrk_fsm_done_one_pass <= 1'b0; + // + case (opcode) + // + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_COPY_CRT_Y2X: begin + // + if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin + // + if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary... + if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1; + // + end + // + end + // + endcase + // + end + + + // + // FSM Transition Logic + // + always @* begin + // + case (wrk_fsm_state) + WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ; + WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ; + WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ; + WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ; + WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ; + WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ; + WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + endcase + // + end + + + // + // Ready Logic + // + reg rdy_reg = 1'b1; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else case (wrk_fsm_state) + WRK_FSM_STATE_IDLE: rdy_reg <= ~ena; + WRK_FSM_STATE_STOP: rdy_reg <= 1'b1; + endcase + + +endmodule diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v index 68d13c4..d7dd72e 100644 --- a/rtl/modexpng_io_block.v +++ b/rtl/modexpng_io_block.v @@ -84,13 +84,17 @@ module modexpng_io_block wire bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0); wire bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1); + wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'b00); + wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'b01); + wire bus_cs_output = bus_cs && (bus_addr_msb == 2'b10); + /* INPUT_1 */ modexpng_tdp_36k_x16_x32_wrapper bram_input_1 ( .clk (clk), // core clock .clk_bus (clk_bus), // bus clock - .ena (bus_cs), // bus side read-write + .ena (bus_cs_input_1), // bus side read-write .wea (bus_data_wr_input_1), // .addra (bus_addr_lsb), // .dina (bus_data_wr), // @@ -109,7 +113,7 @@ module modexpng_io_block .clk (clk), // core clock .clk_bus (clk_bus), // bus clock - .ena (bus_cs), // bus side write-only + .ena (bus_cs_input_2), // bus side write-only .wea (bus_data_wr_input_2), // .addra (bus_addr_lsb), // .dina (bus_data_wr), // @@ -132,7 +136,7 @@ module modexpng_io_block .addra (out_addr), // .dina (out_din), // - .enb (bus_cs), // bus side read-only + .enb (bus_cs_output), // bus side read-only .addrb (bus_addr_lsb), // .doutb (bus_data_rd_output) // ); diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v index 81f582f..dfbd676 100644 --- a/rtl/modexpng_io_manager.v +++ b/rtl/modexpng_io_manager.v @@ -15,42 +15,45 @@ module modexpng_io_manager word_index_last, - ext_wide_xy_ena_x, - ext_wide_xy_bank_x, - ext_wide_xy_addr_x, - ext_wide_x_din_x, - ext_wide_y_din_x, - - ext_narrow_xy_ena_x, - ext_narrow_xy_bank_x, - ext_narrow_xy_addr_x, - ext_narrow_x_din_x, - ext_narrow_y_din_x, - - ext_wide_xy_ena_y, - ext_wide_xy_bank_y, - ext_wide_xy_addr_y, - ext_wide_x_din_y, - ext_wide_y_din_y, - - ext_narrow_xy_ena_y, - ext_narrow_xy_bank_y, - ext_narrow_xy_addr_y, - ext_narrow_x_din_y, - ext_narrow_y_din_y, + io_wide_xy_ena_x, + io_wide_xy_bank_x, + io_wide_xy_addr_x, + io_wide_x_din_x, + io_wide_y_din_x, + + io_narrow_xy_ena_x, + io_narrow_xy_bank_x, + io_narrow_xy_addr_x, + io_narrow_x_din_x, + io_narrow_y_din_x, + + io_wide_xy_ena_y, + io_wide_xy_bank_y, + io_wide_xy_addr_y, + io_wide_x_din_y, + io_wide_y_din_y, + + io_narrow_xy_ena_y, + io_narrow_xy_bank_y, + io_narrow_xy_addr_y, + io_narrow_x_din_y, + io_narrow_y_din_y, io_in_1_en, io_in_1_addr, - io_in_1_dout, + io_in_1_din, io_in_2_en, io_in_2_addr, - io_in_2_dout, + io_in_2_din, io_out_en, io_out_we, io_out_addr, - io_out_din + io_out_dout, + + wrk_narrow_x_din_x_trunc, + wrk_narrow_x_din_y_trunc ); // @@ -78,42 +81,45 @@ module modexpng_io_manager input [ OP_ADDR_W -1:0] word_index_last; - output ext_wide_xy_ena_x; - output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_x; - output [ OP_ADDR_W -1:0] ext_wide_xy_addr_x; - output [ WORD_EXT_W -1:0] ext_wide_x_din_x; - output [ WORD_EXT_W -1:0] ext_wide_y_din_x; - - output ext_narrow_xy_ena_x; - output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_x; - output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_x; - output [ WORD_EXT_W -1:0] ext_narrow_x_din_x; - output [ WORD_EXT_W -1:0] ext_narrow_y_din_x; - - output ext_wide_xy_ena_y; - output [ BANK_ADDR_W -1:0] ext_wide_xy_bank_y; - output [ OP_ADDR_W -1:0] ext_wide_xy_addr_y; - output [ WORD_EXT_W -1:0] ext_wide_x_din_y; - output [ WORD_EXT_W -1:0] ext_wide_y_din_y; - - output ext_narrow_xy_ena_y; - output [ BANK_ADDR_W -1:0] ext_narrow_xy_bank_y; - output [ OP_ADDR_W -1:0] ext_narrow_xy_addr_y; - output [ WORD_EXT_W -1:0] ext_narrow_x_din_y; - output [ WORD_EXT_W -1:0] ext_narrow_y_din_y; + output io_wide_xy_ena_x; + output [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; + output [ OP_ADDR_W -1:0] io_wide_xy_addr_x; + output [ WORD_EXT_W -1:0] io_wide_x_din_x; + output [ WORD_EXT_W -1:0] io_wide_y_din_x; + + output io_narrow_xy_ena_x; + output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; + output [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; + output [ WORD_EXT_W -1:0] io_narrow_x_din_x; + output [ WORD_EXT_W -1:0] io_narrow_y_din_x; + + output io_wide_xy_ena_y; + output [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; + output [ OP_ADDR_W -1:0] io_wide_xy_addr_y; + output [ WORD_EXT_W -1:0] io_wide_x_din_y; + output [ WORD_EXT_W -1:0] io_wide_y_din_y; + + output io_narrow_xy_ena_y; + output [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; + output [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; + output [ WORD_EXT_W -1:0] io_narrow_x_din_y; + output [ WORD_EXT_W -1:0] io_narrow_y_din_y; output io_in_1_en; output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; - input [ WORD_W -1:0] io_in_1_dout; + input [ WORD_W -1:0] io_in_1_din; output io_in_2_en; output [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; - input [ WORD_W -1:0] io_in_2_dout; + input [ WORD_W -1:0] io_in_2_din; output io_out_en; output io_out_we; output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; - output [ WORD_W -1:0] io_out_din; + output [ WORD_W -1:0] io_out_dout; + + output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc; + output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc; // @@ -126,6 +132,7 @@ module modexpng_io_manager localparam [2:0] IO_FSM_STATE_EXTRA = 3'b100; localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101; localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110; + localparam [2:0] IO_FSM_STATE_STOP = 3'b111; reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE; reg [2:0] io_fsm_state_next; @@ -143,10 +150,11 @@ module modexpng_io_manager reg [ OP_ADDR_W -1:0] in_2_addr_op; reg out_en = 1'b0; - reg out_we; reg [BANK_ADDR_W -1:0] out_addr_bank; reg [ OP_ADDR_W -1:0] out_addr_op; - reg [ WORD_W -1:0] out_din; + reg [ WORD_W -1:0] out_dout; + + reg [ OP_ADDR_W -1:0] dummy_addr_op; // @@ -187,51 +195,54 @@ module modexpng_io_manager assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op}; assign io_out_en = out_en; - assign io_out_we = out_we; + assign io_out_we = io_out_en; // we can only write! assign io_out_addr = {out_addr_bank, out_addr_op}; - assign io_out_din = out_din; + assign io_out_dout = out_dout; // // Mapping // - assign ext_wide_xy_ena_x = wide_xy_ena_x; - assign ext_wide_xy_bank_x = wide_xy_bank_x; - assign ext_wide_xy_addr_x = wide_xy_addr_x; - assign ext_wide_x_din_x = wide_x_din_x; - assign ext_wide_y_din_x = wide_y_din_x; + assign io_wide_xy_ena_x = wide_xy_ena_x; + assign io_wide_xy_bank_x = wide_xy_bank_x; + assign io_wide_xy_addr_x = wide_xy_addr_x; + assign io_wide_x_din_x = wide_x_din_x; + assign io_wide_y_din_x = wide_y_din_x; - assign ext_narrow_xy_ena_x = narrow_xy_ena_x; - assign ext_narrow_xy_bank_x = narrow_xy_bank_x; - assign ext_narrow_xy_addr_x = narrow_xy_addr_x; - assign ext_narrow_x_din_x = narrow_x_din_x; - assign ext_narrow_y_din_x = narrow_y_din_x; + assign io_narrow_xy_ena_x = narrow_xy_ena_x; + assign io_narrow_xy_bank_x = narrow_xy_bank_x; + assign io_narrow_xy_addr_x = narrow_xy_addr_x; + assign io_narrow_x_din_x = narrow_x_din_x; + assign io_narrow_y_din_x = narrow_y_din_x; - assign ext_wide_xy_ena_y = wide_xy_ena_y; - assign ext_wide_xy_bank_y = wide_xy_bank_y; - assign ext_wide_xy_addr_y = wide_xy_addr_y; - assign ext_wide_x_din_y = wide_x_din_y; - assign ext_wide_y_din_y = wide_y_din_y; + assign io_wide_xy_ena_y = wide_xy_ena_y; + assign io_wide_xy_bank_y = wide_xy_bank_y; + assign io_wide_xy_addr_y = wide_xy_addr_y; + assign io_wide_x_din_y = wide_x_din_y; + assign io_wide_y_din_y = wide_y_din_y; - assign ext_narrow_xy_ena_y = narrow_xy_ena_y; - assign ext_narrow_xy_bank_y = narrow_xy_bank_y; - assign ext_narrow_xy_addr_y = narrow_xy_addr_y; - assign ext_narrow_x_din_y = narrow_x_din_y; - assign ext_narrow_y_din_y = narrow_y_din_y; + assign io_narrow_xy_ena_y = narrow_xy_ena_y; + assign io_narrow_xy_bank_y = narrow_xy_bank_y; + assign io_narrow_xy_addr_y = narrow_xy_addr_y; + assign io_narrow_x_din_y = narrow_x_din_y; + assign io_narrow_y_din_y = narrow_y_din_y; // // Delays // - reg [ OP_ADDR_W -1:0] in_1_addr_op_dly1; - reg [ OP_ADDR_W -1:0] in_1_addr_op_dly2; - reg [ OP_ADDR_W -1:0] in_2_addr_op_dly1; - reg [ OP_ADDR_W -1:0] in_2_addr_op_dly2; + reg [OP_ADDR_W -1:0] in_1_addr_op_dly1; + reg [OP_ADDR_W -1:0] in_1_addr_op_dly2; + reg [OP_ADDR_W -1:0] in_2_addr_op_dly1; + reg [OP_ADDR_W -1:0] in_2_addr_op_dly2; + reg [OP_ADDR_W -1:0] dummy_addr_op_dly1; + reg [OP_ADDR_W -1:0] dummy_addr_op_dly2; always @(posedge clk) begin // - {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op}; - {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op}; + {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op}; + {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op}; + {dummy_addr_op_dly2, dummy_addr_op_dly1} <= {dummy_addr_op_dly1, dummy_addr_op}; // end @@ -241,10 +252,14 @@ module modexpng_io_manager // wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW); - wire opcode_is_wide = (opcode == UOP_OPCODE_INPUT_TO_WIDE ); - wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW); + wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW; + + wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE; + wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; - wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF); + wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) || + (sel_in == BANK_IN_2_P_COEFF) || + (sel_in == BANK_IN_2_Q_COEFF) ; wire sel_crt_is_x = sel_crt == UOP_CRT_X; wire sel_crt_is_y = sel_crt == UOP_CRT_Y; @@ -252,22 +267,18 @@ module modexpng_io_manager wire sel_aux_is_1 = sel_aux == UOP_AUX_1; wire sel_aux_is_2 = sel_aux == UOP_AUX_2; - wire in_1_addr_op_is_last; - wire in_2_addr_op_is_last; - - wire in_1_addr_next_op_is_last; - wire in_2_addr_next_op_is_last; - + wire in_1_addr_op_next_is_last; + wire in_2_addr_op_next_is_last; + wire dummy_addr_op_next_is_last; // - // Enable Logic + // Source Enable Logic // always @(posedge clk) // if (rst) begin in_1_en <= 1'b0; in_2_en <= 1'b0; - out_en <= 1'b0; end else case (io_fsm_state_next) // IO_FSM_STATE_LATENCY_PRE1, @@ -279,7 +290,7 @@ module modexpng_io_manager // IO_FSM_STATE_EXTRA: begin in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra; - in_2_en <= 1'b0; + in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra; end // default: begin @@ -290,7 +301,7 @@ module modexpng_io_manager endcase // - // Enable Logic + // Destination Enable Logic // always @(posedge clk) // @@ -301,38 +312,52 @@ module modexpng_io_manager narrow_xy_ena_x <= 1'b0; narrow_xy_ena_y <= 1'b0; // + out_en <= 1'b0; + // end else case (io_fsm_state) // IO_FSM_STATE_BUSY, IO_FSM_STATE_EXTRA, IO_FSM_STATE_LATENCY_POST1: begin - wide_xy_ena_x <= opcode_is_wide && sel_crt_is_x; - wide_xy_ena_y <= opcode_is_wide && sel_crt_is_y; - narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x; - narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y; + // + wide_xy_ena_x <= opcode_is_input_wide && sel_crt_is_x; + wide_xy_ena_y <= opcode_is_input_wide && sel_crt_is_y; + narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x; + narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y; + // + out_en <= opcode_is_output; + // end // IO_FSM_STATE_LATENCY_POST2: begin + // wide_xy_ena_x <= 1'b0; wide_xy_ena_y <= 1'b0; - narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra; - narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra; + narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra; + narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra; + // + out_en <= opcode_is_output; + // end // default: begin + // wide_xy_ena_x <= 1'b0; wide_xy_ena_y <= 1'b0; narrow_xy_ena_x <= 1'b0; narrow_xy_ena_y <= 1'b0; + // + out_en <= 1'b0; + // end // endcase // - // Data Logic + // Output Data Logic // - wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout}; + wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_din : io_in_2_din}; always @(posedge clk) begin // @@ -345,20 +370,30 @@ module modexpng_io_manager narrow_x_din_y <= WORD_EXT_DNC; narrow_y_din_y <= WORD_EXT_DNC; // + out_dout <= WORD_DNC; + // case (io_fsm_state) // IO_FSM_STATE_BUSY, IO_FSM_STATE_EXTRA, IO_FSM_STATE_LATENCY_POST1: begin - if (opcode_is_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}}; - if (opcode_is_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}}; - if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; - if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_input_wide && sel_crt_is_x) {wide_x_din_x, wide_y_din_x} <= {2{io_in_dout_mux}}; // TODO: Make external ports smaller (WORD_W, not WORD_EXT_W)?? + if (opcode_is_input_wide && sel_crt_is_y) {wide_x_din_y, wide_y_din_y} <= {2{io_in_dout_mux}}; + if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc; + // end // IO_FSM_STATE_LATENCY_POST2: begin - if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; - if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; + if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; + // + if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc; + // end // endcase @@ -367,7 +402,7 @@ module modexpng_io_manager // - // Address Logic + // Destination Address Logic // wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux = sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2; @@ -378,21 +413,24 @@ module modexpng_io_manager {wide_xy_bank_y, wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC}; {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC}; + {out_addr_bank, out_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; // case (io_fsm_state) // IO_FSM_STATE_BUSY, IO_FSM_STATE_EXTRA, IO_FSM_STATE_LATENCY_POST1: begin - if (opcode_is_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux}; - if (opcode_is_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux}; - if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux}; - if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_wide && sel_crt_is_x) {wide_xy_bank_x, wide_xy_addr_x } <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_wide && sel_crt_is_y) {wide_xy_bank_y, wide_xy_addr_y } <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux}; + if (opcode_is_output ) {out_addr_bank, out_addr_op} <= {sel_out, dummy_addr_op_dly2}; end // IO_FSM_STATE_LATENCY_POST2: begin - if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF}; - if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF}; + if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF }; + if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF }; + if (opcode_is_output ) {out_addr_bank, out_addr_op } <= {sel_out, dummy_addr_op_dly2}; end // endcase @@ -401,21 +439,19 @@ module modexpng_io_manager // - // Address Logic + // Source Address Logic // reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next; - reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next; + reg [ OP_ADDR_W -1:0] dummy_addr_next; - wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0]; - wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0]; - wire [OP_ADDR_W -1:0] out_addr_next_op = out_addr_next [OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] in_1_addr_op_next = in_1_addr_next[OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] in_2_addr_op_next = in_2_addr_next[OP_ADDR_W -1:0]; + wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next; - assign in_1_addr_op_is_last = in_1_addr_op == word_index_last; - assign in_2_addr_op_is_last = in_2_addr_op == word_index_last; - - assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last; - assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last; + assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last; + assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last; + assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; always @(posedge clk) // @@ -423,36 +459,42 @@ module modexpng_io_manager // IO_FSM_STATE_LATENCY_PRE1: begin // - {in_1_addr_bank, in_1_addr_op} <= {sel_in, OP_ADDR_ZERO}; - {in_2_addr_bank, in_2_addr_op} <= {sel_in, OP_ADDR_ZERO}; - {out_addr_bank, out_addr_op } <= {sel_out, OP_ADDR_ZERO}; + {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO}; + {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO}; + { dummy_addr_op} <= { OP_ADDR_ZERO}; // - in_1_addr_next <= {sel_in, OP_ADDR_ONE}; - in_2_addr_next <= {sel_in, OP_ADDR_ONE}; - out_addr_next <= {sel_out, OP_ADDR_ONE}; + in_1_addr_next <= {sel_in, OP_ADDR_ONE}; + in_2_addr_next <= {sel_in, OP_ADDR_ONE}; + dummy_addr_next <= { OP_ADDR_ONE}; // end // IO_FSM_STATE_LATENCY_PRE2, IO_FSM_STATE_BUSY: begin // - {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; - {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next; - {out_addr_bank, out_addr_op } <= out_addr_next; + {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next; + {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next; + { dummy_addr_op} <= dummy_addr_next; // - in_1_addr_next <= in_1_addr_next + 1'b1; - in_2_addr_next <= in_2_addr_next + 1'b1; - out_addr_next <= out_addr_next + 1'b1; + in_1_addr_next <= in_1_addr_next + 1'b1; + in_2_addr_next <= in_2_addr_next + 1'b1; + dummy_addr_next <= dummy_addr_next + 1'b1; // end // IO_FSM_STATE_EXTRA: // - if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin + if (opcode_is_input && sel_in_needs_extra) begin // - {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; + if (sel_aux_is_1) begin + {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next; + in_1_addr_next <= in_1_addr_next + 1'b1; + end // - in_1_addr_next <= in_1_addr_next + 1'b1; + if (sel_aux_is_2) begin + {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next; + in_2_addr_next <= in_2_addr_next + 1'b1; + end // end // @@ -481,28 +523,33 @@ module modexpng_io_manager if (io_fsm_state == IO_FSM_STATE_BUSY) begin // if (opcode_is_input) begin - if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1; - if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1; + if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1; + if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1; + end else if (opcode_is_output) begin + if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1; end - + // end // end - + // // FSM Transition Logic // + wire [2:0] io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_LATENCY_POST1; + always @* begin // case (io_fsm_state) IO_FSM_STATE_IDLE: io_fsm_state_next = ena ? IO_FSM_STATE_LATENCY_PRE1 : IO_FSM_STATE_IDLE ; IO_FSM_STATE_LATENCY_PRE1: io_fsm_state_next = IO_FSM_STATE_LATENCY_PRE2 ; IO_FSM_STATE_LATENCY_PRE2: io_fsm_state_next = IO_FSM_STATE_BUSY ; - IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_BUSY ; + IO_FSM_STATE_BUSY: io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy : IO_FSM_STATE_BUSY ; IO_FSM_STATE_EXTRA: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST1 ; IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next = IO_FSM_STATE_LATENCY_POST2 ; - IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_IDLE ; + IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next = IO_FSM_STATE_STOP ; + IO_FSM_STATE_STOP: io_fsm_state_next = IO_FSM_STATE_IDLE ; endcase // end @@ -517,10 +564,10 @@ module modexpng_io_manager always @(posedge clk) // - if (rst) rdy_reg <= 1'b1; + if (rst) rdy_reg <= 1'b1; else case (io_fsm_state) - IO_FSM_STATE_IDLE: rdy_reg <= ~ena; - IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1; + IO_FSM_STATE_IDLE: rdy_reg <= ~ena; + IO_FSM_STATE_STOP: rdy_reg <= 1'b1; endcase diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh index 1465c48..2e591e7 100644 --- a/rtl/modexpng_microcode.vh +++ b/rtl/modexpng_microcode.vh @@ -32,8 +32,19 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2; */ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3; +/* CRT tells from which of the dual MMM to read + * NPQ specifies the width of the operand + * AUX is don't care + * LADDER is don't care + * source and destination WIDE are don't care + */ -//localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd0; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd4; +/* CRT is don't care + * NPQ specifies the width of the operand + * AUX is don't care + * LADDER is don't care + */ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8; /* CRT is don't care @@ -42,6 +53,14 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8; * LADDER specifies Montgomery ladder mode */ +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11; +/* CRT is don't care + * NPQ specifies the width of the operand + * AUX is don't care + * LADDER is don't care + * source and destination WIDE are don't care + */ + // CRT localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0; localparam [UOP_CRT_W -1:0] UOP_CRT_Y = 1'b1; diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v index b9b41e8..14f1b47 100644 --- a/rtl/modexpng_mmm_dual.v +++ b/rtl/modexpng_mmm_dual.v @@ -17,16 +17,16 @@ module modexpng_mmm_dual rd_wide_xy_bank_aux, rd_wide_xy_addr, rd_wide_xy_addr_aux, - rd_wide_x_dout, - rd_wide_y_dout, - rd_wide_x_dout_aux, - rd_wide_y_dout_aux, + rd_wide_x_din, + rd_wide_y_din, + rd_wide_x_din_aux, + rd_wide_y_din_aux, rd_narrow_xy_ena, rd_narrow_xy_bank, rd_narrow_xy_addr, - rd_narrow_x_dout, - rd_narrow_y_dout, + rd_narrow_x_din, + rd_narrow_y_din, rcmb_wide_xy_bank, rcmb_wide_xy_addr, @@ -82,16 +82,16 @@ module modexpng_mmm_dual output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr; output [ 8-1:0] rd_wide_xy_addr_aux; - input [18*NUM_MULTS/2-1:0] rd_wide_x_dout; - input [18*NUM_MULTS/2-1:0] rd_wide_y_dout; - input [ 18-1:0] rd_wide_x_dout_aux; - input [ 18-1:0] rd_wide_y_dout_aux; + input [18*NUM_MULTS/2-1:0] rd_wide_x_din; + input [18*NUM_MULTS/2-1:0] rd_wide_y_din; + input [ 18-1:0] rd_wide_x_din_aux; + input [ 18-1:0] rd_wide_y_din_aux; output rd_narrow_xy_ena; output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; output [ 7:0] rd_narrow_xy_addr; - input [18-1:0] rd_narrow_x_dout; - input [18-1:0] rd_narrow_y_dout; + input [18-1:0] rd_narrow_x_din; + input [18-1:0] rd_narrow_y_din; output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; output [ 7:0] rcmb_wide_xy_addr; @@ -626,8 +626,8 @@ module modexpng_mmm_dual //end //endgenerate - assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout}; - assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout}; + assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din}; + assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din}; //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux; //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux; @@ -730,25 +730,25 @@ module modexpng_mmm_dual // // On-the-fly Carry Recombination // - wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; - wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + wire [17:0] rd_narrow_x_din_carry = rd_narrow_x_din + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_y_din_carry = rd_narrow_y_din + {{16{1'b0}}, dsp_xy_b_carry}; + wire [17:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry; wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ? - rd_narrow_xy_dout_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO; + rd_narrow_xy_din_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO; always @(posedge clk) // if (narrow_xy_ena_dly2) begin // rewrite // if (!dsp_merge_xy_b) begin - dsp_x_b <= rd_narrow_x_dout[15:0]; - dsp_y_b <= rd_narrow_y_dout[15:0]; + dsp_x_b <= rd_narrow_x_din[15:0]; + dsp_y_b <= rd_narrow_y_din[15:0]; dsp_xy_b_carry <= 2'b00; end else begin dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity; dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity; - dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; + dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[17:16]; end // end else begin @@ -845,6 +845,7 @@ module modexpng_mmm_dual modexpng_recombinator_block recombinator_block ( .clk (clk), + .rst (rst), .ena (rcmb_ena), .rdy (rcmb_rdy), diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index e610e47..6e6c3ca 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -31,9 +31,9 @@ localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS); localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); -localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W; +localparam CARRY_W = WORD_EXT_W - WORD_W; -localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}}; +localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}}; localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1; diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index a37333e..c100b8b 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -4,8 +4,8 @@ module modexpng_reductor ena, rdy, word_index_last, sel_wide_out, sel_narrow_out, - rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux, - rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid, + rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_din_aux, rd_wide_y_din_aux, + rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din, rcmb_final_xy_valid, rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_dout, rdct_wide_y_dout, rdct_wide_xy_valid, rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid ); @@ -41,13 +41,13 @@ module modexpng_reductor */ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; input [ 7:0] rd_wide_xy_addr_aux; - input [ 17:0] rd_wide_x_dout_aux; - input [ 17:0] rd_wide_y_dout_aux; + input [ 17:0] rd_wide_x_din_aux; + input [ 17:0] rd_wide_y_din_aux; // input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank; input [ 7:0] rcmb_final_xy_addr; - input [ 17:0] rcmb_final_x_dout; - input [ 17:0] rcmb_final_y_dout; + input [ 17:0] rcmb_final_x_din; + input [ 17:0] rcmb_final_y_din; input rcmb_final_xy_valid; output [ 2:0] rdct_wide_xy_bank; @@ -121,8 +121,8 @@ module modexpng_reductor if (rcmb_final_xy_valid) begin rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; - rcmb_x_dout_dly1 <= rcmb_final_x_dout; - rcmb_y_dout_dly1 <= rcmb_final_y_dout; + rcmb_x_dout_dly1 <= rcmb_final_x_din; + rcmb_y_dout_dly1 <= rcmb_final_y_din; end // if (rcmb_xy_valid_dly1) begin @@ -167,14 +167,14 @@ module modexpng_reductor case (rcmb_xy_bank_dly3) BANK_RCMB_ML: begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry; end BANK_RCMB_MH: if (rcmb_xy_addr_dly3 == 8'd0) begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry; end endcase @@ -273,8 +273,8 @@ module modexpng_reductor // // // - wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux; - wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux; + wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux; + wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux; wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry}; wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry}; diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v index f1d5ae2..19601ef 100644 --- a/rtl/modexpng_storage_block.v +++ b/rtl/modexpng_storage_block.v @@ -1,6 +1,6 @@ module modexpng_storage_block ( - clk, clk_bus, rst, + clk, rst, wr_wide_xy_ena, wr_wide_xy_bank, @@ -29,7 +29,19 @@ module modexpng_storage_block rd_narrow_xy_bank, rd_narrow_xy_addr, rd_narrow_x_dout, - rd_narrow_y_dout + rd_narrow_y_dout, + + wrk_wide_xy_ena, + wrk_wide_xy_bank, + wrk_wide_xy_addr, + wrk_wide_x_dout, + wrk_wide_y_dout, + + wrk_narrow_xy_ena, + wrk_narrow_xy_bank, + wrk_narrow_xy_addr, + wrk_narrow_x_dout, + wrk_narrow_y_dout ); // @@ -42,7 +54,6 @@ module modexpng_storage_block // Ports // input clk; - input clk_bus; input rst; input wr_wide_xy_ena; @@ -74,6 +85,18 @@ module modexpng_storage_block output [ WORD_EXT_W -1:0] rd_narrow_x_dout; output [ WORD_EXT_W -1:0] rd_narrow_y_dout; + input wrk_wide_xy_ena; + input [ BANK_ADDR_W -1:0] wrk_wide_xy_bank; + input [ OP_ADDR_W -1:0] wrk_wide_xy_addr; + output [ WORD_EXT_W -1:0] wrk_wide_x_dout; + output [ WORD_EXT_W -1:0] wrk_wide_y_dout; + + input wrk_narrow_xy_ena; + input [ BANK_ADDR_W -1:0] wrk_narrow_xy_bank; + input [ OP_ADDR_W -1:0] wrk_narrow_xy_addr; + output [ WORD_EXT_W -1:0] wrk_narrow_x_dout; + output [ WORD_EXT_W -1:0] wrk_narrow_y_dout; + // // Internal Registers @@ -81,6 +104,8 @@ module modexpng_storage_block reg rd_wide_xy_reg_ena = 1'b0; reg rd_wide_xy_reg_ena_aux = 1'b0; reg rd_narrow_xy_reg_ena = 1'b0; + reg wrk_wide_xy_reg_ena = 1'b0; + reg wrk_narrow_xy_reg_ena = 1'b0; always @(posedge clk) // @@ -88,10 +113,14 @@ module modexpng_storage_block rd_wide_xy_reg_ena <= 1'b0; rd_wide_xy_reg_ena_aux <= 1'b0; rd_narrow_xy_reg_ena <= 1'b0; + wrk_wide_xy_reg_ena <= 1'b0; + wrk_narrow_xy_reg_ena <= 1'b0; end else begin rd_wide_xy_reg_ena <= rd_wide_xy_ena; rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux; rd_narrow_xy_reg_ena <= rd_narrow_xy_ena; + wrk_wide_xy_reg_ena <= wrk_wide_xy_ena; + wrk_narrow_xy_reg_ena <= wrk_narrow_xy_ena; end // @@ -102,22 +131,26 @@ module modexpng_storage_block wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset; wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset; wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wrk_wide_xy_offset; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] wrk_narrow_xy_offset; assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux}; - assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr}; - assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr}; - assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr}; + assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr }; + assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr }; + assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr }; + assign wrk_wide_xy_offset = {wrk_wide_xy_bank, wrk_wide_xy_addr }; + assign wrk_narrow_xy_offset = {wrk_narrow_xy_bank, wrk_narrow_xy_addr }; // // "Wide" Storage // genvar z; generate for (z=0; z