module modexpng_core_top ( clk, clk_bus, rst, next, valid, crt_mode, word_index_last_n, word_index_last_pq, bit_index_last_n, bit_index_last_pq, bus_cs, bus_we, bus_addr, bus_data_wr, bus_data_rd ); // // Headers // `include "modexpng_parameters.vh" `include "modexpng_microcode.vh" // // Ports // input clk; input clk_bus; input rst; input next; output valid; input crt_mode; input [ OP_ADDR_W -1:0] word_index_last_n; input [ OP_ADDR_W -1:0] word_index_last_pq; input [ BIT_INDEX_W -1:0] bit_index_last_n; input [ BIT_INDEX_W -1:0] bit_index_last_pq; input bus_cs; input bus_we; input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr; input [ BUS_DATA_W -1:0] bus_data_wr; output [ BUS_DATA_W -1:0] bus_data_rd; // // UOP_FSM // localparam [1:0] UOP_FSM_STATE_IDLE = 2'b00; localparam [1:0] UOP_FSM_STATE_FETCH = 2'b01; localparam [1:0] UOP_FSM_STATE_DECODE = 2'b10; localparam [1:0] UOP_FSM_STATE_BUSY = 2'b11; reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE; reg [1:0] uop_fsm_state_next; // // UOP ROM // reg [UOP_ADDR_W -1:0] uop_addr; wire [UOP_W -1:0] uop_data; wire [UOP_OPCODE_W -1:0] uop_data_opcode = uop_data[UOP_W -1-: UOP_OPCODE_W]; wire [UOP_CRT_W -1:0] uop_data_crt = uop_data[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ]; wire [UOP_NPQ_W -1:0] uop_data_npq = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ]; wire [UOP_AUX_W -1:0] uop_data_aux = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ]; wire [UOP_LADDER_W -1:0] uop_data_ladder = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W]; wire [BANK_ADDR_W -1:0] uop_data_sel_wide_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ]; wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ]; wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ]; wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ]; wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) || (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ; wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) || (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ; wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) || (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) || (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) || (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) || (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ; wire uop_loop_now; wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT; wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1; modexpng_uop_rom uop_rom ( .clk (clk), .addr (uop_addr), .data (uop_data) ); // // UOP ROM Address Logic // always @(posedge clk) // if (uop_fsm_state_next == UOP_FSM_STATE_FETCH) uop_addr <= (uop_fsm_state == UOP_FSM_STATE_IDLE) ? uop_addr_offset : uop_addr_next; // // Storage Interfaces (X, Y) // wire wr_wide_xy_ena_x; // \ \ wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_x; // | WIDE | WR wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] wr_wide_x_data_x; // | | wire [ WORD_EXT_W -1:0] wr_wide_y_data_x; // / | // | wire wr_narrow_xy_ena_x; // \ | wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; // | NARROW | wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] wr_narrow_x_data_x; // | | wire [ WORD_EXT_W -1:0] wr_narrow_y_data_x; // / / // wire rd_wide_xy_ena_x; // \ \ wire rd_wide_xy_ena_aux_x; // | WIDE | RD wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x; // | | wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; // | | wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x; // | | wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x; // | | wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_x; // | | wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_x; // | | wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_x; // | | wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_x; // / | // | wire rd_narrow_xy_ena_x; // \ | wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; // | NARROW | wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] rd_narrow_x_data_x; // | | wire [ WORD_EXT_W -1:0] rd_narrow_y_data_x; // / / // wire wrk_rd_wide_xy_ena_x; // \ \ wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; // | WIDE | WRK wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_x; // | | wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_x; // / | // | wire wrk_rd_narrow_xy_ena_x; // \ | wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; // | NARROW | wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_x; // | | wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_x; // / / wire wrk_wr_wide_xy_ena_x; // \ \ wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; // | WIDE | WRK wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_x; // | | wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_x; // / | // | wire wrk_wr_narrow_xy_ena_x; // \ | wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; // | NARROW | wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_x; // | | wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_x; // / / // wire io_wide_xy_ena_x; // \ \ wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; // | WIDE | IO wire [ OP_ADDR_W -1:0] io_wide_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] io_wide_x_data_x; // | | wire [ WORD_EXT_W -1:0] io_wide_y_data_x; // / | // | wire io_narrow_xy_ena_x; // \ | wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; // | NARROW | wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; // | | wire [ WORD_EXT_W -1:0] io_narrow_x_data_x; // | | wire [ WORD_EXT_W -1:0] io_narrow_y_data_x; // / / // wire wr_wide_xy_ena_y; // \ wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_y; // wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; // wire [ WORD_EXT_W -1:0] wr_wide_x_data_y; // wire [ WORD_EXT_W -1:0] wr_wide_y_data_y; // // wire wr_narrow_xy_ena_y; // wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; // wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; // wire [ WORD_EXT_W -1:0] wr_narrow_x_data_y; // wire [ WORD_EXT_W -1:0] wr_narrow_y_data_y; // // wire rd_wide_xy_ena_y; // wire rd_wide_xy_ena_aux_y; // wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y; // wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; // wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y; // wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y; // wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_y; // wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_y; // wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_y; // wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_y; // // wire rd_narrow_xy_ena_y; // wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; // wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; // wire [ WORD_EXT_W -1:0] rd_narrow_x_data_y; // wire [ WORD_EXT_W -1:0] rd_narrow_y_data_y; // // wire wrk_rd_wide_xy_ena_y; // wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; // wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; // wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_y; // wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_y; // // wire wrk_rd_narrow_xy_ena_y; // wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; // wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; // wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_y; // wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_y; // wire wrk_wr_wide_xy_ena_y; // wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; // wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; // wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_y; // wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_y; // // wire wrk_wr_narrow_xy_ena_y; // wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; // wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; // wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_y; // wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_y; // // wire io_wide_xy_ena_y; // wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; // wire [ OP_ADDR_W -1:0] io_wide_xy_addr_y; // wire [ WORD_EXT_W -1:0] io_wide_x_data_y; // wire [ WORD_EXT_W -1:0] io_wide_y_data_y; // // wire io_narrow_xy_ena_y; // wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; // wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; // wire [ WORD_EXT_W -1:0] io_narrow_x_data_y; // wire [ WORD_EXT_W -1:0] io_narrow_y_data_y; // // // Recombinator Interfaces (X, Y) // wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_x; wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_x; wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_x; wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_x; wire rcmb_wide_xy_valid_x; wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_x; wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_x; wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_x; wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_x; wire rcmb_narrow_xy_valid_x; wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_x; wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_x; wire [ WORD_EXT_W -1:0] rcmb_final_x_data_x; wire [ WORD_EXT_W -1:0] rcmb_final_y_data_x; wire rcmb_final_xy_valid_x; wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_y; wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr_y; wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_y; wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_y; wire rcmb_wide_xy_valid_y; wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_y; wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr_y; wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_y; wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_y; wire rcmb_narrow_xy_valid_y; wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_y; wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr_y; wire [ WORD_EXT_W -1:0] rcmb_final_x_data_y; wire [ WORD_EXT_W -1:0] rcmb_final_y_data_y; wire rcmb_final_xy_valid_y; // // Reductor Interfaces (X, Y) // wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_x; wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_x; wire [ WORD_EXT_W -1:0] rdct_wide_x_data_x; wire [ WORD_EXT_W -1:0] rdct_wide_y_data_x; wire rdct_wide_xy_valid_x; wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_x; wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_x; wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_x; wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_x; wire rdct_narrow_xy_valid_x; wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_y; wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr_y; wire [ WORD_EXT_W -1:0] rdct_wide_x_data_y; wire [ WORD_EXT_W -1:0] rdct_wide_y_data_y; wire rdct_wide_xy_valid_y; wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_y; wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr_y; wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_y; wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_y; wire rdct_narrow_xy_valid_y; // // Storage Blocks (X, Y) // modexpng_storage_block storage_block_x ( .clk (clk), .rst (rst), .wr_wide_xy_ena (wr_wide_xy_ena_x), .wr_wide_xy_bank (wr_wide_xy_bank_x), .wr_wide_xy_addr (wr_wide_xy_addr_x), .wr_wide_x_din (wr_wide_x_data_x), .wr_wide_y_din (wr_wide_y_data_x), .wr_narrow_xy_ena (wr_narrow_xy_ena_x), .wr_narrow_xy_bank (wr_narrow_xy_bank_x), .wr_narrow_xy_addr (wr_narrow_xy_addr_x), .wr_narrow_x_din (wr_narrow_x_data_x), .wr_narrow_y_din (wr_narrow_y_data_x), .rd_wide_xy_ena (rd_wide_xy_ena_x), .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_x), .rd_wide_xy_bank (rd_wide_xy_bank_x), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), .rd_wide_xy_addr (rd_wide_xy_addr_x), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), .rd_wide_x_dout (rd_wide_x_data_x), .rd_wide_y_dout (rd_wide_y_data_x), .rd_wide_x_dout_aux (rd_wide_x_data_aux_x), .rd_wide_y_dout_aux (rd_wide_y_data_aux_x), .rd_narrow_xy_ena (rd_narrow_xy_ena_x), .rd_narrow_xy_bank (rd_narrow_xy_bank_x), .rd_narrow_xy_addr (rd_narrow_xy_addr_x), .rd_narrow_x_dout (rd_narrow_x_data_x), .rd_narrow_y_dout (rd_narrow_y_data_x), .wrk_wide_xy_ena (wrk_rd_wide_xy_ena_x), .wrk_wide_xy_bank (wrk_rd_wide_xy_bank_x), .wrk_wide_xy_addr (wrk_rd_wide_xy_addr_x), .wrk_wide_x_dout (wrk_rd_wide_x_data_x), .wrk_wide_y_dout (wrk_rd_wide_y_data_x), .wrk_narrow_xy_ena (wrk_rd_narrow_xy_ena_x), .wrk_narrow_xy_bank (wrk_rd_narrow_xy_bank_x), .wrk_narrow_xy_addr (wrk_rd_narrow_xy_addr_x), .wrk_narrow_x_dout (wrk_rd_narrow_x_data_x), .wrk_narrow_y_dout (wrk_rd_narrow_y_data_x) ); modexpng_storage_block storage_block_y ( .clk (clk), .rst (rst), .wr_wide_xy_ena (wr_wide_xy_ena_y), .wr_wide_xy_bank (wr_wide_xy_bank_y), .wr_wide_xy_addr (wr_wide_xy_addr_y), .wr_wide_x_din (wr_wide_x_data_y), .wr_wide_y_din (wr_wide_y_data_y), .wr_narrow_xy_ena (wr_narrow_xy_ena_y), .wr_narrow_xy_bank (wr_narrow_xy_bank_y), .wr_narrow_xy_addr (wr_narrow_xy_addr_y), .wr_narrow_x_din (wr_narrow_x_data_y), .wr_narrow_y_din (wr_narrow_y_data_y), .rd_wide_xy_ena (rd_wide_xy_ena_y), .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_y), .rd_wide_xy_bank (rd_wide_xy_bank_y), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), .rd_wide_xy_addr (rd_wide_xy_addr_y), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), .rd_wide_x_dout (rd_wide_x_data_y), .rd_wide_y_dout (rd_wide_y_data_y), .rd_wide_x_dout_aux (rd_wide_x_data_aux_y), .rd_wide_y_dout_aux (rd_wide_y_data_aux_y), .rd_narrow_xy_ena (rd_narrow_xy_ena_y), .rd_narrow_xy_bank (rd_narrow_xy_bank_y), .rd_narrow_xy_addr (rd_narrow_xy_addr_y), .rd_narrow_x_dout (rd_narrow_x_data_y), .rd_narrow_y_dout (rd_narrow_y_data_y), .wrk_wide_xy_ena (wrk_rd_wide_xy_ena_y), .wrk_wide_xy_bank (wrk_rd_wide_xy_bank_y), .wrk_wide_xy_addr (wrk_rd_wide_xy_addr_y), .wrk_wide_x_dout (wrk_rd_wide_x_data_y), .wrk_wide_y_dout (wrk_rd_wide_y_data_y), .wrk_narrow_xy_ena (wrk_rd_narrow_xy_ena_y), .wrk_narrow_xy_bank (wrk_rd_narrow_xy_bank_y), .wrk_narrow_xy_addr (wrk_rd_narrow_xy_addr_y), .wrk_narrow_x_dout (wrk_rd_narrow_x_data_y), .wrk_narrow_y_dout (wrk_rd_narrow_y_data_y) ); // // Storage Managers (X, Y) // modexpng_storage_manager storage_manager_x ( .clk (clk), .rst (rst), .wr_wide_xy_ena (wr_wide_xy_ena_x), .wr_wide_xy_bank (wr_wide_xy_bank_x), .wr_wide_xy_addr (wr_wide_xy_addr_x), .wr_wide_x_dout (wr_wide_x_data_x), .wr_wide_y_dout (wr_wide_y_data_x), .wr_narrow_xy_ena (wr_narrow_xy_ena_x), .wr_narrow_xy_bank (wr_narrow_xy_bank_x), .wr_narrow_xy_addr (wr_narrow_xy_addr_x), .wr_narrow_x_dout (wr_narrow_x_data_x), .wr_narrow_y_dout (wr_narrow_y_data_x), .io_wide_xy_ena (io_wide_xy_ena_x), .io_wide_xy_bank (io_wide_xy_bank_x), .io_wide_xy_addr (io_wide_xy_addr_x), .io_wide_x_din (io_wide_x_data_x), .io_wide_y_din (io_wide_y_data_x), .io_narrow_xy_ena (io_narrow_xy_ena_x), .io_narrow_xy_bank (io_narrow_xy_bank_x), .io_narrow_xy_addr (io_narrow_xy_addr_x), .io_narrow_x_din (io_narrow_x_data_x), .io_narrow_y_din (io_narrow_y_data_x), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_x), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_x), .rcmb_wide_x_din (rcmb_wide_x_data_x), .rcmb_wide_y_din (rcmb_wide_y_data_x), .rcmb_wide_xy_ena (rcmb_wide_xy_valid_x), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x), .rcmb_narrow_x_din (rcmb_narrow_x_data_x), .rcmb_narrow_y_din (rcmb_narrow_y_data_x), .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_x), .rdct_wide_xy_bank (rdct_wide_xy_bank_x), .rdct_wide_xy_addr (rdct_wide_xy_addr_x), .rdct_wide_x_din (rdct_wide_x_data_x), .rdct_wide_y_din (rdct_wide_y_data_x), .rdct_wide_xy_valid (rdct_wide_xy_valid_x), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_x), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_x), .rdct_narrow_x_din (rdct_narrow_x_data_x), .rdct_narrow_y_din (rdct_narrow_y_data_x), .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x), .wrk_wide_xy_ena (wrk_wr_wide_xy_ena_x), .wrk_wide_xy_bank (wrk_wr_wide_xy_bank_x), .wrk_wide_xy_addr (wrk_wr_wide_xy_addr_x), .wrk_wide_x_din (wrk_wr_wide_x_data_x), .wrk_wide_y_din (wrk_wr_wide_y_data_x), .wrk_narrow_xy_ena (wrk_wr_narrow_xy_ena_x), .wrk_narrow_xy_bank (wrk_wr_narrow_xy_bank_x), .wrk_narrow_xy_addr (wrk_wr_narrow_xy_addr_x), .wrk_narrow_x_din (wrk_wr_narrow_x_data_x), .wrk_narrow_y_din (wrk_wr_narrow_y_data_x) ); modexpng_storage_manager storage_manager_y ( .clk (clk), .rst (rst), .wr_wide_xy_ena (wr_wide_xy_ena_y), .wr_wide_xy_bank (wr_wide_xy_bank_y), .wr_wide_xy_addr (wr_wide_xy_addr_y), .wr_wide_x_dout (wr_wide_x_data_y), .wr_wide_y_dout (wr_wide_y_data_y), .wr_narrow_xy_ena (wr_narrow_xy_ena_y), .wr_narrow_xy_bank (wr_narrow_xy_bank_y), .wr_narrow_xy_addr (wr_narrow_xy_addr_y), .wr_narrow_x_dout (wr_narrow_x_data_y), .wr_narrow_y_dout (wr_narrow_y_data_y), .io_wide_xy_ena (io_wide_xy_ena_y), .io_wide_xy_bank (io_wide_xy_bank_y), .io_wide_xy_addr (io_wide_xy_addr_y), .io_wide_x_din (io_wide_x_data_y), .io_wide_y_din (io_wide_y_data_y), .io_narrow_xy_ena (io_narrow_xy_ena_y), .io_narrow_xy_bank (io_narrow_xy_bank_y), .io_narrow_xy_addr (io_narrow_xy_addr_y), .io_narrow_x_din (io_narrow_x_data_y), .io_narrow_y_din (io_narrow_y_data_y), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_y), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_y), .rcmb_wide_x_din (rcmb_wide_x_data_y), .rcmb_wide_y_din (rcmb_wide_y_data_y), .rcmb_wide_xy_ena (rcmb_wide_xy_valid_y), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y), .rcmb_narrow_x_din (rcmb_narrow_x_data_y), .rcmb_narrow_y_din (rcmb_narrow_y_data_y), .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid_y), .rdct_wide_xy_bank (rdct_wide_xy_bank_y), .rdct_wide_xy_addr (rdct_wide_xy_addr_y), .rdct_wide_x_din (rdct_wide_x_data_y), .rdct_wide_y_din (rdct_wide_y_data_y), .rdct_wide_xy_valid (rdct_wide_xy_valid_y), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_y), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_y), .rdct_narrow_x_din (rdct_narrow_x_data_y), .rdct_narrow_y_din (rdct_narrow_y_data_y), .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y), .wrk_wide_xy_ena (wrk_wr_wide_xy_ena_y), .wrk_wide_xy_bank (wrk_wr_wide_xy_bank_y), .wrk_wide_xy_addr (wrk_wr_wide_xy_addr_y), .wrk_wide_x_din (wrk_wr_wide_x_data_y), .wrk_wide_y_din (wrk_wr_wide_y_data_y), .wrk_narrow_xy_ena (wrk_wr_narrow_xy_ena_y), .wrk_narrow_xy_bank (wrk_wr_narrow_xy_bank_y), .wrk_narrow_xy_addr (wrk_wr_narrow_xy_addr_y), .wrk_narrow_x_din (wrk_wr_narrow_x_data_y), .wrk_narrow_y_din (wrk_wr_narrow_y_data_y) ); // // IO Block // wire io_in_1_en; wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; wire [ WORD_W -1:0] io_in_1_data; wire io_in_2_en; wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; wire [ WORD_W -1:0] io_in_2_data; wire io_out_en; wire io_out_we; wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; wire [ WORD_W -1:0] io_out_data; // TODO: Separate reset for clock domains (core/bus)??? modexpng_io_block io_block ( .clk (clk), .clk_bus (clk_bus), .rst (rst), .bus_cs (bus_cs), .bus_we (bus_we), .bus_addr (bus_addr), .bus_data_wr (bus_data_wr), .bus_data_rd (bus_data_rd), .in_1_en (io_in_1_en), .in_1_addr (io_in_1_addr), .in_1_dout (io_in_1_data), .in_2_en (io_in_2_en), .in_2_addr (io_in_2_addr), .in_2_dout (io_in_2_data), .out_en (io_out_en), .out_we (io_out_we), .out_addr (io_out_addr), .out_din (io_out_data) ); // // IO Manager // reg io_mgr_ena = 1'b0; wire io_mgr_rdy; reg [UOP_CRT_W -1:0] io_mgr_sel_crt; reg [UOP_AUX_W -1:0] io_mgr_sel_aux; reg [BANK_ADDR_W -1:0] io_mgr_sel_in; reg [BANK_ADDR_W -1:0] io_mgr_sel_out; reg [OP_ADDR_W -1:0] io_mgr_word_index_last; reg [UOP_OPCODE_W -1:0] io_mgr_opcode; reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps; wire io_mgr_ladder_d; wire io_mgr_ladder_p; wire io_mgr_ladder_q; wire io_mgr_ladder_done; assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done; wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0]; modexpng_io_manager io_manager ( .clk (clk), .rst (rst), .ena (io_mgr_ena), .rdy (io_mgr_rdy), .sel_crt (io_mgr_sel_crt), .sel_aux (io_mgr_sel_aux), .sel_in (io_mgr_sel_in), .sel_out (io_mgr_sel_out), .opcode (io_mgr_opcode), .word_index_last (io_mgr_word_index_last), .io_wide_xy_ena_x (io_wide_xy_ena_x), .io_wide_xy_bank_x (io_wide_xy_bank_x), .io_wide_xy_addr_x (io_wide_xy_addr_x), .io_wide_x_din_x (io_wide_x_data_x), .io_wide_y_din_x (io_wide_y_data_x), .io_narrow_xy_ena_x (io_narrow_xy_ena_x), .io_narrow_xy_bank_x (io_narrow_xy_bank_x), .io_narrow_xy_addr_x (io_narrow_xy_addr_x), .io_narrow_x_din_x (io_narrow_x_data_x), .io_narrow_y_din_x (io_narrow_y_data_x), .io_wide_xy_ena_y (io_wide_xy_ena_y), .io_wide_xy_bank_y (io_wide_xy_bank_y), .io_wide_xy_addr_y (io_wide_xy_addr_y), .io_wide_x_din_y (io_wide_x_data_y), .io_wide_y_din_y (io_wide_y_data_y), .io_narrow_xy_ena_y (io_narrow_xy_ena_y), .io_narrow_xy_bank_y (io_narrow_xy_bank_y), .io_narrow_xy_addr_y (io_narrow_xy_addr_y), .io_narrow_x_din_y (io_narrow_x_data_y), .io_narrow_y_din_y (io_narrow_y_data_y), .io_in_1_en (io_in_1_en), .io_in_1_addr (io_in_1_addr), .io_in_1_din (io_in_1_data), .io_in_2_en (io_in_2_en), .io_in_2_addr (io_in_2_addr), .io_in_2_din (io_in_2_data), .io_out_en (io_out_en), .io_out_we (io_out_we), .io_out_addr (io_out_addr), .io_out_dout (io_out_data), .wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc), .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc), .ladder_steps (io_mgr_ladder_steps), .ladder_d (io_mgr_ladder_d), .ladder_p (io_mgr_ladder_p), .ladder_q (io_mgr_ladder_q), .ladder_done (io_mgr_ladder_done) ); // // Multipliers (X, Y) // reg mmm_ena_x = 1'b0; reg mmm_ena_y = 1'b0; wire mmm_ena = mmm_ena_x & mmm_ena_y; wire mmm_rdy_x; wire mmm_rdy_y; wire mmm_rdy = mmm_rdy_x & mmm_rdy_y; reg [OP_ADDR_W -1:0] mmm_word_index_last_x; reg [OP_ADDR_W -1:0] mmm_word_index_last_y; reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x; reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y; reg mmm_ladder_mode_x; reg mmm_ladder_mode_y; reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_x; reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_y; reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x; reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y; reg mmm_force_unity_b_x; reg mmm_force_unity_b_y; reg mmm_only_reduce_x; reg mmm_only_reduce_y; wire rdct_ena_x; wire rdct_ena_y; wire rdct_rdy_x; wire rdct_rdy_y; modexpng_mmm_dual mmm_x ( .clk (clk), .rst (rst), .ena (mmm_ena_x), .rdy (mmm_rdy_x), .ladder_mode (mmm_ladder_mode_x), .word_index_last (mmm_word_index_last_x), .word_index_last_minus1 (mmm_word_index_last_minus1_x), .force_unity_b (mmm_force_unity_b_x), .only_reduce (mmm_only_reduce_x), .sel_wide_in (mmm_sel_wide_in_x), .sel_narrow_in (mmm_sel_narrow_in_x), .rd_wide_xy_ena (rd_wide_xy_ena_x), .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_x), .rd_wide_xy_bank (rd_wide_xy_bank_x), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), .rd_wide_xy_addr (rd_wide_xy_addr_x), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), .rd_wide_x_din (rd_wide_x_data_x), .rd_wide_y_din (rd_wide_y_data_x), .rd_wide_x_din_aux (rd_wide_x_data_aux_x), .rd_wide_y_din_aux (rd_wide_y_data_aux_x), .rd_narrow_xy_ena (rd_narrow_xy_ena_x), .rd_narrow_xy_bank (rd_narrow_xy_bank_x), .rd_narrow_xy_addr (rd_narrow_xy_addr_x), .rd_narrow_x_din (rd_narrow_x_data_x), .rd_narrow_y_din (rd_narrow_y_data_x), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_x), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_x), .rcmb_wide_x_dout (rcmb_wide_x_data_x), .rcmb_wide_y_dout (rcmb_wide_y_data_x), .rcmb_wide_xy_valid (rcmb_wide_xy_valid_x), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_x), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_x), .rcmb_narrow_x_dout (rcmb_narrow_x_data_x), .rcmb_narrow_y_dout (rcmb_narrow_y_data_x), .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_x), .rcmb_xy_bank (rcmb_final_xy_bank_x), .rcmb_xy_addr (rcmb_final_xy_addr_x), .rcmb_x_dout (rcmb_final_x_data_x), .rcmb_y_dout (rcmb_final_y_data_x), .rcmb_xy_valid (rcmb_final_xy_valid_x), .rdct_ena (rdct_ena_x), .rdct_rdy (rdct_rdy_x) ); modexpng_mmm_dual mmm_y ( .clk (clk), .rst (rst), .ena (mmm_ena_y), .rdy (mmm_rdy_y), .ladder_mode (mmm_ladder_mode_y), .word_index_last (mmm_word_index_last_y), .word_index_last_minus1 (mmm_word_index_last_minus1_y), .force_unity_b (mmm_force_unity_b_y), .only_reduce (mmm_only_reduce_y), .sel_wide_in (mmm_sel_wide_in_y), .sel_narrow_in (mmm_sel_narrow_in_y), .rd_wide_xy_ena (rd_wide_xy_ena_y), .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux_y), .rd_wide_xy_bank (rd_wide_xy_bank_y), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), .rd_wide_xy_addr (rd_wide_xy_addr_y), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), .rd_wide_x_din (rd_wide_x_data_y), .rd_wide_y_din (rd_wide_y_data_y), .rd_wide_x_din_aux (rd_wide_x_data_aux_y), .rd_wide_y_din_aux (rd_wide_y_data_aux_y), .rd_narrow_xy_ena (rd_narrow_xy_ena_y), .rd_narrow_xy_bank (rd_narrow_xy_bank_y), .rd_narrow_xy_addr (rd_narrow_xy_addr_y), .rd_narrow_x_din (rd_narrow_x_data_y), .rd_narrow_y_din (rd_narrow_y_data_y), .rcmb_wide_xy_bank (rcmb_wide_xy_bank_y), .rcmb_wide_xy_addr (rcmb_wide_xy_addr_y), .rcmb_wide_x_dout (rcmb_wide_x_data_y), .rcmb_wide_y_dout (rcmb_wide_y_data_y), .rcmb_wide_xy_valid (rcmb_wide_xy_valid_y), .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank_y), .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr_y), .rcmb_narrow_x_dout (rcmb_narrow_x_data_y), .rcmb_narrow_y_dout (rcmb_narrow_y_data_y), .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid_y), .rcmb_xy_bank (rcmb_final_xy_bank_y), .rcmb_xy_addr (rcmb_final_xy_addr_y), .rcmb_x_dout (rcmb_final_x_data_y), .rcmb_y_dout (rcmb_final_y_data_y), .rcmb_xy_valid (rcmb_final_xy_valid_y), .rdct_ena (rdct_ena_y), .rdct_rdy (rdct_rdy_y) ); // // Reductors (X, Y) // reg [ OP_ADDR_W -1:0] rdct_word_index_last_x; reg [ OP_ADDR_W -1:0] rdct_word_index_last_y; reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_x; reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x; reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_y; reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y; modexpng_reductor reductor_x ( .clk (clk), .rst (rst), .ena (rdct_ena_x), .rdy (rdct_rdy_x), .word_index_last (rdct_word_index_last_x), .sel_wide_out (rdct_sel_wide_out_x), .sel_narrow_out (rdct_sel_narrow_out_x), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), .rd_wide_x_din_aux (rd_wide_x_data_aux_x), .rd_wide_y_din_aux (rd_wide_y_data_aux_x), .rcmb_final_xy_bank (rcmb_final_xy_bank_x), .rcmb_final_xy_addr (rcmb_final_xy_addr_x), .rcmb_final_x_din (rcmb_final_x_data_x), .rcmb_final_y_din (rcmb_final_y_data_x), .rcmb_final_xy_valid (rcmb_final_xy_valid_x), .rdct_wide_xy_bank (rdct_wide_xy_bank_x), .rdct_wide_xy_addr (rdct_wide_xy_addr_x), .rdct_wide_x_dout (rdct_wide_x_data_x), .rdct_wide_y_dout (rdct_wide_y_data_x), .rdct_wide_xy_valid (rdct_wide_xy_valid_x), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_x), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_x), .rdct_narrow_x_dout (rdct_narrow_x_data_x), .rdct_narrow_y_dout (rdct_narrow_y_data_x), .rdct_narrow_xy_valid (rdct_narrow_xy_valid_x) ); modexpng_reductor reductor_y ( .clk (clk), .rst (rst), .ena (rdct_ena_y), .rdy (rdct_rdy_y), .word_index_last (rdct_word_index_last_y), .sel_wide_out (rdct_sel_wide_out_y), .sel_narrow_out (rdct_sel_narrow_out_y), .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), .rd_wide_x_din_aux (rd_wide_x_data_aux_y), .rd_wide_y_din_aux (rd_wide_y_data_aux_y), .rcmb_final_xy_bank (rcmb_final_xy_bank_y), .rcmb_final_xy_addr (rcmb_final_xy_addr_y), .rcmb_final_x_din (rcmb_final_x_data_y), .rcmb_final_y_din (rcmb_final_y_data_y), .rcmb_final_xy_valid (rcmb_final_xy_valid_y), .rdct_wide_xy_bank (rdct_wide_xy_bank_y), .rdct_wide_xy_addr (rdct_wide_xy_addr_y), .rdct_wide_x_dout (rdct_wide_x_data_y), .rdct_wide_y_dout (rdct_wide_y_data_y), .rdct_wide_xy_valid (rdct_wide_xy_valid_y), .rdct_narrow_xy_bank (rdct_narrow_xy_bank_y), .rdct_narrow_xy_addr (rdct_narrow_xy_addr_y), .rdct_narrow_x_dout (rdct_narrow_x_data_y), .rdct_narrow_y_dout (rdct_narrow_y_data_y), .rdct_narrow_xy_valid (rdct_narrow_xy_valid_y) ); // // General Worker // reg wrk_ena = 1'b0; wire wrk_rdy; reg [ BANK_ADDR_W -1:0] wrk_sel_wide_in; reg [ BANK_ADDR_W -1:0] wrk_sel_wide_out; reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in; reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out; reg [ OP_ADDR_W -1:0] wrk_word_index_last; reg [ OP_ADDR_W -1:0] wrk_word_index_last_half; reg [UOP_OPCODE_W -1:0] wrk_opcode; modexpng_general_worker general_worker ( .clk (clk), .rst (rst), .ena (wrk_ena), .rdy (wrk_rdy), .sel_narrow_in (wrk_sel_narrow_in), .sel_narrow_out (wrk_sel_narrow_out), .sel_wide_in (wrk_sel_wide_in), .sel_wide_out (wrk_sel_wide_out), .opcode (wrk_opcode), .word_index_last (wrk_word_index_last), .word_index_last_half (wrk_word_index_last_half), .wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x), .wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x), .wrk_rd_wide_xy_addr_x (wrk_rd_wide_xy_addr_x), .wrk_rd_wide_x_din_x (wrk_rd_wide_x_data_x), .wrk_rd_wide_y_din_x (wrk_rd_wide_y_data_x), .wrk_rd_narrow_xy_ena_x (wrk_rd_narrow_xy_ena_x), .wrk_rd_narrow_xy_bank_x (wrk_rd_narrow_xy_bank_x), .wrk_rd_narrow_xy_addr_x (wrk_rd_narrow_xy_addr_x), .wrk_rd_narrow_x_din_x (wrk_rd_narrow_x_data_x), .wrk_rd_narrow_y_din_x (wrk_rd_narrow_y_data_x), .wrk_rd_wide_xy_ena_y (wrk_rd_wide_xy_ena_y), .wrk_rd_wide_xy_bank_y (wrk_rd_wide_xy_bank_y), .wrk_rd_wide_xy_addr_y (wrk_rd_wide_xy_addr_y), .wrk_rd_wide_x_din_y (wrk_rd_wide_x_data_y), .wrk_rd_wide_y_din_y (wrk_rd_wide_y_data_y), .wrk_rd_narrow_xy_ena_y (wrk_rd_narrow_xy_ena_y), .wrk_rd_narrow_xy_bank_y (wrk_rd_narrow_xy_bank_y), .wrk_rd_narrow_xy_addr_y (wrk_rd_narrow_xy_addr_y), .wrk_rd_narrow_x_din_y (wrk_rd_narrow_x_data_y), .wrk_rd_narrow_y_din_y (wrk_rd_narrow_y_data_y), .wrk_wr_wide_xy_ena_x (wrk_wr_wide_xy_ena_x), .wrk_wr_wide_xy_bank_x (wrk_wr_wide_xy_bank_x), .wrk_wr_wide_xy_addr_x (wrk_wr_wide_xy_addr_x), .wrk_wr_wide_x_dout_x (wrk_wr_wide_x_data_x), .wrk_wr_wide_y_dout_x (wrk_wr_wide_y_data_x), .wrk_wr_narrow_xy_ena_x (wrk_wr_narrow_xy_ena_x), .wrk_wr_narrow_xy_bank_x (wrk_wr_narrow_xy_bank_x), .wrk_wr_narrow_xy_addr_x (wrk_wr_narrow_xy_addr_x), .wrk_wr_narrow_x_dout_x (wrk_wr_narrow_x_data_x), .wrk_wr_narrow_y_dout_x (wrk_wr_narrow_y_data_x), .wrk_wr_wide_xy_ena_y (wrk_wr_wide_xy_ena_y), .wrk_wr_wide_xy_bank_y (wrk_wr_wide_xy_bank_y), .wrk_wr_wide_xy_addr_y (wrk_wr_wide_xy_addr_y), .wrk_wr_wide_x_dout_y (wrk_wr_wide_x_data_y), .wrk_wr_wide_y_dout_y (wrk_wr_wide_y_data_y), .wrk_wr_narrow_xy_ena_y (wrk_wr_narrow_xy_ena_y), .wrk_wr_narrow_xy_bank_y (wrk_wr_narrow_xy_bank_y), .wrk_wr_narrow_xy_addr_y (wrk_wr_narrow_xy_addr_y), .wrk_wr_narrow_x_dout_y (wrk_wr_narrow_x_data_y), .wrk_wr_narrow_y_dout_y (wrk_wr_narrow_y_data_y) ); // // uOP Completion Detector // reg uop_exit_from_busy; always @* begin // uop_exit_from_busy = 0; // if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy); if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy; if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy; if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; // end // // uOP Trigger Logic // always @(posedge clk) // if (rst) begin io_mgr_ena <= 1'b0; mmm_ena_x <= 1'b0; mmm_ena_y <= 1'b0; wrk_ena <= 1'b0; end else begin io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out || uop_opcode_is_ladder): 1'b0; mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out ): 1'b0; end // // Parameters // wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1; // TODO: Add reset to default don't care values. always @(posedge clk) // if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin // io_mgr_opcode <= uop_data_opcode; wrk_opcode <= uop_data_opcode; // case (uop_data_opcode) // UOP_OPCODE_INPUT_TO_WIDE: begin io_mgr_sel_crt <= uop_data_crt; io_mgr_sel_aux <= uop_data_aux; io_mgr_sel_in <= uop_data_sel_narrow_in; io_mgr_sel_out <= uop_data_sel_wide_out; end // UOP_OPCODE_INPUT_TO_NARROW: begin io_mgr_sel_crt <= uop_data_crt; io_mgr_sel_aux <= uop_data_aux; io_mgr_sel_in <= uop_data_sel_narrow_in; io_mgr_sel_out <= uop_data_sel_narrow_out; end // UOP_OPCODE_OUTPUT_FROM_NARROW: begin io_mgr_sel_crt <= uop_data_crt; io_mgr_sel_aux <= UOP_AUX_DNC; io_mgr_sel_in <= BANK_DNC; io_mgr_sel_out <= uop_data_sel_narrow_out; // wrk_sel_narrow_in <= uop_data_sel_narrow_in; end // UOP_OPCODE_MODULAR_MULTIPLY: begin // case (uop_data_ladder) UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00; UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11; UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q}; endcase // {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}}; {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}}; {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }}; // end // UOP_OPCODE_MODULAR_REDUCE_PROC: begin // {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'bX }}; // {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b1 }}; {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{1'b0 }}; {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{BANK_DNC }}; {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{BANK_DNC }}; {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}}; // end // UOP_OPCODE_PROPAGATE_CARRIES: begin wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // UOP_OPCODE_MODULAR_SUBTRACT: begin wrk_sel_wide_out <= uop_data_sel_wide_out; wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin wrk_sel_wide_in <= uop_data_sel_wide_in; wrk_sel_wide_out <= uop_data_sel_wide_out; wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // UOP_OPCODE_MODULAR_REDUCE_INIT: begin wrk_sel_narrow_in <= uop_data_sel_narrow_in; end // endcase // end // // Lengths // wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1; wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1; wire uop_npq_is_n = uop_data_npq == UOP_NPQ_N; always @(posedge clk) // if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin // case (uop_data_opcode) // UOP_OPCODE_INPUT_TO_WIDE, UOP_OPCODE_INPUT_TO_NARROW, UOP_OPCODE_OUTPUT_FROM_NARROW: // io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; // UOP_OPCODE_MODULAR_MULTIPLY: begin {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{uop_npq_is_n ? word_index_last_n_minus1 : word_index_last_pq_minus1}}; {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; end // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; // UOP_OPCODE_MODULAR_REDUCE_INIT: begin wrk_word_index_last <= word_index_last_n; wrk_word_index_last_half <= word_index_last_pq; end // UOP_OPCODE_MODULAR_REDUCE_PROC: begin {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }}; {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}}; {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }}; end // UOP_OPCODE_MODULAR_SUBTRACT: begin wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; end // UOP_OPCODE_LADDER_INIT: begin io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; end // UOP_OPCODE_LADDER_STEP: begin io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; end // endcase // end // // FSM Process // always @(posedge clk) // if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE; else uop_fsm_state <= uop_fsm_state_next; // // FSM Transition Logic // always @* begin // case (uop_fsm_state) UOP_FSM_STATE_IDLE: uop_fsm_state_next = next ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE; UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE ; UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY; UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY; endcase // end // // Ready Flag Logic // reg valid_reg = 1'b1; assign valid = valid_reg; always @(posedge clk) // if (rst) valid_reg <= 1'b1; else case (uop_fsm_state) UOP_FSM_STATE_IDLE: valid_reg <= ~next; UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop; endcase // // BEGIN DEBUG // integer i; always @(posedge clk) // if ((uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop) begin // $display("STOP - BANKS DUMP FOLLOWS"); // // X.X // $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[0*256+i]); $write("\n"); $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[1*256+i]); $write("\n"); $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[2*256+i]); $write("\n"); $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[3*256+i]); $write("\n"); $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[4*256+i]); $write("\n"); $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[5*256+i]); $write("\n"); $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[6*256+i]); $write("\n"); $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[7*256+i]); $write("\n"); $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); // // X.Y // $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[0*256+i]); $write("\n"); $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[1*256+i]); $write("\n"); $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[2*256+i]); $write("\n"); $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[3*256+i]); $write("\n"); $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[4*256+i]); $write("\n"); $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[5*256+i]); $write("\n"); $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[6*256+i]); $write("\n"); $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[7*256+i]); $write("\n"); $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); // // Y.X // $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[0*256+i]); $write("\n"); $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[1*256+i]); $write("\n"); $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[2*256+i]); $write("\n"); $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[3*256+i]); $write("\n"); $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[4*256+i]); $write("\n"); $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[5*256+i]); $write("\n"); $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[6*256+i]); $write("\n"); $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[7*256+i]); $write("\n"); $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); // // Y.Y // $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[0*256+i]); $write("\n"); $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[1*256+i]); $write("\n"); $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[2*256+i]); $write("\n"); $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[3*256+i]); $write("\n"); $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[4*256+i]); $write("\n"); $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[5*256+i]); $write("\n"); $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[6*256+i]); $write("\n"); $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[7*256+i]); $write("\n"); $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); // end // // END DEBUG // endmodule