module modexpng_general_worker ( clk, rst_n, ena, rdy, sel_narrow_in, sel_narrow_out, sel_wide_in, sel_wide_out, opcode, word_index_last, word_index_last_half, wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x, wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x, wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y, wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y, wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x, wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x, wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y, wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y ); // // Headers // `include "modexpng_parameters.vh" `include "modexpng_microcode.vh" // // Ports // input clk; input rst_n; input ena; output rdy; input [ BANK_ADDR_W -1:0] sel_narrow_in; input [ BANK_ADDR_W -1:0] sel_narrow_out; input [ BANK_ADDR_W -1:0] sel_wide_in; input [ BANK_ADDR_W -1:0] sel_wide_out; input [ UOP_OPCODE_W -1:0] opcode; input [ OP_ADDR_W -1:0] word_index_last; input [ OP_ADDR_W -1:0] word_index_last_half; output wrk_rd_wide_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_x; input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_x; output wrk_rd_narrow_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x; input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x; output wrk_rd_wide_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; input [ WORD_EXT_W -1:0] wrk_rd_wide_x_din_y; input [ WORD_EXT_W -1:0] wrk_rd_wide_y_din_y; output wrk_rd_narrow_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; input [ WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y; input [ WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y; output wrk_wr_wide_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_x; output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_x; output wrk_wr_narrow_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_x; output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_x; output wrk_wr_wide_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; output [ WORD_EXT_W -1:0] wrk_wr_wide_x_dout_y; output [ WORD_EXT_W -1:0] wrk_wr_wide_y_dout_y; output wrk_wr_narrow_xy_ena_y; output [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; output [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; output [ WORD_EXT_W -1:0] wrk_wr_narrow_x_dout_y; output [ WORD_EXT_W -1:0] wrk_wr_narrow_y_dout_y; // // FSM Declaration // localparam [5:0] WRK_FSM_STATE_IDLE = 6'h00; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1 = 6'h01; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2 = 6'h02; localparam [5:0] WRK_FSM_STATE_BUSY = 6'h03; localparam [5:0] WRK_FSM_STATE_LATENCY_POST1 = 6'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! localparam [5:0] WRK_FSM_STATE_LATENCY_POST2 = 6'h06; localparam [5:0] WRK_FSM_STATE_STOP = 6'h07; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13; localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14; localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15; localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16; localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 6'h17; localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 6'h18; localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 6'h19; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_TP = 6'h20; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_TP = 6'h21; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE3_TP = 6'h22; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE4_TP = 6'h23; localparam [5:0] WRK_FSM_STATE_BUSY_TP = 6'h24; localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_TP = 6'h25; localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_TP = 6'h26; localparam [5:0] WRK_FSM_STATE_LATENCY_POST3_TP = 6'h27; localparam [5:0] WRK_FSM_STATE_LATENCY_POST4_TP = 6'h28; localparam [5:0] WRK_FSM_STATE_HOLDOFF_TP = 6'h29; reg [5:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; reg [5:0] wrk_fsm_state_next_one_pass; // single address space sweep reg [5:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y) reg [5:0] wrk_fsm_state_next_two_pass; // two address space sweeps reg wrk_fsm_two_pass_pass; // 0=first pass, 1=second pass reg wrk_fsm_two_pass_pass_dly; // 0=first pass, 1=second pass // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps) // // Control Signals // reg rd_wide_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x; reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x; reg rd_narrow_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; reg rd_wide_xy_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y; reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y; reg rd_narrow_xy_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; reg wr_wide_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x; reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x; reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x; reg wr_narrow_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x; reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x; reg wr_wide_xy_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y; reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y; reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y; reg wr_narrow_xy_ena_y = 1'b0; reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y; reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y; // // Mapping // assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x; assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x; assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x; assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x; assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x; assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x; assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y; assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y; assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y; assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y; assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y; assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y; assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x; assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x; assign wrk_wr_wide_xy_addr_x = wr_wide_xy_addr_x; assign wrk_wr_wide_x_dout_x = wr_wide_x_dout_x; assign wrk_wr_wide_y_dout_x = wr_wide_y_dout_x; assign wrk_wr_narrow_xy_ena_x = wr_narrow_xy_ena_x; assign wrk_wr_narrow_xy_bank_x = wr_narrow_xy_bank_x; assign wrk_wr_narrow_xy_addr_x = wr_narrow_xy_addr_x; assign wrk_wr_narrow_x_dout_x = wr_narrow_x_dout_x; assign wrk_wr_narrow_y_dout_x = wr_narrow_y_dout_x; assign wrk_wr_wide_xy_ena_y = wr_wide_xy_ena_y; assign wrk_wr_wide_xy_bank_y = wr_wide_xy_bank_y; assign wrk_wr_wide_xy_addr_y = wr_wide_xy_addr_y; assign wrk_wr_wide_x_dout_y = wr_wide_x_dout_y; assign wrk_wr_wide_y_dout_y = wr_wide_y_dout_y; assign wrk_wr_narrow_xy_ena_y = wr_narrow_xy_ena_y; assign wrk_wr_narrow_xy_bank_y = wr_narrow_xy_bank_y; assign wrk_wr_narrow_xy_addr_y = wr_narrow_xy_addr_y; assign wrk_wr_narrow_x_dout_y = wr_narrow_x_dout_y; assign wrk_wr_narrow_y_dout_y = wr_narrow_y_dout_y; // // Delays // reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3; reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3; //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly4; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3; //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly4; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3; reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly1; reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly2; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3; reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly1; reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly2; always @(posedge clk) begin // {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; // {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; // {/*wrk_rd_wide_x_din_x_dly4,*/ wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {/*wrk_rd_wide_x_din_x_dly3,*/ wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x}; {/*wrk_rd_wide_x_din_y_dly4,*/ wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {/*wrk_rd_wide_x_din_y_dly3,*/ wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y}; // {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x}; {wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x}; {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y}; {wrk_rd_narrow_y_din_y_dly2, wrk_rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y_dly1, wrk_rd_narrow_y_din_y}; // end // // Source Read Enable Logic // task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask always @(posedge clk or negedge rst_n) // if (!rst_n) begin // disable_wide_xy_rd_en; disable_narrow_xy_rd_en; // end else begin // disable_wide_xy_rd_en; disable_narrow_xy_rd_en; // // one_pass // case (wrk_fsm_state_next_one_pass) // WRK_FSM_STATE_LATENCY_PRE1, WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_BUSY: // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_MODULAR_REDUCE_INIT: // enable_narrow_xy_rd_en; // UOP_OPCODE_COPY_CRT_Y2X: begin // enable_wide_xy_rd_en; enable_narrow_xy_rd_en; // end // UOP_OPCODE_MERGE_LH: // enable_wide_xy_rd_en; // endcase // endcase // // one_pass_meander // case (wrk_fsm_state_next_one_pass_meander) // WRK_FSM_STATE_LATENCY_PRE1_M1, WRK_FSM_STATE_LATENCY_PRE1_M2, WRK_FSM_STATE_LATENCY_PRE2_M1, WRK_FSM_STATE_LATENCY_PRE2_M2, WRK_FSM_STATE_BUSY_M1, WRK_FSM_STATE_BUSY_M2: // case (opcode) // UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin // enable_wide_xy_rd_en; enable_narrow_xy_rd_en; // end // UOP_OPCODE_REGULAR_ADD_UNEVEN: // enable_narrow_xy_rd_en; // endcase // endcase // // two_pass // case (wrk_fsm_state_next_two_pass) // WRK_FSM_STATE_LATENCY_PRE1_TP, WRK_FSM_STATE_LATENCY_PRE2_TP, WRK_FSM_STATE_LATENCY_PRE3_TP, WRK_FSM_STATE_LATENCY_PRE4_TP, WRK_FSM_STATE_BUSY_TP: // case (opcode) UOP_OPCODE_MODULAR_SUBTRACT: // if (!wrk_fsm_two_pass_pass) begin enable_wide_xy_rd_en; enable_narrow_xy_rd_en; end else enable_narrow_xy_rd_en; // endcase // endcase // end // // Destination Write Enable Logic // task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask always @(posedge clk or negedge rst_n) // if (!rst_n) begin // disable_wide_xy_wr_en; disable_narrow_xy_wr_en; // end else begin // disable_wide_xy_wr_en; disable_narrow_xy_wr_en; // // one_pass // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST2: // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_MERGE_LH: // enable_narrow_xy_wr_en; // UOP_OPCODE_COPY_CRT_Y2X: begin // enable_wide_xy_wr_en; enable_narrow_xy_wr_en; // end // UOP_OPCODE_MODULAR_REDUCE_INIT: // enable_wide_xy_wr_en; // endcase // endcase // // one_pass_meander // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_M2, WRK_FSM_STATE_LATENCY_POST1_M2, WRK_FSM_STATE_LATENCY_POST2_M2: // case (opcode) // UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin // enable_wide_xy_wr_en; enable_narrow_xy_wr_en; // end // UOP_OPCODE_REGULAR_ADD_UNEVEN: // enable_narrow_xy_wr_en; // endcase // endcase // // two_pass // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_TP, WRK_FSM_STATE_LATENCY_POST1_TP, WRK_FSM_STATE_LATENCY_POST2_TP, WRK_FSM_STATE_LATENCY_POST3_TP, WRK_FSM_STATE_LATENCY_POST4_TP: // case (opcode) // UOP_OPCODE_MODULAR_SUBTRACT: // if (!wrk_fsm_two_pass_pass) enable_narrow_xy_wr_en; else begin enable_wide_xy_wr_en; enable_narrow_xy_wr_en; end // endcase // endcase // end // // Source to Destination Data Logic // always @(posedge clk) begin // update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); // // one_pass // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST2: // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES: // update_narrow_dout(rd_narrow_x_din_x_w_cry_reduced, rd_narrow_y_din_x_w_cry_reduced, rd_narrow_x_din_y_w_cry_reduced, rd_narrow_y_din_y_w_cry_reduced); // UOP_OPCODE_COPY_CRT_Y2X: begin // update_wide_dout(wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y); // update_narrow_dout(wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y); // end // UOP_OPCODE_MODULAR_REDUCE_INIT: // update_wide_dout(wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y); // UOP_OPCODE_MERGE_LH: // update_narrow_dout(wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y); // endcase // endcase // // one_pass_meander // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_M2, WRK_FSM_STATE_LATENCY_POST1_M2, WRK_FSM_STATE_LATENCY_POST2_M2: // case (opcode) // UOP_OPCODE_COPY_LADDERS_X2Y: begin // update_wide_dout(wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2); // update_narrow_dout(wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2); // end // UOP_OPCODE_CROSS_LADDERS_X2Y: begin // update_wide_dout(wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_x_dly2); // update_narrow_dout(wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_x_dly2); // end // UOP_OPCODE_REGULAR_ADD_UNEVEN: begin // update_narrow_dout(regadd_x_x_trunc, regadd_y_x_trunc, regadd_x_y_trunc, regadd_y_y_trunc); // end // endcase // endcase // // two_pass // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_TP, WRK_FSM_STATE_LATENCY_POST1_TP, WRK_FSM_STATE_LATENCY_POST2_TP, WRK_FSM_STATE_LATENCY_POST3_TP, WRK_FSM_STATE_LATENCY_POST4_TP: // case (opcode) // UOP_OPCODE_MODULAR_SUBTRACT: // if (!wrk_fsm_two_pass_pass) update_narrow_dout(modsub_x_ab_dly_trunc, modsub_x_abn_trunc, modsub_y_ab_dly_trunc, modsub_y_abn_trunc); else begin update_wide_dout (modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux); update_narrow_dout(modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux); end // endcase // endcase // end // // Source Read Address Logic // reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next; reg rd_wide_xy_addr_xy_next_last_seen; reg rd_wide_xy_addr_xy_next_last_seen_dly1; reg rd_wide_xy_addr_xy_next_last_seen_dly2; wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half; wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last; task update_rd_wide_bank_addr; input [BANK_ADDR_W -1:0] bank; input [ OP_ADDR_W -1:0] addr; begin {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, addr}; {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, addr}; end endtask task update_rd_wide_bank; input [BANK_ADDR_W -1:0] bank; begin {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, rd_wide_xy_addr_x}; {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, rd_wide_xy_addr_y}; end endtask task update_rd_narrow_bank_addr; input [BANK_ADDR_W -1:0] bank; input [ OP_ADDR_W -1:0] addr; begin {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, addr}; {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, addr}; end endtask task update_rd_narrow_bank; input [BANK_ADDR_W -1:0] bank; begin {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, rd_narrow_xy_addr_x}; {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, rd_narrow_xy_addr_y}; end endtask task update_rd_wide_addr_next; input [OP_ADDR_W -1:0] addr; rd_wide_xy_addr_xy_next <= addr; endtask task update_rd_narrow_addr_next; input [OP_ADDR_W -1:0] addr; rd_narrow_xy_addr_xy_next <= addr; endtask task advance_rd_wide_addr_next; rd_wide_xy_addr_xy_next <= !rd_wide_xy_addr_xy_next_is_last ? rd_wide_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO; endtask task advance_rd_narrow_addr_next; rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO; endtask always @(posedge clk) // case (opcode) UOP_OPCODE_MERGE_LH: case (wrk_fsm_state_next_one_pass) WRK_FSM_STATE_LATENCY_PRE1: rd_wide_xy_addr_xy_next_last_seen <= 1'b0; WRK_FSM_STATE_BUSY: if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last) rd_wide_xy_addr_xy_next_last_seen <= 1'b1; endcase UOP_OPCODE_REGULAR_ADD_UNEVEN: case (wrk_fsm_state_next_one_pass_meander) WRK_FSM_STATE_LATENCY_PRE1_M1: begin rd_wide_xy_addr_xy_next_last_seen <= 1'b0; rd_wide_xy_addr_xy_next_last_seen_dly1 <= 1'b0; rd_wide_xy_addr_xy_next_last_seen_dly2 <= 1'b0; end WRK_FSM_STATE_BUSY_M1: begin if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last) rd_wide_xy_addr_xy_next_last_seen <= 1'b1; rd_wide_xy_addr_xy_next_last_seen_dly1 <= rd_wide_xy_addr_xy_next_last_seen; rd_wide_xy_addr_xy_next_last_seen_dly2 <= rd_wide_xy_addr_xy_next_last_seen_dly1; end endcase endcase always @(posedge clk) begin // update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC); update_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC); // // one_pass // case (wrk_fsm_state_next_one_pass) // WRK_FSM_STATE_LATENCY_PRE1: // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_REDUCE_INIT: begin // update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); // end // UOP_OPCODE_MERGE_LH: begin update_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); end // endcase // WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_BUSY: // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_COPY_CRT_Y2X: begin // update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; // end // UOP_OPCODE_MODULAR_REDUCE_INIT: begin // update_rd_wide_bank_addr (sel_wide_in, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; // end // UOP_OPCODE_MERGE_LH: begin // if (!rd_wide_xy_addr_xy_next_last_seen) update_rd_wide_bank_addr (BANK_WIDE_L, rd_wide_xy_addr_xy_next ); else update_rd_wide_bank_addr (BANK_WIDE_H, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; // end // endcase // endcase // // one_pass_meander // case (wrk_fsm_state_next_one_pass_meander) // WRK_FSM_STATE_LATENCY_PRE1_M1: case (opcode) UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); end UOP_OPCODE_REGULAR_ADD_UNEVEN: begin update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); update_rd_narrow_bank_addr(sel_wide_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); end endcase // WRK_FSM_STATE_LATENCY_PRE2_M1, WRK_FSM_STATE_BUSY_M1: case (opcode) UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; end UOP_OPCODE_REGULAR_ADD_UNEVEN: begin update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; end endcase // WRK_FSM_STATE_LATENCY_PRE1_M2, WRK_FSM_STATE_LATENCY_PRE2_M2, WRK_FSM_STATE_BUSY_M2: case (opcode) UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin update_rd_wide_bank (sel_wide_in ); update_rd_narrow_bank(sel_narrow_in); end UOP_OPCODE_REGULAR_ADD_UNEVEN: begin update_rd_wide_bank (sel_narrow_in); update_rd_narrow_bank(sel_narrow_in); end endcase // endcase // // two_pass // case (wrk_fsm_state_next_two_pass) // WRK_FSM_STATE_LATENCY_PRE1_TP: // case (opcode) // UOP_OPCODE_MODULAR_SUBTRACT: // if (!wrk_fsm_two_pass_pass) begin update_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); end else begin update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); end // endcase // WRK_FSM_STATE_LATENCY_PRE2_TP, WRK_FSM_STATE_LATENCY_PRE3_TP, WRK_FSM_STATE_LATENCY_PRE4_TP, WRK_FSM_STATE_BUSY_TP: // case (opcode) // UOP_OPCODE_MODULAR_SUBTRACT: // if (!wrk_fsm_two_pass_pass) begin update_rd_wide_bank_addr (BANK_WIDE_N, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; end else begin update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; end // endcase // endcase // end // // Destination Write Address Logic // wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half; wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half; wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_x = uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H; wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_y = uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H; task update_wr_wide_bank_addr; input [BANK_ADDR_W -1:0] x_bank; input [BANK_ADDR_W -1:0] y_bank; input [ OP_ADDR_W -1:0] x_addr; input [ OP_ADDR_W -1:0] y_addr; begin {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr}; {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr}; end endtask task update_wr_narrow_bank_addr; input [BANK_ADDR_W -1:0] x_bank; input [BANK_ADDR_W -1:0] y_bank; input [ OP_ADDR_W -1:0] x_addr; input [ OP_ADDR_W -1:0] y_addr; begin {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr}; {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr}; end endtask always @(posedge clk) begin // update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC); update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC); // // one_pass // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY, WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST2: // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_COPY_CRT_Y2X: begin update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2); update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2); end // UOP_OPCODE_MODULAR_REDUCE_INIT: update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2); // UOP_OPCODE_MERGE_LH: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2); // endcase // endcase // // one_pass_meander // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_M2, WRK_FSM_STATE_LATENCY_POST1_M2, WRK_FSM_STATE_LATENCY_POST2_M2: // case (opcode) UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y: begin update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); end UOP_OPCODE_REGULAR_ADD_UNEVEN: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); endcase // endcase // // two_pass // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_TP, WRK_FSM_STATE_LATENCY_POST1_TP, WRK_FSM_STATE_LATENCY_POST2_TP, WRK_FSM_STATE_LATENCY_POST3_TP, WRK_FSM_STATE_LATENCY_POST4_TP: // case (opcode) // UOP_OPCODE_MODULAR_SUBTRACT: // if (!wrk_fsm_two_pass_pass) begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); end else begin update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); end // endcase // endcase // end // // FSM Process // always @(posedge clk or negedge rst_n) // if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE; else case (opcode) UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_MERGE_LH: wrk_fsm_state <= wrk_fsm_state_next_one_pass; UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_OPCODE_REGULAR_ADD_UNEVEN: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander; UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass; default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; endcase // // Busy Exit Logic // reg wrk_fsm_done_one_pass = 1'b0; reg wrk_fsm_done_one_pass_meander = 1'b0; reg wrk_fsm_done_two_pass = 1'b0; always @(posedge clk) begin // wrk_fsm_done_one_pass <= 1'b0; wrk_fsm_done_one_pass_meander <= 1'b0; wrk_fsm_done_two_pass <= 1'b0; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_MERGE_LH: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY: if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass <= 1'b1; endcase // UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY_M2: if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; WRK_FSM_STATE_BUSY_M1: wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander; endcase // UOP_OPCODE_MODULAR_SUBTRACT: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY_TP: if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_two_pass <= 1'b1; endcase // // endcase // end // // FSM Helper Logic // always @(posedge clk) // case (wrk_fsm_state) WRK_FSM_STATE_IDLE: if (ena) {wrk_fsm_two_pass_pass, wrk_fsm_two_pass_pass_dly} <= {1'b0, 1'b0}; WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_two_pass_pass <= 1'b1; WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_two_pass_pass_dly <= 1'b1; endcase // // FSM Transition Logic // always @* begin // case (wrk_fsm_state) WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ; WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ; WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ; WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ; WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ; WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ; WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; endcase // end always @* begin // case (wrk_fsm_state) WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ; // WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ; WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ; WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ; WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ; WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ; WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ; WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ; WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ; WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ; WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ; // WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; // default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; endcase // end always @* begin // case (wrk_fsm_state) WRK_FSM_STATE_IDLE: wrk_fsm_state_next_two_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1_TP : WRK_FSM_STATE_IDLE; WRK_FSM_STATE_LATENCY_PRE1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE2_TP ; WRK_FSM_STATE_LATENCY_PRE2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE3_TP ; WRK_FSM_STATE_LATENCY_PRE3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE4_TP ; WRK_FSM_STATE_LATENCY_PRE4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_BUSY_TP ; WRK_FSM_STATE_BUSY_TP: wrk_fsm_state_next_two_pass = wrk_fsm_done_two_pass ? WRK_FSM_STATE_LATENCY_POST1_TP : WRK_FSM_STATE_BUSY_TP; WRK_FSM_STATE_LATENCY_POST1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST2_TP ; WRK_FSM_STATE_LATENCY_POST2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST3_TP ; WRK_FSM_STATE_LATENCY_POST3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST4_TP ; WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_HOLDOFF_TP ; WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_state_next_two_pass = wrk_fsm_two_pass_pass_dly ? WRK_FSM_STATE_STOP : WRK_FSM_STATE_LATENCY_PRE1_TP; WRK_FSM_STATE_STOP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ; default: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ; endcase // end // // Ready Logic // reg rdy_reg = 1'b1; assign rdy = rdy_reg; always @(posedge clk or negedge rst_n) // if (!rst_n) rdy_reg <= 1'b1; else case (wrk_fsm_state) WRK_FSM_STATE_IDLE: rdy_reg <= ~ena; WRK_FSM_STATE_STOP: rdy_reg <= 1'b1; endcase // // UOP_OPCODE_PROPAGATE_CARRIES // reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r; reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r; reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W]; wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W]; wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W]; wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W]; wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]}; wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]}; task update_wide_dout; input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <= { x_x, y_x, x_y, y_y }; endtask task update_narrow_dout; input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <= { x_x, y_x, x_y, y_y }; endtask task update_narrow_carries; input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <= { x_x_cry, y_x_cry, x_y_cry, y_y_cry }; endtask always @(posedge clk) // if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) // case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE2: // update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); // WRK_FSM_STATE_BUSY, WRK_FSM_STATE_LATENCY_POST1: // update_narrow_carries(rd_narrow_x_din_x_w_cry_msb, rd_narrow_y_din_x_w_cry_msb, rd_narrow_x_din_y_w_cry_msb, rd_narrow_y_din_y_w_cry_msb); // endcase // // UOP_OPCODE_MODULAR_SUBTRACT // reg [WORD_W:0] modsub_x_ab; reg [WORD_W:0] modsub_y_ab; reg [WORD_W:0] modsub_x_ab_dly; reg [WORD_W:0] modsub_y_ab_dly; reg [WORD_W:0] modsub_x_abn; reg [WORD_W:0] modsub_y_abn; reg modsub_x_ab_mask_now; reg modsub_y_ab_mask_now; reg modsub_x_abn_mask_now; reg modsub_y_abn_mask_now; reg modsub_x_borrow_r; reg modsub_y_borrow_r; wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W]; wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W]; wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W]; wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W]; wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]}; wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]}; wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2; wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2; wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]}; wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]}; task update_modsub_ab; begin modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked; modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked; end endtask task update_modsub_abn; begin modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked; modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked; end endtask always @(posedge clk) // if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) // case (wrk_fsm_state) WRK_FSM_STATE_LATENCY_POST4_TP: if (!wrk_fsm_two_pass_pass) {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]}; endcase always @(posedge clk) begin modsub_x_ab_dly <= modsub_x_ab; modsub_y_ab_dly <= modsub_y_ab; end always @(posedge clk) begin // modsub_x_ab <= {1'bX, WORD_DNC}; modsub_y_ab <= {1'bX, WORD_DNC}; // modsub_x_abn <= {1'bX, WORD_DNC}; modsub_y_abn <= {1'bX, WORD_DNC}; // if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) // case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE3_TP: update_modsub_ab; WRK_FSM_STATE_LATENCY_PRE4_TP, WRK_FSM_STATE_BUSY_TP, WRK_FSM_STATE_LATENCY_POST1_TP, WRK_FSM_STATE_LATENCY_POST2_TP: begin update_modsub_ab; update_modsub_abn; end // WRK_FSM_STATE_LATENCY_POST3_TP: // update_modsub_abn; // endcase // end always @(posedge clk) begin // modsub_x_ab_mask_now <= 1'b0; modsub_y_ab_mask_now <= 1'b0; // modsub_x_abn_mask_now <= 1'b0; modsub_y_abn_mask_now <= 1'b0; // if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) // case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE2_TP: begin modsub_x_ab_mask_now <= 1'b1; modsub_y_ab_mask_now <= 1'b1; end // WRK_FSM_STATE_LATENCY_PRE3_TP: begin modsub_x_abn_mask_now <= 1'b1; modsub_y_abn_mask_now <= 1'b1; end // endcase // end // // UOP_OPCODE_ADD_UNEVEN // reg [WORD_W:0] regadd_x_x; reg [WORD_W:0] regadd_y_x; reg [WORD_W:0] regadd_x_y; reg [WORD_W:0] regadd_y_y; reg regadd_x_x_cry; reg regadd_y_x_cry; reg regadd_x_y_cry; reg regadd_y_y_cry; wire [WORD_EXT_W-1:0] regadd_x_x_trunc = {{CARRY_W{1'b0}}, regadd_x_x[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] regadd_y_x_trunc = {{CARRY_W{1'b0}}, regadd_y_x[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] regadd_x_y_trunc = {{CARRY_W{1'b0}}, regadd_x_y[WORD_W-1:0]}; wire [WORD_EXT_W-1:0] regadd_y_y_trunc = {{CARRY_W{1'b0}}, regadd_y_y[WORD_W-1:0]}; //wire regadd_x_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_x_x[WORD_W]; //wire regadd_y_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_y_x[WORD_W]; //wire regadd_x_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_x_y[WORD_W]; //wire regadd_y_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_y_y[WORD_W]; /**/ reg [WORD_W:0] regadd_x_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly2[WORD_W-1:0]}; reg [WORD_W:0] regadd_x_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0]}; reg [WORD_W:0] regadd_y_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly2[WORD_W-1:0]}; reg [WORD_W:0] regadd_y_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0]}; reg [WORD_W:0] regadd_x_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly2[WORD_W-1:0]}; reg [WORD_W:0] regadd_x_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0]}; reg [WORD_W:0] regadd_y_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly2[WORD_W-1:0]}; reg [WORD_W:0] regadd_y_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0]}; /**/ //WRK_FSM_STATE_BUSY_M1, //WRK_FSM_STATE_LATENCY_POST1_M1, //WRK_FSM_STATE_LATENCY_POST2_M1: always @(posedge clk) begin // regadd_x_x_a_lsb_pad <= {1'bX, WORD_DNC}; regadd_x_x_b_lsb_pad <= {1'bX, WORD_DNC}; regadd_y_x_a_lsb_pad <= {1'bX, WORD_DNC}; regadd_y_x_b_lsb_pad <= {1'bX, WORD_DNC}; regadd_x_y_a_lsb_pad <= {1'bX, WORD_DNC}; regadd_x_y_b_lsb_pad <= {1'bX, WORD_DNC}; regadd_y_y_a_lsb_pad <= {1'bX, WORD_DNC}; regadd_y_y_b_lsb_pad <= {1'bX, WORD_DNC}; // if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) // case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE2_M2, WRK_FSM_STATE_BUSY_M2, WRK_FSM_STATE_LATENCY_POST1_M2: begin regadd_x_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0] : WORD_ZERO}; regadd_x_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_x [WORD_W-1:0] }; regadd_y_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0] : WORD_ZERO}; regadd_y_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_x [WORD_W-1:0] }; regadd_x_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0] : WORD_ZERO}; regadd_x_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_y [WORD_W-1:0] }; regadd_y_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0] : WORD_ZERO}; regadd_y_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_y [WORD_W-1:0] }; end // endcase end always @(posedge clk) begin // regadd_x_x <= {1'bX, WORD_DNC}; regadd_y_x <= {1'bX, WORD_DNC}; regadd_x_y <= {1'bX, WORD_DNC}; regadd_y_y <= {1'bX, WORD_DNC}; // if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) // case (wrk_fsm_state) // WRK_FSM_STATE_BUSY_M1, WRK_FSM_STATE_LATENCY_POST1_M1, WRK_FSM_STATE_LATENCY_POST2_M1: begin regadd_x_x <= regadd_x_x_a_lsb_pad + regadd_x_x_b_lsb_pad + regadd_x_x_cry; regadd_y_x <= regadd_y_x_a_lsb_pad + regadd_y_x_b_lsb_pad + regadd_y_x_cry; regadd_x_y <= regadd_x_y_a_lsb_pad + regadd_x_y_b_lsb_pad + regadd_x_y_cry; regadd_y_y <= regadd_y_y_a_lsb_pad + regadd_y_y_b_lsb_pad + regadd_y_y_cry; end // endcase // end always @(posedge clk) begin // regadd_x_x_cry <= 1'bX; regadd_y_x_cry <= 1'bX; regadd_x_y_cry <= 1'bX; regadd_y_y_cry <= 1'bX; // if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) // case (wrk_fsm_state) // WRK_FSM_STATE_LATENCY_PRE2_M2: begin regadd_x_x_cry <= 1'b0; regadd_y_x_cry <= 1'b0; regadd_x_y_cry <= 1'b0; regadd_y_y_cry <= 1'b0; end // WRK_FSM_STATE_BUSY_M2, WRK_FSM_STATE_LATENCY_POST1_M2: begin regadd_x_x_cry <= regadd_x_x[WORD_W]; regadd_y_x_cry <= regadd_y_x[WORD_W]; regadd_x_y_cry <= regadd_x_y[WORD_W]; regadd_y_y_cry <= regadd_y_y[WORD_W]; end // endcase // end endmodule