diff options
Diffstat (limited to 'rtl/modexpng_general_worker.v')
-rw-r--r-- | rtl/modexpng_general_worker.v | 1896 |
1 files changed, 838 insertions, 1058 deletions
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index eadd284..0620bd6 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -127,67 +127,46 @@ module modexpng_general_worker // // FSM Declaration // - localparam [5:0] WRK_FSM_STATE_IDLE = 6'h00; - - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1 = 6'h01; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2 = 6'h02; - localparam [5:0] WRK_FSM_STATE_BUSY = 6'h03; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST1 = 6'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! - localparam [5:0] WRK_FSM_STATE_LATENCY_POST2 = 6'h06; - - localparam [5:0] WRK_FSM_STATE_STOP = 6'h07; - - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13; - localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14; - localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 6'h17; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 6'h18; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 6'h19; - - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_TP = 6'h20; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_TP = 6'h21; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE3_TP = 6'h22; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE4_TP = 6'h23; - localparam [5:0] WRK_FSM_STATE_BUSY_TP = 6'h24; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_TP = 6'h25; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_TP = 6'h26; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST3_TP = 6'h27; - localparam [5:0] WRK_FSM_STATE_LATENCY_POST4_TP = 6'h28; - localparam [5:0] WRK_FSM_STATE_HOLDOFF_TP = 6'h29; - - reg [5:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; - reg [5:0] wrk_fsm_state_next_one_pass; // single address space sweep - reg [5:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y) - reg [5:0] wrk_fsm_state_next_two_pass; // two address space sweeps - reg wrk_fsm_two_pass_pass; // 0=first pass, 1=second pass - reg wrk_fsm_two_pass_pass_dly; // 0=first pass, 1=second pass - - - // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps) + + localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0; + + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE3 = 4'h3; + localparam [3:0] WRK_FSM_STATE_LATENCY_PRE4 = 4'h4; + + localparam [3:0] WRK_FSM_STATE_BUSY1 = 4'hA; + localparam [3:0] WRK_FSM_STATE_BUSY2 = 4'hB; + localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; + localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6; + localparam [3:0] WRK_FSM_STATE_LATENCY_POST3 = 4'h7; + localparam [3:0] WRK_FSM_STATE_LATENCY_POST4 = 4'h8; + + localparam [3:0] WRK_FSM_STATE_STOP = 4'hF; + + reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; + reg [3:0] wrk_fsm_state_next; + // // Control Signals // - reg rd_wide_xy_ena_x = 1'b0; - reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x; - reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_x; + reg rd_wide_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] rd_wide_bank_x; + reg [ OP_ADDR_W -1:0] rd_wide_addr_x; - reg rd_narrow_xy_ena_x = 1'b0; - reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; - reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; + reg rd_narrow_ena_x = 1'b0; + reg [BANK_ADDR_W -1:0] rd_narrow_bank_x; + reg [ OP_ADDR_W -1:0] rd_narrow_addr_x; - reg rd_wide_xy_ena_y = 1'b0; - reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y; - reg [ OP_ADDR_W -1:0] rd_wide_xy_addr_y; + reg rd_wide_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] rd_wide_bank_y; + reg [ OP_ADDR_W -1:0] rd_wide_addr_y; - reg rd_narrow_xy_ena_y = 1'b0; - reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; - reg [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; + reg rd_narrow_ena_y = 1'b0; + reg [BANK_ADDR_W -1:0] rd_narrow_bank_y; + reg [ OP_ADDR_W -1:0] rd_narrow_addr_y; reg wr_wide_xy_ena_x = 1'b0; reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x; @@ -217,21 +196,21 @@ module modexpng_general_worker // // Mapping // - assign wrk_rd_wide_xy_ena_x = rd_wide_xy_ena_x; - assign wrk_rd_wide_xy_bank_x = rd_wide_xy_bank_x; - assign wrk_rd_wide_xy_addr_x = rd_wide_xy_addr_x; + assign wrk_rd_wide_xy_ena_x = rd_wide_ena_x; + assign wrk_rd_wide_xy_bank_x = rd_wide_bank_x; + assign wrk_rd_wide_xy_addr_x = rd_wide_addr_x; - assign wrk_rd_narrow_xy_ena_x = rd_narrow_xy_ena_x; - assign wrk_rd_narrow_xy_bank_x = rd_narrow_xy_bank_x; - assign wrk_rd_narrow_xy_addr_x = rd_narrow_xy_addr_x; + assign wrk_rd_narrow_xy_ena_x = rd_narrow_ena_x; + assign wrk_rd_narrow_xy_bank_x = rd_narrow_bank_x; + assign wrk_rd_narrow_xy_addr_x = rd_narrow_addr_x; - assign wrk_rd_wide_xy_ena_y = rd_wide_xy_ena_y; - assign wrk_rd_wide_xy_bank_y = rd_wide_xy_bank_y; - assign wrk_rd_wide_xy_addr_y = rd_wide_xy_addr_y; + assign wrk_rd_wide_xy_ena_y = rd_wide_ena_y; + assign wrk_rd_wide_xy_bank_y = rd_wide_bank_y; + assign wrk_rd_wide_xy_addr_y = rd_wide_addr_y; - assign wrk_rd_narrow_xy_ena_y = rd_narrow_xy_ena_y; - assign wrk_rd_narrow_xy_bank_y = rd_narrow_xy_bank_y; - assign wrk_rd_narrow_xy_addr_y = rd_narrow_xy_addr_y; + assign wrk_rd_narrow_xy_ena_y = rd_narrow_ena_y; + assign wrk_rd_narrow_xy_bank_y = rd_narrow_bank_y; + assign wrk_rd_narrow_xy_addr_y = rd_narrow_addr_y; assign wrk_wr_wide_xy_ena_x = wr_wide_xy_ena_x; assign wrk_wr_wide_xy_bank_x = wr_wide_xy_bank_x; @@ -260,172 +239,111 @@ module modexpng_general_worker // // Delays - // - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3; - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4; - - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4; - - reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1; - reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2; - reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3; - //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly4; - - reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1; - reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2; - reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3; - //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly4; - - reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly1; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly2; - - reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly1; - reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly2; + // + reg [OP_ADDR_W -1:0] rd_narrow_addr_x_dly[0:3]; + reg [OP_ADDR_W -1:0] rd_narrow_addr_y_dly[0:3]; + + reg [OP_ADDR_W -1:0] rd_wide_addr_x_dly[0:3]; + reg [OP_ADDR_W -1:0] rd_wide_addr_y_dly[0:3]; + + reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly1; + reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly1; + reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly1; + reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly1; + reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly1; + reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly1; + reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly1; + reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly1; always @(posedge clk) begin // - {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; - {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + {rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x}; + {rd_wide_y_din_x_dly1} <= {wrk_rd_wide_y_din_x}; + {rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y}; + {rd_wide_y_din_y_dly1} <= {wrk_rd_wide_y_din_y}; // - {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; - {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; + {rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x}; + {rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x}; + {rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y}; + {rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y}; // - {/*wrk_rd_wide_x_din_x_dly4,*/ wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {/*wrk_rd_wide_x_din_x_dly3,*/ wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x}; - {/*wrk_rd_wide_x_din_y_dly4,*/ wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {/*wrk_rd_wide_x_din_y_dly3,*/ wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y}; + {rd_narrow_addr_x_dly[3], rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0]} <= {rd_narrow_addr_x_dly[2], rd_narrow_addr_x_dly[1], rd_narrow_addr_x_dly[0], rd_narrow_addr_x}; + {rd_narrow_addr_y_dly[3], rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0]} <= {rd_narrow_addr_y_dly[2], rd_narrow_addr_y_dly[1], rd_narrow_addr_y_dly[0], rd_narrow_addr_y}; // - {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x}; - {wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x}; - {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y}; - {wrk_rd_narrow_y_din_y_dly2, wrk_rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y_dly1, wrk_rd_narrow_y_din_y}; + {rd_wide_addr_x_dly[3], rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0]} <= {rd_wide_addr_x_dly[2], rd_wide_addr_x_dly[1], rd_wide_addr_x_dly[0], rd_wide_addr_x}; + {rd_wide_addr_y_dly[3], rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0]} <= {rd_wide_addr_y_dly[2], rd_wide_addr_y_dly[1], rd_wide_addr_y_dly[0], rd_wide_addr_y}; // end - - + + // // Source Read Enable Logic // + task _update_wide_rd_en; input _en; {rd_wide_ena_x, rd_wide_ena_y } <= {2{_en}}; endtask + task _update_narrow_rd_en; input _en; {rd_narrow_ena_x, rd_narrow_ena_y} <= {2{_en}}; endtask - task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask - task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask + task enable_wide_rd_en; _update_wide_rd_en(1'b1); endtask + task disable_wide_rd_en; _update_wide_rd_en(1'b0); endtask - task enable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b1); endtask - task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask - - task enable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b1); endtask - task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask + task enable_narrow_rd_en; _update_narrow_rd_en(1'b1); endtask + task disable_narrow_rd_en; _update_narrow_rd_en(1'b0); endtask always @(posedge clk or negedge rst_n) // if (!rst_n) begin // - disable_wide_xy_rd_en; - disable_narrow_xy_rd_en; + disable_wide_rd_en; + disable_narrow_rd_en; // end else begin // - disable_wide_xy_rd_en; - disable_narrow_xy_rd_en; - // - // one_pass + disable_wide_rd_en; + disable_narrow_rd_en; // - case (wrk_fsm_state_next_one_pass) + case (opcode) // - WRK_FSM_STATE_LATENCY_PRE1, - WRK_FSM_STATE_LATENCY_PRE2, - WRK_FSM_STATE_BUSY: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_MODULAR_REDUCE_INIT, + UOP_OPCODE_MODULAR_SUBTRACT_X: // - case (opcode) - // - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_MODULAR_REDUCE_INIT: - // - enable_narrow_xy_rd_en; - // - UOP_OPCODE_COPY_CRT_Y2X: begin - // - enable_wide_xy_rd_en; - enable_narrow_xy_rd_en; - // - end - // - UOP_OPCODE_MERGE_LH: - // - enable_wide_xy_rd_en; - // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: enable_narrow_rd_en; endcase // - endcase - // - // one_pass_meander - // - case (wrk_fsm_state_next_one_pass_meander) - // - WRK_FSM_STATE_LATENCY_PRE1_M1, - WRK_FSM_STATE_LATENCY_PRE1_M2, - WRK_FSM_STATE_LATENCY_PRE2_M1, - WRK_FSM_STATE_LATENCY_PRE2_M2, - WRK_FSM_STATE_BUSY_M1, - WRK_FSM_STATE_BUSY_M2: + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_MODULAR_SUBTRACT_Y, + UOP_OPCODE_MODULAR_SUBTRACT_Z, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // - case (opcode) - // - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - // - enable_wide_xy_rd_en; - enable_narrow_xy_rd_en; - // - end - // - UOP_OPCODE_REGULAR_ADD_UNEVEN: - // - enable_narrow_xy_rd_en; - // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: begin enable_wide_rd_en; enable_narrow_rd_en; end endcase // - endcase - // - // two_pass - // - case (wrk_fsm_state_next_two_pass) - // - WRK_FSM_STATE_LATENCY_PRE1_TP, - WRK_FSM_STATE_LATENCY_PRE2_TP, - WRK_FSM_STATE_LATENCY_PRE3_TP, - WRK_FSM_STATE_LATENCY_PRE4_TP, - WRK_FSM_STATE_BUSY_TP: + UOP_OPCODE_COPY_LADDERS_X2Y, + UOP_OPCODE_CROSS_LADDERS_X2Y: // - case (opcode) - UOP_OPCODE_MODULAR_SUBTRACT: - // - if (!wrk_fsm_two_pass_pass) begin - enable_wide_xy_rd_en; - enable_narrow_xy_rd_en; - end else - enable_narrow_xy_rd_en; - // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_BUSY2: begin enable_wide_rd_en; enable_narrow_rd_en; end endcase // + UOP_OPCODE_MERGE_LH: + // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1, + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: enable_wide_rd_en; + endcase + // endcase // end @@ -435,490 +353,330 @@ module modexpng_general_worker // Destination Write Enable Logic // - task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask - task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask + task _update_wide_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask + task _update_narrow_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask - task enable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b1); endtask - task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask + task enable_wide_wr_en; _update_wide_wr_en(1'b1); endtask + task disable_wide_wr_en; _update_wide_wr_en(1'b0); endtask - task enable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b1); endtask - task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask + task enable_narrow_wr_en; _update_narrow_wr_en(1'b1); endtask + task disable_narrow_wr_en; _update_narrow_wr_en(1'b0); endtask always @(posedge clk or negedge rst_n) // if (!rst_n) begin // - disable_wide_xy_wr_en; - disable_narrow_xy_wr_en; + disable_wide_wr_en; + disable_narrow_wr_en; // end else begin // - disable_wide_xy_wr_en; - disable_narrow_xy_wr_en; + disable_wide_wr_en; + disable_narrow_wr_en; // - // one_pass - // - case (wrk_fsm_state) + case (opcode) // - WRK_FSM_STATE_BUSY, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST2: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_MERGE_LH, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // - case (opcode) - // - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_MERGE_LH: - // - enable_narrow_xy_wr_en; - // - UOP_OPCODE_COPY_CRT_Y2X: begin - // - enable_wide_xy_wr_en; - enable_narrow_xy_wr_en; - // - end - // - UOP_OPCODE_MODULAR_REDUCE_INIT: - // - enable_wide_xy_wr_en; - // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: enable_narrow_wr_en; endcase // - endcase - // - // one_pass_meander - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY_M2, - WRK_FSM_STATE_LATENCY_POST1_M2, - WRK_FSM_STATE_LATENCY_POST2_M2: + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y, + UOP_OPCODE_CROSS_LADDERS_X2Y, + UOP_OPCODE_MODULAR_SUBTRACT_Z: // - case (opcode) - // - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - // - enable_wide_xy_wr_en; - enable_narrow_xy_wr_en; - // - end - // - UOP_OPCODE_REGULAR_ADD_UNEVEN: - // - enable_narrow_xy_wr_en; - // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: begin enable_wide_wr_en; enable_narrow_wr_en; end endcase // - endcase - // - // two_pass - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY_TP, - WRK_FSM_STATE_LATENCY_POST1_TP, - WRK_FSM_STATE_LATENCY_POST2_TP, - WRK_FSM_STATE_LATENCY_POST3_TP, - WRK_FSM_STATE_LATENCY_POST4_TP: + UOP_OPCODE_MODULAR_REDUCE_INIT, + UOP_OPCODE_MODULAR_SUBTRACT_Y: // - case (opcode) - // - UOP_OPCODE_MODULAR_SUBTRACT: - // - if (!wrk_fsm_two_pass_pass) - enable_narrow_xy_wr_en; - else begin - enable_wide_xy_wr_en; - enable_narrow_xy_wr_en; - end - // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: enable_wide_wr_en; endcase - // + // endcase // end - + // - // Source to Destination Data Logic + // Source Read Address Logic // + reg [OP_ADDR_W -1:0] rd_wide_addr_next; + reg [OP_ADDR_W -1:0] rd_narrow_addr_next; + + reg rd_wide_addr_is_last = 1'b0; + reg rd_narrow_addr_is_last = 1'b0; + reg rd_wide_addr_is_last_half = 1'b0; + reg rd_narrow_addr_is_last_half = 1'b0; + + reg rd_wide_addr_next_is_last = 1'b0; + reg rd_narrow_addr_next_is_last = 1'b0; + + reg rd_wide_addr_next_is_last_half = 1'b0; + reg rd_narrow_addr_next_is_last_half = 1'b0; + + reg [3:0] rd_wide_addr_is_last_half_dly = 4'h0; + reg [3:0] rd_narrow_addr_is_last_half_dly = 4'h0; + always @(posedge clk) begin // - update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); - update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); - // - // one_pass - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST2: - // - case (opcode) - // - UOP_OPCODE_PROPAGATE_CARRIES: - // - update_narrow_dout(rd_narrow_x_din_x_w_cry_reduced, - rd_narrow_y_din_x_w_cry_reduced, - rd_narrow_x_din_y_w_cry_reduced, - rd_narrow_y_din_y_w_cry_reduced); - // - UOP_OPCODE_COPY_CRT_Y2X: begin - // - update_wide_dout(wrk_rd_wide_x_din_y, - wrk_rd_wide_y_din_y, - wrk_rd_wide_x_din_y, - wrk_rd_wide_y_din_y); - // - update_narrow_dout(wrk_rd_narrow_x_din_y, - wrk_rd_narrow_y_din_y, - wrk_rd_narrow_x_din_y, - wrk_rd_narrow_y_din_y); - // - end - // - UOP_OPCODE_MODULAR_REDUCE_INIT: - // - update_wide_dout(wrk_rd_narrow_x_din_x, - wrk_rd_narrow_y_din_x, - wrk_rd_narrow_x_din_y, - wrk_rd_narrow_y_din_y); - // - UOP_OPCODE_MERGE_LH: - // - update_narrow_dout(wrk_rd_wide_x_din_x, - wrk_rd_wide_y_din_x, - wrk_rd_wide_x_din_y, - wrk_rd_wide_y_din_y); - // - endcase - // - endcase - // - // one_pass_meander - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY_M2, - WRK_FSM_STATE_LATENCY_POST1_M2, - WRK_FSM_STATE_LATENCY_POST2_M2: - // - case (opcode) - // - UOP_OPCODE_COPY_LADDERS_X2Y: begin - // - update_wide_dout(wrk_rd_wide_x_din_x_dly3, - wrk_rd_wide_x_din_x_dly2, - wrk_rd_wide_x_din_y_dly3, - wrk_rd_wide_x_din_y_dly2); - // - update_narrow_dout(wrk_rd_narrow_x_din_x_dly3, - wrk_rd_narrow_x_din_x_dly2, - wrk_rd_narrow_x_din_y_dly3, - wrk_rd_narrow_x_din_y_dly2); - // - end - // - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - // - update_wide_dout(wrk_rd_wide_x_din_x_dly3, - wrk_rd_wide_x_din_y_dly2, - wrk_rd_wide_x_din_y_dly3, - wrk_rd_wide_x_din_x_dly2); - // - update_narrow_dout(wrk_rd_narrow_x_din_x_dly3, - wrk_rd_narrow_x_din_y_dly2, - wrk_rd_narrow_x_din_y_dly3, - wrk_rd_narrow_x_din_x_dly2); - // - end - // - UOP_OPCODE_REGULAR_ADD_UNEVEN: begin - // - update_narrow_dout(regadd_x_x_trunc, - regadd_y_x_trunc, - regadd_x_y_trunc, - regadd_y_y_trunc); - // - end - // - endcase - // - endcase - // - // two_pass - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_BUSY_TP, - WRK_FSM_STATE_LATENCY_POST1_TP, - WRK_FSM_STATE_LATENCY_POST2_TP, - WRK_FSM_STATE_LATENCY_POST3_TP, - WRK_FSM_STATE_LATENCY_POST4_TP: - // - case (opcode) - // - UOP_OPCODE_MODULAR_SUBTRACT: - // - if (!wrk_fsm_two_pass_pass) - update_narrow_dout(modsub_x_ab_dly_trunc, modsub_x_abn_trunc, modsub_y_ab_dly_trunc, modsub_y_abn_trunc); - else begin - update_wide_dout (modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux); - update_narrow_dout(modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux); - end - // - endcase - // - endcase + rd_wide_addr_is_last_half_dly <= {rd_wide_addr_is_last_half_dly[2:0], rd_wide_addr_is_last_half}; + rd_narrow_addr_is_last_half_dly <= {rd_narrow_addr_is_last_half_dly[2:0], rd_narrow_addr_is_last_half}; // end - - // - // Source Read Address Logic - // - - reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next; - reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next; - - reg rd_wide_xy_addr_xy_next_last_seen; - reg rd_wide_xy_addr_xy_next_last_seen_dly1; - reg rd_wide_xy_addr_xy_next_last_seen_dly2; - - wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half; - wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last; + task preset_rd_wide_bank_addr; + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + begin + {rd_wide_bank_x, rd_wide_addr_x} <= {bank, addr}; + {rd_wide_bank_y, rd_wide_addr_y} <= {bank, addr}; + rd_wide_addr_is_last <= 1'b0; + rd_wide_addr_is_last_half <= 1'b0; + end + endtask - task update_rd_wide_bank_addr; + task preset_rd_narrow_bank_addr; input [BANK_ADDR_W -1:0] bank; input [ OP_ADDR_W -1:0] addr; begin - {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, addr}; - {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, addr}; + {rd_narrow_bank_x, rd_narrow_addr_x} <= {bank, addr}; + {rd_narrow_bank_y, rd_narrow_addr_y} <= {bank, addr}; + rd_narrow_addr_is_last <= 1'b0; + rd_narrow_addr_is_last_half <= 1'b0; + end + endtask + + task preset_rd_wide_addr_next; + input [OP_ADDR_W -1:0] addr; + begin + rd_wide_addr_next <= addr; + rd_wide_addr_next_is_last <= 1'b0; + rd_wide_addr_next_is_last_half <= 1'b0; end endtask - task update_rd_wide_bank; - input [BANK_ADDR_W -1:0] bank; + task preset_rd_narrow_addr_next; + input [OP_ADDR_W -1:0] addr; begin - {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, rd_wide_xy_addr_x}; - {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, rd_wide_xy_addr_y}; + rd_narrow_addr_next <= addr; + rd_narrow_addr_next_is_last <= 1'b0; + rd_narrow_addr_next_is_last_half <= 1'b0; end endtask - task update_rd_narrow_bank_addr; + task keep_rd_wide_bank; + begin + {rd_wide_bank_x} <= {rd_wide_bank_x}; + {rd_wide_bank_y} <= {rd_wide_bank_y}; + end + endtask + + task switch_rd_wide_bank; input [BANK_ADDR_W -1:0] bank; - input [ OP_ADDR_W -1:0] addr; begin - {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, addr}; - {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, addr}; + {rd_wide_bank_x} <= {bank}; + {rd_wide_bank_y} <= {bank}; + end + endtask + + task keep_rd_wide_addr; + begin + {rd_wide_addr_x} <= {rd_wide_addr_x}; + {rd_wide_addr_y} <= {rd_wide_addr_y}; end endtask - task update_rd_narrow_bank; + task advance_rd_wide_addr; + begin + {rd_wide_addr_x} <= {rd_wide_addr_next}; + {rd_wide_addr_y} <= {rd_wide_addr_next}; + rd_wide_addr_is_last <= rd_wide_addr_next == word_index_last; + rd_wide_addr_is_last_half <= rd_wide_addr_next == word_index_last_half; + end + endtask + + task keep_rd_narrow_bank; + begin + {rd_narrow_bank_x} <= {rd_narrow_bank_x}; + {rd_narrow_bank_y} <= {rd_narrow_bank_y}; + end + endtask + + task switch_rd_narrow_bank; input [BANK_ADDR_W -1:0] bank; begin - {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, rd_narrow_xy_addr_x}; - {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, rd_narrow_xy_addr_y}; + {rd_narrow_bank_x} <= {bank}; + {rd_narrow_bank_y} <= {bank}; end endtask - task update_rd_wide_addr_next; - input [OP_ADDR_W -1:0] addr; - rd_wide_xy_addr_xy_next <= addr; + task keep_rd_narrow_addr; + begin + {rd_narrow_addr_x} <= {rd_narrow_addr_x}; + {rd_narrow_addr_y} <= {rd_narrow_addr_y}; + end + endtask + + task advance_rd_narrow_addr; + begin + {rd_narrow_addr_x} <= {rd_narrow_addr_next}; + {rd_narrow_addr_y} <= {rd_narrow_addr_next}; + rd_narrow_addr_is_last <= rd_narrow_addr_next == word_index_last; + rd_narrow_addr_is_last_half <= rd_narrow_addr_next == word_index_last_half; + end + endtask + + task update_rd_wide_addr_flags; + begin + rd_wide_addr_next_is_last <= rd_wide_addr_next == (word_index_last - 1'b1); + rd_wide_addr_next_is_last_half <= rd_wide_addr_next == (word_index_last_half - 1'b1); + end endtask - task update_rd_narrow_addr_next; - input [OP_ADDR_W -1:0] addr; - rd_narrow_xy_addr_xy_next <= addr; + task update_rd_narrow_addr_flags; + begin + rd_narrow_addr_next_is_last <= rd_narrow_addr_next == (word_index_last - 1'b1); + rd_narrow_addr_next_is_last_half <= rd_narrow_addr_next == (word_index_last_half - 1'b1); + end endtask task advance_rd_wide_addr_next; - rd_wide_xy_addr_xy_next <= !rd_wide_xy_addr_xy_next_is_last ? rd_wide_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO; + begin + rd_wide_addr_next <= !rd_wide_addr_next_is_last ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO; + update_rd_wide_addr_flags; + end endtask task advance_rd_narrow_addr_next; - rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO; + begin + rd_narrow_addr_next <= !rd_narrow_addr_next_is_last ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO; + update_rd_narrow_addr_flags; + end + endtask + + task advance_rd_wide_addr_next_half; + begin + rd_wide_addr_next <= !rd_wide_addr_next_is_last_half ? rd_wide_addr_next + 1'b1 : OP_ADDR_ZERO; + update_rd_wide_addr_flags; + end + endtask + + task advance_rd_narrow_addr_next_half; + begin + rd_narrow_addr_next <= !rd_narrow_addr_next_is_last_half ? rd_narrow_addr_next + 1'b1 : OP_ADDR_ZERO; + update_rd_narrow_addr_flags; + end endtask - - always @(posedge clk) - // - case (opcode) - UOP_OPCODE_MERGE_LH: - case (wrk_fsm_state_next_one_pass) - WRK_FSM_STATE_LATENCY_PRE1: - rd_wide_xy_addr_xy_next_last_seen <= 1'b0; - WRK_FSM_STATE_BUSY: - if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last) - rd_wide_xy_addr_xy_next_last_seen <= 1'b1; - endcase - UOP_OPCODE_REGULAR_ADD_UNEVEN: - case (wrk_fsm_state_next_one_pass_meander) - WRK_FSM_STATE_LATENCY_PRE1_M1: begin - rd_wide_xy_addr_xy_next_last_seen <= 1'b0; - rd_wide_xy_addr_xy_next_last_seen_dly1 <= 1'b0; - rd_wide_xy_addr_xy_next_last_seen_dly2 <= 1'b0; - end - WRK_FSM_STATE_BUSY_M1: begin - if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last) - rd_wide_xy_addr_xy_next_last_seen <= 1'b1; - rd_wide_xy_addr_xy_next_last_seen_dly1 <= rd_wide_xy_addr_xy_next_last_seen; - rd_wide_xy_addr_xy_next_last_seen_dly2 <= rd_wide_xy_addr_xy_next_last_seen_dly1; - end - endcase - endcase always @(posedge clk) begin // - update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC); - update_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC); + preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC); + preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC); // - // one_pass - // - case (wrk_fsm_state_next_one_pass) + case (opcode) // - WRK_FSM_STATE_LATENCY_PRE1: - // - case (opcode) - // - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X, - UOP_OPCODE_MODULAR_REDUCE_INIT: begin - // - update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); - update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); - // - end - // - UOP_OPCODE_MERGE_LH: begin - update_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); - update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); - end - // - endcase - // - WRK_FSM_STATE_LATENCY_PRE2, - WRK_FSM_STATE_BUSY: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_MODULAR_SUBTRACT_X: // - case (opcode) - // - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X: begin - // - update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; - update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - // - end - // - UOP_OPCODE_MODULAR_REDUCE_INIT: begin - // - update_rd_wide_bank_addr (sel_wide_in, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ; - update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - // - end - // - UOP_OPCODE_MERGE_LH: begin - // - if (!rd_wide_xy_addr_xy_next_last_seen) update_rd_wide_bank_addr (BANK_WIDE_L, rd_wide_xy_addr_xy_next ); - else update_rd_wide_bank_addr (BANK_WIDE_H, rd_wide_xy_addr_xy_next ); - advance_rd_wide_addr_next ; - update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - // - end - // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: begin keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank; endcase // - endcase - // - // one_pass_meander - // - case (wrk_fsm_state_next_one_pass_meander) - // - WRK_FSM_STATE_LATENCY_PRE1_M1: - case (opcode) - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); - update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); - end - UOP_OPCODE_REGULAR_ADD_UNEVEN: begin - update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); - update_rd_narrow_bank_addr(sel_wide_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); - end + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_MODULAR_SUBTRACT_Z, + UOP_OPCODE_REGULAR_ADD_UNEVEN: + // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); + preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next; + keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end endcase // - WRK_FSM_STATE_LATENCY_PRE2_M1, - WRK_FSM_STATE_BUSY_M1: - case (opcode) - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; - update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - end - UOP_OPCODE_REGULAR_ADD_UNEVEN: begin - update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; - update_rd_narrow_bank_addr(sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - end + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_DNC, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); + preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next_half; + keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2: keep_rd_narrow_bank; endcase // - WRK_FSM_STATE_LATENCY_PRE1_M2, - WRK_FSM_STATE_LATENCY_PRE2_M2, - WRK_FSM_STATE_BUSY_M2: - case (opcode) - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - update_rd_wide_bank (sel_wide_in ); - update_rd_narrow_bank(sel_narrow_in); - end - UOP_OPCODE_REGULAR_ADD_UNEVEN: begin - update_rd_wide_bank (sel_narrow_in); - update_rd_narrow_bank(sel_narrow_in); - end + UOP_OPCODE_COPY_LADDERS_X2Y, + UOP_OPCODE_CROSS_LADDERS_X2Y: + // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); + preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end + WRK_FSM_STATE_LATENCY_PRE2: begin switch_rd_wide_bank (sel_wide_out); keep_rd_wide_addr; + switch_rd_narrow_bank(sel_narrow_out); keep_rd_narrow_addr; end + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: begin advance_rd_wide_addr; advance_rd_wide_addr_next; switch_rd_wide_bank(sel_wide_in); + advance_rd_narrow_addr; advance_rd_narrow_addr_next; switch_rd_narrow_bank(sel_narrow_in); end + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2: begin keep_rd_wide_addr; switch_rd_wide_bank (sel_wide_out); + keep_rd_narrow_addr; switch_rd_narrow_bank(sel_narrow_out); end endcase // - endcase - // - // two_pass - // - case (wrk_fsm_state_next_two_pass) - // - WRK_FSM_STATE_LATENCY_PRE1_TP: + UOP_OPCODE_MODULAR_SUBTRACT_Y: // - case (opcode) - // - UOP_OPCODE_MODULAR_SUBTRACT: - // - if (!wrk_fsm_two_pass_pass) begin - update_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); - update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); - end else begin - update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); - end - // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); + preset_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end + WRK_FSM_STATE_LATENCY_PRE3, + WRK_FSM_STATE_BUSY1: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next; + keep_rd_narrow_bank; advance_rd_narrow_addr; advance_rd_narrow_addr_next; end + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2: begin keep_rd_wide_bank; keep_rd_narrow_bank; end endcase + // + UOP_OPCODE_MERGE_LH: // - WRK_FSM_STATE_LATENCY_PRE2_TP, - WRK_FSM_STATE_LATENCY_PRE3_TP, - WRK_FSM_STATE_LATENCY_PRE4_TP, - WRK_FSM_STATE_BUSY_TP: - // - case (opcode) - // - UOP_OPCODE_MODULAR_SUBTRACT: - // - if (!wrk_fsm_two_pass_pass) begin - update_rd_wide_bank_addr (BANK_WIDE_N, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; - update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - end else begin - update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - end - // + case (wrk_fsm_state_next) + WRK_FSM_STATE_LATENCY_PRE1: begin preset_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); preset_rd_wide_addr_next (OP_ADDR_ONE); + preset_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_ZERO); preset_rd_narrow_addr_next(OP_ADDR_ONE); end + WRK_FSM_STATE_LATENCY_PRE3: begin keep_rd_wide_bank; advance_rd_wide_addr; advance_rd_wide_addr_next_half; + advance_rd_narrow_addr; advance_rd_narrow_addr_next; end + WRK_FSM_STATE_BUSY1: begin if (!rd_wide_addr_is_last_half_dly[0]) keep_rd_wide_bank; + else switch_rd_wide_bank(BANK_WIDE_H); + advance_rd_wide_addr; advance_rd_wide_addr_next_half; + advance_rd_narrow_addr; advance_rd_narrow_addr_next; end + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2: keep_rd_wide_bank; endcase - // + // endcase // end @@ -927,13 +685,21 @@ module modexpng_general_worker // // Destination Write Address Logic // - - wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half; - wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half; - - wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_x = uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H; - wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_y = uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H; + reg modular_reduce_init_first_half_x; + reg modular_reduce_init_first_half_y; + reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_x; + reg [BANK_ADDR_W -1:0] modular_reduce_init_sel_wide_out_y; + always @(posedge clk) begin + // + modular_reduce_init_first_half_x <= rd_narrow_addr_x_dly[1] <= word_index_last_half; + modular_reduce_init_first_half_y <= rd_narrow_addr_y_dly[1] <= word_index_last_half; + // + modular_reduce_init_sel_wide_out_x <= modular_reduce_init_first_half_x ? BANK_WIDE_L : BANK_WIDE_H; + modular_reduce_init_sel_wide_out_y <= modular_reduce_init_first_half_y ? BANK_WIDE_L : BANK_WIDE_H; + // + end + task update_wr_wide_bank_addr; input [BANK_ADDR_W -1:0] x_bank; input [BANK_ADDR_W -1:0] y_bank; @@ -955,120 +721,351 @@ module modexpng_general_worker {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr}; end endtask - + always @(posedge clk) begin // update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC); update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC); // - // one_pass - // - case (wrk_fsm_state) + case (opcode) // - WRK_FSM_STATE_BUSY, - WRK_FSM_STATE_LATENCY_POST1, - WRK_FSM_STATE_LATENCY_POST2: + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_MERGE_LH, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // - case (opcode) - // - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_COPY_CRT_Y2X: begin - update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2); - update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2); - end - // - UOP_OPCODE_MODULAR_REDUCE_INIT: - update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2); - // - UOP_OPCODE_MERGE_LH: - update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2); - // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]); endcase + // + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y, + UOP_OPCODE_CROSS_LADDERS_X2Y, + UOP_OPCODE_MODULAR_SUBTRACT_Z: // - endcase - // - // one_pass_meander - // - case (wrk_fsm_state) + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: begin update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[3], rd_narrow_addr_y_dly[3]); + update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3] ); end + endcase // - WRK_FSM_STATE_BUSY_M2, - WRK_FSM_STATE_LATENCY_POST1_M2, - WRK_FSM_STATE_LATENCY_POST2_M2: - // - case (opcode) - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); - update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); - end - UOP_OPCODE_REGULAR_ADD_UNEVEN: - update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(modular_reduce_init_sel_wide_out_x, modular_reduce_init_sel_wide_out_y, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]); endcase + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: // + case (wrk_fsm_state) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: update_wr_wide_bank_addr(sel_wide_out, sel_wide_out, rd_wide_addr_x_dly[3], rd_wide_addr_y_dly[3]); + endcase + // endcase // - // two_pass + end + + + // + // UOP_OPCODE_PROPAGATE_CARRIES + // + reg [CARRY_W -1:0] propagate_carries_x_x_cry_r; + reg [CARRY_W -1:0] propagate_carries_y_x_cry_r; + reg [CARRY_W -1:0] propagate_carries_x_y_cry_r; + reg [CARRY_W -1:0] propagate_carries_y_y_cry_r; + + wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_x_cry_r}; + wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_x_cry_r}; + wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_x_y_cry_r}; + wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, propagate_carries_y_y_cry_r}; + + reg [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_r; + reg [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_r; + reg [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_r; + reg [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_r; + + wire [CARRY_W -1:0] propagate_carries_x_x_w_cry_msb = propagate_carries_x_x_w_cry_r[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] propagate_carries_y_x_w_cry_msb = propagate_carries_y_x_w_cry_r[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] propagate_carries_x_y_w_cry_msb = propagate_carries_x_y_w_cry_r[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] propagate_carries_y_y_w_cry_msb = propagate_carries_y_y_w_cry_r[WORD_EXT_W -1:WORD_W]; + + wire [WORD_W -1:0] propagate_carries_x_x_w_cry_lsb = propagate_carries_x_x_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] propagate_carries_y_x_w_cry_lsb = propagate_carries_y_x_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] propagate_carries_x_y_w_cry_lsb = propagate_carries_x_y_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] propagate_carries_y_y_w_cry_lsb = propagate_carries_y_y_w_cry_r[WORD_W -1:0]; + + wire [WORD_EXT_W -1:0] propagate_carries_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_x_w_cry_lsb}; + wire [WORD_EXT_W -1:0] propagate_carries_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_x_w_cry_lsb}; + wire [WORD_EXT_W -1:0] propagate_carries_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_x_y_w_cry_lsb}; + wire [WORD_EXT_W -1:0] propagate_carries_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, propagate_carries_y_y_w_cry_lsb}; + + task _propagate_carries_update_cry; + input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; + { propagate_carries_x_x_cry_r, propagate_carries_y_x_cry_r, propagate_carries_x_y_cry_r, propagate_carries_y_y_cry_r} <= + { x_x_cry, y_x_cry, x_y_cry, y_y_cry}; + endtask + + task propagate_carries_clear_cry; _propagate_carries_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask + task propagate_carries_store_cry; _propagate_carries_update_cry(propagate_carries_x_x_w_cry_msb, propagate_carries_y_x_w_cry_msb, propagate_carries_x_y_w_cry_msb, propagate_carries_y_y_w_cry_msb); endtask + + task _propagate_carries_update_sum_w_cry; + input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry; + { propagate_carries_x_x_w_cry_r, propagate_carries_y_x_w_cry_r, propagate_carries_x_y_w_cry_r, propagate_carries_y_y_w_cry_r} <= + { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry}; + endtask + + task propagate_carries_store_sum_w_cry; _propagate_carries_update_sum_w_cry(propagate_carries_x_x_w_cry, propagate_carries_y_x_w_cry, propagate_carries_x_y_w_cry, propagate_carries_y_y_w_cry); endtask + + always @(posedge clk) // - case (wrk_fsm_state) + if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) // - WRK_FSM_STATE_BUSY_TP, - WRK_FSM_STATE_LATENCY_POST1_TP, - WRK_FSM_STATE_LATENCY_POST2_TP, - WRK_FSM_STATE_LATENCY_POST3_TP, - WRK_FSM_STATE_LATENCY_POST4_TP: + case (wrk_fsm_state) // - case (opcode) + WRK_FSM_STATE_LATENCY_PRE3: propagate_carries_clear_cry; + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1: propagate_carries_store_cry; + // + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2: propagate_carries_store_sum_w_cry; + // + endcase + + + // + // UOP_OPCODE_MODULAR_SUBTRACT_X + // UOP_OPCODE_MODULAR_SUBTRACT_Y + // + reg modular_subtract_x_brw_r; + reg modular_subtract_y_brw_r; + + reg modular_subtract_x_cry_r; + reg modular_subtract_y_cry_r; + + wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r}; + wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; + + wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r}; + wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_brw_r}; + + reg [WORD_W:0] modular_subtract_x_w_brw_r; + reg [WORD_W:0] modular_subtract_y_w_brw_r; + + reg [WORD_W:0] modular_subtract_x_w_cry_r; + reg [WORD_W:0] modular_subtract_y_w_cry_r; + + wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W]; + wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W]; + + wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W]; + wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W]; + + wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0]; + wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0]; + + wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0]; + + wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb}; + wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb}; + + wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb}; + wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb}; + + reg [WORD_EXT_W -1:0] modular_subtract_x_mux; + reg [WORD_EXT_W -1:0] modular_subtract_y_mux; + + wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]}; + wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]}; + + task _modular_subtract_update_brw; + input x_brw, y_brw; + {modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw}; + endtask + + task _modular_subtract_update_cry; + input x_cry, y_cry; + {modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry}; + endtask + + task modular_subtract_clear_brw; _modular_subtract_update_brw( 1'b0, 1'b0); endtask + task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask + + task modular_subtract_clear_cry; _modular_subtract_update_cry( 1'b0, 1'b0); endtask + task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask + + task _modular_subtract_update_diff_w_brw; + input [WORD_W:0] x_diff_w_brw, y_diff_w_brw; + {modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw}; + endtask + + task _modular_subtract_update_sum_w_cry; + input [WORD_W:0] x_sum_w_cry, y_sum_w_cry; + {modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry}; + endtask + + task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask + + task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask + + always @(posedge clk) + // + case (opcode) + // + UOP_OPCODE_MODULAR_SUBTRACT_X: + // + case (wrk_fsm_state) // - UOP_OPCODE_MODULAR_SUBTRACT: - // - if (!wrk_fsm_two_pass_pass) begin - update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); - end else begin - update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); - update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); - end + WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_brw; + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too! + // + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw; + // + endcase + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE3: modular_subtract_clear_cry; + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry; + // + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry; + // + endcase + // + UOP_OPCODE_MODULAR_SUBTRACT_Z: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE4, + WRK_FSM_STATE_BUSY2, + WRK_FSM_STATE_LATENCY_POST2: // - endcase + begin modular_subtract_x_mux <= !modular_subtract_x_brw_r ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1; + modular_subtract_y_mux <= !modular_subtract_y_brw_r ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end + // + endcase + // + endcase + + + // + // UOP_OPCODE_REGULAR_ADD_UNEVEN + // + reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r; + reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r; + reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r; + reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r; + + wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r}; + wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r}; + wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r}; + wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r}; + + wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1; + wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1; + wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1; + wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1; + + reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r; + reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r; + reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r; + reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r; + + wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W]; + + wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0]; + wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0]; + + wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb}; + wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb}; + wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb}; + wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb}; + + reg regular_add_uneven_store_lsb_now; + + task _regular_add_uneven_update_cry; + input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; + { regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <= + { x_x_cry, y_x_cry, x_y_cry, y_y_cry}; + endtask + + task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry( CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); endtask + task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask + + task _regular_add_uneven_update_sum_w_cry; + input [WORD_EXT_W-1:0] x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry; + { regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <= + { x_x_sum_w_cry, y_x_sum_w_cry, x_y_sum_w_cry, y_y_sum_w_cry}; + endtask + + task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask + + task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask + + always @(posedge clk) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1; + WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0; // endcase + + always @(posedge clk) // - end + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_clear_cry; + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry; + // + WRK_FSM_STATE_LATENCY_PRE4: regular_add_uneven_store_sum_lsb_w_cry; + WRK_FSM_STATE_BUSY2: if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry; + else regular_add_uneven_store_sum_msb_w_cry; + WRK_FSM_STATE_LATENCY_POST2: regular_add_uneven_store_sum_msb_w_cry; + // + endcase // // FSM Process // - always @(posedge clk or negedge rst_n) // if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE; - else case (opcode) - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X, - UOP_OPCODE_MODULAR_REDUCE_INIT, - UOP_OPCODE_MERGE_LH: wrk_fsm_state <= wrk_fsm_state_next_one_pass; - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y, - UOP_OPCODE_REGULAR_ADD_UNEVEN: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander; - UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass; - default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; - endcase - - + else wrk_fsm_state <= wrk_fsm_state_next; + + // // Busy Exit Logic - // - - reg wrk_fsm_done_one_pass = 1'b0; - reg wrk_fsm_done_one_pass_meander = 1'b0; - reg wrk_fsm_done_two_pass = 1'b0; + // + reg wrk_fsm_done = 1'b0; always @(posedge clk) begin // - wrk_fsm_done_one_pass <= 1'b0; - wrk_fsm_done_one_pass_meander <= 1'b0; - wrk_fsm_done_two_pass <= 1'b0; + wrk_fsm_done <= 1'b0; // case (opcode) // @@ -1076,47 +1073,22 @@ module modexpng_general_worker UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_OPCODE_COPY_CRT_Y2X, UOP_OPCODE_MODULAR_REDUCE_INIT, - UOP_OPCODE_MERGE_LH: - // - case (wrk_fsm_state) - WRK_FSM_STATE_BUSY: - if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass <= 1'b1; - endcase - // UOP_OPCODE_COPY_LADDERS_X2Y, UOP_OPCODE_CROSS_LADDERS_X2Y, + UOP_OPCODE_MODULAR_SUBTRACT_X, + UOP_OPCODE_MODULAR_SUBTRACT_Y, + UOP_OPCODE_MODULAR_SUBTRACT_Z, + UOP_OPCODE_MERGE_LH, UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) - WRK_FSM_STATE_BUSY_M2: - if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; - WRK_FSM_STATE_BUSY_M1: - wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander; + WRK_FSM_STATE_BUSY1: + if (rd_narrow_addr_is_last) wrk_fsm_done <= 1'b1; endcase - // - UOP_OPCODE_MODULAR_SUBTRACT: - // - case (wrk_fsm_state) - WRK_FSM_STATE_BUSY_TP: - if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_two_pass <= 1'b1; - endcase - // // endcase // end - - - // - // FSM Helper Logic - // - always @(posedge clk) - // - case (wrk_fsm_state) - WRK_FSM_STATE_IDLE: if (ena) {wrk_fsm_two_pass_pass, wrk_fsm_two_pass_pass_dly} <= {1'b0, 1'b0}; - WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_two_pass_pass <= 1'b1; - WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_two_pass_pass_dly <= 1'b1; - endcase // @@ -1125,64 +1097,26 @@ module modexpng_general_worker always @* begin // case (wrk_fsm_state) - WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ; - WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_PRE2 ; - WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_BUSY ; - WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ; - WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ; - WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ; - WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; - default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; - endcase - // - end - - always @* begin - // - case (wrk_fsm_state) - WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ; - // - WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ; - WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ; - WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ; - WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ; - WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ; - WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ; - WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ; - WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ; - WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ; - WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ; - // - WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; - // - default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; + WRK_FSM_STATE_IDLE: wrk_fsm_state_next = ena ? WRK_FSM_STATE_LATENCY_PRE1 : WRK_FSM_STATE_IDLE ; + WRK_FSM_STATE_LATENCY_PRE1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE2 ; + WRK_FSM_STATE_LATENCY_PRE2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE3 ; + WRK_FSM_STATE_LATENCY_PRE3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_PRE4 ; + WRK_FSM_STATE_LATENCY_PRE4: wrk_fsm_state_next = WRK_FSM_STATE_BUSY1 ; + WRK_FSM_STATE_BUSY1: wrk_fsm_state_next = WRK_FSM_STATE_BUSY2 ; + WRK_FSM_STATE_BUSY2: wrk_fsm_state_next = wrk_fsm_done ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY1 ; + WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST2 ; + WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST3 ; + WRK_FSM_STATE_LATENCY_POST3: wrk_fsm_state_next = WRK_FSM_STATE_LATENCY_POST4 ; + WRK_FSM_STATE_LATENCY_POST4: wrk_fsm_state_next = WRK_FSM_STATE_STOP ; + WRK_FSM_STATE_STOP: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ; + default: wrk_fsm_state_next = WRK_FSM_STATE_IDLE ; endcase // end - - always @* begin - // - case (wrk_fsm_state) - WRK_FSM_STATE_IDLE: wrk_fsm_state_next_two_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1_TP : WRK_FSM_STATE_IDLE; - WRK_FSM_STATE_LATENCY_PRE1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE2_TP ; - WRK_FSM_STATE_LATENCY_PRE2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE3_TP ; - WRK_FSM_STATE_LATENCY_PRE3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE4_TP ; - WRK_FSM_STATE_LATENCY_PRE4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_BUSY_TP ; - WRK_FSM_STATE_BUSY_TP: wrk_fsm_state_next_two_pass = wrk_fsm_done_two_pass ? WRK_FSM_STATE_LATENCY_POST1_TP : WRK_FSM_STATE_BUSY_TP; - WRK_FSM_STATE_LATENCY_POST1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST2_TP ; - WRK_FSM_STATE_LATENCY_POST2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST3_TP ; - WRK_FSM_STATE_LATENCY_POST3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST4_TP ; - WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_HOLDOFF_TP ; - WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_state_next_two_pass = wrk_fsm_two_pass_pass_dly ? WRK_FSM_STATE_STOP : WRK_FSM_STATE_LATENCY_PRE1_TP; - WRK_FSM_STATE_STOP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ; - default: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ; - endcase - // - end - - + + // - // Ready Logic + // Ready Flag Logic // reg rdy_reg = 1'b1; @@ -1198,321 +1132,167 @@ module modexpng_general_worker // - // UOP_OPCODE_PROPAGATE_CARRIES + // Source to Destination Data Logic // - reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r; - reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r; - reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; - reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; - - wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; - wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; - - wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W]; - - wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]}; - wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]}; - + reg [WORD_EXT_W -1:0] rd_wide_x_din_x_dly2; + reg [WORD_EXT_W -1:0] rd_wide_y_din_x_dly2; + reg [WORD_EXT_W -1:0] rd_wide_x_din_y_dly2; + reg [WORD_EXT_W -1:0] rd_wide_y_din_y_dly2; + reg [WORD_EXT_W -1:0] rd_narrow_x_din_x_dly2; + reg [WORD_EXT_W -1:0] rd_narrow_y_din_x_dly2; + reg [WORD_EXT_W -1:0] rd_narrow_x_din_y_dly2; + reg [WORD_EXT_W -1:0] rd_narrow_y_din_y_dly2; + + always @(posedge clk) begin + {rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2 } <= {rd_wide_x_din_x_dly1, rd_wide_y_din_x_dly1, rd_wide_x_din_y_dly1, rd_wide_y_din_y_dly1 }; + {rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2} <= {rd_narrow_x_din_x_dly1, rd_narrow_y_din_x_dly1, rd_narrow_x_din_y_dly1, rd_narrow_y_din_y_dly1}; + end + task update_wide_dout; input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <= - { x_x, y_x, x_y, y_y }; + { x_x, y_x, x_y, y_y}; endtask task update_narrow_dout; input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <= - { x_x, y_x, x_y, y_y }; - endtask - - task update_narrow_carries; - input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; - {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <= - { x_x_cry, y_x_cry, x_y_cry, y_y_cry }; + { x_x, y_x, x_y, y_y}; endtask - - always @(posedge clk) - // - if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE2: - // - update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); - // - WRK_FSM_STATE_BUSY, - WRK_FSM_STATE_LATENCY_POST1: - // - update_narrow_carries(rd_narrow_x_din_x_w_cry_msb, - rd_narrow_y_din_x_w_cry_msb, - rd_narrow_x_din_y_w_cry_msb, - rd_narrow_y_din_y_w_cry_msb); - // - endcase - - // - // UOP_OPCODE_MODULAR_SUBTRACT - // - - reg [WORD_W:0] modsub_x_ab; - reg [WORD_W:0] modsub_y_ab; - - reg [WORD_W:0] modsub_x_ab_dly; - reg [WORD_W:0] modsub_y_ab_dly; - - reg [WORD_W:0] modsub_x_abn; - reg [WORD_W:0] modsub_y_abn; - - reg modsub_x_ab_mask_now; - reg modsub_y_ab_mask_now; - - reg modsub_x_abn_mask_now; - reg modsub_y_abn_mask_now; - - reg modsub_x_borrow_r; - reg modsub_y_borrow_r; - - wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W]; - wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W]; - - wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W]; - wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W]; - - wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; - wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; - wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; - wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; - - wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]}; - wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]}; - - wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]}; - wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]}; - - wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]}; - wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]}; - - wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2; - wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2; - - wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]}; - wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]}; - - task update_modsub_ab; - begin - modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked; - modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked; - end - endtask - - task update_modsub_abn; - begin - modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked; - modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked; - end - endtask - - always @(posedge clk) - // - if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) - // - case (wrk_fsm_state) - WRK_FSM_STATE_LATENCY_POST4_TP: - if (!wrk_fsm_two_pass_pass) - {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]}; - endcase - - always @(posedge clk) begin - modsub_x_ab_dly <= modsub_x_ab; - modsub_y_ab_dly <= modsub_y_ab; - end - always @(posedge clk) begin // - modsub_x_ab <= {1'bX, WORD_DNC}; - modsub_y_ab <= {1'bX, WORD_DNC}; - // - modsub_x_abn <= {1'bX, WORD_DNC}; - modsub_y_abn <= {1'bX, WORD_DNC}; + update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); + update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); // - if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) + case (opcode) // - case (wrk_fsm_state) + UOP_OPCODE_PROPAGATE_CARRIES: // - WRK_FSM_STATE_LATENCY_PRE3_TP: - update_modsub_ab; - - WRK_FSM_STATE_LATENCY_PRE4_TP, - WRK_FSM_STATE_BUSY_TP, - WRK_FSM_STATE_LATENCY_POST1_TP, - WRK_FSM_STATE_LATENCY_POST2_TP: begin - update_modsub_ab; - update_modsub_abn; - end - // - WRK_FSM_STATE_LATENCY_POST3_TP: + case (wrk_fsm_state) // - update_modsub_abn; - // - endcase - // - end - - always @(posedge clk) begin - // - modsub_x_ab_mask_now <= 1'b0; - modsub_y_ab_mask_now <= 1'b0; - // - modsub_x_abn_mask_now <= 1'b0; - modsub_y_abn_mask_now <= 1'b0; - // - if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + update_narrow_dout(propagate_carries_x_x_w_cry_reduced, propagate_carries_y_x_w_cry_reduced, propagate_carries_x_y_w_cry_reduced, propagate_carries_y_y_w_cry_reduced); + // + endcase // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE2_TP: begin - modsub_x_ab_mask_now <= 1'b1; - modsub_y_ab_mask_now <= 1'b1; - end + UOP_OPCODE_COPY_CRT_Y2X: // - WRK_FSM_STATE_LATENCY_PRE3_TP: begin - modsub_x_abn_mask_now <= 1'b1; - modsub_y_abn_mask_now <= 1'b1; - end + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + begin update_narrow_dout(rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2); + update_wide_dout (rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: // - endcase - // - end - - - // - // UOP_OPCODE_ADD_UNEVEN - // - reg [WORD_W:0] regadd_x_x; - reg [WORD_W:0] regadd_y_x; - reg [WORD_W:0] regadd_x_y; - reg [WORD_W:0] regadd_y_y; - - reg regadd_x_x_cry; - reg regadd_y_x_cry; - reg regadd_x_y_cry; - reg regadd_y_y_cry; - - wire [WORD_EXT_W-1:0] regadd_x_x_trunc = {{CARRY_W{1'b0}}, regadd_x_x[WORD_W-1:0]}; - wire [WORD_EXT_W-1:0] regadd_y_x_trunc = {{CARRY_W{1'b0}}, regadd_y_x[WORD_W-1:0]}; - wire [WORD_EXT_W-1:0] regadd_x_y_trunc = {{CARRY_W{1'b0}}, regadd_x_y[WORD_W-1:0]}; - wire [WORD_EXT_W-1:0] regadd_y_y_trunc = {{CARRY_W{1'b0}}, regadd_y_y[WORD_W-1:0]}; - - //wire regadd_x_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_x_x[WORD_W]; - //wire regadd_y_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_y_x[WORD_W]; - //wire regadd_x_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_x_y[WORD_W]; - //wire regadd_y_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_y_y[WORD_W]; - /**/ - reg [WORD_W:0] regadd_x_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly2[WORD_W-1:0]}; - reg [WORD_W:0] regadd_x_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0]}; - reg [WORD_W:0] regadd_y_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly2[WORD_W-1:0]}; - reg [WORD_W:0] regadd_y_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0]}; - reg [WORD_W:0] regadd_x_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly2[WORD_W-1:0]}; - reg [WORD_W:0] regadd_x_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0]}; - reg [WORD_W:0] regadd_y_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly2[WORD_W-1:0]}; - reg [WORD_W:0] regadd_y_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0]}; - /**/ - //WRK_FSM_STATE_BUSY_M1, - //WRK_FSM_STATE_LATENCY_POST1_M1, - //WRK_FSM_STATE_LATENCY_POST2_M1: - - always @(posedge clk) begin - // - regadd_x_x_a_lsb_pad <= {1'bX, WORD_DNC}; - regadd_x_x_b_lsb_pad <= {1'bX, WORD_DNC}; - regadd_y_x_a_lsb_pad <= {1'bX, WORD_DNC}; - regadd_y_x_b_lsb_pad <= {1'bX, WORD_DNC}; - regadd_x_y_a_lsb_pad <= {1'bX, WORD_DNC}; - regadd_x_y_b_lsb_pad <= {1'bX, WORD_DNC}; - regadd_y_y_a_lsb_pad <= {1'bX, WORD_DNC}; - regadd_y_y_b_lsb_pad <= {1'bX, WORD_DNC}; - // - if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + update_wide_dout(rd_narrow_x_din_x_dly2, rd_narrow_y_din_x_dly2, rd_narrow_x_din_y_dly2, rd_narrow_y_din_y_dly2); + // + endcase // - case (wrk_fsm_state) + UOP_OPCODE_COPY_LADDERS_X2Y: // - WRK_FSM_STATE_LATENCY_PRE2_M2, - WRK_FSM_STATE_BUSY_M2, - WRK_FSM_STATE_LATENCY_POST1_M2: begin - regadd_x_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0] : WORD_ZERO}; - regadd_x_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_x [WORD_W-1:0] }; - regadd_y_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0] : WORD_ZERO}; - regadd_y_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_x [WORD_W-1:0] }; - regadd_x_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0] : WORD_ZERO}; - regadd_x_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_y [WORD_W-1:0] }; - regadd_y_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0] : WORD_ZERO}; - regadd_y_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_y [WORD_W-1:0] }; - end + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_x_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_y_dly2); + update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_x_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_y_dly2); end + // + endcase + // + UOP_OPCODE_CROSS_LADDERS_X2Y: // - endcase - end - - always @(posedge clk) begin - // - regadd_x_x <= {1'bX, WORD_DNC}; - regadd_y_x <= {1'bX, WORD_DNC}; - regadd_x_y <= {1'bX, WORD_DNC}; - regadd_y_y <= {1'bX, WORD_DNC}; - // - if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + begin update_wide_dout (rd_wide_x_din_x_dly1, rd_wide_x_din_y_dly2, rd_wide_x_din_y_dly1, rd_wide_x_din_x_dly2); + update_narrow_dout(rd_narrow_x_din_x_dly1, rd_narrow_x_din_y_dly2, rd_narrow_x_din_y_dly1, rd_narrow_x_din_x_dly2); end + // + endcase // - case (wrk_fsm_state) + UOP_OPCODE_MODULAR_SUBTRACT_X: // - WRK_FSM_STATE_BUSY_M1, - WRK_FSM_STATE_LATENCY_POST1_M1, - WRK_FSM_STATE_LATENCY_POST2_M1: begin - regadd_x_x <= regadd_x_x_a_lsb_pad + regadd_x_x_b_lsb_pad + regadd_x_x_cry; - regadd_y_x <= regadd_y_x_a_lsb_pad + regadd_y_x_b_lsb_pad + regadd_y_x_cry; - regadd_x_y <= regadd_x_y_a_lsb_pad + regadd_x_y_b_lsb_pad + regadd_x_y_cry; - regadd_y_y <= regadd_y_y_a_lsb_pad + regadd_y_y_b_lsb_pad + regadd_y_y_cry; - end + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + update_narrow_dout(modular_subtract_x_w_brw_reduced, modular_subtract_x_w_brw_reduced, modular_subtract_y_w_brw_reduced, modular_subtract_y_w_brw_reduced); + // + endcase + // + UOP_OPCODE_MODULAR_SUBTRACT_Y: // - endcase - // - end - - always @(posedge clk) begin - // - regadd_x_x_cry <= 1'bX; - regadd_y_x_cry <= 1'bX; - regadd_x_y_cry <= 1'bX; - regadd_y_y_cry <= 1'bX; - // - if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + update_wide_dout(modular_subtract_x_w_cry_reduced, modular_subtract_x_w_cry_reduced, modular_subtract_y_w_cry_reduced, modular_subtract_y_w_cry_reduced); + // + endcase // - case (wrk_fsm_state) + UOP_OPCODE_MODULAR_SUBTRACT_Z: // - WRK_FSM_STATE_LATENCY_PRE2_M2: begin - regadd_x_x_cry <= 1'b0; - regadd_y_x_cry <= 1'b0; - regadd_x_y_cry <= 1'b0; - regadd_y_y_cry <= 1'b0; - end + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + begin update_wide_dout (modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); + update_narrow_dout(modular_subtract_x_mux_reduced, modular_subtract_x_mux_reduced, modular_subtract_y_mux_reduced, modular_subtract_y_mux_reduced); end + // + endcase + // + UOP_OPCODE_MERGE_LH: // - WRK_FSM_STATE_BUSY_M2, - WRK_FSM_STATE_LATENCY_POST1_M2: begin - regadd_x_x_cry <= regadd_x_x[WORD_W]; - regadd_y_x_cry <= regadd_y_x[WORD_W]; - regadd_x_y_cry <= regadd_x_y[WORD_W]; - regadd_y_y_cry <= regadd_y_y[WORD_W]; - end + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + update_narrow_dout(rd_wide_x_din_x_dly2, rd_wide_y_din_x_dly2, rd_wide_x_din_y_dly2, rd_wide_y_din_y_dly2); + // + endcase + // + UOP_OPCODE_REGULAR_ADD_UNEVEN: // - endcase - // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY1, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST3: + // + update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced); + // + endcase + endcase + // end + endmodule |