From 36339014ec3d3ad3bb4622392d5075d674e7dbeb Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 21 Oct 2019 12:51:30 +0300 Subject: Added the regular (not modular) addition operation required during the final step of the Garner's formula algorithm. Note, that the addition is "uneven" in the sense, that the first operand is full-size (as wide as the modulus), while the second one is only half the size. The adder internally banks the second input port during the second half of the addition. --- rtl/modexpng_general_worker.v | 602 +++++++++++++++++++++++++++--------------- 1 file changed, 384 insertions(+), 218 deletions(-) (limited to 'rtl/modexpng_general_worker.v') diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index d82a120..cedbee9 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -108,7 +108,7 @@ module modexpng_general_worker localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11; localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12; - localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13; + localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13; localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14; localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15; localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16; @@ -250,10 +250,12 @@ module modexpng_general_worker reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3; + //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly4; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2; reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3; + //reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly4; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1; reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2; @@ -275,8 +277,8 @@ module modexpng_general_worker {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; // - {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x}; - {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y}; + {/*wrk_rd_wide_x_din_x_dly4,*/ wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {/*wrk_rd_wide_x_din_x_dly3,*/ wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x}; + {/*wrk_rd_wide_x_din_y_dly4,*/ wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {/*wrk_rd_wide_x_din_y_dly3,*/ wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y}; // {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x}; {wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x}; @@ -363,6 +365,10 @@ module modexpng_general_worker // end // + UOP_OPCODE_REGULAR_ADD_UNEVEN: + // + enable_narrow_xy_rd_en; + // endcase // endcase @@ -466,6 +472,10 @@ module modexpng_general_worker // end // + UOP_OPCODE_REGULAR_ADD_UNEVEN: + // + enable_narrow_xy_wr_en; + // endcase // endcase @@ -502,204 +512,6 @@ module modexpng_general_worker // Source to Destination Data Logic // - // - // UOP_OPCODE_PROPAGATE_CARRIES - // - - reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r; - reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r; - reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; - reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; - - wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; - wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; - - wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W]; - wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W]; - - wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]}; - wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]}; - wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]}; - - task update_wide_dout; - input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; - {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <= - { x_x, y_x, x_y, y_y }; - endtask - - task update_narrow_dout; - input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; - {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <= - { x_x, y_x, x_y, y_y }; - endtask - - task update_narrow_carries; - input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; - {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <= - { x_x_cry, y_x_cry, x_y_cry, y_y_cry }; - endtask - - - always @(posedge clk) - // - if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE2: - // - update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); - // - WRK_FSM_STATE_BUSY, - WRK_FSM_STATE_LATENCY_POST1: - // - update_narrow_carries(rd_narrow_x_din_x_w_cry_msb, - rd_narrow_y_din_x_w_cry_msb, - rd_narrow_x_din_y_w_cry_msb, - rd_narrow_y_din_y_w_cry_msb); - // - endcase - - - // - // UOP_OPCODE_MODULAR_SUBTRACT - // - - reg [WORD_W:0] modsub_x_ab; - reg [WORD_W:0] modsub_y_ab; - - reg [WORD_W:0] modsub_x_ab_dly; - reg [WORD_W:0] modsub_y_ab_dly; - - reg [WORD_W:0] modsub_x_abn; - reg [WORD_W:0] modsub_y_abn; - - reg modsub_x_ab_mask_now; - reg modsub_y_ab_mask_now; - - reg modsub_x_abn_mask_now; - reg modsub_y_abn_mask_now; - - reg modsub_x_borrow_r; - reg modsub_y_borrow_r; - - wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W]; - wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W]; - - wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W]; - wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W]; - - wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; - wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; - wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; - wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; - - wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]}; - wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]}; - - wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]}; - wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]}; - - wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]}; - wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]}; - - wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2; - wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2; - - wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]}; - wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]}; - - task update_modsub_ab; - begin - modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked; - modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked; - end - endtask - - task update_modsub_abn; - begin - modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked; - modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked; - end - endtask - - always @(posedge clk) - // - if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) - // - case (wrk_fsm_state) - WRK_FSM_STATE_LATENCY_POST4_TP: - if (!wrk_fsm_two_pass_pass) - {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]}; - endcase - - always @(posedge clk) begin - modsub_x_ab_dly <= modsub_x_ab; - modsub_y_ab_dly <= modsub_y_ab; - end - - always @(posedge clk) begin - // - modsub_x_ab <= {1'bX, WORD_DNC}; - modsub_y_ab <= {1'bX, WORD_DNC}; - // - modsub_x_abn <= {1'bX, WORD_DNC}; - modsub_y_abn <= {1'bX, WORD_DNC}; - // - if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE3_TP: - update_modsub_ab; - - WRK_FSM_STATE_LATENCY_PRE4_TP, - WRK_FSM_STATE_BUSY_TP, - WRK_FSM_STATE_LATENCY_POST1_TP, - WRK_FSM_STATE_LATENCY_POST2_TP: begin - update_modsub_ab; - update_modsub_abn; - end - // - WRK_FSM_STATE_LATENCY_POST3_TP: - // - update_modsub_abn; - // - endcase - // - end - - always @(posedge clk) begin - // - modsub_x_ab_mask_now <= 1'b0; - modsub_y_ab_mask_now <= 1'b0; - // - modsub_x_abn_mask_now <= 1'b0; - modsub_y_abn_mask_now <= 1'b0; - // - if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) - // - case (wrk_fsm_state) - // - WRK_FSM_STATE_LATENCY_PRE2_TP: begin - modsub_x_ab_mask_now <= 1'b1; - modsub_y_ab_mask_now <= 1'b1; - end - // - WRK_FSM_STATE_LATENCY_PRE3_TP: begin - modsub_x_abn_mask_now <= 1'b1; - modsub_y_abn_mask_now <= 1'b1; - end - // - endcase - // - end - always @(posedge clk) begin // update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC); @@ -792,6 +604,15 @@ module modexpng_general_worker // end // + UOP_OPCODE_REGULAR_ADD_UNEVEN: begin + // + update_narrow_dout(regadd_x_x_trunc, + regadd_y_x_trunc, + regadd_x_y_trunc, + regadd_y_y_trunc); + // + end + // endcase // endcase @@ -832,6 +653,8 @@ module modexpng_general_worker reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next; reg rd_wide_xy_addr_xy_next_last_seen; + reg rd_wide_xy_addr_xy_next_last_seen_dly1; + reg rd_wide_xy_addr_xy_next_last_seen_dly2; wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half; wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last; @@ -890,18 +713,29 @@ module modexpng_general_worker always @(posedge clk) // - case (wrk_fsm_state_next_one_pass) - // - WRK_FSM_STATE_LATENCY_PRE1: - // - rd_wide_xy_addr_xy_next_last_seen <= 1'b0; - // - WRK_FSM_STATE_LATENCY_PRE2, - WRK_FSM_STATE_BUSY: - // - if (!rd_wide_xy_addr_xy_next_last_seen) - rd_wide_xy_addr_xy_next_last_seen <= rd_wide_xy_addr_xy_next_is_last; - // + case (opcode) + UOP_OPCODE_MERGE_LH: + case (wrk_fsm_state_next_one_pass) + WRK_FSM_STATE_LATENCY_PRE1: + rd_wide_xy_addr_xy_next_last_seen <= 1'b0; + WRK_FSM_STATE_BUSY: + if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last) + rd_wide_xy_addr_xy_next_last_seen <= 1'b1; + endcase + UOP_OPCODE_REGULAR_ADD_UNEVEN: + case (wrk_fsm_state_next_one_pass_meander) + WRK_FSM_STATE_LATENCY_PRE1_M1: begin + rd_wide_xy_addr_xy_next_last_seen <= 1'b0; + rd_wide_xy_addr_xy_next_last_seen_dly1 <= 1'b0; + rd_wide_xy_addr_xy_next_last_seen_dly2 <= 1'b0; + end + WRK_FSM_STATE_BUSY_M1: begin + if (!rd_wide_xy_addr_xy_next_last_seen && rd_wide_xy_addr_xy_next_is_last) + rd_wide_xy_addr_xy_next_last_seen <= 1'b1; + rd_wide_xy_addr_xy_next_last_seen_dly1 <= rd_wide_xy_addr_xy_next_last_seen; + rd_wide_xy_addr_xy_next_last_seen_dly2 <= rd_wide_xy_addr_xy_next_last_seen_dly1; + end + endcase endcase always @(posedge clk) begin @@ -979,6 +813,10 @@ module modexpng_general_worker update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); end + UOP_OPCODE_REGULAR_ADD_UNEVEN: begin + update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE); + update_rd_narrow_bank_addr(sel_wide_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE); + end endcase // WRK_FSM_STATE_LATENCY_PRE2_M1, @@ -988,9 +826,11 @@ module modexpng_general_worker UOP_OPCODE_CROSS_LADDERS_X2Y: begin update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; - // end - // + UOP_OPCODE_REGULAR_ADD_UNEVEN: begin + update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ; + update_rd_narrow_bank_addr(sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next; + end endcase // WRK_FSM_STATE_LATENCY_PRE1_M2, @@ -1002,6 +842,10 @@ module modexpng_general_worker update_rd_wide_bank (sel_wide_in ); update_rd_narrow_bank(sel_narrow_in); end + UOP_OPCODE_REGULAR_ADD_UNEVEN: begin + update_rd_wide_bank (sel_narrow_in); + update_rd_narrow_bank(sel_narrow_in); + end endcase // endcase @@ -1125,6 +969,8 @@ module modexpng_general_worker update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); end + UOP_OPCODE_REGULAR_ADD_UNEVEN: + update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4); endcase // endcase @@ -1171,8 +1017,9 @@ module modexpng_general_worker UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_OPCODE_MERGE_LH: wrk_fsm_state <= wrk_fsm_state_next_one_pass; UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander; - UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass; + UOP_OPCODE_CROSS_LADDERS_X2Y, + UOP_OPCODE_REGULAR_ADD_UNEVEN: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander; + UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass; default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; endcase @@ -1205,7 +1052,8 @@ module modexpng_general_worker endcase // UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: + UOP_OPCODE_CROSS_LADDERS_X2Y, + UOP_OPCODE_REGULAR_ADD_UNEVEN: // case (wrk_fsm_state) WRK_FSM_STATE_BUSY_M2: @@ -1317,4 +1165,322 @@ module modexpng_general_worker endcase + // + // UOP_OPCODE_PROPAGATE_CARRIES + // + reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r; + reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r; + reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; + reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; + + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; + + wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W]; + wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W]; + + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]}; + wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]}; + + task update_wide_dout; + input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; + {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <= + { x_x, y_x, x_y, y_y }; + endtask + + task update_narrow_dout; + input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y; + {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <= + { x_x, y_x, x_y, y_y }; + endtask + + task update_narrow_carries; + input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry; + {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <= + { x_x_cry, y_x_cry, x_y_cry, y_y_cry }; + endtask + + always @(posedge clk) + // + if (opcode == UOP_OPCODE_PROPAGATE_CARRIES) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE2: + // + update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO); + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1: + // + update_narrow_carries(rd_narrow_x_din_x_w_cry_msb, + rd_narrow_y_din_x_w_cry_msb, + rd_narrow_x_din_y_w_cry_msb, + rd_narrow_y_din_y_w_cry_msb); + // + endcase + + + // + // UOP_OPCODE_MODULAR_SUBTRACT + // + + reg [WORD_W:0] modsub_x_ab; + reg [WORD_W:0] modsub_y_ab; + + reg [WORD_W:0] modsub_x_ab_dly; + reg [WORD_W:0] modsub_y_ab_dly; + + reg [WORD_W:0] modsub_x_abn; + reg [WORD_W:0] modsub_y_abn; + + reg modsub_x_ab_mask_now; + reg modsub_y_ab_mask_now; + + reg modsub_x_abn_mask_now; + reg modsub_y_abn_mask_now; + + reg modsub_x_borrow_r; + reg modsub_y_borrow_r; + + wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W]; + wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W]; + + wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W]; + wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W]; + + wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]}; + wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]}; + wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]}; + wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]}; + + wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]}; + wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]}; + + wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]}; + wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]}; + + wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]}; + wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]}; + + wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2; + wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2; + + wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]}; + wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]}; + + task update_modsub_ab; + begin + modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked; + modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked; + end + endtask + + task update_modsub_abn; + begin + modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked; + modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked; + end + endtask + + always @(posedge clk) + // + if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) + // + case (wrk_fsm_state) + WRK_FSM_STATE_LATENCY_POST4_TP: + if (!wrk_fsm_two_pass_pass) + {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]}; + endcase + + always @(posedge clk) begin + modsub_x_ab_dly <= modsub_x_ab; + modsub_y_ab_dly <= modsub_y_ab; + end + + always @(posedge clk) begin + // + modsub_x_ab <= {1'bX, WORD_DNC}; + modsub_y_ab <= {1'bX, WORD_DNC}; + // + modsub_x_abn <= {1'bX, WORD_DNC}; + modsub_y_abn <= {1'bX, WORD_DNC}; + // + if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE3_TP: + update_modsub_ab; + + WRK_FSM_STATE_LATENCY_PRE4_TP, + WRK_FSM_STATE_BUSY_TP, + WRK_FSM_STATE_LATENCY_POST1_TP, + WRK_FSM_STATE_LATENCY_POST2_TP: begin + update_modsub_ab; + update_modsub_abn; + end + // + WRK_FSM_STATE_LATENCY_POST3_TP: + // + update_modsub_abn; + // + endcase + // + end + + always @(posedge clk) begin + // + modsub_x_ab_mask_now <= 1'b0; + modsub_y_ab_mask_now <= 1'b0; + // + modsub_x_abn_mask_now <= 1'b0; + modsub_y_abn_mask_now <= 1'b0; + // + if (opcode == UOP_OPCODE_MODULAR_SUBTRACT) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE2_TP: begin + modsub_x_ab_mask_now <= 1'b1; + modsub_y_ab_mask_now <= 1'b1; + end + // + WRK_FSM_STATE_LATENCY_PRE3_TP: begin + modsub_x_abn_mask_now <= 1'b1; + modsub_y_abn_mask_now <= 1'b1; + end + // + endcase + // + end + + + // + // UOP_OPCODE_ADD_UNEVEN + // + reg [WORD_W:0] regadd_x_x; + reg [WORD_W:0] regadd_y_x; + reg [WORD_W:0] regadd_x_y; + reg [WORD_W:0] regadd_y_y; + + reg regadd_x_x_cry; + reg regadd_y_x_cry; + reg regadd_x_y_cry; + reg regadd_y_y_cry; + + wire [WORD_EXT_W-1:0] regadd_x_x_trunc = {{CARRY_W{1'b0}}, regadd_x_x[WORD_W-1:0]}; + wire [WORD_EXT_W-1:0] regadd_y_x_trunc = {{CARRY_W{1'b0}}, regadd_y_x[WORD_W-1:0]}; + wire [WORD_EXT_W-1:0] regadd_x_y_trunc = {{CARRY_W{1'b0}}, regadd_x_y[WORD_W-1:0]}; + wire [WORD_EXT_W-1:0] regadd_y_y_trunc = {{CARRY_W{1'b0}}, regadd_y_y[WORD_W-1:0]}; + + //wire regadd_x_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_x_x[WORD_W]; + //wire regadd_y_x_masked = regadd_xy_ab_x_mask_now ? 1'b0 : regadd_y_x[WORD_W]; + //wire regadd_x_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_x_y[WORD_W]; + //wire regadd_y_y_masked = regadd_xy_ab_y_mask_now ? 1'b0 : regadd_y_y[WORD_W]; + /**/ + reg [WORD_W:0] regadd_x_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly2[WORD_W-1:0]}; + reg [WORD_W:0] regadd_x_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0]}; + reg [WORD_W:0] regadd_y_x_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly2[WORD_W-1:0]}; + reg [WORD_W:0] regadd_y_x_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0]}; + reg [WORD_W:0] regadd_x_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly2[WORD_W-1:0]}; + reg [WORD_W:0] regadd_x_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0]}; + reg [WORD_W:0] regadd_y_y_a_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly2[WORD_W-1:0]}; + reg [WORD_W:0] regadd_y_y_b_lsb_pad; //= {1'b0, wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0]}; + /**/ + //WRK_FSM_STATE_BUSY_M1, + //WRK_FSM_STATE_LATENCY_POST1_M1, + //WRK_FSM_STATE_LATENCY_POST2_M1: + + always @(posedge clk) begin + // + regadd_x_x_a_lsb_pad <= {1'bX, WORD_DNC}; + regadd_x_x_b_lsb_pad <= {1'bX, WORD_DNC}; + regadd_y_x_a_lsb_pad <= {1'bX, WORD_DNC}; + regadd_y_x_b_lsb_pad <= {1'bX, WORD_DNC}; + regadd_x_y_a_lsb_pad <= {1'bX, WORD_DNC}; + regadd_x_y_b_lsb_pad <= {1'bX, WORD_DNC}; + regadd_y_y_a_lsb_pad <= {1'bX, WORD_DNC}; + regadd_y_y_b_lsb_pad <= {1'bX, WORD_DNC}; + // + if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE2_M2, + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2: begin + regadd_x_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_x_dly1[WORD_W-1:0] : WORD_ZERO}; + regadd_x_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_x [WORD_W-1:0] }; + regadd_y_x_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_x_dly1[WORD_W-1:0] : WORD_ZERO}; + regadd_y_x_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_x [WORD_W-1:0] }; + regadd_x_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_x_din_y_dly1[WORD_W-1:0] : WORD_ZERO}; + regadd_x_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_x_din_y [WORD_W-1:0] }; + regadd_y_y_a_lsb_pad <= {1'b0, !rd_wide_xy_addr_xy_next_last_seen_dly2 ? wrk_rd_narrow_y_din_y_dly1[WORD_W-1:0] : WORD_ZERO}; + regadd_y_y_b_lsb_pad <= {1'b0, wrk_rd_narrow_y_din_y [WORD_W-1:0] }; + end + // + endcase + end + + always @(posedge clk) begin + // + regadd_x_x <= {1'bX, WORD_DNC}; + regadd_y_x <= {1'bX, WORD_DNC}; + regadd_x_y <= {1'bX, WORD_DNC}; + regadd_y_y <= {1'bX, WORD_DNC}; + // + if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M1, + WRK_FSM_STATE_LATENCY_POST1_M1, + WRK_FSM_STATE_LATENCY_POST2_M1: begin + regadd_x_x <= regadd_x_x_a_lsb_pad + regadd_x_x_b_lsb_pad + regadd_x_x_cry; + regadd_y_x <= regadd_y_x_a_lsb_pad + regadd_y_x_b_lsb_pad + regadd_y_x_cry; + regadd_x_y <= regadd_x_y_a_lsb_pad + regadd_x_y_b_lsb_pad + regadd_x_y_cry; + regadd_y_y <= regadd_y_y_a_lsb_pad + regadd_y_y_b_lsb_pad + regadd_y_y_cry; + end + // + endcase + // + end + + always @(posedge clk) begin + // + regadd_x_x_cry <= 1'bX; + regadd_y_x_cry <= 1'bX; + regadd_x_y_cry <= 1'bX; + regadd_y_y_cry <= 1'bX; + // + if (opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN) + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_LATENCY_PRE2_M2: begin + regadd_x_x_cry <= 1'b0; + regadd_y_x_cry <= 1'b0; + regadd_x_y_cry <= 1'b0; + regadd_y_y_cry <= 1'b0; + end + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2: begin + regadd_x_x_cry <= regadd_x_x[WORD_W]; + regadd_y_x_cry <= regadd_y_x[WORD_W]; + regadd_x_y_cry <= regadd_x_y[WORD_W]; + regadd_y_y_cry <= regadd_y_y[WORD_W]; + end + // + endcase + // + end + endmodule -- cgit v1.2.3