aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-11-16 01:17:02 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-11-16 01:17:02 +0300
commitf4771a7b6774a53cbada5b86701d65e08a36c10d (patch)
treea8a54d115a178b1463bcedc6c37759d9acf33e9f
parent65bf05440677643d9c1f2ae6a0573315f52926c8 (diff)
The uOP engine didn't compile at 180 MHz. The pipeline had two stages: FETCH
and DECODE. Apparently one clock cycle is not enough to entirely decode an instruction, so decoding now takes two clock cycles (DECODE_1 and DECODE_2). This seems to solve the problem. If we run into more timing violations here, we can add an extra DECODE_3 cycle and register the currently combinatorial uop_opcode_* flags at DECODE_2. This fix increases the core's latency by 59/32 clock cycles (CRT/non-CRT mode) plus two extra clock cycles per each bit of the exponent.
-rw-r--r--rtl/modexpng_uop_engine.v168
1 files changed, 91 insertions, 77 deletions
diff --git a/rtl/modexpng_uop_engine.v b/rtl/modexpng_uop_engine.v
index ba2d4c8..1119fbd 100644
--- a/rtl/modexpng_uop_engine.v
+++ b/rtl/modexpng_uop_engine.v
@@ -280,66 +280,79 @@ module modexpng_uop_engine
//
// UOP_FSM
//
- localparam [1:0] UOP_FSM_STATE_IDLE = 2'b00;
- localparam [1:0] UOP_FSM_STATE_FETCH = 2'b01;
- localparam [1:0] UOP_FSM_STATE_DECODE = 2'b10;
- localparam [1:0] UOP_FSM_STATE_BUSY = 2'b11;
+ localparam [2:0] UOP_FSM_STATE_IDLE = 3'b000;
+ localparam [2:0] UOP_FSM_STATE_FETCH = 3'b001;
+ localparam [2:0] UOP_FSM_STATE_DECODE_1 = 3'b010;
+ localparam [2:0] UOP_FSM_STATE_DECODE_2 = 3'b011;
+ localparam [2:0] UOP_FSM_STATE_BUSY = 3'b100;
- reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE;
- reg [1:0] uop_fsm_state_next;
+ reg [2:0] uop_fsm_state = UOP_FSM_STATE_IDLE;
+ reg [2:0] uop_fsm_state_next;
//
// UOP ROM
//
reg [UOP_ADDR_W -1:0] uop_addr;
- wire [UOP_W -1:0] uop_data;
+ wire [UOP_W -1:0] uop_data_int;
modexpng_uop_rom uop_rom
(
.clk (clk),
.addr (uop_addr),
- .data (uop_data)
- );
+ .data (uop_data_int)
+ );
//
// UOP ROM Data Decoder
//
- wire [UOP_OPCODE_W -1:0] uop_data_opcode = uop_data[UOP_W -1-: UOP_OPCODE_W];
- wire [UOP_CRT_W -1:0] uop_data_crt = uop_data[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ];
- wire [UOP_NPQ_W -1:0] uop_data_npq = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ];
- wire [UOP_AUX_W -1:0] uop_data_aux = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ];
- wire [UOP_LADDER_W -1:0] uop_data_ladder = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W];
- wire [BANK_ADDR_W -1:0] uop_data_sel_wide_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ];
- wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ];
- wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
- wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
-
- wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
- wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
- (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
- wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
- wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
- (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ||
- (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) ||
- (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ||
- (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
- (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
- (uop_data_opcode == UOP_OPCODE_MERGE_LH ) ||
- (uop_data_opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN ) ;
- wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) ||
- (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ||
- (uop_data_opcode == UOP_OPCODE_REGULAR_MULTIPLY ) ;
- wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) ||
- (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ;
+ reg [UOP_OPCODE_W -1:0] uop_data_opcode_dec;
+ reg [UOP_CRT_W -1:0] uop_data_crt_dec;
+ reg [UOP_NPQ_W -1:0] uop_data_npq_dec;
+ reg [UOP_AUX_W -1:0] uop_data_aux_dec;
+ reg [UOP_LADDER_W -1:0] uop_data_ladder_dec;
+ reg [BANK_ADDR_W -1:0] uop_data_sel_wide_in_dec;
+ reg [BANK_ADDR_W -1:0] uop_data_sel_narrow_in_dec;
+ reg [BANK_ADDR_W -1:0] uop_data_sel_wide_out_dec;
+ reg [BANK_ADDR_W -1:0] uop_data_sel_narrow_out_dec;
+
+ always @(posedge clk) begin
+ uop_data_opcode_dec <= uop_data_int[UOP_W -1-: UOP_OPCODE_W];
+ uop_data_crt_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ];
+ uop_data_npq_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ];
+ uop_data_aux_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ];
+ uop_data_ladder_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W];
+ uop_data_sel_wide_in_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ];
+ uop_data_sel_narrow_in_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ uop_data_sel_wide_out_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ uop_data_sel_narrow_out_dec <= uop_data_int[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ end
+
+ wire uop_opcode_is_stop = uop_data_opcode_dec == UOP_OPCODE_STOP ;
+ wire uop_opcode_is_in = (uop_data_opcode_dec == UOP_OPCODE_INPUT_TO_WIDE ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_INPUT_TO_NARROW ) ;
+ wire uop_opcode_is_out = uop_data_opcode_dec == UOP_OPCODE_OUTPUT_FROM_NARROW ;
+ wire uop_opcode_is_wrk = (uop_data_opcode_dec == UOP_OPCODE_COPY_CRT_Y2X ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_COPY_LADDERS_X2Y ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_CROSS_LADDERS_X2Y ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_MODULAR_SUBTRACT ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_PROPAGATE_CARRIES ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_MERGE_LH ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_REGULAR_ADD_UNEVEN ) ;
+ wire uop_opcode_is_mmm = (uop_data_opcode_dec == UOP_OPCODE_MODULAR_MULTIPLY ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_MODULAR_REDUCE_PROC ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_REGULAR_MULTIPLY ) ;
+ wire uop_opcode_is_ladder = (uop_data_opcode_dec == UOP_OPCODE_LADDER_INIT ) ||
+ (uop_data_opcode_dec == UOP_OPCODE_LADDER_STEP ) ;
//
// Debug Signal
//
`ifdef MODEXPNG_ENABLE_DEBUG
- assign uop_decoded_stop = (uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop;
+ assign uop_decoded_stop = (uop_fsm_state == UOP_FSM_STATE_DECODE_2) && uop_opcode_is_stop;
`else
assign uop_decoded_stop = 1'b0;
`endif
@@ -369,13 +382,13 @@ module modexpng_uop_engine
wire mmm_ena = mmm_ena_x & mmm_ena_y;
wire mmm_rdy = mmm_rdy_x & mmm_rdy_y;
- assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done;
+ assign uop_loop_now = (uop_data_opcode_dec == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done;
reg [1:0] uop_data_ladder_mux;
- always @(uop_data_ladder, io_mgr_ladder_p, io_mgr_ladder_q, io_mgr_ladder_d)
+ always @(uop_data_ladder_dec, io_mgr_ladder_p, io_mgr_ladder_q, io_mgr_ladder_d)
//
- case (uop_data_ladder)
+ case (uop_data_ladder_dec)
UOP_LADDER_00: uop_data_ladder_mux = 2'b00;
UOP_LADDER_11: uop_data_ladder_mux = 2'b11;
UOP_LADDER_D: uop_data_ladder_mux = {~io_mgr_ladder_d, io_mgr_ladder_d};
@@ -384,10 +397,10 @@ module modexpng_uop_engine
reg [OP_ADDR_W-1:0] word_index_last_mux;
- always @(uop_data_npq, word_index_last_n, word_index_last_pq)
+ always @(uop_data_npq_dec, word_index_last_n, word_index_last_pq)
//
- if (uop_data_npq == UOP_NPQ_N) word_index_last_mux = word_index_last_n;
- else word_index_last_mux = word_index_last_pq;
+ if (uop_data_npq_dec == UOP_NPQ_N) word_index_last_mux = word_index_last_n;
+ else word_index_last_mux = word_index_last_pq;
reg [BIT_INDEX_W-1:0] bit_index_last_mux;
@@ -407,13 +420,13 @@ module modexpng_uop_engine
mmm_ena_y_r <= 1'b0;
wrk_ena_r <= 1'b0;
end else begin
- io_mgr_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in ||
- uop_opcode_is_out ||
- uop_opcode_is_ladder): 1'b0;
- mmm_ena_x_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- mmm_ena_y_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- wrk_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk ||
- uop_opcode_is_out ): 1'b0;
+ io_mgr_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE_2 ? (uop_opcode_is_in ||
+ uop_opcode_is_out ||
+ uop_opcode_is_ladder): 1'b0;
+ mmm_ena_x_r <= uop_fsm_state == UOP_FSM_STATE_DECODE_2 ? uop_opcode_is_mmm : 1'b0;
+ mmm_ena_y_r <= uop_fsm_state == UOP_FSM_STATE_DECODE_2 ? uop_opcode_is_mmm : 1'b0;
+ wrk_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE_2 ? (uop_opcode_is_wrk ||
+ uop_opcode_is_out ): 1'b0;
end
@@ -497,59 +510,59 @@ module modexpng_uop_engine
always @(posedge clk)
//
- if (uop_fsm_state == UOP_FSM_STATE_DECODE)
+ if (uop_fsm_state == UOP_FSM_STATE_DECODE_2)
//
- case (uop_data_opcode)
+ case (uop_data_opcode_dec)
//
UOP_OPCODE_INPUT_TO_WIDE:
- update_io_mgr_params(uop_data_crt, uop_data_aux, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_opcode);
+ update_io_mgr_params(uop_data_crt_dec, uop_data_aux_dec, uop_data_sel_narrow_in_dec, uop_data_sel_wide_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_INPUT_TO_NARROW:
- update_io_mgr_params(uop_data_crt, uop_data_aux, uop_data_sel_narrow_in, uop_data_sel_narrow_out, uop_data_opcode);
+ update_io_mgr_params(uop_data_crt_dec, uop_data_aux_dec, uop_data_sel_narrow_in_dec, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_OUTPUT_FROM_NARROW: begin
- update_io_mgr_params(uop_data_crt, uop_data_aux, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
- update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, BANK_DNC, uop_data_opcode);
+ update_io_mgr_params(uop_data_crt_dec, uop_data_aux_dec, BANK_DNC, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in_dec, BANK_DNC, BANK_DNC, uop_data_opcode_dec);
end
//
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y:
- update_wrk_params(uop_data_sel_wide_in, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_sel_narrow_out, uop_data_opcode);
+ update_wrk_params(uop_data_sel_wide_in_dec, uop_data_sel_narrow_in_dec, uop_data_sel_wide_out_dec, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_MODULAR_MULTIPLY: begin
- update_mmm_params(uop_data_ladder_mux, uop_data_sel_wide_in, uop_data_sel_narrow_in, uop_data_aux, 1'b0, 1'b0);
- update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out);
+ update_mmm_params(uop_data_ladder_mux, uop_data_sel_wide_in_dec, uop_data_sel_narrow_in_dec, uop_data_aux_dec, 1'b0, 1'b0);
+ update_rdct_params(uop_data_sel_wide_out_dec, uop_data_sel_narrow_out_dec);
end
//
UOP_OPCODE_MODULAR_SUBTRACT:
- update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_sel_narrow_out, uop_data_opcode);
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in_dec, uop_data_sel_wide_out_dec, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_MODULAR_REDUCE_INIT:
- update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, BANK_DNC, uop_data_opcode);
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in_dec, BANK_DNC, BANK_DNC, uop_data_opcode_dec);
//
UOP_OPCODE_MODULAR_REDUCE_PROC: begin
update_mmm_params(2'bXX, BANK_DNC, BANK_DNC, 1'b0, 1'b1, 1'b0);
- update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out);
+ update_rdct_params(uop_data_sel_wide_out_dec, uop_data_sel_narrow_out_dec);
end
//
UOP_OPCODE_PROPAGATE_CARRIES:
- update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in_dec, BANK_DNC, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_MERGE_LH:
- update_wrk_params(BANK_DNC, BANK_DNC, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ update_wrk_params(BANK_DNC, BANK_DNC, BANK_DNC, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_REGULAR_MULTIPLY: begin
- update_mmm_params(2'b11, uop_data_sel_wide_in, uop_data_sel_narrow_in, 1'b0, 1'b0, 1'b1);
- update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out);
+ update_mmm_params(2'b11, uop_data_sel_wide_in_dec, uop_data_sel_narrow_in_dec, 1'b0, 1'b0, 1'b1);
+ update_rdct_params(uop_data_sel_wide_out_dec, uop_data_sel_narrow_out_dec);
end
//
UOP_OPCODE_REGULAR_ADD_UNEVEN:
- update_wrk_params(uop_data_sel_wide_in, uop_data_sel_narrow_in, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ update_wrk_params(uop_data_sel_wide_in_dec, uop_data_sel_narrow_in_dec, BANK_DNC, uop_data_sel_narrow_out_dec, uop_data_opcode_dec);
//
UOP_OPCODE_LADDER_INIT,
UOP_OPCODE_LADDER_STEP:
- update_io_mgr_params(UOP_CRT_DNC, UOP_AUX_DNC, BANK_DNC, BANK_DNC, uop_data_opcode);
+ update_io_mgr_params(UOP_CRT_DNC, UOP_AUX_DNC, BANK_DNC, BANK_DNC, uop_data_opcode_dec);
//
endcase
@@ -595,9 +608,9 @@ module modexpng_uop_engine
always @(posedge clk)
//
- if (uop_fsm_state == UOP_FSM_STATE_DECODE)
+ if (uop_fsm_state == UOP_FSM_STATE_DECODE_2)
//
- case (uop_data_opcode)
+ case (uop_data_opcode_dec)
//
UOP_OPCODE_INPUT_TO_WIDE,
UOP_OPCODE_INPUT_TO_NARROW,
@@ -660,10 +673,11 @@ module modexpng_uop_engine
always @* begin
//
case (uop_fsm_state)
- UOP_FSM_STATE_IDLE: uop_fsm_state_next = ena ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE;
- UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE ;
- UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY;
- UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY;
+ UOP_FSM_STATE_IDLE: uop_fsm_state_next = ena ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE;
+ UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE_1 ;
+ UOP_FSM_STATE_DECODE_1: uop_fsm_state_next = UOP_FSM_STATE_DECODE_2 ;
+ UOP_FSM_STATE_DECODE_2: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY;
+ UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY;
endcase
//
end
@@ -677,10 +691,10 @@ module modexpng_uop_engine
always @(posedge clk or negedge rst_n)
//
- if (!rst_n) rdy_r <= 1'b1;
+ if (!rst_n) rdy_r <= 1'b1;
else case (uop_fsm_state)
- UOP_FSM_STATE_IDLE: rdy_r <= ~ena;
- UOP_FSM_STATE_DECODE: rdy_r <= uop_opcode_is_stop;
+ UOP_FSM_STATE_IDLE: rdy_r <= ~ena;
+ UOP_FSM_STATE_DECODE_2: rdy_r <= uop_opcode_is_stop;
endcase