aboutsummaryrefslogtreecommitdiff
path: root/rtl/modexpng_uop_engine.v
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-21 12:56:30 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-21 12:56:30 +0300
commit9eac252242c69e51a38a9a88c87b564dd40b6257 (patch)
treeab6653950a7f2a811598c73f15116fa5c009ec5c /rtl/modexpng_uop_engine.v
parent36339014ec3d3ad3bb4622392d5075d674e7dbeb (diff)
Entire CRT signature algorithm works by now.
Moved micro-operations handler into a separate module file, this way we don't have any synthesized stuff in the top-level module, just instantiations. This is more consistent from the design partitioning point of view. Btw, Xilinx claims their tools work better that way too, but who knows... Added optional simulation-only code to assist debugging. Un-comment the ENABLE_DEBUG `define in 'rtl/modexpng_parameters.vh' to use, but don't ever try to synthesize the core with debugging enabled.
Diffstat (limited to 'rtl/modexpng_uop_engine.v')
-rw-r--r--rtl/modexpng_uop_engine.v658
1 files changed, 658 insertions, 0 deletions
diff --git a/rtl/modexpng_uop_engine.v b/rtl/modexpng_uop_engine.v
new file mode 100644
index 0000000..a87d924
--- /dev/null
+++ b/rtl/modexpng_uop_engine.v
@@ -0,0 +1,658 @@
+module modexpng_uop_engine
+(
+ clk,
+ rst,
+
+ ena,
+ rdy,
+
+ `ifdef MODEXPNG_ENABLE_DEBUG
+ uop_decoded_stop,
+ `endif
+
+ crt_mode,
+
+ word_index_last_n,
+ word_index_last_pq,
+
+ bit_index_last_n,
+ bit_index_last_pq,
+
+ io_mgr_ena,
+ io_mgr_rdy,
+ io_mgr_sel_crt,
+ io_mgr_sel_aux,
+ io_mgr_sel_in,
+ io_mgr_sel_out,
+ io_mgr_word_index_last,
+ io_mgr_opcode,
+ io_mgr_ladder_steps,
+ io_mgr_ladder_d,
+ io_mgr_ladder_p,
+ io_mgr_ladder_q,
+ io_mgr_ladder_done,
+
+ mmm_ena_x,
+ mmm_ena_y,
+ mmm_rdy_x,
+ mmm_rdy_y,
+ mmm_word_index_last_x,
+ mmm_word_index_last_y,
+ mmm_word_index_last_minus1_x,
+ mmm_word_index_last_minus1_y,
+ mmm_ladder_mode_x,
+ mmm_ladder_mode_y,
+ mmm_sel_wide_in_x,
+ mmm_sel_wide_in_y,
+ mmm_sel_narrow_in_x,
+ mmm_sel_narrow_in_y,
+ mmm_force_unity_b_x,
+ mmm_force_unity_b_y,
+ mmm_only_reduce_x,
+ mmm_only_reduce_y,
+ mmm_just_multiply_x,
+ mmm_just_multiply_y,
+
+ rdct_word_index_last_x,
+ rdct_word_index_last_y,
+ rdct_sel_wide_out_x,
+ rdct_sel_narrow_out_x,
+ rdct_sel_wide_out_y,
+ rdct_sel_narrow_out_y,
+
+ wrk_ena,
+ wrk_rdy,
+ wrk_sel_wide_in,
+ wrk_sel_wide_out,
+ wrk_sel_narrow_in,
+ wrk_sel_narrow_out,
+ wrk_word_index_last,
+ wrk_word_index_last_half,
+ wrk_opcode
+);
+
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+ `include "modexpng_microcode.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ `ifdef MODEXPNG_ENABLE_DEBUG
+ output uop_decoded_stop;
+ `endif
+
+ input crt_mode;
+
+ input [OP_ADDR_W -1:0] word_index_last_n;
+ input [OP_ADDR_W -1:0] word_index_last_pq;
+
+ input [BIT_INDEX_W -1:0] bit_index_last_n;
+ input [BIT_INDEX_W -1:0] bit_index_last_pq;
+
+ output io_mgr_ena;
+ input io_mgr_rdy;
+ output [UOP_CRT_W -1:0] io_mgr_sel_crt;
+ output [UOP_AUX_W -1:0] io_mgr_sel_aux;
+ output [BANK_ADDR_W -1:0] io_mgr_sel_in;
+ output [BANK_ADDR_W -1:0] io_mgr_sel_out;
+ output [OP_ADDR_W -1:0] io_mgr_word_index_last;
+ output [UOP_OPCODE_W -1:0] io_mgr_opcode;
+ output [BIT_INDEX_W -1:0] io_mgr_ladder_steps;
+ input io_mgr_ladder_d;
+ input io_mgr_ladder_p;
+ input io_mgr_ladder_q;
+ input io_mgr_ladder_done;
+
+ output mmm_ena_x;
+ output mmm_ena_y;
+ input mmm_rdy_x;
+ input mmm_rdy_y;
+ output [OP_ADDR_W -1:0] mmm_word_index_last_x;
+ output [OP_ADDR_W -1:0] mmm_word_index_last_y;
+ output [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x;
+ output [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y;
+ output mmm_ladder_mode_x;
+ output mmm_ladder_mode_y;
+ output [BANK_ADDR_W -1:0] mmm_sel_wide_in_x;
+ output [BANK_ADDR_W -1:0] mmm_sel_wide_in_y;
+ output [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x;
+ output [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y;
+ output mmm_force_unity_b_x;
+ output mmm_force_unity_b_y;
+ output mmm_only_reduce_x;
+ output mmm_only_reduce_y;
+ output mmm_just_multiply_x;
+ output mmm_just_multiply_y;
+
+ output [OP_ADDR_W -1:0] rdct_word_index_last_x;
+ output [OP_ADDR_W -1:0] rdct_word_index_last_y;
+ output [BANK_ADDR_W -1:0] rdct_sel_wide_out_x;
+ output [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x;
+ output [BANK_ADDR_W -1:0] rdct_sel_wide_out_y;
+ output [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y;
+
+ output wrk_ena;
+ input wrk_rdy;
+ output [BANK_ADDR_W -1:0] wrk_sel_wide_in;
+ output [BANK_ADDR_W -1:0] wrk_sel_wide_out;
+ output [BANK_ADDR_W -1:0] wrk_sel_narrow_in;
+ output [BANK_ADDR_W -1:0] wrk_sel_narrow_out;
+ output [OP_ADDR_W -1:0] wrk_word_index_last;
+ output [OP_ADDR_W -1:0] wrk_word_index_last_half;
+ output [UOP_OPCODE_W -1:0] wrk_opcode;
+
+
+ //
+ // Registers
+ //
+ reg io_mgr_ena_r = 1'b0;
+ reg [UOP_CRT_W -1:0] io_mgr_sel_crt_r;
+ reg [UOP_AUX_W -1:0] io_mgr_sel_aux_r;
+ reg [BANK_ADDR_W -1:0] io_mgr_sel_in_r;
+ reg [BANK_ADDR_W -1:0] io_mgr_sel_out_r;
+ reg [OP_ADDR_W -1:0] io_mgr_word_index_last_r;
+ reg [UOP_OPCODE_W -1:0] io_mgr_opcode_r;
+ reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps_r;
+
+ reg mmm_ena_x_r = 1'b0;
+ reg mmm_ena_y_r = 1'b0;
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_x_r;
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_y_r;
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x_r;
+ reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y_r;
+ reg mmm_ladder_mode_x_r;
+ reg mmm_ladder_mode_y_r;
+ reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_x_r;
+ reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_y_r;
+ reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x_r;
+ reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y_r;
+ reg mmm_force_unity_b_x_r;
+ reg mmm_force_unity_b_y_r;
+ reg mmm_only_reduce_x_r;
+ reg mmm_only_reduce_y_r;
+ reg mmm_just_multiply_x_r;
+ reg mmm_just_multiply_y_r;
+
+ reg [OP_ADDR_W -1:0] rdct_word_index_last_x_r;
+ reg [OP_ADDR_W -1:0] rdct_word_index_last_y_r;
+ reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_x_r;
+ reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x_r;
+ reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_y_r;
+ reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y_r;
+
+ reg wrk_ena_r = 1'b0;
+ reg [BANK_ADDR_W -1:0] wrk_sel_wide_in_r;
+ reg [BANK_ADDR_W -1:0] wrk_sel_wide_out_r;
+ reg [BANK_ADDR_W -1:0] wrk_sel_narrow_in_r;
+ reg [BANK_ADDR_W -1:0] wrk_sel_narrow_out_r;
+ reg [OP_ADDR_W -1:0] wrk_word_index_last_r;
+ reg [OP_ADDR_W -1:0] wrk_word_index_last_half_r;
+ reg [UOP_OPCODE_W -1:0] wrk_opcode_r;
+
+
+ //
+ // Mapping
+ //
+ assign io_mgr_ena = io_mgr_ena_r;
+ assign io_mgr_sel_crt = io_mgr_sel_crt_r;
+ assign io_mgr_sel_aux = io_mgr_sel_aux_r;
+ assign io_mgr_sel_in = io_mgr_sel_in_r;
+ assign io_mgr_sel_out = io_mgr_sel_out_r;
+ assign io_mgr_word_index_last = io_mgr_word_index_last_r;
+ assign io_mgr_opcode = io_mgr_opcode_r;
+ assign io_mgr_ladder_steps = io_mgr_ladder_steps_r;
+
+ assign mmm_ena_x = mmm_ena_x_r;
+ assign mmm_ena_y = mmm_ena_y_r;
+ assign mmm_word_index_last_x = mmm_word_index_last_x_r;
+ assign mmm_word_index_last_y = mmm_word_index_last_y_r;
+ assign mmm_word_index_last_minus1_x = mmm_word_index_last_minus1_x_r;
+ assign mmm_word_index_last_minus1_y = mmm_word_index_last_minus1_y_r;
+ assign mmm_ladder_mode_x = mmm_ladder_mode_x_r;
+ assign mmm_ladder_mode_y = mmm_ladder_mode_y_r;
+ assign mmm_sel_wide_in_x = mmm_sel_wide_in_x_r;
+ assign mmm_sel_wide_in_y = mmm_sel_wide_in_y_r;
+ assign mmm_sel_narrow_in_x = mmm_sel_narrow_in_x_r;
+ assign mmm_sel_narrow_in_y = mmm_sel_narrow_in_y_r;
+ assign mmm_force_unity_b_x = mmm_force_unity_b_x_r;
+ assign mmm_force_unity_b_y = mmm_force_unity_b_y_r;
+ assign mmm_only_reduce_x = mmm_only_reduce_x_r;
+ assign mmm_only_reduce_y = mmm_only_reduce_y_r;
+ assign mmm_just_multiply_x = mmm_just_multiply_x_r;
+ assign mmm_just_multiply_y = mmm_just_multiply_y_r;
+
+ assign rdct_word_index_last_x = rdct_word_index_last_x_r;
+ assign rdct_word_index_last_y = rdct_word_index_last_y_r;
+ assign rdct_sel_wide_out_x = rdct_sel_wide_out_x_r;
+ assign rdct_sel_wide_out_y = rdct_sel_wide_out_y_r;
+ assign rdct_sel_narrow_out_x = rdct_sel_narrow_out_x_r;
+ assign rdct_sel_narrow_out_y = rdct_sel_narrow_out_y_r;
+
+ assign wrk_ena = wrk_ena_r;
+ assign wrk_sel_wide_in = wrk_sel_wide_in_r;
+ assign wrk_sel_wide_out = wrk_sel_wide_out_r;
+ assign wrk_sel_narrow_in = wrk_sel_narrow_in_r;
+ assign wrk_sel_narrow_out = wrk_sel_narrow_out_r;
+ assign wrk_word_index_last = wrk_word_index_last_r;
+ assign wrk_word_index_last_half = wrk_word_index_last_half_r;
+ assign wrk_opcode = wrk_opcode_r;
+
+
+ //
+ // UOP_FSM
+ //
+ localparam [1:0] UOP_FSM_STATE_IDLE = 2'b00;
+ localparam [1:0] UOP_FSM_STATE_FETCH = 2'b01;
+ localparam [1:0] UOP_FSM_STATE_DECODE = 2'b10;
+ localparam [1:0] UOP_FSM_STATE_BUSY = 2'b11;
+
+ reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE;
+ reg [1:0] uop_fsm_state_next;
+
+
+ //
+ // UOP ROM
+ //
+ reg [UOP_ADDR_W -1:0] uop_addr;
+ wire [UOP_W -1:0] uop_data;
+
+ modexpng_uop_rom uop_rom
+ (
+ .clk (clk),
+ .addr (uop_addr),
+ .data (uop_data)
+ );
+
+
+ //
+ // UOP ROM Data Decoder
+ //
+ wire [UOP_OPCODE_W -1:0] uop_data_opcode = uop_data[UOP_W -1-: UOP_OPCODE_W];
+ wire [UOP_CRT_W -1:0] uop_data_crt = uop_data[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ];
+ wire [UOP_NPQ_W -1:0] uop_data_npq = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ];
+ wire [UOP_AUX_W -1:0] uop_data_aux = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ];
+ wire [UOP_LADDER_W -1:0] uop_data_ladder = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_wide_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
+ wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
+
+ wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
+ wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
+ (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
+ wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
+ wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
+ (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ||
+ (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
+ (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
+ (uop_data_opcode == UOP_OPCODE_MERGE_LH ) ||
+ (uop_data_opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN ) ;
+ wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ||
+ (uop_data_opcode == UOP_OPCODE_REGULAR_MULTIPLY ) ;
+ wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) ||
+ (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ;
+
+
+ //
+ // Debug Signal
+ //
+ `ifdef MODEXPNG_ENABLE_DEBUG
+ assign uop_decoded_stop = (uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop;
+ `endif
+
+
+
+ //
+ // UOP ROM Address Increment Logic
+ //
+ wire uop_loop_now;
+
+ wire [UOP_ADDR_W -1:0] uop_addr_plus1 = uop_addr + 1'b1;
+ wire [UOP_ADDR_W -1:0] uop_addr_minus1 = uop_addr - 1'b1;
+
+ wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr_minus1 : uop_addr_plus1 ;
+ wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
+
+ always @(posedge clk)
+ //
+ if (uop_fsm_state_next == UOP_FSM_STATE_FETCH)
+ uop_addr <= (uop_fsm_state == UOP_FSM_STATE_IDLE) ? uop_addr_offset : uop_addr_next;
+
+
+ //
+ // Handy Signals
+ //
+ wire mmm_ena = mmm_ena_x & mmm_ena_y;
+ wire mmm_rdy = mmm_rdy_x & mmm_rdy_y;
+
+ assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done;
+
+ reg [1:0] uop_data_ladder_mux;
+
+ always @(uop_data_ladder, io_mgr_ladder_p, io_mgr_ladder_q, io_mgr_ladder_d)
+ //
+ case (uop_data_ladder)
+ UOP_LADDER_00: uop_data_ladder_mux = 2'b00;
+ UOP_LADDER_11: uop_data_ladder_mux = 2'b11;
+ UOP_LADDER_D: uop_data_ladder_mux = {io_mgr_ladder_d, ~io_mgr_ladder_d};
+ UOP_LADDER_PQ: uop_data_ladder_mux = {io_mgr_ladder_p, io_mgr_ladder_q};
+ endcase
+
+ reg [OP_ADDR_W-1:0] word_index_last_mux;
+
+ always @(uop_data_npq, word_index_last_n, word_index_last_pq)
+ //
+ if (uop_data_npq == UOP_NPQ_N) word_index_last_mux = word_index_last_n;
+ else word_index_last_mux = word_index_last_pq;
+
+ reg [BIT_INDEX_W-1:0] bit_index_last_mux;
+
+ always @(crt_mode, bit_index_last_pq, bit_index_last_n)
+ if (crt_mode) bit_index_last_mux = bit_index_last_pq;
+ else bit_index_last_mux = bit_index_last_n;
+
+
+ //
+ // UOP Trigger Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) begin
+ io_mgr_ena_r <= 1'b0;
+ mmm_ena_x_r <= 1'b0;
+ mmm_ena_y_r <= 1'b0;
+ wrk_ena_r <= 1'b0;
+ end else begin
+ io_mgr_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in ||
+ uop_opcode_is_out ||
+ uop_opcode_is_ladder): 1'b0;
+ mmm_ena_x_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ mmm_ena_y_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ wrk_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk ||
+ uop_opcode_is_out ): 1'b0;
+ end
+
+
+ //
+ // UOP Completion Detector
+ //
+ reg uop_exit_from_busy;
+
+ always @* begin
+ //
+ uop_exit_from_busy = 0;
+ //
+ if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy ;
+ if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) &
+ (~wrk_ena & wrk_rdy ) ;
+ if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy ;
+ if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy ;
+ if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy ;
+ //
+ end
+
+
+ //
+ // UOP Parameters
+ //
+ task update_io_mgr_params;
+ input [UOP_CRT_W -1:0] sel_crt;
+ input [UOP_AUX_W -1:0] sel_aux;
+ input [BANK_ADDR_W -1:0] sel_in;
+ input [BANK_ADDR_W -1:0] sel_out;
+ input [UOP_OPCODE_W-1:0] opcode;
+ begin
+ io_mgr_sel_crt_r <= sel_crt;
+ io_mgr_sel_aux_r <= sel_aux;
+ io_mgr_sel_in_r <= sel_in;
+ io_mgr_sel_out_r <= sel_out;
+ io_mgr_opcode_r <= opcode;
+ end
+ endtask
+
+ task update_wrk_params;
+ input [BANK_ADDR_W -1:0] wide_in;
+ input [BANK_ADDR_W -1:0] narrow_in;
+ input [BANK_ADDR_W -1:0] wide_out;
+ input [BANK_ADDR_W -1:0] narrow_out;
+ input [UOP_OPCODE_W-1:0] opcode;
+ begin
+ wrk_sel_wide_in_r <= wide_in;
+ wrk_sel_narrow_in_r <= narrow_in;
+ wrk_sel_wide_out_r <= wide_out;
+ wrk_sel_narrow_out_r <= narrow_out;
+ wrk_opcode_r <= opcode;
+ end
+ endtask
+
+ task update_mmm_params;
+ input [ 1:0] ladder_mode;
+ input [BANK_ADDR_W-1:0] sel_wide_in;
+ input [BANK_ADDR_W-1:0] sel_narrow_in;
+ input force_unity_b;
+ input only_reduce;
+ input just_multiply;
+ begin
+ {mmm_ladder_mode_x_r, mmm_ladder_mode_y_r } <= ladder_mode;
+ {mmm_sel_wide_in_x_r, mmm_sel_wide_in_y_r } <= {2{sel_wide_in }};
+ {mmm_sel_narrow_in_x_r, mmm_sel_narrow_in_y_r} <= {2{sel_narrow_in}};
+ {mmm_force_unity_b_x_r, mmm_force_unity_b_y_r} <= {2{force_unity_b}};
+ {mmm_only_reduce_x_r, mmm_only_reduce_y_r } <= {2{only_reduce }};
+ {mmm_just_multiply_x_r, mmm_just_multiply_y_r} <= {2{just_multiply}};
+ end
+ endtask
+
+ task update_rdct_params;
+ input [BANK_ADDR_W-1:0] sel_wide_out;
+ input [BANK_ADDR_W-1:0] sel_narrow_out;
+ begin
+ {rdct_sel_wide_out_x_r, rdct_sel_wide_out_y_r } <= {2{sel_wide_out}};
+ {rdct_sel_narrow_out_x_r, rdct_sel_narrow_out_y_r} <= {2{sel_narrow_out}};
+ end
+ endtask
+
+ always @(posedge clk)
+ //
+ if (uop_fsm_state == UOP_FSM_STATE_DECODE)
+ //
+ case (uop_data_opcode)
+ //
+ UOP_OPCODE_INPUT_TO_WIDE:
+ update_io_mgr_params(uop_data_crt, uop_data_aux, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_opcode);
+ //
+ UOP_OPCODE_INPUT_TO_NARROW:
+ update_io_mgr_params(uop_data_crt, uop_data_aux, uop_data_sel_narrow_in, uop_data_sel_narrow_out, uop_data_opcode);
+ //
+ UOP_OPCODE_OUTPUT_FROM_NARROW: begin
+ update_io_mgr_params(uop_data_crt, UOP_AUX_DNC, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, BANK_DNC, uop_data_opcode);
+ end
+ //
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
+ update_wrk_params(uop_data_sel_wide_in, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_sel_narrow_out, uop_data_opcode);
+ //
+ UOP_OPCODE_MODULAR_MULTIPLY: begin
+ update_mmm_params(uop_data_ladder_mux, uop_data_sel_wide_in, uop_data_sel_narrow_in, uop_data_aux, 1'b0, 1'b0);
+ update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out);
+ end
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_sel_narrow_out, uop_data_opcode);
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, BANK_DNC, uop_data_opcode);
+ //
+ UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+ update_mmm_params(2'bXX, BANK_DNC, BANK_DNC, 1'b0, 1'b1, 1'b0);
+ update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out);
+ end
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ //
+ UOP_OPCODE_MERGE_LH:
+ update_wrk_params(BANK_DNC, BANK_DNC, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ //
+ UOP_OPCODE_REGULAR_MULTIPLY: begin
+ update_mmm_params(2'b11, uop_data_sel_wide_in, uop_data_sel_narrow_in, 1'b0, 1'b0, 1'b1);
+ update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out);
+ end
+ //
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
+ update_wrk_params(uop_data_sel_wide_in, uop_data_sel_narrow_in, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode);
+ //
+ UOP_OPCODE_LADDER_INIT,
+ UOP_OPCODE_LADDER_STEP:
+ update_io_mgr_params(UOP_CRT_DNC, UOP_AUX_DNC, BANK_DNC, BANK_DNC, uop_data_opcode);
+ //
+ endcase
+
+
+ //
+ // UOP Lengths
+ //
+ task update_io_mgr_length;
+ input [OP_ADDR_W -1:0] word_index_last;
+ input [BIT_INDEX_W-1:0] ladder_steps;
+ begin
+ io_mgr_word_index_last_r <= word_index_last;
+ io_mgr_ladder_steps_r <= ladder_steps;
+ end
+ endtask
+
+ task update_wrk_length;
+ input [OP_ADDR_W-1:0] word_index_last;
+ input [OP_ADDR_W-1:0] word_index_last_half;
+ begin
+ wrk_word_index_last_r <= word_index_last;
+ wrk_word_index_last_half_r <= word_index_last_half;
+ end
+ endtask
+
+ task update_mmm_length;
+ input [OP_ADDR_W-1:0] word_index_last;
+ begin
+ mmm_word_index_last_x_r <= word_index_last;
+ mmm_word_index_last_y_r <= word_index_last;
+ mmm_word_index_last_minus1_x_r <= word_index_last - 1'b1;
+ mmm_word_index_last_minus1_y_r <= word_index_last - 1'b1;
+ end
+ endtask
+
+ task update_rdct_length;
+ input [OP_ADDR_W-1:0] word_index_last;
+ begin
+ rdct_word_index_last_x_r <= word_index_last;
+ rdct_word_index_last_y_r <= word_index_last;
+ end
+ endtask
+
+ always @(posedge clk)
+ //
+ if (uop_fsm_state == UOP_FSM_STATE_DECODE)
+ //
+ case (uop_data_opcode)
+ //
+ UOP_OPCODE_INPUT_TO_WIDE,
+ UOP_OPCODE_INPUT_TO_NARROW,
+ UOP_OPCODE_OUTPUT_FROM_NARROW:
+ update_io_mgr_length(word_index_last_mux, BIT_INDEX_DNC);
+ //
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
+ update_wrk_length(word_index_last_mux, OP_ADDR_DNC);
+ //
+ UOP_OPCODE_MODULAR_MULTIPLY: begin
+ update_mmm_length(word_index_last_mux);
+ update_rdct_length(word_index_last_mux);
+ end
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
+ update_wrk_length(word_index_last_mux, OP_ADDR_DNC);
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ update_wrk_length(word_index_last_n, word_index_last_pq);
+ //
+ UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+ update_mmm_length(word_index_last_pq);
+ update_rdct_length(word_index_last_pq);
+ end
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ update_wrk_length(word_index_last_mux, OP_ADDR_DNC);
+ //
+ UOP_OPCODE_MERGE_LH:
+ update_wrk_length(word_index_last_n, word_index_last_pq);
+ //
+ UOP_OPCODE_REGULAR_MULTIPLY: begin
+ update_mmm_length(word_index_last_pq);
+ update_rdct_length(word_index_last_pq);
+ end
+ //
+ UOP_OPCODE_REGULAR_ADD_UNEVEN:
+ update_wrk_length(word_index_last_n, word_index_last_pq);
+ //
+ UOP_OPCODE_LADDER_INIT,
+ UOP_OPCODE_LADDER_STEP:
+ update_io_mgr_length(OP_ADDR_LADDER_LAST, bit_index_last_mux);
+ //
+ endcase
+
+
+ //
+ // UOP FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE;
+ else uop_fsm_state <= uop_fsm_state_next;
+
+
+ //
+ // UOP FSM Transition Logic
+ //
+ always @* begin
+ //
+ case (uop_fsm_state)
+ UOP_FSM_STATE_IDLE: uop_fsm_state_next = ena ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE;
+ UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE ;
+ UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY;
+ UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY;
+ endcase
+ //
+ end
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_r = 1'b1;
+ assign rdy = rdy_r;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_r <= 1'b1;
+ else case (uop_fsm_state)
+ UOP_FSM_STATE_IDLE: rdy_r <= ~ena;
+ UOP_FSM_STATE_DECODE: rdy_r <= uop_opcode_is_stop;
+ endcase
+
+
+endmodule