diff options
Diffstat (limited to 'rtl')
-rw-r--r-- | rtl/modexpng_core_top.v | 138 | ||||
-rw-r--r-- | rtl/modexpng_general_worker.v | 402 | ||||
-rw-r--r-- | rtl/modexpng_io_manager.v | 170 | ||||
-rw-r--r-- | rtl/modexpng_microcode.vh | 42 | ||||
-rw-r--r-- | rtl/modexpng_mmm_dual.v | 10 | ||||
-rw-r--r-- | rtl/modexpng_parameters.vh | 8 | ||||
-rw-r--r-- | rtl/modexpng_uop_rom.v | 82 |
7 files changed, 720 insertions, 132 deletions
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v index 6b194dc..c78a969 100644 --- a/rtl/modexpng_core_top.v +++ b/rtl/modexpng_core_top.v @@ -6,6 +6,8 @@ module modexpng_core_top crt_mode, word_index_last_n, word_index_last_pq, + bit_index_last_n, + bit_index_last_pq, bus_cs, bus_we, bus_addr, @@ -37,6 +39,9 @@ module modexpng_core_top input [ OP_ADDR_W -1:0] word_index_last_n; input [ OP_ADDR_W -1:0] word_index_last_pq; + input [ BIT_INDEX_W -1:0] bit_index_last_n; + input [ BIT_INDEX_W -1:0] bit_index_last_pq; + input bus_cs; input bus_we; input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr; @@ -71,16 +76,23 @@ module modexpng_core_top wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ]; wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ]; - wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; - wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || - (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; - wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; - wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ; - wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || - (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ; - + wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; + wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || + (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; + wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; + wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) || + (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ; + wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ; + wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || + (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) || + (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ; + + wire uop_loop_now; + wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT; - wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1; + wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1; modexpng_uop_rom uop_rom ( @@ -595,6 +607,14 @@ module modexpng_core_top reg [OP_ADDR_W -1:0] io_mgr_word_index_last; reg [UOP_OPCODE_W -1:0] io_mgr_opcode; + reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps; + wire io_mgr_ladder_d; + wire io_mgr_ladder_p; + wire io_mgr_ladder_q; + wire io_mgr_ladder_done; + + assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done; + wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0]; @@ -653,7 +673,13 @@ module modexpng_core_top .io_out_dout (io_out_data), .wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc), - .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc) + .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc), + + .ladder_steps (io_mgr_ladder_steps), + .ladder_d (io_mgr_ladder_d), + .ladder_p (io_mgr_ladder_p), + .ladder_q (io_mgr_ladder_q), + .ladder_done (io_mgr_ladder_done) ); @@ -685,6 +711,9 @@ module modexpng_core_top reg mmm_force_unity_b_x; reg mmm_force_unity_b_y; + reg mmm_only_reduce_x; + reg mmm_only_reduce_y; + wire rdct_ena_x; wire rdct_ena_y; wire rdct_rdy_x; @@ -702,6 +731,7 @@ module modexpng_core_top .word_index_last (mmm_word_index_last_x), .word_index_last_minus1 (mmm_word_index_last_minus1_x), .force_unity_b (mmm_force_unity_b_x), + .only_reduce (mmm_only_reduce_x), .sel_wide_in (mmm_sel_wide_in_x), .sel_narrow_in (mmm_sel_narrow_in_x), @@ -757,6 +787,7 @@ module modexpng_core_top .word_index_last (mmm_word_index_last_y), .word_index_last_minus1 (mmm_word_index_last_minus1_y), .force_unity_b (mmm_force_unity_b_y), + .only_reduce (mmm_only_reduce_y), .sel_wide_in (mmm_sel_wide_in_y), .sel_narrow_in (mmm_sel_narrow_in_y), @@ -898,6 +929,7 @@ module modexpng_core_top reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in; reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out; reg [ OP_ADDR_W -1:0] wrk_word_index_last; + reg [ OP_ADDR_W -1:0] wrk_word_index_last_half; reg [UOP_OPCODE_W -1:0] wrk_opcode; modexpng_general_worker general_worker @@ -916,6 +948,7 @@ module modexpng_core_top .opcode (wrk_opcode), .word_index_last (wrk_word_index_last), + .word_index_last_half (wrk_word_index_last_half), .wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x), .wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x), @@ -976,10 +1009,11 @@ module modexpng_core_top // uop_exit_from_busy = 0; // - if (uop_opcode_is_in) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; - if (uop_opcode_is_out) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy); - if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy ; - if (uop_opcode_is_wrk) uop_exit_from_busy = ~wrk_ena & wrk_rdy ; + if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; + if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy); + if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy; + if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy; + if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; // end @@ -995,17 +1029,22 @@ module modexpng_core_top mmm_ena_y <= 1'b0; wrk_ena <= 1'b0; end else begin - io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out) : 1'b0; - mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0; + io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || + uop_opcode_is_out || + uop_opcode_is_ladder): 1'b0; + mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || + uop_opcode_is_out ): 1'b0; end // // Parameters // wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1; - + + // TODO: Add reset to default don't care values. + always @(posedge clk) // if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin @@ -1044,9 +1083,10 @@ module modexpng_core_top UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00; UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11; UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; - UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; + UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q}; endcase // + {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}}; {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}}; {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; @@ -1055,24 +1095,42 @@ module modexpng_core_top // end // + UOP_OPCODE_MODULAR_REDUCE_PROC: begin + // + {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'bX }}; + // + {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b1 }}; + {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{1'b0 }}; + {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{BANK_DNC }}; + {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{BANK_DNC }}; + {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; + {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}}; + // + end + // UOP_OPCODE_PROPAGATE_CARRIES: begin wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // - UOP_OPCODE_COPY_CRT_Y2X: begin + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y: begin wrk_sel_wide_in <= uop_data_sel_wide_in; wrk_sel_wide_out <= uop_data_sel_wide_out; wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // + UOP_OPCODE_MODULAR_REDUCE_INIT: begin + wrk_sel_narrow_in <= uop_data_sel_narrow_in; + end + // endcase // end // - // Length + // Lengths // wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1; wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1; @@ -1086,7 +1144,10 @@ module modexpng_core_top case (uop_data_opcode) // UOP_OPCODE_INPUT_TO_WIDE, - UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; + UOP_OPCODE_INPUT_TO_NARROW, + UOP_OPCODE_OUTPUT_FROM_NARROW: + // + io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; // UOP_OPCODE_MODULAR_MULTIPLY: begin {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; @@ -1094,8 +1155,31 @@ module modexpng_core_top {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; end // - UOP_OPCODE_PROPAGATE_CARRIES: - wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq; + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y: + wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; + // + UOP_OPCODE_MODULAR_REDUCE_INIT: begin + wrk_word_index_last <= word_index_last_n; + wrk_word_index_last_half <= word_index_last_pq; + end + // + UOP_OPCODE_MODULAR_REDUCE_PROC: begin + {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }}; + {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}}; + {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }}; + end + // + UOP_OPCODE_LADDER_INIT: begin + io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; + io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; + end + // + UOP_OPCODE_LADDER_STEP: begin + io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; + io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; + end // endcase // @@ -1140,8 +1224,8 @@ module modexpng_core_top UOP_FSM_STATE_IDLE: valid_reg <= ~next; UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop; endcase - - + + // // BEGIN DEBUG diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index c35f0b3..269ef98 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -14,6 +14,7 @@ module modexpng_general_worker opcode, word_index_last, + word_index_last_half, wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, @@ -88,6 +89,7 @@ module modexpng_general_worker input [ UOP_OPCODE_W -1:0] opcode; input [ OP_ADDR_W -1:0] word_index_last; + input [ OP_ADDR_W -1:0] word_index_last_half; output wrk_rd_wide_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; @@ -141,18 +143,35 @@ module modexpng_general_worker // // FSM Declaration // - localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0; - localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1; - localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2; - localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3; - localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! - localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6; - localparam [3:0] WRK_FSM_STATE_STOP = 4'h7; + localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00; - reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; - reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02; + localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! + localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06; + + localparam [4:0] WRK_FSM_STATE_STOP = 5'h07; + + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13; + localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14; + localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19; + + reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; + reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep + reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y) + // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps) + + // // Control Signals // @@ -244,32 +263,62 @@ module modexpng_general_worker // // Delays // - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4; + + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3; + + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3; + + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3; + + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3; + always @(posedge clk) begin // - //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; - //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; + {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + // + {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2}; + {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2}; // {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; // + {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2}; + {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2}; + // + {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x}; + {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y}; + // + {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x}; + {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y}; + // end - - - // - // Handy Wires - // - wire rd_narrow_xy_addr_x_next_is_last; - wire rd_narrow_xy_addr_y_next_is_last; // @@ -310,7 +359,8 @@ module modexpng_general_worker case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_OUTPUT_FROM_NARROW: + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_MODULAR_REDUCE_INIT: // case (wrk_fsm_state_next_one_pass) // @@ -333,12 +383,30 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_BUSY: begin // - enable_narrow_xy_rd_en; enable_wide_xy_rd_en; + enable_narrow_xy_rd_en; // end // endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state_next_one_pass_meander) + // + WRK_FSM_STATE_LATENCY_PRE1_M1, + WRK_FSM_STATE_LATENCY_PRE1_M2, + WRK_FSM_STATE_LATENCY_PRE2_M1, + WRK_FSM_STATE_LATENCY_PRE2_M2, + WRK_FSM_STATE_BUSY_M1, + WRK_FSM_STATE_BUSY_M2: begin + // + enable_wide_xy_rd_en; + enable_narrow_xy_rd_en; + // + end + // + endcase // endcase // @@ -389,8 +457,7 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST2: // - enable_narrow_xy_wr_en; - //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + enable_narrow_xy_wr_en; // // endcase @@ -405,7 +472,34 @@ module modexpng_general_worker // enable_wide_xy_wr_en; enable_narrow_xy_wr_en; - //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + // + end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: + // + enable_wide_xy_wr_en; + // + // + endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2, + WRK_FSM_STATE_LATENCY_POST2_M2: begin + // + enable_wide_xy_wr_en; + enable_narrow_xy_wr_en; // end // @@ -424,7 +518,7 @@ module modexpng_general_worker reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; - wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; @@ -497,6 +591,45 @@ module modexpng_general_worker end // endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2, + WRK_FSM_STATE_LATENCY_POST2_M2: begin + // + wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3; + wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2; + wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3; + wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2; + // + wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3; + wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2; + wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3; + wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2; + // + end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x; + wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x; + wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y; + wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y; + // + end + // + endcase // endcase // @@ -506,6 +639,9 @@ module modexpng_general_worker // // Write Address Logic // + wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half; + wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half; + always @(posedge clk) begin // {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; @@ -534,22 +670,64 @@ module modexpng_general_worker // endcase // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H; + wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H; + // + wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2; + wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2; + // + end + // + endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2, + WRK_FSM_STATE_LATENCY_POST2_M2: begin + // + {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4}; + {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4}; + // + {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4}; + {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4}; + // + end + // + endcase + // // endcase // end - - + + // // Read Address Logic // + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next; - assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last; - assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last; + wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half; + wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half; + + wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last; + wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last; - always @(posedge clk) begin + always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)?? // {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ?? {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; @@ -572,6 +750,9 @@ module modexpng_general_worker {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO}; {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO}; // + rd_wide_xy_addr_x_next <= OP_ADDR_ONE; + rd_wide_xy_addr_y_next <= OP_ADDR_ONE; + // rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; // @@ -586,11 +767,113 @@ module modexpng_general_worker {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next}; {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next}; // + rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; + rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO; + // + rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; + rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; + // + end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO}; + // + rd_wide_xy_addr_x_next <= OP_ADDR_ONE; + rd_wide_xy_addr_y_next <= OP_ADDR_ONE; + // + rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; + rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; + // + end + // + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next}; + // + rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; + rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO; + // + rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; + rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; + // + end + // + endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state_next_one_pass_meander) + // + WRK_FSM_STATE_LATENCY_PRE1_M1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO}; + // + rd_wide_xy_addr_x_next <= OP_ADDR_ONE; + rd_wide_xy_addr_y_next <= OP_ADDR_ONE; + // + rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; + rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; + // + end + // + WRK_FSM_STATE_LATENCY_PRE1_M2: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y}; + // + end + // + WRK_FSM_STATE_LATENCY_PRE2_M1, + WRK_FSM_STATE_BUSY_M1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next}; + // + rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; + rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO; + // rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; // end // + WRK_FSM_STATE_LATENCY_PRE2_M2, + WRK_FSM_STATE_BUSY_M2: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y}; + // + end + // endcase // // @@ -608,7 +891,9 @@ module modexpng_general_worker else case (opcode) UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass; + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass; + UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander; default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; endcase @@ -616,17 +901,20 @@ module modexpng_general_worker // // Busy Exit Logic // - reg wrk_fsm_done_one_pass = 1'b0; + reg wrk_fsm_done_one_pass = 1'b0; + reg wrk_fsm_done_one_pass_meander = 1'b0; always @(posedge clk) begin // - wrk_fsm_done_one_pass <= 1'b0; + wrk_fsm_done_one_pass <= 1'b0; + wrk_fsm_done_one_pass_meander <= 1'b0; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X: begin + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_MODULAR_REDUCE_INIT: begin // if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin // @@ -637,6 +925,20 @@ module modexpng_general_worker // end // + UOP_OPCODE_COPY_LADDERS_X2Y: begin + // + if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin + // + if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary... + if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; + // + end + // + if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1) + wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander; + // + end + // endcase // end @@ -654,7 +956,31 @@ module modexpng_general_worker WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ; WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ; WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ; - WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + endcase + // + end + + always @* begin + // + case (wrk_fsm_state) + WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ; + // + WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ; + WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ; + WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ; + WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ; + WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ; + WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ; + WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ; + WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ; + WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ; + WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ; + // + WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; + // + default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; endcase // end diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v index dfbd676..59f4709 100644 --- a/rtl/modexpng_io_manager.v +++ b/rtl/modexpng_io_manager.v @@ -53,7 +53,13 @@ module modexpng_io_manager io_out_dout, wrk_narrow_x_din_x_trunc, - wrk_narrow_x_din_y_trunc + wrk_narrow_x_din_y_trunc, + + ladder_steps, + ladder_d, + ladder_p, + ladder_q, + ladder_done ); // @@ -120,6 +126,12 @@ module modexpng_io_manager output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc; output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc; + + input [ BIT_INDEX_W -1:0] ladder_steps; + output ladder_d; + output ladder_p; + output ladder_q; + output ladder_done; // @@ -254,6 +266,10 @@ module modexpng_io_manager wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW; + wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT; + wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP; + wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step; + wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE; wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; @@ -269,8 +285,90 @@ module modexpng_io_manager wire in_1_addr_op_next_is_last; wire in_2_addr_op_next_is_last; + wire in_2_addr_op_next_is_one; wire dummy_addr_op_next_is_last; + + // + // Ladder Init/Step Logic + // + reg ladder_d_r; + reg ladder_p_r; + reg ladder_q_r; + reg ladder_done_r = 1'b0; + + assign ladder_d = ladder_d_r; + assign ladder_p = ladder_p_r; + assign ladder_q = ladder_q_r; + assign ladder_done = ladder_done_r; + + reg [BIT_INDEX_W -1:0] ladder_index; + reg [BIT_INDEX_W -1:0] ladder_index_next; + wire [ OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W]; + wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W]; + wire ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO; + + always @(posedge clk) + // + if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin + // + if (opcode_is_ladder_init) begin + ladder_index <= ladder_steps; + ladder_index_next <= ladder_steps - 1'b1; + ladder_done_r <= 1'b0; + end + // + if (opcode_is_ladder_step) begin + ladder_index <= ladder_index_next; + ladder_index_next <= ladder_index_next - 1'b1; + if (ladder_index_is_zero) ladder_done_r <= 1'b1; + end + // + end + + + // + // Ladder Mux + // + reg ladder_dpq_mux; + + always @(io_in_2_din, ladder_index_lsb) + // + case(ladder_index_lsb) + 4'b0000: ladder_dpq_mux = io_in_2_din[ 0]; + 4'b0001: ladder_dpq_mux = io_in_2_din[ 1]; + 4'b0010: ladder_dpq_mux = io_in_2_din[ 2]; + 4'b0011: ladder_dpq_mux = io_in_2_din[ 3]; + 4'b0100: ladder_dpq_mux = io_in_2_din[ 4]; + 4'b0101: ladder_dpq_mux = io_in_2_din[ 5]; + 4'b0110: ladder_dpq_mux = io_in_2_din[ 6]; + 4'b0111: ladder_dpq_mux = io_in_2_din[ 7]; + 4'b1000: ladder_dpq_mux = io_in_2_din[ 8]; + 4'b1001: ladder_dpq_mux = io_in_2_din[ 9]; + 4'b1010: ladder_dpq_mux = io_in_2_din[10]; + 4'b1011: ladder_dpq_mux = io_in_2_din[11]; + 4'b1100: ladder_dpq_mux = io_in_2_din[12]; + 4'b1101: ladder_dpq_mux = io_in_2_din[13]; + 4'b1110: ladder_dpq_mux = io_in_2_din[14]; + 4'b1111: ladder_dpq_mux = io_in_2_din[15]; + endcase + + always @(posedge clk) + // + case (io_fsm_state) + // + IO_FSM_STATE_BUSY: + if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux; + // + IO_FSM_STATE_LATENCY_POST1: + if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux; + // + IO_FSM_STATE_LATENCY_POST2: + if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux; + // + endcase + + // // Source Enable Logic // @@ -284,8 +382,8 @@ module modexpng_io_manager IO_FSM_STATE_LATENCY_PRE1, IO_FSM_STATE_LATENCY_PRE2, IO_FSM_STATE_BUSY: begin - in_1_en <= opcode_is_input && sel_aux_is_1; - in_2_en <= opcode_is_input && sel_aux_is_2; + in_1_en <= opcode_is_input && sel_aux_is_1; + in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder; end // IO_FSM_STATE_EXTRA: begin @@ -450,35 +548,59 @@ module modexpng_io_manager wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next; assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last; - assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last; + assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last; + assign in_2_addr_op_next_is_one = in_2_addr_op_next == OP_ADDR_ONE; assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; - always @(posedge clk) + always @(posedge clk) begin + // + {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; + {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; + { dummy_addr_op} <= { OP_ADDR_DNC}; + // + in_1_addr_next <= {BANK_DNC, OP_ADDR_DNC}; + in_2_addr_next <= {BANK_DNC, OP_ADDR_DNC}; + dummy_addr_next <= { OP_ADDR_DNC}; // case (io_fsm_state_next) // IO_FSM_STATE_LATENCY_PRE1: begin // - {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO}; - {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO}; - { dummy_addr_op} <= { OP_ADDR_ZERO}; + {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO}; + if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO}; + else {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; + { dummy_addr_op} <= { OP_ADDR_ZERO}; + // + in_1_addr_next <= {sel_in, OP_ADDR_ONE}; + in_2_addr_next <= {sel_in, OP_ADDR_ONE}; + dummy_addr_next <= { OP_ADDR_ONE}; + // + end + // + IO_FSM_STATE_LATENCY_PRE2: begin + // + {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next; + if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next; + else {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb}; + { dummy_addr_op} <= dummy_addr_next; // - in_1_addr_next <= {sel_in, OP_ADDR_ONE}; - in_2_addr_next <= {sel_in, OP_ADDR_ONE}; - dummy_addr_next <= { OP_ADDR_ONE}; + in_1_addr_next <= in_1_addr_next + 1'b1; + if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1; + else in_2_addr_next <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]}; + dummy_addr_next <= dummy_addr_next + 1'b1; // end // - IO_FSM_STATE_LATENCY_PRE2, IO_FSM_STATE_BUSY: begin // {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next; {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next; { dummy_addr_op} <= dummy_addr_next; // - in_1_addr_next <= in_1_addr_next + 1'b1; - in_2_addr_next <= in_2_addr_next + 1'b1; - dummy_addr_next <= dummy_addr_next + 1'b1; + in_1_addr_next <= in_1_addr_next + 1'b1; + if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1; + else in_2_addr_next <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]}; + dummy_addr_next <= dummy_addr_next + 1'b1; // end // @@ -499,7 +621,8 @@ module modexpng_io_manager end // endcase - + // + end // @@ -525,7 +648,7 @@ module modexpng_io_manager if (opcode_is_input) begin if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1; if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1; - end else if (opcode_is_output) begin + end else if (opcode_is_output || opcode_is_ladder) begin if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1; end // @@ -571,4 +694,17 @@ module modexpng_io_manager endcase + // + // BEGIN DEBUG + // + always @(posedge clk) + // + if ((io_fsm_state == IO_FSM_STATE_STOP) && opcode_is_ladder) + $display("[%4d] / %d / %d / %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r); + + // + // END DEBUG + // + + endmodule diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh index 2e591e7..f68c559 100644 --- a/rtl/modexpng_microcode.vh +++ b/rtl/modexpng_microcode.vh @@ -1,8 +1,8 @@ -localparam UOP_OPCODE_W = 4; +localparam UOP_OPCODE_W = 5; localparam UOP_CRT_W = 1; localparam UOP_NPQ_W = 1; localparam UOP_AUX_W = 1; -localparam UOP_LADDER_W = 1; +localparam UOP_LADDER_W = 2; localparam UOP_SEL_W = 4 * BANK_ADDR_W; localparam UOP_ADDR_W = 6; // 64 instructions @@ -11,17 +11,17 @@ localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0; localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31; -// 4 1 1 1 2 4*3=12 +// 5 1 1 1 2 4*3=12 localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W; -// [20:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0] +// [21:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0] // OPCODE -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 4'd0; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd0; /* all fields are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 4'd1; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 5'd1; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd2; /* CRT tells into which of the dual MMM to write * NPQ specifies the width of the operand * AUX specifies from which INPUT to read @@ -31,7 +31,7 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2; * */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3; /* CRT tells from which of the dual MMM to read * NPQ specifies the width of the operand * AUX is don't care @@ -39,27 +39,45 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3; * source and destination WIDE are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd4; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8; /* CRT is don't care * NPQ specifies the width of the operand * AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual) * LADDER specifies Montgomery ladder mode */ +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11; +/* CRT + * NPQ + * AUX + * LADDER is don't care + */ + -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care * source and destination WIDE are don't care - */ + */ + +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17; +/* CRT is don't care + * NPQ is don't care + * AUX is don't care + * LADDER is don't care + * WIDE and NARROW are don't care + */ // CRT localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0; diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v index 14f1b47..6e52a97 100644 --- a/rtl/modexpng_mmm_dual.v +++ b/rtl/modexpng_mmm_dual.v @@ -8,6 +8,7 @@ module modexpng_mmm_dual word_index_last, word_index_last_minus1, force_unity_b, + only_reduce, sel_wide_in, sel_narrow_in, @@ -72,6 +73,7 @@ module modexpng_mmm_dual input [7:0] word_index_last; input [7:0] word_index_last_minus1; input force_unity_b; + input only_reduce; input [BANK_ADDR_W-1:0] sel_wide_in; input [BANK_ADDR_W-1:0] sel_narrow_in; @@ -120,7 +122,8 @@ module modexpng_mmm_dual // reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; reg [FSM_STATE_WIDTH-1:0] fsm_state_next; - + + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; @@ -905,16 +908,17 @@ module modexpng_mmm_dual // // FSM Transition Logic // + assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT; assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; - assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; always @* begin // fsm_state_next = FSM_STATE_IDLE; // case (fsm_state) - FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle /*FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : FSM_STATE_IDLE; FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index 6e6c3ca..c7566ad 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -23,7 +23,7 @@ localparam MAC_W = 47; localparam BUS_DATA_W = 32; localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W); - +localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W); localparam BANK_ADDR_W = 3; localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); @@ -33,6 +33,8 @@ localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); localparam CARRY_W = WORD_EXT_W - WORD_W; +localparam WORD_MUX_W = cryptech_clog2(WORD_W); + localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}}; localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; @@ -80,6 +82,10 @@ localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2; localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}}; +localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q + +localparam [BIT_INDEX_W-1:0] BIT_INDEX_ZERO = {BIT_INDEX_W{1'b0}}; + localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v index 016b1b0..04f0c83 100644 --- a/rtl/modexpng_uop_rom.v +++ b/rtl/modexpng_uop_rom.v @@ -10,53 +10,67 @@ module modexpng_uop_rom input wire clk; input wire [UOP_ADDR_W -1:0] addr; - output reg [UOP_W -1:0] data; + output reg [UOP_W -1:0] data; always @(posedge clk) // case (addr) - 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // - 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // - 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // - 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // - 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // - 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // + 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // + 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // // - 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // - 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // // - 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; // - 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // - 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; // + 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // // - 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // + 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // // - 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // - 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // + 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // + 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // // - 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // // - 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; // + 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; // // - 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // // - 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; // - 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; // - 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; // - 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; // - 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; // + 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; // + 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; // + 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; // + 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; // + 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; // // - 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; // - // - default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; // + // + 6'd31: data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_DNC }; // + // + 6'd32: data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + // + 6'd33: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_D, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd34: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_D, BANK_NARROW_D }; // + 6'd35: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_A, BANK_DNC, BANK_WIDE_C, BANK_NARROW_C }; // + // + 6'd36: data <= {UOP_OPCODE_COPY_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; // + // + 6'd37: data <= {UOP_OPCODE_LADDER_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + 6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + // + default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // endcase endmodule |