From 8ee5a19240722f397d55f57a426992350f8019a3 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Thu, 3 Oct 2019 16:42:24 +0300 Subject: Expanded micro-operation parameters (added dedicated control bit to force the B input of the modular multiplier to 1, this is necessary to bring numbers out of Montgomery domain). --- rtl/modexpng_core_top.v | 20 ++++++++++++++------ rtl/modexpng_microcode.vh | 15 +++++++++++++++ rtl/modexpng_mmm_dual.v | 26 ++++++++++++++++++++------ rtl/modexpng_parameters.vh | 8 +++++--- rtl/modexpng_uop_rom.v | 4 +++- 5 files changed, 57 insertions(+), 16 deletions(-) diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v index eb6826c..e117e5d 100644 --- a/rtl/modexpng_core_top.v +++ b/rtl/modexpng_core_top.v @@ -578,6 +578,9 @@ module modexpng_core_top reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x; reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y; + reg mmm_force_unity_b_x; + reg mmm_force_unity_b_y; + wire rdct_ena_x; wire rdct_ena_y; wire rdct_rdy_x; @@ -594,6 +597,7 @@ module modexpng_core_top .ladder_mode (mmm_ladder_mode_x), .word_index_last (mmm_word_index_last_x), .word_index_last_minus1 (mmm_word_index_last_minus1_x), + .force_unity_b (mmm_force_unity_b_x), .sel_wide_in (mmm_sel_wide_in_x), .sel_narrow_in (mmm_sel_narrow_in_x), @@ -648,6 +652,7 @@ module modexpng_core_top .ladder_mode (mmm_ladder_mode_y), .word_index_last (mmm_word_index_last_y), .word_index_last_minus1 (mmm_word_index_last_minus1_y), + .force_unity_b (mmm_force_unity_b_y), .sel_wide_in (mmm_sel_wide_in_y), .sel_narrow_in (mmm_sel_narrow_in_y), @@ -812,7 +817,9 @@ module modexpng_core_top // // Parameters - // + // + wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1; + always @(posedge clk) // if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin @@ -844,11 +851,12 @@ module modexpng_core_top UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; endcase // - {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; - {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; - {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; - {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}}; - + {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}}; + {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; + {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; + {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; + {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }}; + // end // diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh index af21391..1465c48 100644 --- a/rtl/modexpng_microcode.vh +++ b/rtl/modexpng_microcode.vh @@ -17,15 +17,30 @@ localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER // OPCODE localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 4'd0; +/* all fields are don't care + */ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 4'd1; localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2; +/* CRT tells into which of the dual MMM to write + * NPQ specifies the width of the operand + * AUX specifies from which INPUT to read + * LADDER is don't care + * source WIDE is always don't care + * destination NARROW is don't care for _WIDE opcode and vice versa + * +*/ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3; //localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd0; localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8; +/* CRT is don't care + * NPQ specifies the width of the operand + * AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual) + * LADDER specifies Montgomery ladder mode + */ // CRT localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0; diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v index babd565..b9b41e8 100644 --- a/rtl/modexpng_mmm_dual.v +++ b/rtl/modexpng_mmm_dual.v @@ -7,6 +7,7 @@ module modexpng_mmm_dual ladder_mode, word_index_last, word_index_last_minus1, + force_unity_b, sel_wide_in, sel_narrow_in, @@ -70,7 +71,8 @@ module modexpng_mmm_dual input ladder_mode; input [7:0] word_index_last; input [7:0] word_index_last_minus1; - + input force_unity_b; + input [BANK_ADDR_W-1:0] sel_wide_in; input [BANK_ADDR_W-1:0] sel_narrow_in; @@ -708,13 +710,22 @@ module modexpng_mmm_dual // DSP Feed Logic // reg dsp_merge_xy_b; + reg dsp_merge_xy_b_first; - always @(posedge clk) + always @(posedge clk) begin // case (fsm_state) FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1; FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0; endcase + // + case (fsm_state) + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1; + default: dsp_merge_xy_b_first <= 1'b0; + endcase + // + end // // On-the-fly Carry Recombination @@ -723,6 +734,9 @@ module modexpng_mmm_dual wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry}; wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry; + wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ? + rd_narrow_xy_dout_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO; + always @(posedge clk) // if (narrow_xy_ena_dly2) begin // rewrite @@ -732,15 +746,15 @@ module modexpng_mmm_dual dsp_y_b <= rd_narrow_y_dout[15:0]; dsp_xy_b_carry <= 2'b00; end else begin - dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0]; - dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0]; + dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity; + dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity; dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16]; end // end else begin // - dsp_x_b <= {16{1'bX}}; - dsp_y_b <= {16{1'bX}}; + dsp_x_b <= WORD_DNC; + dsp_y_b <= WORD_DNC; // dsp_xy_b_carry <= 2'b00; // diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index 25fafb4..e610e47 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -87,10 +87,12 @@ localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}}; localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1}; localparam [OP_ADDR_W-1:0] OP_ADDR_DNC = {OP_ADDR_W{1'bX}}; -localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}}; -localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}}; +localparam [WORD_W-1:0] WORD_ZERO = {WORD_W{1'b0}}; +localparam [WORD_W-1:0] WORD_DNC = {WORD_W{1'bX}}; +localparam [WORD_W-1:0] WORD_ONE = {{(WORD_W-1){1'b0}}, 1'b1}; -localparam [WORD_EXT_W-1:0] WORD_EXT_DNC = {WORD_EXT_W{1'bX}}; +localparam [WORD_EXT_W-1:0] WORD_EXT_ZERO = {WORD_EXT_W{1'b0}}; +localparam [WORD_EXT_W-1:0] WORD_EXT_DNC = {WORD_EXT_W{1'bX}}; localparam [MAC_INDEX_W-1:0] MAC_INDEX_DNC = {MAC_INDEX_W{1'bX}}; diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v index d0b6253..73b3142 100644 --- a/rtl/modexpng_uop_rom.v +++ b/rtl/modexpng_uop_rom.v @@ -29,7 +29,9 @@ module modexpng_uop_rom 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; - 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; + 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; + 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; + 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL}; endcase -- cgit v1.2.3