From edd5efd83266bb534d7cde3d908e74749278ed96 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Mon, 21 Oct 2019 15:19:30 +0300 Subject: Reworked testbench, clk_sys and clk_core can now have any ratio, not necessarily 1:2. Fixed compile-time issue where ISE fails to place two DSP slices next to each other, if A and/or B cascade path(s) between then are partially connected. Basically, if cascade is used, entire bus must be connected. --- bench/tb_core_full_1024.v | 296 ++++++++++++++++++++------------ bench/tb_core_full_512.v | 6 +- bench/tb_wrapper.v | 23 ++- rtl/modexpng_dsp_array_block.v | 12 +- rtl/modexpng_dsp_slice_wrapper_xilinx.v | 18 +- rtl/modexpng_uop_rom.v | 2 +- rtl/modexpng_wrapper.v | 246 ++++++++++++++++---------- 7 files changed, 381 insertions(+), 222 deletions(-) diff --git a/bench/tb_core_full_1024.v b/bench/tb_core_full_1024.v index e6b1a66..87eac79 100644 --- a/bench/tb_core_full_1024.v +++ b/bench/tb_core_full_1024.v @@ -31,6 +31,7 @@ module tb_core_full_1024; reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1]; reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ]; reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] D[0:TB_NUM_WORDS_N-1]; reg [31:0] DP[0:TB_NUM_WORDS_PQ-1]; reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1]; reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1]; @@ -117,6 +118,14 @@ module tb_core_full_1024; Q_COEFF[ 8] = 32'h0cf1175c; Q_COEFF[ 9] = 32'h4911b74e; Q_COEFF[ 10] = 32'h331e61cb; Q_COEFF[ 11] = 32'he9527ead; Q_COEFF[ 12] = 32'h8d6a5911; Q_COEFF[ 13] = 32'hae42d654; Q_COEFF[ 14] = 32'he10d29a8; Q_COEFF[ 15] = 32'h50a5dd76; Q_COEFF[ 16] = 32'h0000ed75; + D[ 0] = 32'h6d4e7831; D[ 1] = 32'ha96f72d6; D[ 2] = 32'h3b47788f; D[ 3] = 32'h07207984; + D[ 4] = 32'h3247b99f; D[ 5] = 32'hfe6ade84; D[ 6] = 32'h0e0fdec4; D[ 7] = 32'hc31450bd; + D[ 8] = 32'hb8624e9a; D[ 9] = 32'h362966c6; D[ 10] = 32'h4d8a15bf; D[ 11] = 32'h845b6e48; + D[ 12] = 32'h0b2259bf; D[ 13] = 32'h793bfff7; D[ 14] = 32'h28eeceb0; D[ 15] = 32'h8d6fde7f; + D[ 16] = 32'h0d618632; D[ 17] = 32'hfb371e4b; D[ 18] = 32'h0b80a9a0; D[ 19] = 32'h5c1a1381; + D[ 20] = 32'h8dfe755c; D[ 21] = 32'h82564b31; D[ 22] = 32'heee93cbd; D[ 23] = 32'h22bb686e; + D[ 24] = 32'hf2ccae97; D[ 25] = 32'he3400736; D[ 26] = 32'h11c4a85e; D[ 27] = 32'ha0c904d0; + D[ 28] = 32'hb495cecf; D[ 29] = 32'h32ceb818; D[ 30] = 32'ha2e46c28; D[ 31] = 32'h6915229e; DP[ 0] = 32'h63d165e5; DP[ 1] = 32'h856ac81e; DP[ 2] = 32'hc4b8779d; DP[ 3] = 32'h8b119544; DP[ 4] = 32'had780837; DP[ 5] = 32'h3e920266; DP[ 6] = 32'he9d10f2e; DP[ 7] = 32'h7c1b42b2; DP[ 8] = 32'hc7daca3b; DP[ 9] = 32'h7883be11; DP[ 10] = 32'ha384548d; DP[ 11] = 32'he0848b23; @@ -124,7 +133,8 @@ module tb_core_full_1024; DQ[ 0] = 32'hd7ffdc71; DQ[ 1] = 32'hed01b8aa; DQ[ 2] = 32'h2f99d3a6; DQ[ 3] = 32'h8ccb4428; DQ[ 4] = 32'hb1574616; DQ[ 5] = 32'hfc218e36; DQ[ 6] = 32'h4fe24f91; DQ[ 7] = 32'h9c367c42; DQ[ 8] = 32'h69dfa208; DQ[ 9] = 32'h3ee3de79; DQ[ 10] = 32'h54ded59b; DQ[ 11] = 32'hcb3b487d; - DQ[ 12] = 32'hbcc0db4e; DQ[ 13] = 32'hb3e6678c; DQ[ 14] = 32'h3d13ec03; DQ[ 15] = 32'h99e0f684; QINV[ 0] = 32'h9a2f0db2; QINV[ 1] = 32'h4a8075a5; QINV[ 2] = 32'hb61201fa; QINV[ 3] = 32'h0e876a42; + DQ[ 12] = 32'hbcc0db4e; DQ[ 13] = 32'hb3e6678c; DQ[ 14] = 32'h3d13ec03; DQ[ 15] = 32'h99e0f684; + QINV[ 0] = 32'h9a2f0db2; QINV[ 1] = 32'h4a8075a5; QINV[ 2] = 32'hb61201fa; QINV[ 3] = 32'h0e876a42; QINV[ 4] = 32'h94667476; QINV[ 5] = 32'h7538b796; QINV[ 6] = 32'h8d8dfa35; QINV[ 7] = 32'h689ee4a7; QINV[ 8] = 32'h6779dd63; QINV[ 9] = 32'he15b6b5e; QINV[ 10] = 32'h8275500c; QINV[ 11] = 32'he4dcd058; QINV[ 12] = 32'haf54b86c; QINV[ 13] = 32'hba76dc50; QINV[ 14] = 32'h473d0d6d; QINV[ 15] = 32'ha023ba44; @@ -155,30 +165,50 @@ module tb_core_full_1024; end - // // Clocks // `define CLK_FREQUENCY_MHZ (100.0) `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + `define CLK_PERIOD_QUARTER_NS (0.5 * `CLK_PERIOD_HALF_NS) - `define CLK_BUS_FREQUENCY_MHZ (50.0) + `define CLK_BUS_FREQUENCY_MHZ (25.0) `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) - reg clk = 1'b1; - reg clk_bus = 1'b0; + reg clk = 1'b1; + reg clk_dly = 1'b0; + wire clk_idle = clk & clk_dly; + + reg clk_bus = 1'b1; + reg clk_bus_dly = 1'b0; + wire clk_bus_idle = clk_bus & clk_bus_dly; + + always #`CLK_PERIOD_HALF_NS clk <= ~clk; + always #`CLK_BUS_PERIOD_HALF_NS clk_bus <= ~clk_bus; + + always @(clk ) clk_dly <= #(`CLK_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk; + always @(clk_bus) clk_bus_dly <= #(`CLK_BUS_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk_bus; - always #`CLK_PERIOD_HALF_NS clk = ~clk; - always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus; + // + // Clock Sync + // + task sync_clk; + while (clk_idle !== 1) _wait_quarter_clk_tick; + endtask + + task sync_clk_bus; + while (clk_bus_idle !== 1) _wait_quarter_clk_tick; + endtask // // Reset // reg rst = 1'b1; + wire rst_n = ~rst; // @@ -196,7 +226,6 @@ module tb_core_full_1024; // // System Bus // - reg bus_ready; reg bus_cs = 1'b0; reg bus_we = 1'b0; reg [11:0] bus_addr; @@ -216,7 +245,7 @@ module tb_core_full_1024; .clk (clk), .clk_bus (clk_bus), - .rst (rst), + .rst_n (rst_n), .next (core_next), .valid (core_valid), @@ -238,66 +267,106 @@ module tb_core_full_1024; // - // Routine (Bus) + // Bus Init Routine // - initial begin - - bus_ready = 1'b0; - - while (rst) wait_clock_bus_tick; - wait_clock_bus_ticks(10); - $display("Core came out of reset."); - - set_input_1; - set_input_2; - - wait_clock_bus_ticks(10); - bus_ready = 1'b1; - - end - + task core_set_input; + begin + core_set_input_1; + core_set_input_2; + wait_clk_bus_ticks(10); + $display("Core input banks written."); + end + endtask + // - // Routine (Control/Status, Bus) + // Script // - initial begin - - _wait_half_clock_tick; - wait_clock_ticks(100); - rst = 1'b0; - - while (!bus_ready) wait_clock_tick; - wait_clock_ticks(10); - $display("Core input banks written."); - - word_index_last_n = CORE_NUM_WORDS_N - 1; - word_index_last_pq = CORE_NUM_WORDS_PQ - 1; - - bit_index_last_n = TB_MODULUS_LENGTH_N - 1; - bit_index_last_pq = 9; //TB_MODULUS_LENGTH_N / 2 - 1; - - core_crt_mode = 1'b1; - - core_next = 1'b1; - wait_clock_tick; - core_next = 1'b0; - $display("Pulsed 'next' control signal."); - - while (!core_valid) wait_clock_tick; - wait_clock_ticks(10); - - $display("Detected high 'valid' status signal."); - core_crt_mode = 1'bX; + initial main; + + + // + // Main Routine (Control/Status, Bus) + // + integer i, j, k; + task main; + begin - wait_clock_ticks(10); - get_output; - wait_clock_ticks(10); - $display("Core output banks read."); + sync_clk; // switch to fast core clock + core_reset; // reset core + core_set_params; // set parameters (modulus width, exponent length) - verify; + sync_clk_bus; // switch to slow bus clock + core_set_input; // write to core input banks + + //sync_clk; // switch to fast core clock + //core_set_crt_mode(1); // enable CRT signing + //core_pulse_next; // assert 'next' bit for one cycle + //core_wait_valid; // wait till 'valid' bit gets asserted + + //sync_clk_bus; // switch to slow bus clock + //core_get_output; // read from core output banks + //core_verify_output; // check, whether core output matches precomputed known good refrence values + + bit_index_last_n = 16; + bus_write(2'd2, BANK_IN_2_D, 7'd0, 32'h00010001); + + sync_clk; // switch to fast core clock + core_set_crt_mode(0); // disable CRT signing + core_pulse_next; // assert 'next' bit for one cycle + core_wait_valid; // wait till 'valid' bit gets asserted + + sync_clk_bus; // switch to slow bus clock + core_get_output; // read from core output banks + core_verify_output; // check, whether core output matches precomputed known good refrence values + end + endtask - end + task core_reset; + begin + wait_clk_ticks(100); + rst = 1'b0; + wait_clk_ticks(10); + $display("Core reset finished."); + end + endtask + + task core_set_params; + begin + word_index_last_n = CORE_NUM_WORDS_N - 1; + word_index_last_pq = CORE_NUM_WORDS_PQ - 1; + bit_index_last_n = TB_MODULUS_LENGTH_N - 1; + bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + $display("Core parameters set."); + end + endtask + + task core_set_crt_mode; + input _crt; + begin + core_crt_mode = _crt; + if (_crt) $display("Enabled CRT mode."); + else $display("Disabled CRT mode."); + end + endtask + + task core_pulse_next; + begin + core_next = 1'b1; + wait_clk_tick; + core_next = 1'b0; + $display("Pulsed core 'next' control signal."); + end + endtask + + task core_wait_valid; + begin + while (!core_valid) wait_clk_tick; + wait_clk_ticks(10); + $display("Detected high core 'valid' status signal."); + end + endtask // @@ -307,58 +376,60 @@ module tb_core_full_1024; // - // set_input_1; + // core_set_input_1 // - task set_input_1; + task core_set_input_1; reg [9:0] _tn; begin _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word - for (_w=0; _w %0d", ti, to); ti = 510; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); @@ -103,8 +104,8 @@ module tb_wrapper; ti = 1026; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); ti = 4094; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); - ti = 4095; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); - ti = 4096; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); + ti = 4095; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);*/ + ti = 4096; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);/* ti = 4097; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); ti = 4098; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to); @@ -135,11 +136,17 @@ module tb_wrapper; ti = 1026; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); ti = 4094; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); - ti = 4095; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); - ti = 4096; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); + ti = 4095; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);*/ + ti = 4096; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);/* ti = 4097; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); ti = 4098; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to); - + */ + //ti = 0; bus_write('h08, ti); + //ti = 2; bus_write('h08, ti); + //ti = 0; bus_write('h08, ti); + //ti = 2; bus_write('h08, ti); + //ti = 0; bus_write('h08, ti); + //ti = 2; bus_write('h08, ti); end endtask diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v index 1444aa7..2724a68 100644 --- a/rtl/modexpng_dsp_array_block.v +++ b/rtl/modexpng_dsp_array_block.v @@ -24,8 +24,8 @@ module modexpng_dsp_array_block input [ WORD_W -1:0] b; output [NUM_MULTS_AUX * MAC_W -1:0] p; - wire [WORD_EXT_W -1:0] casc_a[0:NUM_MULTS_HALF-1]; - wire [ WORD_W -1:0] casc_b[0:NUM_MULTS_HALF-1]; + wire [DSP48E1_A_W -1:0] casc_a[0:NUM_MULTS_HALF-1]; + wire [DSP48E1_B_W -1:0] casc_b[0:NUM_MULTS_HALF-1]; wire ce_a0 = ce_a; reg ce_a1 = 1'b0; @@ -70,8 +70,8 @@ module modexpng_dsp_array_block .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}), .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in (WORD_EXT_ZERO), - .casc_b_in (WORD_ZERO), + .casc_a_in (), + .casc_b_in (), .casc_a_out (casc_a[z]), .casc_b_out (casc_b[z]) @@ -138,8 +138,8 @@ module modexpng_dsp_array_block .opmode ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}), .alumode ({DSP48E1_ALUMODE_W{1'b0}}), - .casc_a_in (WORD_EXT_ZERO), - .casc_b_in (WORD_ZERO), + .casc_a_in (), + .casc_b_in (), .casc_a_out (), .casc_b_out () diff --git a/rtl/modexpng_dsp_slice_wrapper_xilinx.v b/rtl/modexpng_dsp_slice_wrapper_xilinx.v index 9c1a60d..8c0b969 100644 --- a/rtl/modexpng_dsp_slice_wrapper_xilinx.v +++ b/rtl/modexpng_dsp_slice_wrapper_xilinx.v @@ -30,13 +30,11 @@ module modexpng_dsp_slice_wrapper_xilinx # input [ DSP48E1_INMODE_W -1:0] inmode; input [ DSP48E1_OPMODE_W -1:0] opmode; input [DSP48E1_ALUMODE_W -1:0] alumode; - input [ WORD_EXT_W -1:0] casc_a_in; - input [ WORD_W -1:0] casc_b_in; - output [ WORD_EXT_W -1:0] casc_a_out; - output [ WORD_W -1:0] casc_b_out; + input [ DSP48E1_A_W -1:0] casc_a_in; + input [ DSP48E1_B_W -1:0] casc_b_in; + output [ DSP48E1_A_W -1:0] casc_a_out; + output [ DSP48E1_B_W -1:0] casc_b_out; - wire [DSP48E1_A_W - WORD_EXT_W -1:0] casc_a_dummy; - wire [DSP48E1_B_W - WORD_W -1:0] casc_b_dummy; wire [DSP48E1_P_W - MAC_W -1:0] p_dummy; DSP48E1 # @@ -99,10 +97,10 @@ module modexpng_dsp_slice_wrapper_xilinx # .OPMODE (opmode), .ALUMODE (alumode), - .ACIN ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, casc_a_in}), - .BCIN ({{(DSP48E1_B_W-WORD_W){1'b0}}, casc_b_in}), - .ACOUT ({casc_a_dummy, casc_a_out}), - .BCOUT ({casc_b_dummy, casc_b_out}), + .ACIN (casc_a_in), + .BCIN (casc_b_in), + .ACOUT (casc_a_out), + .BCOUT (casc_b_out), .PCIN ({DSP48E1_P_W{1'b0}}), .PCOUT (), .CARRYCASCIN (1'b0), diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v index c15f608..74f7ea3 100644 --- a/rtl/modexpng_uop_rom.v +++ b/rtl/modexpng_uop_rom.v @@ -134,7 +134,7 @@ module modexpng_uop_rom 7'd080: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // 7'd081: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // - // + 7'd082: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // 7'd083: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_WIDE_A, BANK_DNC }; // diff --git a/rtl/modexpng_wrapper.v b/rtl/modexpng_wrapper.v index 687a963..0af6c32 100644 --- a/rtl/modexpng_wrapper.v +++ b/rtl/modexpng_wrapper.v @@ -111,7 +111,8 @@ module modexpng_wrapper // localparam MIN_OP_W = 2 * NUM_MULTS * WORD_W * 2; localparam MIN_EXP_W = 2 * 2; - localparam ZEROES_BIT_INDEX_W = 1 + cryptech_clog2(NUM_MULTS) + cryptech_clog2(WORD_W); + localparam LSB_BIT_INDEX_W = 1 + cryptech_clog2(NUM_MULTS) + cryptech_clog2(WORD_W); + localparam MSB_BIT_INDEX_W = BIT_INDEX_W - LSB_BIT_INDEX_W; // @@ -125,35 +126,29 @@ module modexpng_wrapper // // Registers // - reg wrap_reg_control = `MODEXPNG_DEFAULT_CONTROL; - reg sync_reg_control = `MODEXPNG_DEFAULT_CONTROL; - reg sync_reg_control_dly = `MODEXPNG_DEFAULT_CONTROL; - reg core_reg_control = `MODEXPNG_DEFAULT_CONTROL; - reg wrap_reg_mode = `MODEXPNG_DEFAULT_MODE; + reg wrap_reg_control = `MODEXPNG_DEFAULT_CONTROL; + reg core_reg_control = `MODEXPNG_DEFAULT_CONTROL; + reg wrap_reg_mode = `MODEXPNG_DEFAULT_MODE; reg sync_reg_mode; reg core_reg_mode; - reg [BIT_INDEX_W:ZEROES_BIT_INDEX_W] wrap_modulus_bits_msb; - reg [BIT_INDEX_W: 0] wrap_exponent_bits; + reg [BIT_INDEX_W:LSB_BIT_INDEX_W] wrap_modulus_bits_msb; + reg [BIT_INDEX_W: 0] wrap_exponent_bits; - initial write_modulus_bits(`MODEXPNG_DEFAULT_MODULUS_BITS); - initial write_exponent_bits(`MODEXPNG_DEFAULT_EXPONENT_BITS); - - wire sync_reg_control_rising = sync_reg_control & ~sync_reg_control_dly; + initial update_modulus_bits_user; + initial update_exponent_bits_user; + + //wire sync_reg_control_rising = sync_reg_control & ~sync_reg_control_dly; // // Wires // - reg wrap_reg_status = 1'b1; - reg sync_reg_status = 1'b1; + reg wrap_reg_status = 1'b1; + reg sync_reg_status = 1'b1; + reg sync_reg_status_dly = 1'b1; wire core_reg_status; - always @(posedge clk or negedge rst_n) - // - if (!rst_n) {wrap_reg_status, sync_reg_status} <= { 1'b1, 1'b1}; - else {wrap_reg_status, sync_reg_status} <= {sync_reg_status, core_reg_status}; - // // Output Mux @@ -175,8 +170,76 @@ module modexpng_wrapper else core_rst_shreg <= {core_rst_shreg[14:0], 1'b1}; assign core_rst_n = core_rst_shreg[15]; + + + // + // Trigger Logic + // + reg wrap_reg_control_dly = `MODEXPNG_DEFAULT_CONTROL; + reg wrap_reg_control_posedge = 1'b0; + reg sync_reg_control_posedge = 1'b0; + reg core_reg_control_posedge = 1'b0; + reg core_reg_control_posedge_dly = 1'b0; + reg sync_reg_control_posedge_ack = 1'b0; + reg wrap_reg_control_posedge_ack = 1'b0; + + always @(posedge clk or negedge rst_n) + if (!rst_n) wrap_reg_control_dly <= `MODEXPNG_DEFAULT_CONTROL; + else wrap_reg_control_dly <= wrap_reg_control; + + always @(posedge clk or negedge rst_n) + if (!rst_n) wrap_reg_control_posedge <= 1'b0; + else begin + if (!wrap_reg_control_posedge) begin + if (wrap_reg_control && !wrap_reg_control_dly) wrap_reg_control_posedge <= 1'b1; + end else begin + if (wrap_reg_control_posedge_ack) wrap_reg_control_posedge <= 1'b0; + end + end + + always @(posedge clk_core or negedge core_rst_n) + if (!core_rst_n) sync_reg_control_posedge <= 1'b0; + else sync_reg_control_posedge <= wrap_reg_control_posedge; + + always @(posedge clk_core or negedge core_rst_n) + if (!core_rst_n) core_reg_control_posedge <= 1'b0; + else core_reg_control_posedge <= sync_reg_control_posedge; + + always @(posedge clk_core or negedge core_rst_n) + if (!core_rst_n) core_reg_control_posedge_dly <= 1'b0; + else core_reg_control_posedge_dly <= core_reg_control_posedge; + + always @(posedge clk or negedge rst_n) + if (!rst_n) sync_reg_control_posedge_ack <= 1'b0; + else sync_reg_control_posedge_ack <= core_reg_control_posedge; + + always @(posedge clk or negedge rst_n) + if (!rst_n) wrap_reg_control_posedge_ack <= 1'b0; + else wrap_reg_control_posedge_ack <= sync_reg_control_posedge_ack; + + always @(posedge clk_core or negedge core_rst_n) + if (!core_rst_n) core_reg_control <= `MODEXPNG_DEFAULT_CONTROL; + else core_reg_control <= core_reg_control_posedge && !core_reg_control_posedge_dly; + always @(posedge clk or negedge rst_n) + if (!rst_n) sync_reg_status <= 1'b1; + else sync_reg_status <= core_reg_status; + + always @(posedge clk or negedge rst_n) + if (!rst_n) sync_reg_status_dly <= 1'b1; + else sync_reg_status_dly <= sync_reg_status; + + always @(posedge clk or negedge rst_n) + if (!rst_n) wrap_reg_status <= 1'b1; + else begin + if (wrap_reg_status) begin + if (wrap_reg_control && !wrap_reg_control_dly) wrap_reg_status <= 1'b0; + end else begin + if (!wrap_reg_control_posedge && !wrap_reg_control_posedge_ack && sync_reg_status_dly) wrap_reg_status <= 1'b1; + end + end + // // Parameters Resync // @@ -212,7 +275,7 @@ module modexpng_wrapper always @(posedge clk_core) // - if (sync_reg_control_rising) begin + if (core_reg_control_posedge && !core_reg_control_posedge_dly) begin // core_reg_mode <= sync_reg_mode; // @@ -224,21 +287,6 @@ module modexpng_wrapper // end - - // - // Trigger Logic - // - always @(posedge clk_core or negedge rst_n) - // - if (!rst_n) {sync_reg_control_dly, sync_reg_control} <= {`MODEXPNG_DEFAULT_CONTROL, `MODEXPNG_DEFAULT_CONTROL}; - else {sync_reg_control_dly, sync_reg_control} <= { sync_reg_control, wrap_reg_control}; - - always @(posedge clk_core or negedge rst_n) - // - if (!rst_n) core_reg_control <= `MODEXPNG_DEFAULT_CONTROL; - else core_reg_control <= sync_reg_control_rising; - - // // ModExpNG // @@ -271,6 +319,63 @@ module modexpng_wrapper // // Write Interface // + + wire [ BIT_INDEX_W :0] corrected_modulus_bits_user = correct_modulus_bits(write_data[BIT_INDEX_W:0]); + wire [MSB_BIT_INDEX_W :0] corrected_modulus_bits_msb_user = corrected_modulus_bits_user[BIT_INDEX_W:LSB_BIT_INDEX_W]; + wire [ OP_ADDR_W :0] modulus_num_words_n_user = {corrected_modulus_bits_msb_user, {(LSB_BIT_INDEX_W-WORD_MUX_W){1'b0}}} - 1'b1; + wire [ OP_ADDR_W-1:0] modulus_num_words_n_lsb_user = modulus_num_words_n_user[OP_ADDR_W-1:0]; + wire [ OP_ADDR_W-1:0] modulus_num_words_pq_user = {corrected_modulus_bits_msb_user, {(LSB_BIT_INDEX_W-WORD_MUX_W-1){1'b0}}} - 1'b1; + + wire [ BIT_INDEX_W :0] corrected_modulus_bits_default = `MODEXPNG_DEFAULT_MODULUS_BITS; + wire [MSB_BIT_INDEX_W :0] corrected_modulus_bits_msb_default = corrected_modulus_bits_default[BIT_INDEX_W:LSB_BIT_INDEX_W]; + wire [ OP_ADDR_W :0] modulus_num_words_n_default = {corrected_modulus_bits_msb_default, {(LSB_BIT_INDEX_W-WORD_MUX_W){1'b0}}} - 1'b1; + wire [ OP_ADDR_W-1:0] modulus_num_words_n_lsb_default = modulus_num_words_n_default[OP_ADDR_W-1:0]; + wire [ OP_ADDR_W-1:0] modulus_num_words_pq_default = {corrected_modulus_bits_msb_default, {(LSB_BIT_INDEX_W-WORD_MUX_W-1){1'b0}}} - 1'b1; + + wire [ BIT_INDEX_W :0] corrected_exponent_bits_user = correct_exponent_bits(write_data[BIT_INDEX_W:0]); + wire [ BIT_INDEX_W-1:0] corrected_exponent_bits_msb_user = corrected_exponent_bits_user[BIT_INDEX_W:1]; + wire [ BIT_INDEX_W :0] exponent_num_bits_n_user = corrected_exponent_bits_user - 1'b1; + wire [ BIT_INDEX_W-1:0] exponent_num_bits_n_lsb_user = exponent_num_bits_n_user[BIT_INDEX_W-1:0]; + wire [ BIT_INDEX_W-1:0] exponent_num_bits_pq_user = corrected_exponent_bits_msb_user - 1'b1; + + wire [ BIT_INDEX_W :0] corrected_exponent_bits_default = `MODEXPNG_DEFAULT_EXPONENT_BITS; + wire [ BIT_INDEX_W-1:0] corrected_exponent_bits_msb_default = corrected_exponent_bits_default[BIT_INDEX_W:1]; + wire [ BIT_INDEX_W :0] exponent_num_bits_n_default = corrected_exponent_bits_default - 1'b1; + wire [ BIT_INDEX_W-1:0] exponent_num_bits_n_lsb_default = exponent_num_bits_n_default[BIT_INDEX_W-1:0]; + wire [ BIT_INDEX_W-1:0] exponent_num_bits_pq_default = corrected_exponent_bits_msb_default - 1'b1; + + task update_modulus_bits_user; + begin + wrap_modulus_bits_msb <= corrected_modulus_bits_msb_user; + wrap_word_index_last_n <= modulus_num_words_n_lsb_user; + wrap_word_index_last_pq <= modulus_num_words_pq_user; + end + endtask + + task update_modulus_bits_default; + begin + wrap_modulus_bits_msb <= corrected_modulus_bits_msb_default; + wrap_word_index_last_n <= modulus_num_words_n_lsb_default; + wrap_word_index_last_pq <= modulus_num_words_pq_default; + end + endtask + + task update_exponent_bits_user; + begin + wrap_exponent_bits <= corrected_exponent_bits_user; + wrap_bit_index_last_n <= exponent_num_bits_n_lsb_user; + wrap_bit_index_last_pq <= exponent_num_bits_pq_user; + end + endtask + + task update_exponent_bits_default; + begin + wrap_exponent_bits <= corrected_exponent_bits_default; + wrap_bit_index_last_n <= exponent_num_bits_n_lsb_default; + wrap_bit_index_last_pq <= exponent_num_bits_pq_default; + end + endtask + always @(posedge clk or negedge rst_n) // if (!rst_n) begin @@ -278,77 +383,40 @@ module modexpng_wrapper wrap_reg_control <= `MODEXPNG_DEFAULT_CONTROL; wrap_reg_mode <= `MODEXPNG_DEFAULT_MODE; // - write_modulus_bits(`MODEXPNG_DEFAULT_MODULUS_BITS); - write_exponent_bits(`MODEXPNG_DEFAULT_EXPONENT_BITS); + update_modulus_bits_default; + update_exponent_bits_default; // end else if (cs && we && addr_msb_is_wrap) // case (addr_lsb) ADDR_CONTROL: wrap_reg_control <= write_data[CONTROL_NEXT_BIT]; ADDR_MODE: wrap_reg_mode <= write_data[MODE_FULLCRT_BIT]; - ADDR_MODULUS_BITS: write_modulus_bits(write_data[BIT_INDEX_W:0]); - ADDR_EXPONENT_BITS: write_exponent_bits(write_data[BIT_INDEX_W:0]); + ADDR_MODULUS_BITS: update_modulus_bits_user; + ADDR_EXPONENT_BITS: update_exponent_bits_user; endcase // - // Update modulus width + // Only accept correct modulus width // - function [BIT_INDEX_W:ZEROES_BIT_INDEX_W] fix_modulus_bits; - input [BIT_INDEX_W: 0] width; - if (width < MIN_OP_W) fix_modulus_bits = MIN_OP_W[BIT_INDEX_W:ZEROES_BIT_INDEX_W]; - else if (width > MAX_OP_W) fix_modulus_bits = MAX_OP_W[BIT_INDEX_W:ZEROES_BIT_INDEX_W]; - else fix_modulus_bits = width [BIT_INDEX_W:ZEROES_BIT_INDEX_W]; - endfunction - - function [OP_ADDR_W-1: 0] calc_modulus_num_words_n; - input [BIT_INDEX_W:ZEROES_BIT_INDEX_W] width; - calc_modulus_num_words_n = {width, {(ZEROES_BIT_INDEX_W-WORD_MUX_W){1'b0}}} - 1'b1; // truncates msb - endfunction - - function [OP_ADDR_W-1: 0] calc_modulus_num_words_pq; - input [BIT_INDEX_W:ZEROES_BIT_INDEX_W] width; - calc_modulus_num_words_pq = {width, {(ZEROES_BIT_INDEX_W-WORD_MUX_W-1){1'b0}}} - 1'b1; // fits exactly - endfunction - - task write_modulus_bits; + function [BIT_INDEX_W:0] correct_modulus_bits; input [BIT_INDEX_W:0] width; - begin - wrap_modulus_bits_msb <= fix_modulus_bits(width); - wrap_word_index_last_n <= calc_modulus_num_words_n(fix_modulus_bits(width)); - wrap_word_index_last_pq <= calc_modulus_num_words_pq(fix_modulus_bits(width)); - end - endtask + if (width < MIN_OP_W) correct_modulus_bits = MIN_OP_W; + else if (width > MAX_OP_W) correct_modulus_bits = MAX_OP_W; + else correct_modulus_bits = width; + endfunction // - // Update exponent width + // Only accept correct exponent width // - function [BIT_INDEX_W:0] fix_exponent_bits; + function [BIT_INDEX_W:0] correct_exponent_bits; input [BIT_INDEX_W:0] width; - if (width < MIN_EXP_W) fix_exponent_bits = MIN_EXP_W; - else if (width > MAX_OP_W ) fix_exponent_bits = MAX_OP_W; - else fix_exponent_bits = width; - endfunction - - function [BIT_INDEX_W-1:0] calc_exponent_num_bits_n; - input [BIT_INDEX_W :0] width; - calc_exponent_num_bits_n = width - 1'b1; // truncates msb - endfunction - - function [BIT_INDEX_W-1:0] calc_exponent_num_bits_pq; - input [BIT_INDEX_W: 0] width; - calc_exponent_num_bits_pq = width[BIT_INDEX_W:1] - 1'b1; // fits exactly + if (width < MIN_EXP_W) correct_exponent_bits = MIN_EXP_W; + else if (width > MAX_OP_W ) correct_exponent_bits = MAX_OP_W; + else correct_exponent_bits = width; endfunction - - task write_exponent_bits; - input [BIT_INDEX_W:0] width; - begin - wrap_exponent_bits <= fix_exponent_bits(width); - wrap_bit_index_last_n <= calc_exponent_num_bits_n(fix_exponent_bits(width)); - wrap_bit_index_last_pq <= calc_exponent_num_bits_pq(fix_exponent_bits(width)); - end - endtask + // @@ -367,7 +435,7 @@ module modexpng_wrapper ADDR_STATUS: wrap_read_data <= {{30{1'b0}}, wrap_reg_status, 1'b1}; // ADDR_MODE: wrap_read_data <= {{30{1'b0}}, wrap_reg_mode, 1'b0}; - ADDR_MODULUS_BITS: wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_modulus_bits_msb, {ZEROES_BIT_INDEX_W{1'b0}}}; + ADDR_MODULUS_BITS: wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_modulus_bits_msb, {LSB_BIT_INDEX_W{1'b0}}}; ADDR_EXPONENT_BITS: wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_exponent_bits}; ADDR_BANK_BITS: wrap_read_data <= MAX_OP_W; ADDR_NUM_MULTS: wrap_read_data <= NUM_MULTS; -- cgit v1.2.3