diff options
24 files changed, 2034 insertions, 1137 deletions
diff --git a/bench/tb_core_full.v b/bench/tb_core_full_1024.v index e592ac5..e6b1a66 100644 --- a/bench/tb_core_full.v +++ b/bench/tb_core_full_1024.v @@ -1,6 +1,6 @@ `timescale 1ns / 1ps -module tb_core_full; +module tb_core_full_1024; // @@ -274,7 +274,7 @@ module tb_core_full; word_index_last_pq = CORE_NUM_WORDS_PQ - 1; bit_index_last_n = TB_MODULUS_LENGTH_N - 1; - bit_index_last_pq = 9;//TB_MODULUS_LENGTH_N / 2 - 1; + bit_index_last_pq = 9; //TB_MODULUS_LENGTH_N / 2 - 1; core_crt_mode = 1'b1; @@ -295,7 +295,7 @@ module tb_core_full; $display("Core output banks read."); - //verify; + verify; end diff --git a/bench/tb_core_full_512.v b/bench/tb_core_full_512.v new file mode 100644 index 0000000..221a2c6 --- /dev/null +++ b/bench/tb_core_full_512.v @@ -0,0 +1,458 @@ +`timescale 1ns / 1ps + +module tb_core_full_512; + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Test Vectors + // + localparam TB_MODULUS_LENGTH_N = 512; + localparam TB_MODULUS_LENGTH_PQ = TB_MODULUS_LENGTH_N / 2; + localparam TB_NUM_WORDS_PQ = TB_MODULUS_LENGTH_PQ / BUS_DATA_W; + localparam TB_NUM_WORDS_N = TB_MODULUS_LENGTH_N / BUS_DATA_W; + localparam CORE_NUM_WORDS_PQ = TB_MODULUS_LENGTH_PQ / WORD_W; + localparam CORE_NUM_WORDS_N = TB_MODULUS_LENGTH_N / WORD_W; + + reg [31:0] M[0:TB_NUM_WORDS_N-1]; + reg [31:0] N[0:TB_NUM_WORDS_N-1]; + reg [31:0] N_FACTOR[0:TB_NUM_WORDS_N-1]; + reg [31:0] N_COEFF[0:TB_NUM_WORDS_N]; + reg [31:0] X[0:TB_NUM_WORDS_N-1]; + reg [31:0] Y[0:TB_NUM_WORDS_N-1]; + reg [31:0] P[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] Q[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] P_FACTOR[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] DP[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] XM[0:TB_NUM_WORDS_N-1]; + reg [31:0] YM[0:TB_NUM_WORDS_N-1]; + reg [31:0] S[0:TB_NUM_WORDS_N-1]; + reg [31:0] XM_READBACK[0:TB_NUM_WORDS_N-1]; + reg [31:0] YM_READBACK[0:TB_NUM_WORDS_N-1]; + reg [31:0] S_READBACK[0:TB_NUM_WORDS_N-1]; + + initial begin + M[ 0] = 32'h8d3b583b; M[ 1] = 32'hc370f07e; M[ 2] = 32'hb9078738; M[ 3] = 32'haf37f86c; + M[ 4] = 32'h02f0e161; M[ 5] = 32'h0506a68a; M[ 6] = 32'h1ae65107; M[ 7] = 32'hcd3a97f1; + M[ 8] = 32'hb27244b8; M[ 9] = 32'h9bc3c400; M[ 10] = 32'he4d5636e; M[ 11] = 32'h35187c07; + M[ 12] = 32'h78a661c9; M[ 13] = 32'h1e7ec273; M[ 14] = 32'hcdc31041; M[ 15] = 32'h002291d8; + N[ 0] = 32'hcb703101; N[ 1] = 32'h82bc8290; N[ 2] = 32'hdb2372c2; N[ 3] = 32'hdeeb692e; + N[ 4] = 32'ha3ee352a; N[ 5] = 32'h81a711ba; N[ 6] = 32'h14ee23bd; N[ 7] = 32'h8ad351c0; + N[ 8] = 32'h75ecd3d5; N[ 9] = 32'h51c9b22f; N[ 10] = 32'hc1d3496e; N[ 11] = 32'h48176f3e; + N[ 12] = 32'hd2aca749; N[ 13] = 32'hf236cea9; N[ 14] = 32'h7f4525ed; N[ 15] = 32'hb4fc5067; + N_FACTOR[ 0] = 32'he253bfbf; N_FACTOR[ 1] = 32'h8e0b26aa; N_FACTOR[ 2] = 32'h0480b661; N_FACTOR[ 3] = 32'h9a13f7a1; + N_FACTOR[ 4] = 32'h464b7342; N_FACTOR[ 5] = 32'hfb6f8e41; N_FACTOR[ 6] = 32'h081208e4; N_FACTOR[ 7] = 32'h63d8328a; + N_FACTOR[ 8] = 32'h604d2b71; N_FACTOR[ 9] = 32'hc987dabe; N_FACTOR[ 10] = 32'h8a474e35; N_FACTOR[ 11] = 32'hc053ba1c; + N_FACTOR[ 12] = 32'h15b82dd9; N_FACTOR[ 13] = 32'h42c2bbfa; N_FACTOR[ 14] = 32'h1681e95d; N_FACTOR[ 15] = 32'h07dee5fa; + N_COEFF[ 0] = 32'h730f30ff; N_COEFF[ 1] = 32'h50ed900a; N_COEFF[ 2] = 32'h0b9038c5; N_COEFF[ 3] = 32'h974ddd03; + N_COEFF[ 4] = 32'he2c118c8; N_COEFF[ 5] = 32'hbe1bc7e1; N_COEFF[ 6] = 32'h224d548c; N_COEFF[ 7] = 32'h48ea2ee4; + N_COEFF[ 8] = 32'heb379247; N_COEFF[ 9] = 32'had97b934; N_COEFF[ 10] = 32'hfc6dfd93; N_COEFF[ 11] = 32'h3a0246ef; + N_COEFF[ 12] = 32'h1baa167c; N_COEFF[ 13] = 32'h7d7ee254; N_COEFF[ 14] = 32'h657f0a53; N_COEFF[ 15] = 32'hea9e7245; + N_COEFF[ 16] = 32'h0000f88c; + X[ 0] = 32'h2d532c22; X[ 1] = 32'h2d3c3b06; X[ 2] = 32'he2862a8f; X[ 3] = 32'he8616ce4; + X[ 4] = 32'h5d77ee51; X[ 5] = 32'he609de07; X[ 6] = 32'hef718044; X[ 7] = 32'h82f35f8b; + X[ 8] = 32'hcdb9dcfe; X[ 9] = 32'hff6ea364; X[ 10] = 32'h0994ae28; X[ 11] = 32'h409b369b; + X[ 12] = 32'hcfabda4e; X[ 13] = 32'h5cd52bbc; X[ 14] = 32'hd90e1715; X[ 15] = 32'h00f4dcf2; + Y[ 0] = 32'h34fff653; Y[ 1] = 32'h50f52544; Y[ 2] = 32'h0ebf96a7; Y[ 3] = 32'h98352265; + Y[ 4] = 32'hbe372927; Y[ 5] = 32'h5b2f6394; Y[ 6] = 32'h9acfccb3; Y[ 7] = 32'h7b5bd4b2; + Y[ 8] = 32'h79b09448; Y[ 9] = 32'h08f11fa6; Y[ 10] = 32'h8411d066; Y[ 11] = 32'h58ba5021; + Y[ 12] = 32'h03c1cb72; Y[ 13] = 32'hacf0689d; Y[ 14] = 32'h983c65bd; Y[ 15] = 32'h29a39dcc; + P[ 0] = 32'hebfc2433; P[ 1] = 32'ha2cfbc81; P[ 2] = 32'hea08812b; P[ 3] = 32'h0adf004f; + P[ 4] = 32'hb987a8c6; P[ 5] = 32'h2860f873; P[ 6] = 32'haf2cfe12; P[ 7] = 32'hddd53c3a; + Q[ 0] = 32'hdc1981fb; Q[ 1] = 32'h01184053; Q[ 2] = 32'h7ab8d640; Q[ 3] = 32'h62ba8a22; + Q[ 4] = 32'h6cb226a1; Q[ 5] = 32'he1f08e16; Q[ 6] = 32'h13e990b5; Q[ 7] = 32'hd0dc7ce3; + P_FACTOR[ 0] = 32'h043fb284; P_FACTOR[ 1] = 32'hcaab7ce3; P_FACTOR[ 2] = 32'h543c62ef; P_FACTOR[ 3] = 32'h8aa74942; + P_FACTOR[ 4] = 32'hefa2ea7b; P_FACTOR[ 5] = 32'hdb8513b5; P_FACTOR[ 6] = 32'h0ea607a4; P_FACTOR[ 7] = 32'h6a59e5a7; + Q_FACTOR[ 0] = 32'h089dcd43; Q_FACTOR[ 1] = 32'h5b23611b; Q_FACTOR[ 2] = 32'h02f0f47c; Q_FACTOR[ 3] = 32'h952ababd; + Q_FACTOR[ 4] = 32'hc4ee13fe; Q_FACTOR[ 5] = 32'h3feb46fa; Q_FACTOR[ 6] = 32'h96b679df; Q_FACTOR[ 7] = 32'h831126dd; + P_COEFF[ 0] = 32'h647c8905; P_COEFF[ 1] = 32'hcb7c6b7d; P_COEFF[ 2] = 32'h8053b8be; P_COEFF[ 3] = 32'hb28f33a7; + P_COEFF[ 4] = 32'hb3207e05; P_COEFF[ 5] = 32'h4e3d416e; P_COEFF[ 6] = 32'h1911d8d9; P_COEFF[ 7] = 32'hd569156e; + P_COEFF[ 8] = 32'h00003dd7; + Q_COEFF[ 0] = 32'h5eee9ecd; Q_COEFF[ 1] = 32'h085153b0; Q_COEFF[ 2] = 32'h85326da6; Q_COEFF[ 3] = 32'h7521931a; + Q_COEFF[ 4] = 32'h99e0eef1; Q_COEFF[ 5] = 32'ha219917b; Q_COEFF[ 6] = 32'he8e9087a; Q_COEFF[ 7] = 32'h5239d12b; + Q_COEFF[ 8] = 32'h0000ed92; + DP[ 0] = 32'h3891ed91; DP[ 1] = 32'h775046c2; DP[ 2] = 32'h60180c26; DP[ 3] = 32'h5130700a; + DP[ 4] = 32'hb13c8216; DP[ 5] = 32'h833fcf78; DP[ 6] = 32'h7ab89b12; DP[ 7] = 32'hb976758c; + DQ[ 0] = 32'h28cc59ad; DQ[ 1] = 32'h3ce6ed45; DQ[ 2] = 32'ha1f53aeb; DQ[ 3] = 32'h06ca05e1; + DQ[ 4] = 32'hc5195df6; DQ[ 5] = 32'h42cf91f8; DQ[ 6] = 32'h93d6f054; DQ[ 7] = 32'h3d3bc769; + QINV[ 0] = 32'h50201af6; QINV[ 1] = 32'h85d97b7f; QINV[ 2] = 32'h4247e697; QINV[ 3] = 32'h9fd231fe; + QINV[ 4] = 32'h21e98610; QINV[ 5] = 32'ha0bc58dc; QINV[ 6] = 32'ha86f266c; QINV[ 7] = 32'h838688c8; + XM[ 0] = 32'hf9980f33; XM[ 1] = 32'hb444f483; XM[ 2] = 32'h0a6f8294; XM[ 3] = 32'h1c74da49; + XM[ 4] = 32'h0aa4151f; XM[ 5] = 32'ha1dfb66f; XM[ 6] = 32'h1415da79; XM[ 7] = 32'had7d3272; + XM[ 8] = 32'h43d7b612; XM[ 9] = 32'h56626cce; XM[ 10] = 32'ha65edef6; XM[ 11] = 32'h28c49eb8; + XM[ 12] = 32'h5364b7f8; XM[ 13] = 32'hd170915e; XM[ 14] = 32'h5a4c960d; XM[ 15] = 32'h27cb1911; + YM[ 0] = 32'h7640c2ca; YM[ 1] = 32'hf49d583a; YM[ 2] = 32'he4ae0f22; YM[ 3] = 32'ha3dad5ed; + YM[ 4] = 32'hbe88ab4d; YM[ 5] = 32'h9fb50b38; YM[ 6] = 32'h223feceb; YM[ 7] = 32'hfc4893ff; + YM[ 8] = 32'hb40556c2; YM[ 9] = 32'hb25b27fa; YM[ 10] = 32'h7e277535; YM[ 11] = 32'h42e1e9ab; + YM[ 12] = 32'hebd55ef2; YM[ 13] = 32'h8b6d8c0b; YM[ 14] = 32'h4d91ad9a; YM[ 15] = 32'h0e8bf565; + S[ 0] = 32'h2f89f059; S[ 1] = 32'hdbc41170; S[ 2] = 32'h1d7ea6c0; S[ 3] = 32'h1df9add6; + S[ 4] = 32'ha619e2e1; S[ 5] = 32'h253fcd88; S[ 6] = 32'h6c03a351; S[ 7] = 32'h795b1df0; + S[ 8] = 32'h2854a51a; S[ 9] = 32'h0245619b; S[ 10] = 32'hfb67ef8f; S[ 11] = 32'hcc5bdd4f; + S[ 12] = 32'ha70f58bd; S[ 13] = 32'h31f15702; S[ 14] = 32'hd6f36259; S[ 15] = 32'h280e67e0; + end + + + + // + // Clocks + // + `define CLK_FREQUENCY_MHZ (100.0) + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + + `define CLK_BUS_FREQUENCY_MHZ (50.0) + `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) + `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) + + reg clk = 1'b1; + reg clk_bus = 1'b0; + + always #`CLK_PERIOD_HALF_NS clk = ~clk; + + always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus; + + + // + // Reset + // + reg rst = 1'b1; + + + // + // Control / Status + // + reg [ 7:0] word_index_last_n; + reg [ 7:0] word_index_last_pq; + reg [11:0] bit_index_last_n; + reg [11:0] bit_index_last_pq; + reg core_next = 1'b0; + wire core_valid; + reg core_crt_mode; + + + // + // System Bus + // + reg bus_ready; + reg bus_cs = 1'b0; + reg bus_we = 1'b0; + reg [11:0] bus_addr; + reg [31:0] bus_data_wr; + wire [31:0] bus_data_rd; + + wire [ 1:0] bus_addr_sel = bus_addr[11:10]; + wire [ 2:0] bus_addr_bank = bus_addr[9:7]; + wire [ 6:0] bus_addr_data = bus_addr[6:0]; + + + // + // UUT + // + modexpng_core_top uut + ( + .clk (clk), + .clk_bus (clk_bus), + + .rst (rst), + + .next (core_next), + .valid (core_valid), + + .crt_mode (core_crt_mode), + + .word_index_last_n (word_index_last_n), + .word_index_last_pq (word_index_last_pq), + + .bit_index_last_n (bit_index_last_n), + .bit_index_last_pq (bit_index_last_pq), + + .bus_cs (bus_cs), + .bus_we (bus_we), + .bus_addr (bus_addr), + .bus_data_wr (bus_data_wr), + .bus_data_rd (bus_data_rd) + ); + + + // + // Routine (Bus) + // + initial begin + + bus_ready = 1'b0; + + while (rst) wait_clock_bus_tick; + wait_clock_bus_ticks(10); + $display("Core came out of reset."); + + set_input_1; + set_input_2; + + wait_clock_bus_ticks(10); + bus_ready = 1'b1; + + end + + + // + // Routine (Control/Status, Bus) + // + initial begin + + _wait_half_clock_tick; + wait_clock_ticks(100); + rst = 1'b0; + + while (!bus_ready) wait_clock_tick; + wait_clock_ticks(10); + $display("Core input banks written."); + + word_index_last_n = CORE_NUM_WORDS_N - 1; + word_index_last_pq = CORE_NUM_WORDS_PQ - 1; + + bit_index_last_n = TB_MODULUS_LENGTH_N - 1; + bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + + core_crt_mode = 1'b1; + + core_next = 1'b1; + wait_clock_tick; + core_next = 1'b0; + $display("Pulsed 'next' control signal."); + + while (!core_valid) wait_clock_tick; + wait_clock_ticks(10); + + $display("Detected high 'valid' status signal."); + core_crt_mode = 1'bX; + + wait_clock_ticks(10); + get_output; + wait_clock_ticks(10); + + $display("Core output banks read."); + + verify; + + end + + + // + // Variables + // + integer _w, _n; + + + // + // set_input_1; + // + task set_input_1; + reg [9:0] _tn; + begin + _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N, _w[6:0], N[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]); + bus_write(2'd0, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_X, _w[6:0], X[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_Y, _w[6:0], Y[_w]); + end + endtask + + + // + // set_input_2; + // + task set_input_2; + begin +// for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]); + for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]); + for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]); + end + endtask + + + // + // get_output; + // + task get_output; + begin + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_S, _w[6:0], S_READBACK[_w]); + end + endtask + + + // + // verify; + // + task verify; + // + reg xm_ok; + reg ym_ok; + reg s_ok; + // + begin + // + xm_ok = 1; + ym_ok = 1; + s_ok = 1; + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + if (XM_READBACK[_w] !== XM[_w]) xm_ok = 0; + if (YM_READBACK[_w] !== YM[_w]) ym_ok = 0; + if (S_READBACK[_w] !== S[_w]) s_ok = 0; + end + // + if (!xm_ok) + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + $write("XM / XM_READBACK [%3d] = 0x%08x / 0x%08x", _w, XM[_w], XM_READBACK[_w]); + if (XM[_w] !== XM_READBACK[_w]) $write(" <???: 0x%08x> ", XM[_w] ^ XM_READBACK[_w]); + $write("\n"); + end + // + if (!ym_ok) + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + $write("YM / YM_READBACK [%3d] = 0x%08x / 0x%08x", _w, YM[_w], YM_READBACK[_w]); + if (YM[_w] !== YM_READBACK[_w]) $write(" <???: 0x%08x> ", YM[_w] ^ YM_READBACK[_w]); + $write("\n"); + end + // + if (!s_ok) + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + $write("S / S_READBACK [%3d] = 0x%08x / 0x%08x", _w, S[_w], S_READBACK[_w]); + if (S[_w] !== S_READBACK[_w]) $write(" <???: 0x%08x> ", S[_w] ^ S_READBACK[_w]); + $write("\n"); + end + // + $write("XM is "); + if (xm_ok) $write("OK.\n"); + else $write("WRONG!\n"); + // + $write("YM is "); + if (ym_ok) $write("OK.\n"); + else $write("WRONG!\n"); + // + $write("S is "); + if (s_ok) $write("OK.\n"); + else $write("WRONG!\n"); + // + end + // + endtask + + + + // + // _bus_drive() + // + task _bus_drive; + input cs; + input we; + input [11:0] addr; + input [31:0] data; + {bus_cs, bus_we, bus_addr, bus_data_wr} <= {cs, we, addr, data}; + endtask + + + // + // bus_write() + // + task bus_write; + input [ 1:0] sel; + input [ 2:0] bank; + input [ 6:0] addr; + input [31:0] data; + begin + _bus_drive(1'b1, 1'b1, {sel, bank, addr}, data); + wait_clock_bus_tick; + _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); + end + endtask + + + // + // bus_read() + // + task bus_read; + input [ 1:0] sel; + input [ 2:0] bank; + input [ 6:0] addr; + output [31:0] data; + begin + _bus_drive(1'b1, 1'b0, {sel, bank, addr}, 32'hXXXXXXXX); + wait_clock_bus_tick; + data = bus_data_rd; + _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); + end + endtask + + + // + // _wait_half_clock_tick() + // + task _wait_half_clock_tick; + #`CLK_PERIOD_HALF_NS; + endtask + + // + // wait_clock_tick() + // + task wait_clock_tick; + begin + _wait_half_clock_tick; + _wait_half_clock_tick; + end + endtask + + + // + // wait_clock_bus_tick() + // + task wait_clock_bus_tick; + #`CLK_BUS_PERIOD_NS; + endtask + + + // + // wait_clock_ticks() + // + task wait_clock_ticks; + input integer num_ticks; + for (_n=0; _n<num_ticks; _n=_n+1) + wait_clock_tick; + endtask + + + // + // wait_clock_bus_ticks() + // + task wait_clock_bus_ticks; + input integer num_ticks; + for (_n=0; _n<num_ticks; _n=_n+1) + wait_clock_bus_tick; + endtask + +endmodule diff --git a/rtl/_modexpng_reductor.v b/rtl/_modexpng_reductor.v deleted file mode 100644 index 25cf394..0000000 --- a/rtl/_modexpng_reductor.v +++ /dev/null @@ -1,252 +0,0 @@ -module modexpng_reductor -( - clk, rst, - ena, rdy, - word_index_last, - rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux, - rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid, - rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid -); - - // - // Headers - // - `include "../rtl/modexpng_parameters.vh" - - - input clk; - input rst; - // - input ena; - output rdy; - // - input [ OP_ADDR_W -1:0] word_index_last; - // - input [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; - input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; - input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; - input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; - // - input [BANK_ADDR_W -1:0] rcmb_final_xy_bank; - input [ OP_ADDR_W -1:0] rcmb_final_xy_addr; - input [ WORD_EXT_W -1:0] rcmb_final_x_dout; - input [ WORD_EXT_W -1:0] rcmb_final_y_dout; - input rcmb_final_xy_valid; - // - output [ OP_ADDR_W -1:0] rdct_final_xy_addr; - output [ WORD_EXT_W -1:0] rdct_final_x_dout; - output [ WORD_EXT_W -1:0] rdct_final_y_dout; - output rdct_final_xy_valid; - - - // - // Ready - // - reg rdy_reg = 1'b1; - reg busy_now = 1'b0; - - assign rdy = rdy_reg; - - always @(posedge clk) - // - if (rst) rdy_reg <= 1'b1; - else begin - if (rdy && ena) rdy_reg <= 1'b0; - if (!rdy && !busy_now) rdy_reg <= 1'b1; - end - - - // - // Pipeline (Delay Match) - // - reg rcmb_xy_valid_dly1 = 1'b0; - reg rcmb_xy_valid_dly2 = 1'b0; - reg rcmb_xy_valid_dly3 = 1'b0; - - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; - reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; - - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2; - reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3; - - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2; - reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3; - - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2; - reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3; - - always @(posedge clk) - // - if (rst) begin - rcmb_xy_valid_dly1 <= 1'b0; - rcmb_xy_valid_dly2 <= 1'b0; - rcmb_xy_valid_dly3 <= 1'b0; - end else begin - rcmb_xy_valid_dly1 <= rcmb_final_xy_valid; - rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1; - rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2; - end - - - always @(posedge clk) begin - // - if (rcmb_final_xy_valid) begin - rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; - rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; - rcmb_x_dout_dly1 <= rcmb_final_x_dout; - rcmb_y_dout_dly1 <= rcmb_final_y_dout; - end - // - if (rcmb_xy_valid_dly1) begin - rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1; - rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1; - rcmb_x_dout_dly2 <= rcmb_x_dout_dly1; - rcmb_y_dout_dly2 <= rcmb_y_dout_dly1; - end - // - if (rcmb_xy_valid_dly2) begin - rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2; - rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2; - rcmb_x_dout_dly3 <= rcmb_x_dout_dly2; - rcmb_y_dout_dly3 <= rcmb_y_dout_dly2; - end - // - end - - - // - // Carry Logic - // - reg [RDCT_CARRY_W -1:0] rcmb_x_lsb_carry; - reg [WORD_W -1:0] rcmb_x_lsb_dummy; - reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout; - - reg [RDCT_CARRY_W -1:0] rcmb_y_lsb_carry; - reg [WORD_W -1:0] rcmb_y_lsb_dummy; - reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout; - - - // - // Carry Computation - // - always @(posedge clk) - // - if (ena) begin - rcmb_x_lsb_carry <= RDCT_CARRY_ZEROES; - rcmb_y_lsb_carry <= RDCT_CARRY_ZEROES; - end else if (rcmb_xy_valid_dly3) - // - case (rcmb_xy_bank_dly3) - - BANK_RCMB_ML: begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; - end - - BANK_RCMB_MH: - if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin - {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; - {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; - end - - endcase - - - // - // Reduction - // - reg [ OP_ADDR_W -1:0] rdct_xy_addr; - reg [WORD_EXT_W -1:0] rdct_x_dout; - reg [WORD_EXT_W -1:0] rdct_y_dout; - reg rdct_xy_valid = 1'b0; - - assign rdct_final_xy_addr = rdct_xy_addr; - assign rdct_final_x_dout = rdct_x_dout; - assign rdct_final_y_dout = rdct_y_dout; - assign rdct_final_xy_valid = rdct_xy_valid; - - task _update_rdct; - input [ OP_ADDR_W -1:0] addr; - input [WORD_EXT_W -1:0] dout_x; - input [WORD_EXT_W -1:0] dout_y; - input valid; - begin - rdct_xy_addr <= addr; - rdct_x_dout <= dout_x; - rdct_y_dout <= dout_y; - rdct_xy_valid <= valid; - end - endtask - - task set_rdct; - input [ OP_ADDR_W -1:0] addr; - input [WORD_EXT_W -1:0] dout_x; - input [WORD_EXT_W -1:0] dout_y; - begin - _update_rdct(addr, dout_x, dout_y, 1'b1); - end - endtask - - task clear_rdct; - begin - _update_rdct(OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); - end - endtask - - - // - // Helper Wires - // - wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux; - wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux; - - wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_NULL, rcmb_x_lsb_carry}; - wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_NULL, rcmb_y_lsb_carry}; - - - // - // - // - always @(posedge clk) - // - if (rst) clear_rdct; - else begin - // - clear_rdct; - // - if (busy_now && rcmb_xy_valid_dly3) - // - case (rcmb_xy_bank_dly3) - - BANK_RCMB_MH: - if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) - set_rdct(OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); - else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) - set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); - - BANK_RCMB_EXT: - set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); - - endcase - // - end - - - - // - // Busy - // - always @(posedge clk) - // - if (rst) busy_now <= 1'b0; - else begin - if (rdy && ena) busy_now <= 1'b1; - //if (!rdy && !busy_now) rdy <= 1'b1; - end - - -endmodule diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v index 78e4668..0c72478 100644 --- a/rtl/modexpng_core_top.v +++ b/rtl/modexpng_core_top.v @@ -15,7 +15,6 @@ module modexpng_core_top bus_data_rd ); - // // Headers // @@ -50,205 +49,281 @@ module modexpng_core_top // - // UOP_FSM + // uOP Control / Status Signals // - localparam [1:0] UOP_FSM_STATE_IDLE = 2'b00; - localparam [1:0] UOP_FSM_STATE_FETCH = 2'b01; - localparam [1:0] UOP_FSM_STATE_DECODE = 2'b10; - localparam [1:0] UOP_FSM_STATE_BUSY = 2'b11; - - reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE; - reg [1:0] uop_fsm_state_next; - + wire io_mgr_ena; + wire io_mgr_rdy; + wire [UOP_CRT_W -1:0] io_mgr_sel_crt; + wire [UOP_AUX_W -1:0] io_mgr_sel_aux; + wire [BANK_ADDR_W -1:0] io_mgr_sel_in; + wire [BANK_ADDR_W -1:0] io_mgr_sel_out; + wire [OP_ADDR_W -1:0] io_mgr_word_index_last; + wire [UOP_OPCODE_W -1:0] io_mgr_opcode; + wire [BIT_INDEX_W -1:0] io_mgr_ladder_steps; + wire io_mgr_ladder_d; + wire io_mgr_ladder_p; + wire io_mgr_ladder_q; + wire io_mgr_ladder_done; - // - // UOP ROM - // - reg [UOP_ADDR_W -1:0] uop_addr; - wire [UOP_W -1:0] uop_data; - wire [UOP_OPCODE_W -1:0] uop_data_opcode = uop_data[UOP_W -1-: UOP_OPCODE_W]; - wire [UOP_CRT_W -1:0] uop_data_crt = uop_data[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ]; - wire [UOP_NPQ_W -1:0] uop_data_npq = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ]; - wire [UOP_AUX_W -1:0] uop_data_aux = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ]; - wire [UOP_LADDER_W -1:0] uop_data_ladder = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W]; - wire [BANK_ADDR_W -1:0] uop_data_sel_wide_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ]; - wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ]; - wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ]; - wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ]; - - wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; - wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || - (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; - wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; - wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) || - (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ; - wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) || - (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) || - (uop_data_opcode == UOP_OPCODE_REGULAR_MULTIPLY ) ; - wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || - (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) || - (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) || - (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) || - (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) || - (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) || - (uop_data_opcode == UOP_OPCODE_MERGE_LH ) || - (uop_data_opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN ) ; - - wire uop_loop_now; - - wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT; - wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1; - - modexpng_uop_rom uop_rom + wire mmm_ena_x; + wire mmm_ena_y; + wire mmm_rdy_x; + wire mmm_rdy_y; + wire [OP_ADDR_W -1:0] mmm_word_index_last_x; + wire [OP_ADDR_W -1:0] mmm_word_index_last_y; + wire [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x; + wire [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y; + wire mmm_ladder_mode_x; + wire mmm_ladder_mode_y; + wire [BANK_ADDR_W -1:0] mmm_sel_wide_in_x; + wire [BANK_ADDR_W -1:0] mmm_sel_wide_in_y; + wire [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x; + wire [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y; + wire mmm_force_unity_b_x; + wire mmm_force_unity_b_y; + wire mmm_only_reduce_x; + wire mmm_only_reduce_y; + wire mmm_just_multiply_x; + wire mmm_just_multiply_y; + + wire rdct_ena_x; + wire rdct_ena_y; + wire rdct_rdy_x; + wire rdct_rdy_y; + wire [OP_ADDR_W -1:0] rdct_word_index_last_x; + wire [OP_ADDR_W -1:0] rdct_word_index_last_y; + wire [BANK_ADDR_W -1:0] rdct_sel_wide_out_x; + wire [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x; + wire [BANK_ADDR_W -1:0] rdct_sel_wide_out_y; + wire [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y; + + wire wrk_ena; + wire wrk_rdy; + wire [BANK_ADDR_W -1:0] wrk_sel_wide_in; + wire [BANK_ADDR_W -1:0] wrk_sel_wide_out; + wire [BANK_ADDR_W -1:0] wrk_sel_narrow_in; + wire [BANK_ADDR_W -1:0] wrk_sel_narrow_out; + wire [OP_ADDR_W -1:0] wrk_word_index_last; + wire [OP_ADDR_W -1:0] wrk_word_index_last_half; + wire [UOP_OPCODE_W -1:0] wrk_opcode; + + + // + // uOP Engine + // + `ifdef MODEXPNG_ENABLE_DEBUG + wire [1:0] uop_decoded_stop; + `endif + + modexpng_uop_engine uop_engine ( - .clk (clk), - .addr (uop_addr), - .data (uop_data) + .clk (clk), + .rst (rst), + + .ena (next), + .rdy (valid), + + `ifdef MODEXPNG_ENABLE_DEBUG + .uop_decoded_stop (uop_decoded_stop), + `endif + + .crt_mode (crt_mode), + + .word_index_last_n (word_index_last_n), + .word_index_last_pq (word_index_last_pq), + + .bit_index_last_n (bit_index_last_n), + .bit_index_last_pq (bit_index_last_pq), + + .io_mgr_ena (io_mgr_ena), + .io_mgr_rdy (io_mgr_rdy), + .io_mgr_sel_crt (io_mgr_sel_crt), + .io_mgr_sel_aux (io_mgr_sel_aux), + .io_mgr_sel_in (io_mgr_sel_in), + .io_mgr_sel_out (io_mgr_sel_out), + .io_mgr_word_index_last (io_mgr_word_index_last), + .io_mgr_opcode (io_mgr_opcode), + .io_mgr_ladder_steps (io_mgr_ladder_steps), + .io_mgr_ladder_d (io_mgr_ladder_d), + .io_mgr_ladder_p (io_mgr_ladder_p), + .io_mgr_ladder_q (io_mgr_ladder_q), + .io_mgr_ladder_done (io_mgr_ladder_done), + + .mmm_ena_x (mmm_ena_x), + .mmm_ena_y (mmm_ena_y), + .mmm_rdy_x (mmm_rdy_x), + .mmm_rdy_y (mmm_rdy_y), + .mmm_word_index_last_x (mmm_word_index_last_x), + .mmm_word_index_last_y (mmm_word_index_last_y), + .mmm_word_index_last_minus1_x (mmm_word_index_last_minus1_x), + .mmm_word_index_last_minus1_y (mmm_word_index_last_minus1_y), + .mmm_ladder_mode_x (mmm_ladder_mode_x), + .mmm_ladder_mode_y (mmm_ladder_mode_y), + .mmm_sel_wide_in_x (mmm_sel_wide_in_x), + .mmm_sel_wide_in_y (mmm_sel_wide_in_y), + .mmm_sel_narrow_in_x (mmm_sel_narrow_in_x), + .mmm_sel_narrow_in_y (mmm_sel_narrow_in_y), + .mmm_force_unity_b_x (mmm_force_unity_b_x), + .mmm_force_unity_b_y (mmm_force_unity_b_y), + .mmm_only_reduce_x (mmm_only_reduce_x), + .mmm_only_reduce_y (mmm_only_reduce_y), + .mmm_just_multiply_x (mmm_just_multiply_x), + .mmm_just_multiply_y (mmm_just_multiply_y), + + .rdct_word_index_last_x (rdct_word_index_last_x), + .rdct_word_index_last_y (rdct_word_index_last_y), + .rdct_sel_wide_out_x (rdct_sel_wide_out_x), + .rdct_sel_wide_out_y (rdct_sel_wide_out_y), + .rdct_sel_narrow_out_x (rdct_sel_narrow_out_x), + .rdct_sel_narrow_out_y (rdct_sel_narrow_out_y), + + .wrk_ena (wrk_ena), + .wrk_rdy (wrk_rdy), + .wrk_sel_wide_in (wrk_sel_wide_in), + .wrk_sel_wide_out (wrk_sel_wide_out), + .wrk_sel_narrow_in (wrk_sel_narrow_in), + .wrk_sel_narrow_out (wrk_sel_narrow_out), + .wrk_word_index_last (wrk_word_index_last), + .wrk_word_index_last_half (wrk_word_index_last_half), + .wrk_opcode (wrk_opcode) ); // - // UOP ROM Address Logic - // - - always @(posedge clk) - // - if (uop_fsm_state_next == UOP_FSM_STATE_FETCH) - uop_addr <= (uop_fsm_state == UOP_FSM_STATE_IDLE) ? uop_addr_offset : uop_addr_next; - - - // // Storage Interfaces (X, Y) // - wire wr_wide_xy_ena_x; // \ \ - wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_x; // | WIDE | WR - wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] wr_wide_x_data_x; // | | - wire [ WORD_EXT_W -1:0] wr_wide_y_data_x; // / | - // | - wire wr_narrow_xy_ena_x; // \ | - wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; // | NARROW | - wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] wr_narrow_x_data_x; // | | - wire [ WORD_EXT_W -1:0] wr_narrow_y_data_x; // / / - // - wire rd_wide_xy_ena_x; // \ \ - wire rd_wide_xy_ena_aux_x; // | WIDE | RD - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x; // | | - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; // | | - wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x; // | | - wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x; // | | - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_x; // | | - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_x; // | | - wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_x; // | | - wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_x; // / | - // | - wire rd_narrow_xy_ena_x; // \ | - wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; // | NARROW | - wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] rd_narrow_x_data_x; // | | - wire [ WORD_EXT_W -1:0] rd_narrow_y_data_x; // / / - // - wire wrk_rd_wide_xy_ena_x; // \ \ - wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; // | WIDE | WRK - wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_x; // | | - wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_x; // / | - // | - wire wrk_rd_narrow_xy_ena_x; // \ | - wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; // | NARROW | - wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_x; // | | - wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_x; // / / - - wire wrk_wr_wide_xy_ena_x; // \ \ - wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; // | WIDE | WRK - wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_x; // | | - wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_x; // / | - // | - wire wrk_wr_narrow_xy_ena_x; // \ | - wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; // | NARROW | - wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_x; // | | - wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_x; // / / - // - wire io_wide_xy_ena_x; // \ \ - wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; // | WIDE | IO - wire [ OP_ADDR_W -1:0] io_wide_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] io_wide_x_data_x; // | | - wire [ WORD_EXT_W -1:0] io_wide_y_data_x; // / | - // | - wire io_narrow_xy_ena_x; // \ | - wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; // | NARROW | - wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; // | | - wire [ WORD_EXT_W -1:0] io_narrow_x_data_x; // | | - wire [ WORD_EXT_W -1:0] io_narrow_y_data_x; // / / - // - wire wr_wide_xy_ena_y; // \ - wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_y; // - wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; // - wire [ WORD_EXT_W -1:0] wr_wide_x_data_y; // - wire [ WORD_EXT_W -1:0] wr_wide_y_data_y; // - // - wire wr_narrow_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; // - wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; // - wire [ WORD_EXT_W -1:0] wr_narrow_x_data_y; // - wire [ WORD_EXT_W -1:0] wr_narrow_y_data_y; // - // - wire rd_wide_xy_ena_y; // - wire rd_wide_xy_ena_aux_y; // - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y; // - wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; // - wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y; // - wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y; // - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_y; // - wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_y; // - wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_y; // - wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_y; // - // - wire rd_narrow_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; // - wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; // - wire [ WORD_EXT_W -1:0] rd_narrow_x_data_y; // - wire [ WORD_EXT_W -1:0] rd_narrow_y_data_y; // - // - wire wrk_rd_wide_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; // - wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; // - wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_y; // - wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_y; // - // - wire wrk_rd_narrow_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; // - wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; // - wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_y; // - wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_y; // - - wire wrk_wr_wide_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; // - wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; // - wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_y; // - wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_y; // - // - wire wrk_wr_narrow_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; // - wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; // - wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_y; // - wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_y; // - // - wire io_wide_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; // - wire [ OP_ADDR_W -1:0] io_wide_xy_addr_y; // - wire [ WORD_EXT_W -1:0] io_wide_x_data_y; // - wire [ WORD_EXT_W -1:0] io_wide_y_data_y; // - // - wire io_narrow_xy_ena_y; // - wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; // - wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; // - wire [ WORD_EXT_W -1:0] io_narrow_x_data_y; // - wire [ WORD_EXT_W -1:0] io_narrow_y_data_y; // + wire wr_wide_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_x; // + wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_x; // + wire [ WORD_EXT_W -1:0] wr_wide_x_data_x; // + wire [ WORD_EXT_W -1:0] wr_wide_y_data_x; // + // + wire wr_narrow_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_x; // + wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_x; // + wire [ WORD_EXT_W -1:0] wr_narrow_x_data_x; // + wire [ WORD_EXT_W -1:0] wr_narrow_y_data_x; // + // + wire rd_wide_xy_ena_x; // + wire rd_wide_xy_ena_aux_x; // + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_x; // + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; // + wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_x; // + wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_x; // + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_x; // + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_x; // + wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_x; // + wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_x; // + // + wire rd_narrow_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_x; // + wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_x; // + wire [ WORD_EXT_W -1:0] rd_narrow_x_data_x; // + wire [ WORD_EXT_W -1:0] rd_narrow_y_data_x; // + // + wire wrk_rd_wide_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; // + wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_x; // + wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_x; // + wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_x; // + // + wire wrk_rd_narrow_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x; // + wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_x; // + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_x; // + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_x; // + // + wire wrk_wr_wide_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x; // + wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_x; // + wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_x; // + wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_x; // + // + wire wrk_wr_narrow_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x; // + wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_x; // + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_x; // + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_x; // + // + wire io_wide_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_x; // + wire [ OP_ADDR_W -1:0] io_wide_xy_addr_x; // + wire [ WORD_EXT_W -1:0] io_wide_x_data_x; // + wire [ WORD_EXT_W -1:0] io_wide_y_data_x; // + // + wire io_narrow_xy_ena_x; // + wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_x; // + wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_x; // + wire [ WORD_EXT_W -1:0] io_narrow_x_data_x; // + wire [ WORD_EXT_W -1:0] io_narrow_y_data_x; // + // + wire wr_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wr_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wr_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wr_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] wr_wide_y_data_y; // + // + wire wr_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wr_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wr_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] wr_narrow_y_data_y; // + // + wire rd_wide_xy_ena_y; // + wire rd_wide_xy_ena_aux_y; // + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_y; // + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; // + wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr_y; // + wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux_y; // + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_data_y; // + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_data_y; // + wire [ WORD_EXT_W -1:0] rd_wide_x_data_aux_y; // + wire [ WORD_EXT_W -1:0] rd_wide_y_data_aux_y; // + // + wire rd_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] rd_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] rd_narrow_y_data_y; // + // + wire wrk_rd_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_wide_y_data_y; // + // + wire wrk_rd_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_rd_narrow_y_data_y; // + // + wire wrk_wr_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_wide_y_data_y; // + // + wire wrk_wr_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] wrk_wr_narrow_y_data_y; // + // + wire io_wide_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] io_wide_xy_bank_y; // + wire [ OP_ADDR_W -1:0] io_wide_xy_addr_y; // + wire [ WORD_EXT_W -1:0] io_wide_x_data_y; // + wire [ WORD_EXT_W -1:0] io_wide_y_data_y; // + // + wire io_narrow_xy_ena_y; // + wire [ BANK_ADDR_W -1:0] io_narrow_xy_bank_y; // + wire [ OP_ADDR_W -1:0] io_narrow_xy_addr_y; // + wire [ WORD_EXT_W -1:0] io_narrow_x_data_y; // + wire [ WORD_EXT_W -1:0] io_narrow_y_data_y; // + + wire [ WORD_W -1:0] wrk_rd_narrow_x_data_x_lsb = wrk_rd_narrow_x_data_x[WORD_W-1:0]; + wire [ WORD_W -1:0] wrk_rd_narrow_x_data_y_lsb = wrk_rd_narrow_x_data_y[WORD_W-1:0]; // @@ -320,6 +395,23 @@ module modexpng_core_top // + // I/O Interfaces + // + wire io_in_1_en; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; + wire [ WORD_W -1:0] io_in_1_data; + + wire io_in_2_en; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; + wire [ WORD_W -1:0] io_in_2_data; + + wire io_out_en; + wire io_out_we; + wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; + wire [ WORD_W -1:0] io_out_data; + + + // // Storage Blocks (X, Y) // modexpng_storage_block storage_block_x @@ -555,23 +647,8 @@ module modexpng_core_top // - // IO Block - // - wire io_in_1_en; - wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr; - wire [ WORD_W -1:0] io_in_1_data; - - wire io_in_2_en; - wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr; - wire [ WORD_W -1:0] io_in_2_data; - - wire io_out_en; - wire io_out_we; - wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; - wire [ WORD_W -1:0] io_out_data; - - // TODO: Separate reset for clock domains (core/bus)??? - + // I/O Block + // modexpng_io_block io_block ( .clk (clk), @@ -602,27 +679,7 @@ module modexpng_core_top // // IO Manager - // - reg io_mgr_ena = 1'b0; - wire io_mgr_rdy; - reg [UOP_CRT_W -1:0] io_mgr_sel_crt; - reg [UOP_AUX_W -1:0] io_mgr_sel_aux; - reg [BANK_ADDR_W -1:0] io_mgr_sel_in; - reg [BANK_ADDR_W -1:0] io_mgr_sel_out; - reg [OP_ADDR_W -1:0] io_mgr_word_index_last; - reg [UOP_OPCODE_W -1:0] io_mgr_opcode; - - reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps; - wire io_mgr_ladder_d; - wire io_mgr_ladder_p; - wire io_mgr_ladder_q; - wire io_mgr_ladder_done; - - assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done; - - wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; - wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0]; - + // modexpng_io_manager io_manager ( .clk (clk), @@ -677,8 +734,8 @@ module modexpng_core_top .io_out_addr (io_out_addr), .io_out_dout (io_out_data), - .wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc), - .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc), + .wrk_narrow_x_din_x_lsb (wrk_rd_narrow_x_data_x_lsb), + .wrk_narrow_x_din_y_lsb (wrk_rd_narrow_x_data_y_lsb), .ladder_steps (io_mgr_ladder_steps), .ladder_d (io_mgr_ladder_d), @@ -690,43 +747,7 @@ module modexpng_core_top // // Multipliers (X, Y) - // - reg mmm_ena_x = 1'b0; - reg mmm_ena_y = 1'b0; - wire mmm_ena = mmm_ena_x & mmm_ena_y; - - wire mmm_rdy_x; - wire mmm_rdy_y; - wire mmm_rdy = mmm_rdy_x & mmm_rdy_y; - - reg [OP_ADDR_W -1:0] mmm_word_index_last_x; - reg [OP_ADDR_W -1:0] mmm_word_index_last_y; - - reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x; - reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y; - - reg mmm_ladder_mode_x; - reg mmm_ladder_mode_y; - - reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_x; - reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_y; - reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x; - reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y; - - reg mmm_force_unity_b_x; - reg mmm_force_unity_b_y; - - reg mmm_only_reduce_x; - reg mmm_only_reduce_y; - - reg mmm_just_multiply_x; - reg mmm_just_multiply_y; - - wire rdct_ena_x; - wire rdct_ena_y; - wire rdct_rdy_x; - wire rdct_rdy_y; - + // modexpng_mmm_dual mmm_x ( .clk (clk), @@ -843,16 +864,7 @@ module modexpng_core_top // // Reductors (X, Y) - // - reg [ OP_ADDR_W -1:0] rdct_word_index_last_x; - reg [ OP_ADDR_W -1:0] rdct_word_index_last_y; - - reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_x; - reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x; - - reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_y; - reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y; - + // modexpng_reductor reductor_x ( .clk (clk), @@ -866,8 +878,8 @@ module modexpng_core_top .sel_wide_out (rdct_sel_wide_out_x), .sel_narrow_out (rdct_sel_narrow_out_x), - .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), - .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), + //.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_x), + //.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_x), .rd_wide_x_din_aux (rd_wide_x_data_aux_x), .rd_wide_y_din_aux (rd_wide_y_data_aux_x), @@ -903,8 +915,8 @@ module modexpng_core_top .sel_wide_out (rdct_sel_wide_out_y), .sel_narrow_out (rdct_sel_narrow_out_y), - .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), - .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), + //.rd_wide_xy_addr_aux (rd_wide_xy_addr_aux_y), + //.rd_wide_xy_bank_aux (rd_wide_xy_bank_aux_y), .rd_wide_x_din_aux (rd_wide_x_data_aux_y), .rd_wide_y_din_aux (rd_wide_y_data_aux_y), @@ -930,18 +942,7 @@ module modexpng_core_top // // General Worker - // - reg wrk_ena = 1'b0; - wire wrk_rdy; - - reg [ BANK_ADDR_W -1:0] wrk_sel_wide_in; - reg [ BANK_ADDR_W -1:0] wrk_sel_wide_out; - reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in; - reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out; - reg [ OP_ADDR_W -1:0] wrk_word_index_last; - reg [ OP_ADDR_W -1:0] wrk_word_index_last_half; - reg [UOP_OPCODE_W -1:0] wrk_opcode; - + // modexpng_general_worker general_worker ( .clk (clk), @@ -1011,384 +1012,11 @@ module modexpng_core_top // - // uOP Completion Detector - // - reg uop_exit_from_busy; - - always @* begin - // - uop_exit_from_busy = 0; - // - if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; - if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy); - if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy; - if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy; - if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; - // - end - - - // - // uOP Trigger Logic - // - always @(posedge clk) - // - if (rst) begin - io_mgr_ena <= 1'b0; - mmm_ena_x <= 1'b0; - mmm_ena_y <= 1'b0; - wrk_ena <= 1'b0; - end else begin - io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || - uop_opcode_is_out || - uop_opcode_is_ladder): 1'b0; - mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || - uop_opcode_is_out ): 1'b0; - end - - // - // Parameters - // - wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1; - - // TODO: Add reset to default don't care values. - - always @(posedge clk) - // - if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin - // - io_mgr_opcode <= uop_data_opcode; - wrk_opcode <= uop_data_opcode; - // - case (uop_data_opcode) - // - UOP_OPCODE_INPUT_TO_WIDE: begin - io_mgr_sel_crt <= uop_data_crt; - io_mgr_sel_aux <= uop_data_aux; - io_mgr_sel_in <= uop_data_sel_narrow_in; - io_mgr_sel_out <= uop_data_sel_wide_out; - end - // - UOP_OPCODE_INPUT_TO_NARROW: begin - io_mgr_sel_crt <= uop_data_crt; - io_mgr_sel_aux <= uop_data_aux; - io_mgr_sel_in <= uop_data_sel_narrow_in; - io_mgr_sel_out <= uop_data_sel_narrow_out; - end - // - UOP_OPCODE_OUTPUT_FROM_NARROW: begin - io_mgr_sel_crt <= uop_data_crt; - io_mgr_sel_aux <= UOP_AUX_DNC; - io_mgr_sel_in <= BANK_DNC; - io_mgr_sel_out <= uop_data_sel_narrow_out; - // - wrk_sel_narrow_in <= uop_data_sel_narrow_in; - end - // - UOP_OPCODE_MODULAR_MULTIPLY: begin - // - case (uop_data_ladder) - UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00; - UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11; - UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; - UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q}; - endcase - // - {mmm_just_multiply_x, mmm_just_multiply_y } <= {2{1'b0}}; - {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}}; - {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}}; - {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; - {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; - {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; - {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }}; - // - end - // - UOP_OPCODE_MODULAR_REDUCE_PROC: begin - // - {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'bX }}; - // - {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b1 }}; - {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{1'b0 }}; - {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{BANK_DNC }}; - {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{BANK_DNC }}; - {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; - {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}}; - // - end - // - UOP_OPCODE_REGULAR_MULTIPLY: begin - // - {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'b1}}; - // - {mmm_just_multiply_x, mmm_just_multiply_y } <= {2{1'b1}}; - {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}}; - {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}}; - {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; - {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; - {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; - {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }}; - // - end - // - UOP_OPCODE_PROPAGATE_CARRIES: begin - wrk_sel_narrow_in <= uop_data_sel_narrow_in; - wrk_sel_narrow_out <= uop_data_sel_narrow_out; - end - // - UOP_OPCODE_MODULAR_SUBTRACT: begin - wrk_sel_wide_out <= uop_data_sel_wide_out; - wrk_sel_narrow_in <= uop_data_sel_narrow_in; - wrk_sel_narrow_out <= uop_data_sel_narrow_out; - end - // - UOP_OPCODE_MERGE_LH: begin - wrk_sel_narrow_out <= uop_data_sel_narrow_out; - end - // - UOP_OPCODE_COPY_CRT_Y2X, - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: begin - wrk_sel_wide_in <= uop_data_sel_wide_in; - wrk_sel_wide_out <= uop_data_sel_wide_out; - wrk_sel_narrow_in <= uop_data_sel_narrow_in; - wrk_sel_narrow_out <= uop_data_sel_narrow_out; - end - // - UOP_OPCODE_MODULAR_REDUCE_INIT: begin - wrk_sel_narrow_in <= uop_data_sel_narrow_in; - end - // - UOP_OPCODE_REGULAR_ADD_UNEVEN: begin - wrk_sel_wide_in <= uop_data_sel_wide_in; - wrk_sel_narrow_in <= uop_data_sel_narrow_in; - wrk_sel_narrow_out <= uop_data_sel_narrow_out; - end - // - endcase - // - end - - // - // Lengths - // - wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1; - wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1; - - wire uop_npq_is_n = uop_data_npq == UOP_NPQ_N; - - always @(posedge clk) - // - if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin - // - case (uop_data_opcode) - // - UOP_OPCODE_INPUT_TO_WIDE, - UOP_OPCODE_INPUT_TO_NARROW, - UOP_OPCODE_OUTPUT_FROM_NARROW: - // - io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; - // - UOP_OPCODE_MODULAR_MULTIPLY: begin - {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; - {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{uop_npq_is_n ? word_index_last_n_minus1 : word_index_last_pq_minus1}}; - {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; - end - // - UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_COPY_CRT_Y2X, - UOP_OPCODE_COPY_LADDERS_X2Y, - UOP_OPCODE_CROSS_LADDERS_X2Y: - wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; - // - UOP_OPCODE_MODULAR_REDUCE_INIT: begin - wrk_word_index_last <= word_index_last_n; - wrk_word_index_last_half <= word_index_last_pq; - end - // - UOP_OPCODE_MODULAR_REDUCE_PROC: begin - {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }}; - {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}}; - {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }}; - end - // - UOP_OPCODE_REGULAR_MULTIPLY: begin - {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }}; - {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}}; - {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }}; - end - // - UOP_OPCODE_MODULAR_SUBTRACT: begin - wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; - end - // - UOP_OPCODE_MERGE_LH: begin - wrk_word_index_last <= word_index_last_n; - wrk_word_index_last_half <= word_index_last_pq; - end - // - UOP_OPCODE_LADDER_INIT: begin - io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; - io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; - end - // - UOP_OPCODE_REGULAR_ADD_UNEVEN: begin - wrk_word_index_last <= word_index_last_n; - wrk_word_index_last_half <= word_index_last_pq; - end - // - UOP_OPCODE_LADDER_STEP: begin - io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; - io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; - end - // - endcase - // - end - - - - // - // FSM Process - // - always @(posedge clk) - // - if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE; - else uop_fsm_state <= uop_fsm_state_next; - - - // - // FSM Transition Logic - // - always @* begin - // - case (uop_fsm_state) - UOP_FSM_STATE_IDLE: uop_fsm_state_next = next ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE; - UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE ; - UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY; - UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY; - endcase - // - end - - - // - // Ready Flag Logic - // - reg valid_reg = 1'b1; - assign valid = valid_reg; - - always @(posedge clk) - // - if (rst) valid_reg <= 1'b1; - else case (uop_fsm_state) - UOP_FSM_STATE_IDLE: valid_reg <= ~next; - UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop; - endcase - - - - // - // BEGIN DEBUG - // - integer i; - always @(posedge clk) - // - if ((uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop) begin - // - $display("STOP - BANKS DUMP FOLLOWS"); - // - // X.X - // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[0*256+i]); $write("\n"); - $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[1*256+i]); $write("\n"); - $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[2*256+i]); $write("\n"); - $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[3*256+i]); $write("\n"); - $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[4*256+i]); $write("\n"); - $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[5*256+i]); $write("\n"); - $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[6*256+i]); $write("\n"); - $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); - $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); - $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); - $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); - $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); - $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); - $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); - $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); - // - // X.Y - // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[0*256+i]); $write("\n"); - $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[1*256+i]); $write("\n"); - $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[2*256+i]); $write("\n"); - $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[3*256+i]); $write("\n"); - $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[4*256+i]); $write("\n"); - $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[5*256+i]); $write("\n"); - $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[6*256+i]); $write("\n"); - $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); - $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); - $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); - $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); - $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); - $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); - $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); - $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); - // - // Y.X - // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[0*256+i]); $write("\n"); - $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[1*256+i]); $write("\n"); - $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[2*256+i]); $write("\n"); - $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[3*256+i]); $write("\n"); - $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[4*256+i]); $write("\n"); - $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[5*256+i]); $write("\n"); - $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[6*256+i]); $write("\n"); - $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); - $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); - $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); - $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); - $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); - $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); - $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); - $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); - // - // Y.Y - // - $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); - $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[0*256+i]); $write("\n"); - $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[1*256+i]); $write("\n"); - $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[2*256+i]); $write("\n"); - $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[3*256+i]); $write("\n"); - $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[4*256+i]); $write("\n"); - $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[5*256+i]); $write("\n"); - $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[6*256+i]); $write("\n"); - $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[7*256+i]); $write("\n"); - $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); - $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); - $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); - $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); - $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); - $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); - $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); - $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); - $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); - // - end - - // - // END DEBUG + // Optional Debug Facility // - + `ifdef MODEXPNG_ENABLE_DEBUG + `include "modexpng_core_top_debug.vh" + `endif + endmodule diff --git a/rtl/modexpng_core_top_debug.vh b/rtl/modexpng_core_top_debug.vh new file mode 100644 index 0000000..7f548c0 --- /dev/null +++ b/rtl/modexpng_core_top_debug.vh @@ -0,0 +1,93 @@ +integer i; + +always @(posedge clk) + // + if (uop_decoded_stop) begin + // + $display("OPCODE == STOP: BANKS DUMP FOLLOWS"); + // + // X.X + // + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("X.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); + $write("X.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); + $write("X.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); + $write("X.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); + $write("X.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); + $write("X.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); + $write("X.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); + $write("X.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("X.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[0*256+i]); $write("\n"); + $write("X.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[1*256+i]); $write("\n"); + $write("X.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[2*256+i]); $write("\n"); + $write("X.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[3*256+i]); $write("\n"); + $write("X.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[4*256+i]); $write("\n"); + $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[5*256+i]); $write("\n"); + $write("X.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[6*256+i]); $write("\n"); + $write("X.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[7*256+i]); $write("\n"); + // + // X.Y + // + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("X.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); + $write("X.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); + $write("X.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); + $write("X.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); + $write("X.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); + $write("X.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); + $write("X.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); + $write("X.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("X.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[0*256+i]); $write("\n"); + $write("X.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[1*256+i]); $write("\n"); + $write("X.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[2*256+i]); $write("\n"); + $write("X.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[3*256+i]); $write("\n"); + $write("X.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[4*256+i]); $write("\n"); + $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[5*256+i]); $write("\n"); + $write("X.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[6*256+i]); $write("\n"); + $write("X.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[7*256+i]); $write("\n"); + // + // Y.X + // + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("Y.X.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[0*256+i]); $write("\n"); + $write("Y.X.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[1*256+i]); $write("\n"); + $write("Y.X.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[2*256+i]); $write("\n"); + $write("Y.X.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[3*256+i]); $write("\n"); + $write("Y.X.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[4*256+i]); $write("\n"); + $write("Y.X.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[5*256+i]); $write("\n"); + $write("Y.X.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[6*256+i]); $write("\n"); + $write("Y.X.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("Y.X.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[0*256+i]); $write("\n"); + $write("Y.X.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[1*256+i]); $write("\n"); + $write("Y.X.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[2*256+i]); $write("\n"); + $write("Y.X.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[3*256+i]); $write("\n"); + $write("Y.X.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[4*256+i]); $write("\n"); + $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[5*256+i]); $write("\n"); + $write("Y.X.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[6*256+i]); $write("\n"); + $write("Y.X.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[7*256+i]); $write("\n"); + // + // Y.Y + // + $write(" "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i); $write("\n"); + $write("Y.Y.WIDE.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[0*256+i]); $write("\n"); + $write("Y.Y.WIDE.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[1*256+i]); $write("\n"); + $write("Y.Y.WIDE.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[2*256+i]); $write("\n"); + $write("Y.Y.WIDE.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[3*256+i]); $write("\n"); + $write("Y.Y.WIDE.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[4*256+i]); $write("\n"); + $write("Y.Y.WIDE.N: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[5*256+i]); $write("\n"); + $write("Y.Y.WIDE.L: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[6*256+i]); $write("\n"); + $write("Y.Y.WIDE.H: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[7*256+i]); $write("\n"); + $write(" "); for (i=0; i<64; i=i+1) $write(" ------ "); $write("\n"); + $write("Y.Y.NARROW.A: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[0*256+i]); $write("\n"); + $write("Y.Y.NARROW.B: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[1*256+i]); $write("\n"); + $write("Y.Y.NARROW.C: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[2*256+i]); $write("\n"); + $write("Y.Y.NARROW.D: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[3*256+i]); $write("\n"); + $write("Y.Y.NARROW.E: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[4*256+i]); $write("\n"); + $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[5*256+i]); $write("\n"); + $write("Y.Y.NARROW.Q: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[6*256+i]); $write("\n"); + $write("Y.Y.NARROW.EXT: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[7*256+i]); $write("\n"); + // + end diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v index d7dd72e..06ce2b1 100644 --- a/rtl/modexpng_io_block.v +++ b/rtl/modexpng_io_block.v @@ -26,6 +26,7 @@ module modexpng_io_block // Headers // `include "modexpng_parameters.vh" + `include "modexpng_storage_primitives.vh" // @@ -81,21 +82,21 @@ module modexpng_io_block wire [ BUS_DATA_W -1:0] bus_data_rd_input_1; wire [ BUS_DATA_W -1:0] bus_data_rd_output; - wire bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0); - wire bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1); + wire bus_we_input_1 = bus_we && (bus_addr_msb == 2'd0); + wire bus_we_input_2 = bus_we && (bus_addr_msb == 2'd1); wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'b00); wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'b01); wire bus_cs_output = bus_cs && (bus_addr_msb == 2'b10); /* INPUT_1 */ - modexpng_tdp_36k_x16_x32_wrapper bram_input_1 + `MODEXPNG_TDP_36K_X16_X32 bram_input_1 ( .clk (clk), // core clock .clk_bus (clk_bus), // bus clock .ena (bus_cs_input_1), // bus side read-write - .wea (bus_data_wr_input_1), // + .wea (bus_we_input_1), // .addra (bus_addr_lsb), // .dina (bus_data_wr), // .douta (bus_data_rd_input_1), // @@ -108,13 +109,13 @@ module modexpng_io_block /* INPUT_2 */ - modexpng_sdp_36k_x16_x32_wrapper bram_input_2 + `MODEXPNG_SDP_36K_X16_X32 bram_input_2 ( .clk (clk), // core clock .clk_bus (clk_bus), // bus clock .ena (bus_cs_input_2), // bus side write-only - .wea (bus_data_wr_input_2), // + .wea (bus_we_input_2), // .addra (bus_addr_lsb), // .dina (bus_data_wr), // @@ -126,7 +127,7 @@ module modexpng_io_block /* OUTPUT */ - modexpng_sdp_36k_x32_x16_wrapper bram_output + `MODEXPNG_SDP_36K_X32_X16 bram_output ( .clk (clk), // core clock .clk_bus (clk_bus), // bus clock diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v index 59f4709..da2bdac 100644 --- a/rtl/modexpng_io_manager.v +++ b/rtl/modexpng_io_manager.v @@ -52,8 +52,8 @@ module modexpng_io_manager io_out_addr, io_out_dout, - wrk_narrow_x_din_x_trunc, - wrk_narrow_x_din_y_trunc, + wrk_narrow_x_din_x_lsb, + wrk_narrow_x_din_y_lsb, ladder_steps, ladder_d, @@ -124,8 +124,8 @@ module modexpng_io_manager output [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr; output [ WORD_W -1:0] io_out_dout; - output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc; - output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc; + input [ WORD_W -1:0] wrk_narrow_x_din_x_lsb; + input [ WORD_W -1:0] wrk_narrow_x_din_y_lsb; input [ BIT_INDEX_W -1:0] ladder_steps; output ladder_d; @@ -481,7 +481,7 @@ module modexpng_io_manager if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; // - if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc; + if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_lsb: wrk_narrow_x_din_y_lsb; // end // @@ -490,7 +490,7 @@ module modexpng_io_manager if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}}; if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}}; // - if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc; + if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_lsb : wrk_narrow_x_din_y_lsb; // end // @@ -697,14 +697,18 @@ module modexpng_io_manager // // BEGIN DEBUG // + `ifdef MODEXPNG_ENABLE_DEBUG always @(posedge clk) // - if ((io_fsm_state == IO_FSM_STATE_STOP) && opcode_is_ladder) - $display("[%4d] / %d / %d / %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r); - - // - // END DEBUG - // + if (io_fsm_state == IO_FSM_STATE_STOP) begin + if (opcode_is_ladder_init) begin + $display("[step] | D | P | Q"); + $display("-------+---+---+---"); + end else if (opcode_is_ladder_step) + $display("[%4d] | %d | %d | %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r); + end + // + `endif endmodule diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh index 6296e9b..9d7099e 100644 --- a/rtl/modexpng_microcode.vh +++ b/rtl/modexpng_microcode.vh @@ -5,64 +5,61 @@ localparam UOP_AUX_W = 1; localparam UOP_LADDER_W = 2; localparam UOP_SEL_W = 4 * BANK_ADDR_W; -localparam UOP_ADDR_W = 6; // 64 instructions - -localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0; -localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31; +localparam UOP_ADDR_W = 7; // 128 instructions +localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 7'd0; +localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 7'd64; // 5 1 1 1 2 4*3=12 localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W; // [21:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0] +// // OPCODE -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd0; +// +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd00; /* all fields are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 5'd1; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd2; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 5'd01; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd02; /* CRT tells into which of the dual MMM to write * NPQ specifies the width of the operand * AUX specifies from which INPUT to read - * LADDER is don't care - * source WIDE is always don't care - * destination NARROW is don't care for _WIDE opcode and vice versa - * -*/ + * LADDER is don't care + */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd03; /* CRT tells from which of the dual MMM to read * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care - * source and destination WIDE are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_CROSS_LADDERS_X2Y = 5'd7; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd04; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd05; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_CROSS_LADDERS_X2Y = 5'd06; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd07; /* CRT is don't care * NPQ specifies the width of the operand * AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual) * LADDER specifies Montgomery ladder mode */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_SUBTRACT = 5'd9; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_SUBTRACT = 5'd08; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd09; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd10; /* CRT * NPQ * AUX @@ -70,7 +67,7 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11; */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd11; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care @@ -78,43 +75,52 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12; * source and destination WIDE are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MERGE_LH = 5'd13; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MERGE_LH = 5'd12; +/* + */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_REGULAR_MULTIPLY = 5'd14; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_REGULAR_MULTIPLY = 5'd13; +/* + */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_REGULAR_ADD_UNEVEN = 5'd15; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_REGULAR_ADD_UNEVEN = 5'd14; +/* + */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd15; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd16; /* CRT is don't care * NPQ is don't care * AUX is don't care * LADDER is don't care - * WIDE and NARROW are don't care */ +// // CRT +// localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0; localparam [UOP_CRT_W -1:0] UOP_CRT_Y = 1'b1; localparam [UOP_CRT_W -1:0] UOP_CRT_DNC = 1'bX; +// // NPQ +// localparam [UOP_NPQ_W -1:0] UOP_NPQ_N = 1'b0; localparam [UOP_NPQ_W -1:0] UOP_NPQ_PQ = 1'b1; localparam [UOP_NPQ_W -1:0] UOP_NPQ_DNC = 1'bX; +// // AUX +// localparam [UOP_AUX_W -1:0] UOP_AUX_1 = 1'b0; localparam [UOP_AUX_W -1:0] UOP_AUX_2 = 1'b1; localparam [UOP_AUX_W -1:0] UOP_AUX_DNC = 1'bX; +// // LADDER +// localparam [UOP_LADDER_W -1:0] UOP_LADDER_00 = 2'b00; localparam [UOP_LADDER_W -1:0] UOP_LADDER_11 = 2'b11; localparam [UOP_LADDER_W -1:0] UOP_LADDER_D = 2'b01; localparam [UOP_LADDER_W -1:0] UOP_LADDER_PQ = 2'b10; localparam [UOP_LADDER_W -1:0] UOP_LADDER_DNC = 2'bXX; - - -// SEL -localparam [UOP_SEL_W -1:0] UOP_SEL_DNC_ALL = {4{BANK_DNC}}; diff --git a/rtl/_modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh index 1c2a57b..1c2a57b 100644 --- a/rtl/_modexpng_mmm_fsm.vh +++ b/rtl/modexpng_mmm_fsm.vh diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index c7566ad..7d2837d 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -1,42 +1,36 @@ +// +// Headers +// `include "modexpng_parameters_x8.vh" - -function integer cryptech_clog2; - input integer value; - integer temp_value; - integer result; - // - begin - temp_value = value - 1; - for (result = 0; temp_value > 0; result = result + 1) - temp_value = temp_value >> 1; - cryptech_clog2 = result; - end - // -endfunction - -localparam MAX_OP_W = 4096; - -localparam WORD_W = 16; -localparam WORD_EXT_W = 18; -localparam MAC_W = 47; - -localparam BUS_DATA_W = 32; +`include "cryptech_clog2.vh" + +// +// Debug Settings +// + +/* Only define this for simulation, must be turned off to synthesize properly! */ +`define MODEXPNG_ENABLE_DEBUG + +// +// Core Settings +// +localparam MAX_OP_W = 4096; +localparam WORD_W = 16; +localparam WORD_EXT_W = 18; +localparam MAC_W = 47; +localparam BUS_DATA_W = 32; localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W); - -localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W); - -localparam BANK_ADDR_W = 3; -localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); -localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS); - -localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); - -localparam CARRY_W = WORD_EXT_W - WORD_W; - -localparam WORD_MUX_W = cryptech_clog2(WORD_W); - -localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}}; - +localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W); +localparam BANK_ADDR_W = 3; +localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); +localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS); +localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); +localparam CARRY_W = WORD_EXT_W - WORD_W; +localparam WORD_MUX_W = cryptech_clog2(WORD_W); + +// +// Wide Bank +// localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1; localparam [BANK_ADDR_W-1:0] BANK_WIDE_C = 3'd2; @@ -46,19 +40,28 @@ localparam [BANK_ADDR_W-1:0] BANK_WIDE_N = 3'd5; localparam [BANK_ADDR_W-1:0] BANK_WIDE_L = 3'd6; localparam [BANK_ADDR_W-1:0] BANK_WIDE_H = 3'd7; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_A = 3'd0; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_B = 3'd1; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_C = 3'd2; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_D = 3'd3; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_E = 3'd4; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_COEFF = 3'd5; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_Q = 3'd6; -localparam [BANK_ADDR_W-1:0] BANK_NARROW_EXT = 3'd7; // [0] -> COEFF', [1] -> Q' - +// +// Narrow Bank +// +localparam [BANK_ADDR_W-1:0] BANK_NARROW_A = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_B = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_C = 3'd2; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_D = 3'd3; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_E = 3'd4; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_COEFF = 3'd5; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_Q = 3'd6; +localparam [BANK_ADDR_W-1:0] BANK_NARROW_EXT = 3'd7; // [0] -> COEFF', [1] -> Q' + +// +// Recombinator Bank +// localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML = 3'd0; localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH = 3'd1; localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH' +// +// Input Bank #1 +// localparam [BANK_ADDR_W-1:0] BANK_IN_1_M = 3'd0; localparam [BANK_ADDR_W-1:0] BANK_IN_1_N = 3'd1; localparam [BANK_ADDR_W-1:0] BANK_IN_1_N_FACTOR = 3'd2; @@ -67,6 +70,9 @@ localparam [BANK_ADDR_W-1:0] BANK_IN_1_EXT = 3'd4; // [0] -> N_COEFF'*/ localparam [BANK_ADDR_W-1:0] BANK_IN_1_X = 3'd5; localparam [BANK_ADDR_W-1:0] BANK_IN_1_Y = 3'd6; +// +// Input Bank #2 +// localparam [BANK_ADDR_W-1:0] BANK_IN_2_D = 3'd0; localparam [BANK_ADDR_W-1:0] BANK_IN_2_P = 3'd1; localparam [BANK_ADDR_W-1:0] BANK_IN_2_P_FACTOR = 3'd2; @@ -76,30 +82,59 @@ localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q_FACTOR = 3'd5; localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q_COEFF = 3'd6; localparam [BANK_ADDR_W-1:0] BANK_IN_2_QINV = 3'd7; -localparam [BANK_ADDR_W-1:0] BANK_OUT_S = 3'd0; -localparam [BANK_ADDR_W-1:0] BANK_OUT_XM = 3'd1; -localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2; +// +// Output Bank +// +localparam [BANK_ADDR_W-1:0] BANK_OUT_S = 3'd0; +localparam [BANK_ADDR_W-1:0] BANK_OUT_XM = 3'd1; +localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2; -localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}}; +// +// Carry Values +// +localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}}; -localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q +// +// Bank Values +// +localparam [ BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}}; +localparam [4*BANK_ADDR_W-1:0] BANK_DNC_ALL = {4{BANK_DNC}}; +// +// Operand Address Values +// +localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; +localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; +localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}}; +localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1}; +localparam [OP_ADDR_W-1:0] OP_ADDR_DNC = {OP_ADDR_W{1'bX}}; + +// +// Bit Index Values +// localparam [BIT_INDEX_W-1:0] BIT_INDEX_ZERO = {BIT_INDEX_W{1'b0}}; +localparam [BIT_INDEX_W-1:0] BIT_INDEX_DNC = {BIT_INDEX_W{1'bX}}; -localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; -localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; - -localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}}; -localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1}; -localparam [OP_ADDR_W-1:0] OP_ADDR_DNC = {OP_ADDR_W{1'bX}}; - +// +// Word Values +// localparam [WORD_W-1:0] WORD_ZERO = {WORD_W{1'b0}}; localparam [WORD_W-1:0] WORD_DNC = {WORD_W{1'bX}}; localparam [WORD_W-1:0] WORD_ONE = {{(WORD_W-1){1'b0}}, 1'b1}; +// +// Extended Word Values +// localparam [WORD_EXT_W-1:0] WORD_EXT_ZERO = {WORD_EXT_W{1'b0}}; localparam [WORD_EXT_W-1:0] WORD_EXT_DNC = {WORD_EXT_W{1'bX}}; +// +// MAC Index Values +// localparam [MAC_INDEX_W-1:0] MAC_INDEX_DNC = {MAC_INDEX_W{1'bX}}; +// +// Multiplier Bitmap Values +// localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}}; diff --git a/rtl/_modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v index b72395e..0736072 100644 --- a/rtl/_modexpng_recombinator_cell.v +++ b/rtl/modexpng_recombinator_cell.v @@ -8,7 +8,7 @@ module modexpng_recombinator_cell // // Headers // - `include "modexpng_parameters.vh" + `include "../rtl/modexpng_parameters.vh" // // Ports @@ -34,7 +34,7 @@ module modexpng_recombinator_cell if (ce) begin z <= din_z; y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; - x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {WORD_NULL, x[WORD_EXT_W-1:WORD_W]}; + x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {WORD_ZERO, x[WORD_EXT_W-1:WORD_W]}; end endmodule diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v index c100b8b..c9de32d 100644 --- a/rtl/modexpng_reductor.v +++ b/rtl/modexpng_reductor.v @@ -4,7 +4,7 @@ module modexpng_reductor ena, rdy, word_index_last, sel_wide_out, sel_narrow_out, - rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_din_aux, rd_wide_y_din_aux, + /*rd_wide_xy_addr_aux, rd_wide_xy_bank_aux,*/ rd_wide_x_din_aux, rd_wide_y_din_aux, rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din, rcmb_final_xy_valid, rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_dout, rdct_wide_y_dout, rdct_wide_xy_valid, rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid @@ -39,8 +39,8 @@ module modexpng_reductor input [ 7:0] rd_narrow_xy_addr; input [ 1:0] rd_narrow_xy_bank; */ - input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; - input [ 7:0] rd_wide_xy_addr_aux; + //input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + //input [ 7:0] rd_wide_xy_addr_aux; input [ 17:0] rd_wide_x_din_aux; input [ 17:0] rd_wide_y_din_aux; // diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v new file mode 100644 index 0000000..034b00b --- /dev/null +++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v @@ -0,0 +1,75 @@ +module modexpng_sdp_36k_x16_x32_wrapper_generic +( + clk, clk_bus, + + ena, wea, + addra, dina, + + enb, regceb, + addrb, doutb +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input clk_bus; + + input ena; + input wea; + input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra; + input [ BUS_DATA_W -1:0] dina; + + input enb; + input regceb; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb; + output [ WORD_W -1:0] doutb; + + + // + // BRAM_SDP_MACRO + // + BRAM_SDP_MACRO # + ( + .DEVICE ("7SERIES"), + + .BRAM_SIZE ("36Kb"), + + .WRITE_WIDTH (BUS_DATA_W), + .READ_WIDTH (WORD_W), + + .DO_REG (1), + .WRITE_MODE ("READ_FIRST"), + + .SRVAL (72'h000000000000000000), + .INIT (72'h000000000000000000), + + .INIT_FILE ("NONE"), + .SIM_COLLISION_CHECK ("NONE") + ) + BRAM_SDP_MACRO_inst + ( + .RST (1'b0), + + .WRCLK (clk_bus), + .WREN (ena), + .WE ({4{wea}}), + .WRADDR (addra), + .DI (dina), + + .RDCLK (clk), + .RDEN (enb), + .REGCE (regceb), + .RDADDR (addrb), + .DO (doutb) + ); + + +endmodule diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper_xilinx.v index 4c6fe46..63c0519 100644 --- a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v +++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper_xilinx.v @@ -1,4 +1,4 @@ -module modexpng_sdp_36k_x16_x32_wrapper +module modexpng_sdp_36k_x16_x32_wrapper_xilinx ( clk, clk_bus, diff --git a/rtl/modexpng_sdp_36k_x18_wrapper.v b/rtl/modexpng_sdp_36k_x18_wrapper_generic.v index ded9425..84c8e09 100644 --- a/rtl/modexpng_sdp_36k_x18_wrapper.v +++ b/rtl/modexpng_sdp_36k_x18_wrapper_generic.v @@ -1,4 +1,4 @@ -module modexpng_sdp_36k_x18_wrapper +module modexpng_sdp_36k_x18_wrapper_generic ( clk, diff --git a/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v b/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v index b9e40ae..cea69ae 100644 --- a/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v +++ b/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v @@ -1,4 +1,4 @@ -module modexpng_sdp_36k_x18_wrapper +module modexpng_sdp_36k_x18_wrapper_xilinx ( clk, diff --git a/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v b/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v new file mode 100644 index 0000000..c74daac --- /dev/null +++ b/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v @@ -0,0 +1,73 @@ +module modexpng_sdp_36k_x32_x16_wrapper_generic +( + clk, clk_bus, + + ena, wea, + addra, dina, + + enb, + addrb, doutb +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input clk_bus; + + input ena; + input wea; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addra; + input [ WORD_W -1:0] dina; + + input enb; + input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addrb; + output [ BUS_DATA_W -1:0] doutb; + + + // + // BRAM_SDP_MACRO + // + BRAM_SDP_MACRO # + ( + .DEVICE ("7SERIES"), + + .BRAM_SIZE ("36Kb"), + + .WRITE_WIDTH (WORD_W), + .READ_WIDTH (BUS_DATA_W), + + .DO_REG (0), + .WRITE_MODE ("READ_FIRST"), + + .SRVAL (72'h000000000000000000), + .INIT (72'h000000000000000000), + + .INIT_FILE ("NONE"), + .SIM_COLLISION_CHECK ("NONE") + ) + BRAM_SDP_MACRO_inst + ( + .RST (1'b0), + + .WRCLK (clk), + .WREN (ena), + .WE ({2{wea}}), + .WRADDR (addra), + .DI (dina), + + .RDCLK (clk_bus), + .RDEN (enb), + .REGCE (1'b0), + .RDADDR (addrb), + .DO (doutb) + ); + +endmodule diff --git a/rtl/modexpng_sdp_36k_x32_x16_wrapper.v b/rtl/modexpng_sdp_36k_x32_x16_wrapper_xilinx.v index ff86802..693926f 100644 --- a/rtl/modexpng_sdp_36k_x32_x16_wrapper.v +++ b/rtl/modexpng_sdp_36k_x32_x16_wrapper_xilinx.v @@ -1,4 +1,4 @@ -module modexpng_sdp_36k_x32_x16_wrapper +module modexpng_sdp_36k_x32_x16_wrapper_xilinx ( clk, clk_bus, diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v index 19601ef..5a03b24 100644 --- a/rtl/modexpng_storage_block.v +++ b/rtl/modexpng_storage_block.v @@ -48,6 +48,7 @@ module modexpng_storage_block // Headers // `include "modexpng_parameters.vh" + `include "modexpng_storage_primitives.vh" // @@ -150,7 +151,7 @@ module modexpng_storage_block // assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]}; // - modexpng_sdp_36k_x18_wrapper wide_x + `MODEXPNG_SDP_36K_X18 wide_x ( .clk (clk), @@ -165,7 +166,7 @@ module modexpng_storage_block .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W]) ); // - modexpng_sdp_36k_x18_wrapper wide_y + `MODEXPNG_SDP_36K_X18 wide_y ( .clk (clk), @@ -186,7 +187,7 @@ module modexpng_storage_block // // Worker "Wide" Storage // - modexpng_sdp_36k_x18_wrapper wrk_wide_x + `MODEXPNG_SDP_36K_X18 wrk_wide_x ( .clk (clk), @@ -201,7 +202,7 @@ module modexpng_storage_block .doutb (wrk_wide_x_dout) ); // - modexpng_sdp_36k_x18_wrapper wrk_wide_y + `MODEXPNG_SDP_36K_X18 wrk_wide_y ( .clk (clk), @@ -219,7 +220,7 @@ module modexpng_storage_block // // Auxilary "Wide" Storage // - modexpng_sdp_36k_x18_wrapper wide_x_aux + `MODEXPNG_SDP_36K_X18 wide_x_aux ( .clk (clk), @@ -234,7 +235,7 @@ module modexpng_storage_block .doutb (rd_wide_x_dout_aux) ); // - modexpng_sdp_36k_x18_wrapper wide_y_aux + `MODEXPNG_SDP_36K_X18 wide_y_aux ( .clk (clk), @@ -252,7 +253,7 @@ module modexpng_storage_block // // "Narrow" Storage // - modexpng_sdp_36k_x18_wrapper narrow_x + `MODEXPNG_SDP_36K_X18 narrow_x ( .clk (clk), @@ -267,7 +268,7 @@ module modexpng_storage_block .doutb (rd_narrow_x_dout) ); - modexpng_sdp_36k_x18_wrapper narrow_y + `MODEXPNG_SDP_36K_X18 narrow_y ( .clk (clk), @@ -285,7 +286,7 @@ module modexpng_storage_block // // Worker "Narrow" Storage // - modexpng_sdp_36k_x18_wrapper wrk_narrow_x + `MODEXPNG_SDP_36K_X18 wrk_narrow_x ( .clk (clk), @@ -300,7 +301,7 @@ module modexpng_storage_block .doutb (wrk_narrow_x_dout) ); - modexpng_sdp_36k_x18_wrapper wrk_narrow_y + `MODEXPNG_SDP_36K_X18 wrk_narrow_y ( .clk (clk), diff --git a/rtl/modexpng_storage_primitives.vh b/rtl/modexpng_storage_primitives.vh new file mode 100644 index 0000000..34dea2a --- /dev/null +++ b/rtl/modexpng_storage_primitives.vh @@ -0,0 +1,15 @@ +`ifndef MODEXPNG_ENABLE_DEBUG + +`define MODEXPNG_SDP_36K_X18 modexpng_sdp_36k_x18_wrapper_xilinx +`define MODEXPNG_SDP_36K_X32_X16 modexpng_sdp_36k_x32_x16_wrapper_xilinx +`define MODEXPNG_SDP_36K_X16_X32 modexpng_sdp_36k_x16_x32_wrapper_xilinx +`define MODEXPNG_TDP_36K_X16_X32 modexpng_tdp_36k_x16_x32_wrapper_xilinx + +`else + +`define MODEXPNG_SDP_36K_X18 modexpng_sdp_36k_x18_wrapper_generic +`define MODEXPNG_SDP_36K_X32_X16 modexpng_sdp_36k_x32_x16_wrapper_generic +`define MODEXPNG_SDP_36K_X16_X32 modexpng_sdp_36k_x16_x32_wrapper_generic +`define MODEXPNG_TDP_36K_X16_X32 modexpng_tdp_36k_x16_x32_wrapper_generic + +`endif diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v new file mode 100644 index 0000000..5e69bef --- /dev/null +++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v @@ -0,0 +1,88 @@ +module modexpng_tdp_36k_x16_x32_wrapper_generic +( + clk, clk_bus, + + ena, wea, + addra, dina, douta, + + enb, regceb, + addrb, doutb +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + + + // + // Ports + // + input clk; + input clk_bus; + + input ena; + input wea; + input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra; + input [ BUS_DATA_W -1:0] dina; + output [ BUS_DATA_W -1:0] douta; + + input enb; + input regceb; + input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb; + output [ WORD_W -1:0] doutb; + + + // + // BRAM_TDP_MACRO + // + BRAM_TDP_MACRO # + ( + .DEVICE ("7SERIES"), + .BRAM_SIZE ("36Kb"), + + .WRITE_WIDTH_A (BUS_DATA_W), + .READ_WIDTH_A (BUS_DATA_W), + + .WRITE_WIDTH_B (WORD_W), + .READ_WIDTH_B (WORD_W), + + .DOA_REG (0), + .DOB_REG (1), + + .WRITE_MODE_A ("READ_FIRST"), + .WRITE_MODE_B ("READ_FIRST"), + + .SRVAL_A (36'h000000000), + .SRVAL_B (36'h000000000), + + .INIT_A (36'h000000000), + .INIT_B (36'h000000000), + + .INIT_FILE ("NONE"), + .SIM_COLLISION_CHECK ("NONE") + ) + BRAM_TDP_MACRO_inst + ( + .RSTA (1'b0), + .RSTB (1'b0), + + .CLKA (clk_bus), + .ENA (ena), + .REGCEA (1'b0), + .WEA ({4{wea}}), + .ADDRA (addra), + .DIA (dina), + .DOA (douta), + + .CLKB (clk), + .ENB (enb), + .REGCEB (regceb), + .WEB ({2{1'b0}}), + .ADDRB (addrb), + .DIB ({WORD_W{1'b0}}), + .DOB (doutb) + ); + +endmodule diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper_xilinx.v index 40930f3..81bcb85 100644 --- a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v +++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper_xilinx.v @@ -1,4 +1,4 @@ -module modexpng_tdp_36k_x16_x32_wrapper +module modexpng_tdp_36k_x16_x32_wrapper_xilinx ( clk, clk_bus, diff --git a/rtl/modexpng_uop_engine.v b/rtl/modexpng_uop_engine.v new file mode 100644 index 0000000..a87d924 --- /dev/null +++ b/rtl/modexpng_uop_engine.v @@ -0,0 +1,658 @@ +module modexpng_uop_engine +( + clk, + rst, + + ena, + rdy, + + `ifdef MODEXPNG_ENABLE_DEBUG + uop_decoded_stop, + `endif + + crt_mode, + + word_index_last_n, + word_index_last_pq, + + bit_index_last_n, + bit_index_last_pq, + + io_mgr_ena, + io_mgr_rdy, + io_mgr_sel_crt, + io_mgr_sel_aux, + io_mgr_sel_in, + io_mgr_sel_out, + io_mgr_word_index_last, + io_mgr_opcode, + io_mgr_ladder_steps, + io_mgr_ladder_d, + io_mgr_ladder_p, + io_mgr_ladder_q, + io_mgr_ladder_done, + + mmm_ena_x, + mmm_ena_y, + mmm_rdy_x, + mmm_rdy_y, + mmm_word_index_last_x, + mmm_word_index_last_y, + mmm_word_index_last_minus1_x, + mmm_word_index_last_minus1_y, + mmm_ladder_mode_x, + mmm_ladder_mode_y, + mmm_sel_wide_in_x, + mmm_sel_wide_in_y, + mmm_sel_narrow_in_x, + mmm_sel_narrow_in_y, + mmm_force_unity_b_x, + mmm_force_unity_b_y, + mmm_only_reduce_x, + mmm_only_reduce_y, + mmm_just_multiply_x, + mmm_just_multiply_y, + + rdct_word_index_last_x, + rdct_word_index_last_y, + rdct_sel_wide_out_x, + rdct_sel_narrow_out_x, + rdct_sel_wide_out_y, + rdct_sel_narrow_out_y, + + wrk_ena, + wrk_rdy, + wrk_sel_wide_in, + wrk_sel_wide_out, + wrk_sel_narrow_in, + wrk_sel_narrow_out, + wrk_word_index_last, + wrk_word_index_last_half, + wrk_opcode +); + + + // + // Headers + // + `include "modexpng_parameters.vh" + `include "modexpng_microcode.vh" + + + // + // Ports + // + input clk; + input rst; + + input ena; + output rdy; + + `ifdef MODEXPNG_ENABLE_DEBUG + output uop_decoded_stop; + `endif + + input crt_mode; + + input [OP_ADDR_W -1:0] word_index_last_n; + input [OP_ADDR_W -1:0] word_index_last_pq; + + input [BIT_INDEX_W -1:0] bit_index_last_n; + input [BIT_INDEX_W -1:0] bit_index_last_pq; + + output io_mgr_ena; + input io_mgr_rdy; + output [UOP_CRT_W -1:0] io_mgr_sel_crt; + output [UOP_AUX_W -1:0] io_mgr_sel_aux; + output [BANK_ADDR_W -1:0] io_mgr_sel_in; + output [BANK_ADDR_W -1:0] io_mgr_sel_out; + output [OP_ADDR_W -1:0] io_mgr_word_index_last; + output [UOP_OPCODE_W -1:0] io_mgr_opcode; + output [BIT_INDEX_W -1:0] io_mgr_ladder_steps; + input io_mgr_ladder_d; + input io_mgr_ladder_p; + input io_mgr_ladder_q; + input io_mgr_ladder_done; + + output mmm_ena_x; + output mmm_ena_y; + input mmm_rdy_x; + input mmm_rdy_y; + output [OP_ADDR_W -1:0] mmm_word_index_last_x; + output [OP_ADDR_W -1:0] mmm_word_index_last_y; + output [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x; + output [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y; + output mmm_ladder_mode_x; + output mmm_ladder_mode_y; + output [BANK_ADDR_W -1:0] mmm_sel_wide_in_x; + output [BANK_ADDR_W -1:0] mmm_sel_wide_in_y; + output [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x; + output [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y; + output mmm_force_unity_b_x; + output mmm_force_unity_b_y; + output mmm_only_reduce_x; + output mmm_only_reduce_y; + output mmm_just_multiply_x; + output mmm_just_multiply_y; + + output [OP_ADDR_W -1:0] rdct_word_index_last_x; + output [OP_ADDR_W -1:0] rdct_word_index_last_y; + output [BANK_ADDR_W -1:0] rdct_sel_wide_out_x; + output [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x; + output [BANK_ADDR_W -1:0] rdct_sel_wide_out_y; + output [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y; + + output wrk_ena; + input wrk_rdy; + output [BANK_ADDR_W -1:0] wrk_sel_wide_in; + output [BANK_ADDR_W -1:0] wrk_sel_wide_out; + output [BANK_ADDR_W -1:0] wrk_sel_narrow_in; + output [BANK_ADDR_W -1:0] wrk_sel_narrow_out; + output [OP_ADDR_W -1:0] wrk_word_index_last; + output [OP_ADDR_W -1:0] wrk_word_index_last_half; + output [UOP_OPCODE_W -1:0] wrk_opcode; + + + // + // Registers + // + reg io_mgr_ena_r = 1'b0; + reg [UOP_CRT_W -1:0] io_mgr_sel_crt_r; + reg [UOP_AUX_W -1:0] io_mgr_sel_aux_r; + reg [BANK_ADDR_W -1:0] io_mgr_sel_in_r; + reg [BANK_ADDR_W -1:0] io_mgr_sel_out_r; + reg [OP_ADDR_W -1:0] io_mgr_word_index_last_r; + reg [UOP_OPCODE_W -1:0] io_mgr_opcode_r; + reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps_r; + + reg mmm_ena_x_r = 1'b0; + reg mmm_ena_y_r = 1'b0; + reg [OP_ADDR_W -1:0] mmm_word_index_last_x_r; + reg [OP_ADDR_W -1:0] mmm_word_index_last_y_r; + reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_x_r; + reg [OP_ADDR_W -1:0] mmm_word_index_last_minus1_y_r; + reg mmm_ladder_mode_x_r; + reg mmm_ladder_mode_y_r; + reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_x_r; + reg [BANK_ADDR_W -1:0] mmm_sel_wide_in_y_r; + reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x_r; + reg [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y_r; + reg mmm_force_unity_b_x_r; + reg mmm_force_unity_b_y_r; + reg mmm_only_reduce_x_r; + reg mmm_only_reduce_y_r; + reg mmm_just_multiply_x_r; + reg mmm_just_multiply_y_r; + + reg [OP_ADDR_W -1:0] rdct_word_index_last_x_r; + reg [OP_ADDR_W -1:0] rdct_word_index_last_y_r; + reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_x_r; + reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x_r; + reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_y_r; + reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y_r; + + reg wrk_ena_r = 1'b0; + reg [BANK_ADDR_W -1:0] wrk_sel_wide_in_r; + reg [BANK_ADDR_W -1:0] wrk_sel_wide_out_r; + reg [BANK_ADDR_W -1:0] wrk_sel_narrow_in_r; + reg [BANK_ADDR_W -1:0] wrk_sel_narrow_out_r; + reg [OP_ADDR_W -1:0] wrk_word_index_last_r; + reg [OP_ADDR_W -1:0] wrk_word_index_last_half_r; + reg [UOP_OPCODE_W -1:0] wrk_opcode_r; + + + // + // Mapping + // + assign io_mgr_ena = io_mgr_ena_r; + assign io_mgr_sel_crt = io_mgr_sel_crt_r; + assign io_mgr_sel_aux = io_mgr_sel_aux_r; + assign io_mgr_sel_in = io_mgr_sel_in_r; + assign io_mgr_sel_out = io_mgr_sel_out_r; + assign io_mgr_word_index_last = io_mgr_word_index_last_r; + assign io_mgr_opcode = io_mgr_opcode_r; + assign io_mgr_ladder_steps = io_mgr_ladder_steps_r; + + assign mmm_ena_x = mmm_ena_x_r; + assign mmm_ena_y = mmm_ena_y_r; + assign mmm_word_index_last_x = mmm_word_index_last_x_r; + assign mmm_word_index_last_y = mmm_word_index_last_y_r; + assign mmm_word_index_last_minus1_x = mmm_word_index_last_minus1_x_r; + assign mmm_word_index_last_minus1_y = mmm_word_index_last_minus1_y_r; + assign mmm_ladder_mode_x = mmm_ladder_mode_x_r; + assign mmm_ladder_mode_y = mmm_ladder_mode_y_r; + assign mmm_sel_wide_in_x = mmm_sel_wide_in_x_r; + assign mmm_sel_wide_in_y = mmm_sel_wide_in_y_r; + assign mmm_sel_narrow_in_x = mmm_sel_narrow_in_x_r; + assign mmm_sel_narrow_in_y = mmm_sel_narrow_in_y_r; + assign mmm_force_unity_b_x = mmm_force_unity_b_x_r; + assign mmm_force_unity_b_y = mmm_force_unity_b_y_r; + assign mmm_only_reduce_x = mmm_only_reduce_x_r; + assign mmm_only_reduce_y = mmm_only_reduce_y_r; + assign mmm_just_multiply_x = mmm_just_multiply_x_r; + assign mmm_just_multiply_y = mmm_just_multiply_y_r; + + assign rdct_word_index_last_x = rdct_word_index_last_x_r; + assign rdct_word_index_last_y = rdct_word_index_last_y_r; + assign rdct_sel_wide_out_x = rdct_sel_wide_out_x_r; + assign rdct_sel_wide_out_y = rdct_sel_wide_out_y_r; + assign rdct_sel_narrow_out_x = rdct_sel_narrow_out_x_r; + assign rdct_sel_narrow_out_y = rdct_sel_narrow_out_y_r; + + assign wrk_ena = wrk_ena_r; + assign wrk_sel_wide_in = wrk_sel_wide_in_r; + assign wrk_sel_wide_out = wrk_sel_wide_out_r; + assign wrk_sel_narrow_in = wrk_sel_narrow_in_r; + assign wrk_sel_narrow_out = wrk_sel_narrow_out_r; + assign wrk_word_index_last = wrk_word_index_last_r; + assign wrk_word_index_last_half = wrk_word_index_last_half_r; + assign wrk_opcode = wrk_opcode_r; + + + // + // UOP_FSM + // + localparam [1:0] UOP_FSM_STATE_IDLE = 2'b00; + localparam [1:0] UOP_FSM_STATE_FETCH = 2'b01; + localparam [1:0] UOP_FSM_STATE_DECODE = 2'b10; + localparam [1:0] UOP_FSM_STATE_BUSY = 2'b11; + + reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE; + reg [1:0] uop_fsm_state_next; + + + // + // UOP ROM + // + reg [UOP_ADDR_W -1:0] uop_addr; + wire [UOP_W -1:0] uop_data; + + modexpng_uop_rom uop_rom + ( + .clk (clk), + .addr (uop_addr), + .data (uop_data) + ); + + + // + // UOP ROM Data Decoder + // + wire [UOP_OPCODE_W -1:0] uop_data_opcode = uop_data[UOP_W -1-: UOP_OPCODE_W]; + wire [UOP_CRT_W -1:0] uop_data_crt = uop_data[UOP_W -UOP_OPCODE_W -1-: UOP_CRT_W ]; + wire [UOP_NPQ_W -1:0] uop_data_npq = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -1-: UOP_NPQ_W ]; + wire [UOP_AUX_W -1:0] uop_data_aux = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -1-: UOP_AUX_W ]; + wire [UOP_LADDER_W -1:0] uop_data_ladder = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -1-: UOP_LADDER_W]; + wire [BANK_ADDR_W -1:0] uop_data_sel_wide_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1-: BANK_ADDR_W ]; + wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_in = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ]; + wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ]; + wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ]; + + wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; + wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || + (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; + wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; + wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) || + (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) || + (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) || + (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || + (uop_data_opcode == UOP_OPCODE_MERGE_LH ) || + (uop_data_opcode == UOP_OPCODE_REGULAR_ADD_UNEVEN ) ; + wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) || + (uop_data_opcode == UOP_OPCODE_REGULAR_MULTIPLY ) ; + wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) || + (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ; + + + // + // Debug Signal + // + `ifdef MODEXPNG_ENABLE_DEBUG + assign uop_decoded_stop = (uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop; + `endif + + + + // + // UOP ROM Address Increment Logic + // + wire uop_loop_now; + + wire [UOP_ADDR_W -1:0] uop_addr_plus1 = uop_addr + 1'b1; + wire [UOP_ADDR_W -1:0] uop_addr_minus1 = uop_addr - 1'b1; + + wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr_minus1 : uop_addr_plus1 ; + wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT; + + always @(posedge clk) + // + if (uop_fsm_state_next == UOP_FSM_STATE_FETCH) + uop_addr <= (uop_fsm_state == UOP_FSM_STATE_IDLE) ? uop_addr_offset : uop_addr_next; + + + // + // Handy Signals + // + wire mmm_ena = mmm_ena_x & mmm_ena_y; + wire mmm_rdy = mmm_rdy_x & mmm_rdy_y; + + assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done; + + reg [1:0] uop_data_ladder_mux; + + always @(uop_data_ladder, io_mgr_ladder_p, io_mgr_ladder_q, io_mgr_ladder_d) + // + case (uop_data_ladder) + UOP_LADDER_00: uop_data_ladder_mux = 2'b00; + UOP_LADDER_11: uop_data_ladder_mux = 2'b11; + UOP_LADDER_D: uop_data_ladder_mux = {io_mgr_ladder_d, ~io_mgr_ladder_d}; + UOP_LADDER_PQ: uop_data_ladder_mux = {io_mgr_ladder_p, io_mgr_ladder_q}; + endcase + + reg [OP_ADDR_W-1:0] word_index_last_mux; + + always @(uop_data_npq, word_index_last_n, word_index_last_pq) + // + if (uop_data_npq == UOP_NPQ_N) word_index_last_mux = word_index_last_n; + else word_index_last_mux = word_index_last_pq; + + reg [BIT_INDEX_W-1:0] bit_index_last_mux; + + always @(crt_mode, bit_index_last_pq, bit_index_last_n) + if (crt_mode) bit_index_last_mux = bit_index_last_pq; + else bit_index_last_mux = bit_index_last_n; + + + // + // UOP Trigger Logic + // + always @(posedge clk) + // + if (rst) begin + io_mgr_ena_r <= 1'b0; + mmm_ena_x_r <= 1'b0; + mmm_ena_y_r <= 1'b0; + wrk_ena_r <= 1'b0; + end else begin + io_mgr_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || + uop_opcode_is_out || + uop_opcode_is_ladder): 1'b0; + mmm_ena_x_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + mmm_ena_y_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + wrk_ena_r <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || + uop_opcode_is_out ): 1'b0; + end + + + // + // UOP Completion Detector + // + reg uop_exit_from_busy; + + always @* begin + // + uop_exit_from_busy = 0; + // + if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy ; + if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & + (~wrk_ena & wrk_rdy ) ; + if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy ; + if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy ; + if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy ; + // + end + + + // + // UOP Parameters + // + task update_io_mgr_params; + input [UOP_CRT_W -1:0] sel_crt; + input [UOP_AUX_W -1:0] sel_aux; + input [BANK_ADDR_W -1:0] sel_in; + input [BANK_ADDR_W -1:0] sel_out; + input [UOP_OPCODE_W-1:0] opcode; + begin + io_mgr_sel_crt_r <= sel_crt; + io_mgr_sel_aux_r <= sel_aux; + io_mgr_sel_in_r <= sel_in; + io_mgr_sel_out_r <= sel_out; + io_mgr_opcode_r <= opcode; + end + endtask + + task update_wrk_params; + input [BANK_ADDR_W -1:0] wide_in; + input [BANK_ADDR_W -1:0] narrow_in; + input [BANK_ADDR_W -1:0] wide_out; + input [BANK_ADDR_W -1:0] narrow_out; + input [UOP_OPCODE_W-1:0] opcode; + begin + wrk_sel_wide_in_r <= wide_in; + wrk_sel_narrow_in_r <= narrow_in; + wrk_sel_wide_out_r <= wide_out; + wrk_sel_narrow_out_r <= narrow_out; + wrk_opcode_r <= opcode; + end + endtask + + task update_mmm_params; + input [ 1:0] ladder_mode; + input [BANK_ADDR_W-1:0] sel_wide_in; + input [BANK_ADDR_W-1:0] sel_narrow_in; + input force_unity_b; + input only_reduce; + input just_multiply; + begin + {mmm_ladder_mode_x_r, mmm_ladder_mode_y_r } <= ladder_mode; + {mmm_sel_wide_in_x_r, mmm_sel_wide_in_y_r } <= {2{sel_wide_in }}; + {mmm_sel_narrow_in_x_r, mmm_sel_narrow_in_y_r} <= {2{sel_narrow_in}}; + {mmm_force_unity_b_x_r, mmm_force_unity_b_y_r} <= {2{force_unity_b}}; + {mmm_only_reduce_x_r, mmm_only_reduce_y_r } <= {2{only_reduce }}; + {mmm_just_multiply_x_r, mmm_just_multiply_y_r} <= {2{just_multiply}}; + end + endtask + + task update_rdct_params; + input [BANK_ADDR_W-1:0] sel_wide_out; + input [BANK_ADDR_W-1:0] sel_narrow_out; + begin + {rdct_sel_wide_out_x_r, rdct_sel_wide_out_y_r } <= {2{sel_wide_out}}; + {rdct_sel_narrow_out_x_r, rdct_sel_narrow_out_y_r} <= {2{sel_narrow_out}}; + end + endtask + + always @(posedge clk) + // + if (uop_fsm_state == UOP_FSM_STATE_DECODE) + // + case (uop_data_opcode) + // + UOP_OPCODE_INPUT_TO_WIDE: + update_io_mgr_params(uop_data_crt, uop_data_aux, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_opcode); + // + UOP_OPCODE_INPUT_TO_NARROW: + update_io_mgr_params(uop_data_crt, uop_data_aux, uop_data_sel_narrow_in, uop_data_sel_narrow_out, uop_data_opcode); + // + UOP_OPCODE_OUTPUT_FROM_NARROW: begin + update_io_mgr_params(uop_data_crt, UOP_AUX_DNC, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode); + update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, BANK_DNC, uop_data_opcode); + end + // + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y, + UOP_OPCODE_CROSS_LADDERS_X2Y: + update_wrk_params(uop_data_sel_wide_in, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_sel_narrow_out, uop_data_opcode); + // + UOP_OPCODE_MODULAR_MULTIPLY: begin + update_mmm_params(uop_data_ladder_mux, uop_data_sel_wide_in, uop_data_sel_narrow_in, uop_data_aux, 1'b0, 1'b0); + update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out); + end + // + UOP_OPCODE_MODULAR_SUBTRACT: + update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, uop_data_sel_wide_out, uop_data_sel_narrow_out, uop_data_opcode); + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, BANK_DNC, uop_data_opcode); + // + UOP_OPCODE_MODULAR_REDUCE_PROC: begin + update_mmm_params(2'bXX, BANK_DNC, BANK_DNC, 1'b0, 1'b1, 1'b0); + update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out); + end + // + UOP_OPCODE_PROPAGATE_CARRIES: + update_wrk_params(BANK_DNC, uop_data_sel_narrow_in, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode); + // + UOP_OPCODE_MERGE_LH: + update_wrk_params(BANK_DNC, BANK_DNC, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode); + // + UOP_OPCODE_REGULAR_MULTIPLY: begin + update_mmm_params(2'b11, uop_data_sel_wide_in, uop_data_sel_narrow_in, 1'b0, 1'b0, 1'b1); + update_rdct_params(uop_data_sel_wide_out, uop_data_sel_narrow_out); + end + // + UOP_OPCODE_REGULAR_ADD_UNEVEN: + update_wrk_params(uop_data_sel_wide_in, uop_data_sel_narrow_in, BANK_DNC, uop_data_sel_narrow_out, uop_data_opcode); + // + UOP_OPCODE_LADDER_INIT, + UOP_OPCODE_LADDER_STEP: + update_io_mgr_params(UOP_CRT_DNC, UOP_AUX_DNC, BANK_DNC, BANK_DNC, uop_data_opcode); + // + endcase + + + // + // UOP Lengths + // + task update_io_mgr_length; + input [OP_ADDR_W -1:0] word_index_last; + input [BIT_INDEX_W-1:0] ladder_steps; + begin + io_mgr_word_index_last_r <= word_index_last; + io_mgr_ladder_steps_r <= ladder_steps; + end + endtask + + task update_wrk_length; + input [OP_ADDR_W-1:0] word_index_last; + input [OP_ADDR_W-1:0] word_index_last_half; + begin + wrk_word_index_last_r <= word_index_last; + wrk_word_index_last_half_r <= word_index_last_half; + end + endtask + + task update_mmm_length; + input [OP_ADDR_W-1:0] word_index_last; + begin + mmm_word_index_last_x_r <= word_index_last; + mmm_word_index_last_y_r <= word_index_last; + mmm_word_index_last_minus1_x_r <= word_index_last - 1'b1; + mmm_word_index_last_minus1_y_r <= word_index_last - 1'b1; + end + endtask + + task update_rdct_length; + input [OP_ADDR_W-1:0] word_index_last; + begin + rdct_word_index_last_x_r <= word_index_last; + rdct_word_index_last_y_r <= word_index_last; + end + endtask + + always @(posedge clk) + // + if (uop_fsm_state == UOP_FSM_STATE_DECODE) + // + case (uop_data_opcode) + // + UOP_OPCODE_INPUT_TO_WIDE, + UOP_OPCODE_INPUT_TO_NARROW, + UOP_OPCODE_OUTPUT_FROM_NARROW: + update_io_mgr_length(word_index_last_mux, BIT_INDEX_DNC); + // + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y, + UOP_OPCODE_CROSS_LADDERS_X2Y: + update_wrk_length(word_index_last_mux, OP_ADDR_DNC); + // + UOP_OPCODE_MODULAR_MULTIPLY: begin + update_mmm_length(word_index_last_mux); + update_rdct_length(word_index_last_mux); + end + // + UOP_OPCODE_MODULAR_SUBTRACT: + update_wrk_length(word_index_last_mux, OP_ADDR_DNC); + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + update_wrk_length(word_index_last_n, word_index_last_pq); + // + UOP_OPCODE_MODULAR_REDUCE_PROC: begin + update_mmm_length(word_index_last_pq); + update_rdct_length(word_index_last_pq); + end + // + UOP_OPCODE_PROPAGATE_CARRIES: + update_wrk_length(word_index_last_mux, OP_ADDR_DNC); + // + UOP_OPCODE_MERGE_LH: + update_wrk_length(word_index_last_n, word_index_last_pq); + // + UOP_OPCODE_REGULAR_MULTIPLY: begin + update_mmm_length(word_index_last_pq); + update_rdct_length(word_index_last_pq); + end + // + UOP_OPCODE_REGULAR_ADD_UNEVEN: + update_wrk_length(word_index_last_n, word_index_last_pq); + // + UOP_OPCODE_LADDER_INIT, + UOP_OPCODE_LADDER_STEP: + update_io_mgr_length(OP_ADDR_LADDER_LAST, bit_index_last_mux); + // + endcase + + + // + // UOP FSM Process + // + always @(posedge clk) + // + if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE; + else uop_fsm_state <= uop_fsm_state_next; + + + // + // UOP FSM Transition Logic + // + always @* begin + // + case (uop_fsm_state) + UOP_FSM_STATE_IDLE: uop_fsm_state_next = ena ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_IDLE; + UOP_FSM_STATE_FETCH: uop_fsm_state_next = UOP_FSM_STATE_DECODE ; + UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE : UOP_FSM_STATE_BUSY; + UOP_FSM_STATE_BUSY: uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH : UOP_FSM_STATE_BUSY; + endcase + // + end + + + // + // Ready Flag Logic + // + reg rdy_r = 1'b1; + assign rdy = rdy_r; + + always @(posedge clk) + // + if (rst) rdy_r <= 1'b1; + else case (uop_fsm_state) + UOP_FSM_STATE_IDLE: rdy_r <= ~ena; + UOP_FSM_STATE_DECODE: rdy_r <= uop_opcode_is_stop; + endcase + + +endmodule diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v index cdbaca6..522e9ca 100644 --- a/rtl/modexpng_uop_rom.v +++ b/rtl/modexpng_uop_rom.v @@ -15,87 +15,101 @@ module modexpng_uop_rom always @(posedge clk) // case (addr) - 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // - 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // - 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // - 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // - 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // - 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // - // - 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // - 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // - // - 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; // - 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // - 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // - // - 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // - // - 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // - 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // - // - 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // - // - 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; // - // - 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // - // - 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; // - 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; // - 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; // - 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; // - 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; // - // - 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; // - // - 6'd31: data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_DNC }; // - // - 6'd32: data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // - // - 6'd33: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_D, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; // - 6'd34: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_D, BANK_NARROW_D }; // - 6'd35: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_A, BANK_DNC, BANK_WIDE_C, BANK_NARROW_C }; // - // - 6'd36: data <= {UOP_OPCODE_COPY_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; // - // - 6'd37: data <= {UOP_OPCODE_LADDER_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // - 6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // - 6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // - // - 6'd40: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // - // - 6'd41: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // - // - 6'd42: data <= {UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_D, BANK_NARROW_D }; // - // - 6'd43: data <= {UOP_OPCODE_MODULAR_SUBTRACT, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; // - // - 6'd44: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_E, BANK_WIDE_C, BANK_NARROW_C }; // - 6'd45: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; // - // - 6'd46: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_E, BANK_DNC }; // - // - 6'd47: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_DNC, BANK_NARROW_E }; // - // - 6'd48: data <= {UOP_OPCODE_REGULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_C, BANK_DNC, BANK_DNC }; // - // - 6'd49: data <= {UOP_OPCODE_MERGE_LH, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_NARROW_A }; // - // - 6'd50: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_NARROW_A }; // - // - 6'd51: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_D, BANK_NARROW_D }; // - // - 6'd52: data <= {UOP_OPCODE_REGULAR_ADD_UNEVEN, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_NARROW_D, BANK_NARROW_A, BANK_DNC , BANK_NARROW_C }; // - // - default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + // + 7'd000: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 7'd001: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 7'd002: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // + 7'd003: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // + 7'd004: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + 7'd005: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + // + 7'd006: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd007: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd008: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 7'd009: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 7'd010: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + 7'd011: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + // + 7'd012: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; // + 7'd013: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // + 7'd014: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + // + 7'd015: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // + // + 7'd016: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // + 7'd017: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // + // + 7'd018: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // + // + 7'd019: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; // + // + 7'd020: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // + // + 7'd021: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; // + 7'd022: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; // + 7'd023: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; // + 7'd024: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; // + 7'd025: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; // + // + 7'd026: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd027: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd028: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 7'd029: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 7'd030: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; // + // + 7'd031: data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_DNC }; // + // + 7'd032: data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + // + 7'd033: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_D, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; // + 7'd034: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_D, BANK_NARROW_D }; // + 7'd035: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_A, BANK_DNC, BANK_WIDE_C, BANK_NARROW_C }; // + // + 7'd036: data <= {UOP_OPCODE_COPY_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; // + // + 7'd037: data <= {UOP_OPCODE_LADDER_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_DNC }; // + 7'd038: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // + 7'd039: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_DNC }; // + // + 7'd040: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + // + 7'd041: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // + // + 7'd042: data <= {UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_D, BANK_NARROW_D }; // + // + 7'd043: data <= {UOP_OPCODE_MODULAR_SUBTRACT, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; // + // + 7'd044: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_E, BANK_WIDE_C, BANK_NARROW_C }; // + 7'd045: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; // + // + 7'd046: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_E, BANK_DNC }; // + // + 7'd047: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_DNC, BANK_NARROW_E }; // + // + 7'd048: data <= {UOP_OPCODE_REGULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_C, BANK_DNC, BANK_DNC }; // + // + 7'd049: data <= {UOP_OPCODE_MERGE_LH, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_NARROW_A }; // + // + 7'd050: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_NARROW_A }; // + // + 7'd051: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_D, BANK_NARROW_D }; // + // + 7'd052: data <= {UOP_OPCODE_REGULAR_ADD_UNEVEN, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_NARROW_D, BANK_NARROW_A, BANK_DNC, BANK_NARROW_C }; // + // + 7'd053: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 7'd054: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + // + 7'd055: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 7'd056: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + // + 7'd057: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_00, BANK_WIDE_B, BANK_NARROW_C, BANK_WIDE_A, BANK_NARROW_A }; // + // + 7'd058: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_NARROW_A }; // + // + 7'd059: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_OUT_S }; // + // + default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_DNC }; // + // endcase endmodule |