aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bench/tb_core_full.v511
-rw-r--r--bench/tb_mmm_dual_x8.v940
-rw-r--r--bench/tb_mmm_x8_dual.v327
-rw-r--r--rtl/modexpng_core_top.v138
-rw-r--r--rtl/modexpng_general_worker.v402
-rw-r--r--rtl/modexpng_io_manager.v170
-rw-r--r--rtl/modexpng_microcode.vh42
-rw-r--r--rtl/modexpng_mmm_dual.v10
-rw-r--r--rtl/modexpng_parameters.vh8
-rw-r--r--rtl/modexpng_uop_rom.v82
10 files changed, 2171 insertions, 459 deletions
diff --git a/bench/tb_core_full.v b/bench/tb_core_full.v
new file mode 100644
index 0000000..248634e
--- /dev/null
+++ b/bench/tb_core_full.v
@@ -0,0 +1,511 @@
+`timescale 1ns / 1ps
+
+module tb_core_full;
+
+
+ //
+ // Headers
+ //
+ `include "../rtl/modexpng_parameters.vh"
+
+
+ //
+ // Test Vectors
+ //
+ localparam TB_MODULUS_LENGTH_N = 1024;
+ localparam TB_MODULUS_LENGTH_PQ = TB_MODULUS_LENGTH_N / 2;
+ localparam TB_NUM_WORDS_PQ = TB_MODULUS_LENGTH_PQ / BUS_DATA_W;
+ localparam TB_NUM_WORDS_N = TB_MODULUS_LENGTH_N / BUS_DATA_W;
+ localparam CORE_NUM_WORDS_PQ = TB_MODULUS_LENGTH_PQ / WORD_W;
+ localparam CORE_NUM_WORDS_N = TB_MODULUS_LENGTH_N / WORD_W;
+
+ reg [31:0] M[0:TB_NUM_WORDS_N-1];
+ reg [31:0] N[0:TB_NUM_WORDS_N-1];
+ reg [31:0] N_FACTOR[0:TB_NUM_WORDS_N-1];
+ reg [31:0] N_COEFF[0:TB_NUM_WORDS_N];
+ reg [31:0] X[0:TB_NUM_WORDS_N-1];
+ reg [31:0] Y[0:TB_NUM_WORDS_N-1];
+ reg [31:0] P[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] Q[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] P_FACTOR[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ];
+ reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ];
+ reg [31:0] DP[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1];
+ reg [31:0] XM[0:TB_NUM_WORDS_N-1];
+ reg [31:0] YM[0:TB_NUM_WORDS_N-1];
+ reg [31:0] S[0:TB_NUM_WORDS_N-1];
+ reg [31:0] XM_READBACK[0:TB_NUM_WORDS_N-1];
+ reg [31:0] YM_READBACK[0:TB_NUM_WORDS_N-1];
+ reg [31:0] S_READBACK[0:TB_NUM_WORDS_N-1];
+
+ initial begin
+ M[ 0] = 32'he1b3c6ac; M[ 1] = 32'haa2c5d8c; M[ 2] = 32'hbecc676a; M[ 3] = 32'hda087a3e;
+ M[ 4] = 32'hf0816496; M[ 5] = 32'hf9e17fd8; M[ 6] = 32'h304d4896; M[ 7] = 32'h81d4e9ab;
+ M[ 8] = 32'h80eff76c; M[ 9] = 32'he5b8f9b6; M[ 10] = 32'h4b1ebe55; M[ 11] = 32'ha1feb9dc;
+ M[ 12] = 32'heca4192f; M[ 13] = 32'h6ad6ea8e; M[ 14] = 32'hf34aed05; M[ 15] = 32'had38c275;
+ M[ 16] = 32'h8d3b583b; M[ 17] = 32'hc370f07e; M[ 18] = 32'hb9078738; M[ 19] = 32'haf37f86c;
+ M[ 20] = 32'h02f0e161; M[ 21] = 32'h0506a68a; M[ 22] = 32'h1ae65107; M[ 23] = 32'hcd3a97f1;
+ M[ 24] = 32'hb27244b8; M[ 25] = 32'h9bc3c400; M[ 26] = 32'he4d5636e; M[ 27] = 32'h35187c07;
+ M[ 28] = 32'h78a661c9; M[ 29] = 32'h1e7ec273; M[ 30] = 32'hcdc31041; M[ 31] = 32'h002291d8;
+ N[ 0] = 32'h6719997f; N[ 1] = 32'hef2df706; N[ 2] = 32'h9ba95792; N[ 3] = 32'h747e0580;
+ N[ 4] = 32'h7507684c; N[ 5] = 32'h7a10d0d1; N[ 6] = 32'h83a33941; N[ 7] = 32'haef9fda5;
+ N[ 8] = 32'h17972933; N[ 9] = 32'h0a98251a; N[ 10] = 32'h7dce3d13; N[ 11] = 32'hdad49a60;
+ N[ 12] = 32'h9f98006b; N[ 13] = 32'h46fd4a05; N[ 14] = 32'h51966e1d; N[ 15] = 32'hb1c59fab;
+ N[ 16] = 32'h8ab3096e; N[ 17] = 32'hef1f0436; N[ 18] = 32'heeed776f; N[ 19] = 32'h106d9d82;
+ N[ 20] = 32'hdd2a44af; N[ 21] = 32'h17c32585; N[ 22] = 32'hc854e454; N[ 23] = 32'h600fb6df;
+ N[ 24] = 32'h25c2d4bb; N[ 25] = 32'h5f09d790; N[ 26] = 32'he5a2bb93; N[ 27] = 32'h5bc6b044;
+ N[ 28] = 32'h2ecbb15f; N[ 29] = 32'h464817f5; N[ 30] = 32'h05cae32b; N[ 31] = 32'hde97bb85;
+ N_FACTOR[ 0] = 32'ha06a1113; N_FACTOR[ 1] = 32'hc9974806; N_FACTOR[ 2] = 32'h572d7a20; N_FACTOR[ 3] = 32'h04000838;
+ N_FACTOR[ 4] = 32'hb275c37a; N_FACTOR[ 5] = 32'hea78a046; N_FACTOR[ 6] = 32'h029e13b8; N_FACTOR[ 7] = 32'hae540753;
+ N_FACTOR[ 8] = 32'h1e98bc21; N_FACTOR[ 9] = 32'h34ede47a; N_FACTOR[ 10] = 32'h0c565ecd; N_FACTOR[ 11] = 32'h027ff3bf;
+ N_FACTOR[ 12] = 32'h08290d30; N_FACTOR[ 13] = 32'hb92857df; N_FACTOR[ 14] = 32'he6c59eb3; N_FACTOR[ 15] = 32'h09e53d6a;
+ N_FACTOR[ 16] = 32'h980d127e; N_FACTOR[ 17] = 32'h4dd6ced0; N_FACTOR[ 18] = 32'h3b9400d0; N_FACTOR[ 19] = 32'h276c6711;
+ N_FACTOR[ 20] = 32'h72eaf2e6; N_FACTOR[ 21] = 32'h749f81eb; N_FACTOR[ 22] = 32'h17b7d05f; N_FACTOR[ 23] = 32'h41a3a2cd;
+ N_FACTOR[ 24] = 32'h1ba098f3; N_FACTOR[ 25] = 32'h9b884af9; N_FACTOR[ 26] = 32'hdafd920c; N_FACTOR[ 27] = 32'h7b1f5cc6;
+ N_FACTOR[ 28] = 32'hb0a0d098; N_FACTOR[ 29] = 32'h4ee55bcf; N_FACTOR[ 30] = 32'haed9b905; N_FACTOR[ 31] = 32'h42d541fb;
+ N_COEFF[ 0] = 32'hb383d981; N_COEFF[ 1] = 32'h9bf1c20c; N_COEFF[ 2] = 32'h268999ff; N_COEFF[ 3] = 32'h11a3c01a;
+ N_COEFF[ 4] = 32'h12665495; N_COEFF[ 5] = 32'h515b0d96; N_COEFF[ 6] = 32'hb704fb07; N_COEFF[ 7] = 32'h8e1bd1d6;
+ N_COEFF[ 8] = 32'h62c5f506; N_COEFF[ 9] = 32'hfdcd0163; N_COEFF[ 10] = 32'h8dd55dee; N_COEFF[ 11] = 32'h6d79c8b1;
+ N_COEFF[ 12] = 32'hca16d0b9; N_COEFF[ 13] = 32'h88bead48; N_COEFF[ 14] = 32'hbcdb1e94; N_COEFF[ 15] = 32'h950c171d;
+ N_COEFF[ 16] = 32'h4fa810af; N_COEFF[ 17] = 32'h9b63e6d2; N_COEFF[ 18] = 32'ha2d0c26b; N_COEFF[ 19] = 32'hafa1ef25;
+ N_COEFF[ 20] = 32'h111bd21e; N_COEFF[ 21] = 32'hc2d896f0; N_COEFF[ 22] = 32'h189dc2cf; N_COEFF[ 23] = 32'h6144156a;
+ N_COEFF[ 24] = 32'hd1c67123; N_COEFF[ 25] = 32'ha127e4f3; N_COEFF[ 26] = 32'h40d342ef; N_COEFF[ 27] = 32'hee476d42;
+ N_COEFF[ 28] = 32'hee05f26a; N_COEFF[ 29] = 32'h4fc717bd; N_COEFF[ 30] = 32'h6baa4d60; N_COEFF[ 31] = 32'h1d6b10db;
+ N_COEFF[ 32] = 32'h00006545;
+ X[ 0] = 32'ha838f053; X[ 1] = 32'h8eb9747c; X[ 2] = 32'h5991b9eb; X[ 3] = 32'h74e6e776;
+ X[ 4] = 32'hcb5aa9e2; X[ 5] = 32'h7f8083d4; X[ 6] = 32'h3f7d47ec; X[ 7] = 32'hbd76a787;
+ X[ 8] = 32'hf4c166b7; X[ 9] = 32'hdbf67229; X[ 10] = 32'h975a5cfb; X[ 11] = 32'he8c35dca;
+ X[ 12] = 32'h6abc86e8; X[ 13] = 32'hfee472cb; X[ 14] = 32'h83ac8f2e; X[ 15] = 32'h82825cff;
+ X[ 16] = 32'h2d532c22; X[ 17] = 32'h2d3c3b06; X[ 18] = 32'he2862a8f; X[ 19] = 32'he8616ce4;
+ X[ 20] = 32'h5d77ee51; X[ 21] = 32'he609de07; X[ 22] = 32'hef718044; X[ 23] = 32'h82f35f8b;
+ X[ 24] = 32'hcdb9dcfe; X[ 25] = 32'hff6ea364; X[ 26] = 32'h0994ae28; X[ 27] = 32'h409b369b;
+ X[ 28] = 32'hcfabda4e; X[ 29] = 32'h5cd52bbc; X[ 30] = 32'hd90e1715; X[ 31] = 32'h00f4dcf2;
+ Y[ 0] = 32'h01b2730a; Y[ 1] = 32'h04ff1664; Y[ 2] = 32'h6d55dc06; Y[ 3] = 32'h1cda0da7;
+ Y[ 4] = 32'h98c812b4; Y[ 5] = 32'ha8f79f3b; Y[ 6] = 32'hb18d9ee1; Y[ 7] = 32'ha53e97db;
+ Y[ 8] = 32'hfbbfd687; Y[ 9] = 32'h6b8a8bf6; Y[ 10] = 32'h59fe5575; Y[ 11] = 32'he6ee62ca;
+ Y[ 12] = 32'h9fe3f32a; Y[ 13] = 32'h6d758eaa; Y[ 14] = 32'h121e3dac; Y[ 15] = 32'h31d77884;
+ Y[ 16] = 32'h8f2701dd; Y[ 17] = 32'hca5e7ac3; Y[ 18] = 32'h731977a3; Y[ 19] = 32'hc3c1af70;
+ Y[ 20] = 32'h5606786a; Y[ 21] = 32'h94b71191; Y[ 22] = 32'hd044c7e2; Y[ 23] = 32'h7d899cd7;
+ Y[ 24] = 32'hb17d4f5d; Y[ 25] = 32'h446e04de; Y[ 26] = 32'h9c40b33d; Y[ 27] = 32'habc2e23e;
+ Y[ 28] = 32'hbb98b1f6; Y[ 29] = 32'hf1f87f7e; Y[ 30] = 32'hf19f3050; Y[ 31] = 32'h91305f4c;
+ P[ 0] = 32'h18bb0f97; P[ 1] = 32'h08588a44; P[ 2] = 32'h042c6647; P[ 3] = 32'hc8d3fa09;
+ P[ 4] = 32'he2ddbbc7; P[ 5] = 32'hef4a17fd; P[ 6] = 32'h90c102ef; P[ 7] = 32'h28b132db;
+ P[ 8] = 32'hebfd5f0a; P[ 9] = 32'h958717ca; P[ 10] = 32'h563cd266; P[ 11] = 32'h433f41af;
+ P[ 12] = 32'hbc198e83; P[ 13] = 32'h5b5300b2; P[ 14] = 32'h9bc50e5d; P[ 15] = 32'hefffa822;
+ Q[ 0] = 32'h25de0259; Q[ 1] = 32'hd81461d0; Q[ 2] = 32'h613815b3; Q[ 3] = 32'h9bf274e0;
+ Q[ 4] = 32'hbfd89a48; Q[ 5] = 32'hc53e71ac; Q[ 6] = 32'hcce7aed3; Q[ 7] = 32'hce1d017c;
+ Q[ 8] = 32'h646547e1; Q[ 9] = 32'hd6779694; Q[ 10] = 32'h20ae39c0; Q[ 11] = 32'hef0d4b5b;
+ Q[ 12] = 32'h8e5f59d6; Q[ 13] = 32'h7e267974; Q[ 14] = 32'h14c86644; Q[ 15] = 32'hed6efcd0;
+ P_FACTOR[ 0] = 32'h614f99ce; P_FACTOR[ 1] = 32'hbcee5381; P_FACTOR[ 2] = 32'h10b70a9a; P_FACTOR[ 3] = 32'h1a697756;
+ P_FACTOR[ 4] = 32'h1a972b27; P_FACTOR[ 5] = 32'hd7c43f9e; P_FACTOR[ 6] = 32'h48cbad9c; P_FACTOR[ 7] = 32'hc350e206;
+ P_FACTOR[ 8] = 32'h51098b50; P_FACTOR[ 9] = 32'h93388ec6; P_FACTOR[ 10] = 32'h548960b5; P_FACTOR[ 11] = 32'h5ecd6b04;
+ P_FACTOR[ 12] = 32'h04d1d543; P_FACTOR[ 13] = 32'ha53994af; P_FACTOR[ 14] = 32'hd390be70; P_FACTOR[ 15] = 32'h0acdced0;
+ Q_FACTOR[ 0] = 32'h8a19423f; Q_FACTOR[ 1] = 32'h9d729c78; Q_FACTOR[ 2] = 32'h26ed5239; Q_FACTOR[ 3] = 32'h5a7eba92;
+ Q_FACTOR[ 4] = 32'h8465f60f; Q_FACTOR[ 5] = 32'hd50817dd; Q_FACTOR[ 6] = 32'hba703ab1; Q_FACTOR[ 7] = 32'h3d59bd42;
+ Q_FACTOR[ 8] = 32'h2c197fcc; Q_FACTOR[ 9] = 32'hed14f573; Q_FACTOR[ 10] = 32'hb860c105; Q_FACTOR[ 11] = 32'h89af91e7;
+ Q_FACTOR[ 12] = 32'h13a3742d; Q_FACTOR[ 13] = 32'h96e41677; Q_FACTOR[ 14] = 32'h86506b4d; Q_FACTOR[ 15] = 32'h4a834535;
+ P_COEFF[ 0] = 32'hb3679fd9; P_COEFF[ 1] = 32'hde24e467; P_COEFF[ 2] = 32'hf0e82022; P_COEFF[ 3] = 32'h99012919;
+ P_COEFF[ 4] = 32'h023bda43; P_COEFF[ 5] = 32'hf04eebf8; P_COEFF[ 6] = 32'h29e9942f; P_COEFF[ 7] = 32'h8c257cb0;
+ P_COEFF[ 8] = 32'h5cdc4e60; P_COEFF[ 9] = 32'h279bdff7; P_COEFF[ 10] = 32'hf04a610d; P_COEFF[ 11] = 32'h342901dc;
+ P_COEFF[ 12] = 32'hc3e2f78c; P_COEFF[ 13] = 32'h39c00ed8; P_COEFF[ 14] = 32'hf7032ac2; P_COEFF[ 15] = 32'h22d9c54e;
+ P_COEFF[ 16] = 32'h0000f994;
+ Q_COEFF[ 0] = 32'h95beda17; Q_COEFF[ 1] = 32'ha4b101fa; Q_COEFF[ 2] = 32'hd0b854bc; Q_COEFF[ 3] = 32'h5c952a67;
+ Q_COEFF[ 4] = 32'h56722aa8; Q_COEFF[ 5] = 32'h2176cace; Q_COEFF[ 6] = 32'h69beef2d; Q_COEFF[ 7] = 32'h95bf6eb2;
+ Q_COEFF[ 8] = 32'h0cf1175c; Q_COEFF[ 9] = 32'h4911b74e; Q_COEFF[ 10] = 32'h331e61cb; Q_COEFF[ 11] = 32'he9527ead;
+ Q_COEFF[ 12] = 32'h8d6a5911; Q_COEFF[ 13] = 32'hae42d654; Q_COEFF[ 14] = 32'he10d29a8; Q_COEFF[ 15] = 32'h50a5dd76;
+ Q_COEFF[ 16] = 32'h0000ed75;
+ DP[ 0] = 32'h63d165e5; DP[ 1] = 32'h856ac81e; DP[ 2] = 32'hc4b8779d; DP[ 3] = 32'h8b119544;
+ DP[ 4] = 32'had780837; DP[ 5] = 32'h3e920266; DP[ 6] = 32'he9d10f2e; DP[ 7] = 32'h7c1b42b2;
+ DP[ 8] = 32'hc7daca3b; DP[ 9] = 32'h7883be11; DP[ 10] = 32'ha384548d; DP[ 11] = 32'he0848b23;
+ DP[ 12] = 32'h0b62bdff; DP[ 13] = 32'h11c64350; DP[ 14] = 32'h2aa1e225; DP[ 15] = 32'h9c2bcaa7;
+ DQ[ 0] = 32'hd7ffdc71; DQ[ 1] = 32'hed01b8aa; DQ[ 2] = 32'h2f99d3a6; DQ[ 3] = 32'h8ccb4428;
+ DQ[ 4] = 32'hb1574616; DQ[ 5] = 32'hfc218e36; DQ[ 6] = 32'h4fe24f91; DQ[ 7] = 32'h9c367c42;
+ DQ[ 8] = 32'h69dfa208; DQ[ 9] = 32'h3ee3de79; DQ[ 10] = 32'h54ded59b; DQ[ 11] = 32'hcb3b487d;
+ DQ[ 12] = 32'hbcc0db4e; DQ[ 13] = 32'hb3e6678c; DQ[ 14] = 32'h3d13ec03; DQ[ 15] = 32'h99e0f684; QINV[ 0] = 32'h9a2f0db2; QINV[ 1] = 32'h4a8075a5; QINV[ 2] = 32'hb61201fa; QINV[ 3] = 32'h0e876a42;
+ QINV[ 4] = 32'h94667476; QINV[ 5] = 32'h7538b796; QINV[ 6] = 32'h8d8dfa35; QINV[ 7] = 32'h689ee4a7;
+ QINV[ 8] = 32'h6779dd63; QINV[ 9] = 32'he15b6b5e; QINV[ 10] = 32'h8275500c; QINV[ 11] = 32'he4dcd058;
+ QINV[ 12] = 32'haf54b86c; QINV[ 13] = 32'hba76dc50; QINV[ 14] = 32'h473d0d6d; QINV[ 15] = 32'ha023ba44;
+ XM[ 0] = 32'h9b067dd2; XM[ 1] = 32'hf47b497a; XM[ 2] = 32'he8044305; XM[ 3] = 32'hf74f1735;
+ XM[ 4] = 32'h494825f4; XM[ 5] = 32'h077bf4a3; XM[ 6] = 32'h637a9f36; XM[ 7] = 32'h3c3821a2;
+ XM[ 8] = 32'haa1fe167; XM[ 9] = 32'h01c7289a; XM[ 10] = 32'hb463d63d; XM[ 11] = 32'hc992252e;
+ XM[ 12] = 32'he43762bf; XM[ 13] = 32'h351d9416; XM[ 14] = 32'h10e7f813; XM[ 15] = 32'h33187c87;
+ XM[ 16] = 32'h9eb98306; XM[ 17] = 32'hb29be7b6; XM[ 18] = 32'h32b237a8; XM[ 19] = 32'h6c1d5e46;
+ XM[ 20] = 32'h1cf10b4a; XM[ 21] = 32'hd874a710; XM[ 22] = 32'h7d2df198; XM[ 23] = 32'h463701cc;
+ XM[ 24] = 32'h9b648da0; XM[ 25] = 32'hdc5d3b10; XM[ 26] = 32'hef88e7fd; XM[ 27] = 32'hcb888210;
+ XM[ 28] = 32'h24397651; XM[ 29] = 32'h9b9bd5a2; XM[ 30] = 32'hbc796763; XM[ 31] = 32'h5be48377;
+ YM[ 0] = 32'h78aba2bd; YM[ 1] = 32'h6885ed1d; YM[ 2] = 32'h0d4983a2; YM[ 3] = 32'h3b775d20;
+ YM[ 4] = 32'hf83145f4; YM[ 5] = 32'h66e52536; YM[ 6] = 32'h25c2377e; YM[ 7] = 32'h91ef1342;
+ YM[ 8] = 32'h73013f57; YM[ 9] = 32'h3862aa1a; YM[ 10] = 32'h37846437; YM[ 11] = 32'ha6ddd3c9;
+ YM[ 12] = 32'h3974d1b2; YM[ 13] = 32'h02aea3f6; YM[ 14] = 32'h2e71b229; YM[ 15] = 32'hb898d5b6;
+ YM[ 16] = 32'h71258bb8; YM[ 17] = 32'h654f94e8; YM[ 18] = 32'h5539e56e; YM[ 19] = 32'hd49567f2;
+ YM[ 20] = 32'he73efaa1; YM[ 21] = 32'h3e4e2162; YM[ 22] = 32'h772d786a; YM[ 23] = 32'hc27be96a;
+ YM[ 24] = 32'h9911c92d; YM[ 25] = 32'hddc1b0fd; YM[ 26] = 32'h829186bb; YM[ 27] = 32'h1bab454e;
+ YM[ 28] = 32'h2f9fd9ce; YM[ 29] = 32'ha57103d4; YM[ 30] = 32'h1a93390c; YM[ 31] = 32'hc0376429;
+ S[ 0] = 32'h8eb4aa6e; S[ 1] = 32'hababa077; S[ 2] = 32'h8758f3f6; S[ 3] = 32'h8282e4f4;
+ S[ 4] = 32'h747947ce; S[ 5] = 32'h9ac7dbb0; S[ 6] = 32'h9184f0b5; S[ 7] = 32'h4b572f47;
+ S[ 8] = 32'hf4807458; S[ 9] = 32'h6da8dcd4; S[ 10] = 32'h9f331c40; S[ 11] = 32'h65e2b7a2;
+ S[ 12] = 32'hd3704e85; S[ 13] = 32'h3366f4f0; S[ 14] = 32'h035044b1; S[ 15] = 32'h54758bc4;
+ S[ 16] = 32'h2a7e0970; S[ 17] = 32'hbcc7783c; S[ 18] = 32'hf62193e6; S[ 19] = 32'h5d7bb220;
+ S[ 20] = 32'hb0fcabdd; S[ 21] = 32'he6dc5c88; S[ 22] = 32'h8e4d5e53; S[ 23] = 32'haa40acba;
+ S[ 24] = 32'h1dfc9178; S[ 25] = 32'h842821bc; S[ 26] = 32'h318fc8e1; S[ 27] = 32'h0f8161fe;
+ S[ 28] = 32'hbf3d7945; S[ 29] = 32'he33612c7; S[ 30] = 32'h7eec7f9d; S[ 31] = 32'h66da2c5a;
+ end
+
+
+
+ //
+ // Clocks
+ //
+ `define CLK_FREQUENCY_MHZ (100.0)
+ `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
+ `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
+
+ `define CLK_BUS_FREQUENCY_MHZ (50.0)
+ `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ)
+ `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS)
+
+ reg clk = 1'b1;
+ reg clk_bus = 1'b0;
+
+ always #`CLK_PERIOD_HALF_NS clk = ~clk;
+
+ always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus;
+
+
+ //
+ // Reset
+ //
+ reg rst = 1'b1;
+
+
+ //
+ // Control / Status
+ //
+ reg [ 7:0] word_index_last_n;
+ reg [ 7:0] word_index_last_pq;
+ reg [11:0] bit_index_last_n;
+ reg [11:0] bit_index_last_pq;
+ reg core_next = 1'b0;
+ wire core_valid;
+ reg core_crt_mode;
+
+
+ //
+ // System Bus
+ //
+ reg bus_ready;
+ reg bus_cs = 1'b0;
+ reg bus_we = 1'b0;
+ reg [11:0] bus_addr;
+ reg [31:0] bus_data_wr;
+ wire [31:0] bus_data_rd;
+
+ wire [ 1:0] bus_addr_sel = bus_addr[11:10];
+ wire [ 2:0] bus_addr_bank = bus_addr[9:7];
+ wire [ 6:0] bus_addr_data = bus_addr[6:0];
+
+
+ //
+ // UUT
+ //
+ modexpng_core_top uut
+ (
+ .clk (clk),
+ .clk_bus (clk_bus),
+
+ .rst (rst),
+
+ .next (core_next),
+ .valid (core_valid),
+
+ .crt_mode (core_crt_mode),
+
+ .word_index_last_n (word_index_last_n),
+ .word_index_last_pq (word_index_last_pq),
+
+ .bit_index_last_n (bit_index_last_n),
+ .bit_index_last_pq (bit_index_last_pq),
+
+ .bus_cs (bus_cs),
+ .bus_we (bus_we),
+ .bus_addr (bus_addr),
+ .bus_data_wr (bus_data_wr),
+ .bus_data_rd (bus_data_rd)
+ );
+
+
+ //
+ // Routine (Bus)
+ //
+ initial begin
+
+ bus_ready = 1'b0;
+
+ while (rst) wait_clock_bus_tick;
+ wait_clock_bus_ticks(10);
+ $display("Core came out of reset.");
+
+ set_input_1;
+ set_input_2;
+
+ wait_clock_bus_ticks(10);
+ bus_ready = 1'b1;
+
+ end
+
+
+ //
+ // Routine (Control/Status, Bus)
+ //
+ initial begin
+
+ _wait_half_clock_tick;
+ wait_clock_ticks(100);
+ rst = 1'b0;
+
+ while (!bus_ready) wait_clock_tick;
+ wait_clock_ticks(10);
+ $display("Core input banks written.");
+
+ word_index_last_n = CORE_NUM_WORDS_N - 1;
+ word_index_last_pq = CORE_NUM_WORDS_PQ - 1;
+
+ bit_index_last_n = TB_MODULUS_LENGTH_N - 1;
+ bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1;
+
+ core_crt_mode = 1'b1;
+
+ core_next = 1'b1;
+ wait_clock_tick;
+ core_next = 1'b0;
+ $display("Pulsed 'next' control signal.");
+
+ while (!core_valid) wait_clock_tick;
+ wait_clock_ticks(10);
+
+ $display("Detected high 'valid' status signal.");
+ core_crt_mode = 1'bX;
+
+ wait_clock_ticks(10);
+ get_output;
+ wait_clock_ticks(10);
+
+ $display("Core output banks read.");
+
+ //verify;
+
+ end
+
+
+ //
+ // Variables
+ //
+ integer _w, _n;
+
+
+ //
+ // set_input_1;
+ //
+ task set_input_1;
+ reg [9:0] _tn;
+ begin
+ _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N, _w[6:0], N[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]);
+ bus_write(2'd0, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_X, _w[6:0], X[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_Y, _w[6:0], Y[_w]);
+ end
+ endtask
+
+
+ //
+ // set_input_2;
+ //
+ task set_input_2;
+ begin
+// for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]);
+ for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]);
+ for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]);
+ end
+ endtask
+
+
+ //
+ // get_output;
+ //
+ task get_output;
+ begin
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_S, _w[6:0], S_READBACK[_w]);
+ end
+ endtask
+
+
+ //
+ // verify;
+ //
+ task verify;
+ //
+ reg xm_ok;
+ reg ym_ok;
+ reg s_ok;
+ //
+ begin
+ //
+ xm_ok = 1;
+ ym_ok = 1;
+ s_ok = 1;
+ //
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+ if (XM_READBACK[_w] !== XM[_w]) xm_ok = 0;
+ if (YM_READBACK[_w] !== YM[_w]) ym_ok = 0;
+ if (S_READBACK[_w] !== S[_w]) s_ok = 0;
+ end
+ //
+ if (!xm_ok)
+ //
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+ $write("XM / XM_READBACK [%3d] = 0x%08x / 0x%08x", _w, XM[_w], XM_READBACK[_w]);
+ if (XM[_w] !== XM_READBACK[_w]) $write(" <???: 0x%08x> ", XM[_w] ^ XM_READBACK[_w]);
+ $write("\n");
+ end
+ //
+ if (!ym_ok)
+ //
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+ $write("YM / YM_READBACK [%3d] = 0x%08x / 0x%08x", _w, YM[_w], YM_READBACK[_w]);
+ if (YM[_w] !== YM_READBACK[_w]) $write(" <???: 0x%08x> ", YM[_w] ^ YM_READBACK[_w]);
+ $write("\n");
+ end
+ //
+ if (!s_ok)
+ //
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+ $write("S / S_READBACK [%3d] = 0x%08x / 0x%08x", _w, S[_w], S_READBACK[_w]);
+ if (S[_w] !== S_READBACK[_w]) $write(" <???: 0x%08x> ", S[_w] ^ S_READBACK[_w]);
+ $write("\n");
+ end
+ //
+ $write("XM is ");
+ if (xm_ok) $write("OK.\n");
+ else $write("WRONG!\n");
+ //
+ $write("YM is ");
+ if (ym_ok) $write("OK.\n");
+ else $write("WRONG!\n");
+ //
+ $write("S is ");
+ if (s_ok) $write("OK.\n");
+ else $write("WRONG!\n");
+ //
+ end
+ //
+ endtask
+
+
+
+ //
+ // _bus_drive()
+ //
+ task _bus_drive;
+ input cs;
+ input we;
+ input [11:0] addr;
+ input [31:0] data;
+ {bus_cs, bus_we, bus_addr, bus_data_wr} <= {cs, we, addr, data};
+ endtask
+
+
+ //
+ // bus_write()
+ //
+ task bus_write;
+ input [ 1:0] sel;
+ input [ 2:0] bank;
+ input [ 6:0] addr;
+ input [31:0] data;
+ begin
+ _bus_drive(1'b1, 1'b1, {sel, bank, addr}, data);
+ wait_clock_bus_tick;
+ _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
+ end
+ endtask
+
+
+ //
+ // bus_read()
+ //
+ task bus_read;
+ input [ 1:0] sel;
+ input [ 2:0] bank;
+ input [ 6:0] addr;
+ output [31:0] data;
+ begin
+ _bus_drive(1'b1, 1'b0, {sel, bank, addr}, 32'hXXXXXXXX);
+ wait_clock_bus_tick;
+ data = bus_data_rd;
+ _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
+ end
+ endtask
+
+
+ //
+ // _wait_half_clock_tick()
+ //
+ task _wait_half_clock_tick;
+ #`CLK_PERIOD_HALF_NS;
+ endtask
+
+ //
+ // wait_clock_tick()
+ //
+ task wait_clock_tick;
+ begin
+ _wait_half_clock_tick;
+ _wait_half_clock_tick;
+ end
+ endtask
+
+
+ //
+ // wait_clock_bus_tick()
+ //
+ task wait_clock_bus_tick;
+ #`CLK_BUS_PERIOD_NS;
+ endtask
+
+
+ //
+ // wait_clock_ticks()
+ //
+ task wait_clock_ticks;
+ input integer num_ticks;
+ for (_n=0; _n<num_ticks; _n=_n+1)
+ wait_clock_tick;
+ endtask
+
+
+ //
+ // wait_clock_bus_ticks()
+ //
+ task wait_clock_bus_ticks;
+ input integer num_ticks;
+ for (_n=0; _n<num_ticks; _n=_n+1)
+ wait_clock_bus_tick;
+ endtask
+
+endmodule
diff --git a/bench/tb_mmm_dual_x8.v b/bench/tb_mmm_dual_x8.v
new file mode 100644
index 0000000..7e54d09
--- /dev/null
+++ b/bench/tb_mmm_dual_x8.v
@@ -0,0 +1,940 @@
+`timescale 1ns / 1ps
+
+module tb_mmm_dual_x8;
+
+
+ //
+ // Headers
+ //
+ `include "../rtl/modexpng_parameters.vh"
+ //`include "../rtl_1/modexpng_mmm_fsm_old.vh"
+
+
+ //
+ // Clock
+ //
+ `define CLK_FREQUENCY_MHZ (100.0)
+ `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
+ `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
+
+ reg clk = 1'b0;
+
+ always begin
+ #`CLK_PERIOD_HALF_NS clk = 1'b1;
+ #`CLK_PERIOD_HALF_NS clk = 1'b0;
+ end
+
+
+ //
+ // Reset
+ //
+ reg rst = 1'b1;
+
+
+ //
+ // Test Vectors
+ //
+ localparam PQ_NUM_WORDS = 32;
+ localparam [OP_ADDR_W -1:0] PQ_WORD_INDEX_LAST = PQ_NUM_WORDS - 1;
+
+ localparam P_LADDER_MODE = 1'b0;
+
+ reg [WORD_EXT_W -1:0] P_T1 [0:PQ_NUM_WORDS -1];
+ reg [WORD_EXT_W -1:0] P_T2 [0:PQ_NUM_WORDS -1];
+
+ reg [WORD_EXT_W -1:0] P_N [0:PQ_NUM_WORDS -1];
+ reg [WORD_EXT_W -1:0] P_N_COEFF[0:PQ_NUM_WORDS ];
+
+ reg [WORD_EXT_W -1:0] P_X_AB [0:2*PQ_NUM_WORDS -1];
+ reg [WORD_EXT_W -1:0] P_Y_AB [0:2*PQ_NUM_WORDS -1];
+
+ reg [WORD_EXT_W -1:0] P_X_Q [0:PQ_NUM_WORDS];
+ reg [WORD_EXT_W -1:0] P_Y_Q [0:PQ_NUM_WORDS];
+
+ reg [WORD_EXT_W -1:0] P_X_M [0:2*PQ_NUM_WORDS];
+ reg [WORD_EXT_W -1:0] P_Y_M [0:2*PQ_NUM_WORDS];
+
+ reg [WORD_EXT_W -1:0] P_X [0:PQ_NUM_WORDS -1];
+ reg [WORD_EXT_W -1:0] P_Y [0:PQ_NUM_WORDS -1];
+
+
+ //
+ // Test Vector Components
+ //
+ initial begin
+ //
+ P_T1[ 0] = 18'h191c5; P_T1[ 1] = 18'h1a118; P_T1[ 2] = 18'h16e06; P_T1[ 3] = 18'h0ea68;
+ P_T1[ 4] = 18'h12944; P_T1[ 5] = 18'h0c242; P_T1[ 6] = 18'h2fc64; P_T1[ 7] = 18'h14efc;
+ P_T1[ 8] = 18'h113da; P_T1[ 9] = 18'h16ff7; P_T1[10] = 18'h1ef0c; P_T1[11] = 18'h18580;
+ P_T1[12] = 18'h1a62c; P_T1[13] = 18'h352b7; P_T1[14] = 18'h114f4; P_T1[15] = 18'h1c53e;
+ P_T1[16] = 18'h0c63e; P_T1[17] = 18'h0dd14; P_T1[18] = 18'h2fba8; P_T1[19] = 18'h1b8e4;
+ P_T1[20] = 18'h2d944; P_T1[21] = 18'h10290; P_T1[22] = 18'h1d276; P_T1[23] = 18'h327b0;
+ P_T1[24] = 18'h1c0c4; P_T1[25] = 18'h100a8; P_T1[26] = 18'h2a9ab; P_T1[27] = 18'h0e694;
+ P_T1[28] = 18'h10798; P_T1[29] = 18'h1ae91; P_T1[30] = 18'h38d4c; P_T1[31] = 18'h00808;
+ //
+ P_T2[ 0] = 18'h1193b; P_T2[ 1] = 18'h0de9c; P_T2[ 2] = 18'h0b993; P_T2[ 3] = 18'h0d2cd;
+ P_T2[ 4] = 18'h106ad; P_T2[ 5] = 18'h076da; P_T2[ 6] = 18'h10cab; P_T2[ 7] = 18'h15cd5;
+ P_T2[ 8] = 18'h15425; P_T2[ 9] = 18'h16287; P_T2[10] = 18'h0fd64; P_T2[11] = 18'h06ee0;
+ P_T2[12] = 18'h1b0c9; P_T2[13] = 18'h01a5e; P_T2[14] = 18'h1855c; P_T2[15] = 18'h17bf9;
+ P_T2[16] = 18'h1c83c; P_T2[17] = 18'h158ed; P_T2[18] = 18'h086df; P_T2[19] = 18'h16676;
+ P_T2[20] = 18'h0a0f8; P_T2[21] = 18'h14545; P_T2[22] = 18'h09641; P_T2[23] = 18'h16863;
+ P_T2[24] = 18'h17e20; P_T2[25] = 18'h0d457; P_T2[26] = 18'h05a9b; P_T2[27] = 18'h1a4cf;
+ P_T2[28] = 18'h1582a; P_T2[29] = 18'h1686c; P_T2[30] = 18'h1394e; P_T2[31] = 18'h0bdbc;
+ //
+ P_N[ 0] = 18'h00f97; P_N[ 1] = 18'h018bb; P_N[ 2] = 18'h08a44; P_N[ 3] = 18'h00858;
+ P_N[ 4] = 18'h06647; P_N[ 5] = 18'h0042c; P_N[ 6] = 18'h0fa09; P_N[ 7] = 18'h0c8d3;
+ P_N[ 8] = 18'h0bbc7; P_N[ 9] = 18'h0e2dd; P_N[10] = 18'h017fd; P_N[11] = 18'h0ef4a;
+ P_N[12] = 18'h002ef; P_N[13] = 18'h090c1; P_N[14] = 18'h032db; P_N[15] = 18'h028b1;
+ P_N[16] = 18'h05f0a; P_N[17] = 18'h0ebfd; P_N[18] = 18'h017ca; P_N[19] = 18'h09587;
+ P_N[20] = 18'h0d266; P_N[21] = 18'h0563c; P_N[22] = 18'h041af; P_N[23] = 18'h0433f;
+ P_N[24] = 18'h08e83; P_N[25] = 18'h0bc19; P_N[26] = 18'h000b2; P_N[27] = 18'h05b53;
+ P_N[28] = 18'h00e5d; P_N[29] = 18'h09bc5; P_N[30] = 18'h0a822; P_N[31] = 18'h0efff;
+ //
+ P_N_COEFF[ 0] = 18'h09fd9; P_N_COEFF[ 1] = 18'h0b367; P_N_COEFF[ 2] = 18'h0e467; P_N_COEFF[ 3] = 18'h0de24;
+ P_N_COEFF[ 4] = 18'h02022; P_N_COEFF[ 5] = 18'h0f0e8; P_N_COEFF[ 6] = 18'h02919; P_N_COEFF[ 7] = 18'h09901;
+ P_N_COEFF[ 8] = 18'h0da43; P_N_COEFF[ 9] = 18'h0023b; P_N_COEFF[10] = 18'h0ebf8; P_N_COEFF[11] = 18'h0f04e;
+ P_N_COEFF[12] = 18'h0942f; P_N_COEFF[13] = 18'h029e9; P_N_COEFF[14] = 18'h07cb0; P_N_COEFF[15] = 18'h08c25;
+ P_N_COEFF[16] = 18'h04e60; P_N_COEFF[17] = 18'h05cdc; P_N_COEFF[18] = 18'h0dff7; P_N_COEFF[19] = 18'h0279b;
+ P_N_COEFF[20] = 18'h0610d; P_N_COEFF[21] = 18'h0f04a; P_N_COEFF[22] = 18'h001dc; P_N_COEFF[23] = 18'h03429;
+ P_N_COEFF[24] = 18'h0f78c; P_N_COEFF[25] = 18'h0c3e2; P_N_COEFF[26] = 18'h00ed8; P_N_COEFF[27] = 18'h039c0;
+ P_N_COEFF[28] = 18'h02ac2; P_N_COEFF[29] = 18'h0f703; P_N_COEFF[30] = 18'h0c54e; P_N_COEFF[31] = 18'h022d9;
+ P_N_COEFF[32] = 18'h0f994;
+ //
+ P_X_AB[ 0] = 18'h0c199; P_X_AB[ 1] = 18'h0957a; P_X_AB[ 2] = 18'h070ad; P_X_AB[ 3] = 18'h0e5a6;
+ P_X_AB[ 4] = 18'h0fec9; P_X_AB[ 5] = 18'h00b73; P_X_AB[ 6] = 18'h09c72; P_X_AB[ 7] = 18'h0cdf0;
+ P_X_AB[ 8] = 18'h08755; P_X_AB[ 9] = 18'h07560; P_X_AB[10] = 18'h084b1; P_X_AB[11] = 18'h0ad3f;
+ P_X_AB[12] = 18'h074fe; P_X_AB[13] = 18'h04d74; P_X_AB[14] = 18'h00e16; P_X_AB[15] = 18'h0d3b3;
+ P_X_AB[16] = 18'h0d418; P_X_AB[17] = 18'h02f12; P_X_AB[18] = 18'h0c301; P_X_AB[19] = 18'h0be2b;
+ P_X_AB[20] = 18'h08222; P_X_AB[21] = 18'h0056c; P_X_AB[22] = 18'h01c7c; P_X_AB[23] = 18'h0bc95;
+ P_X_AB[24] = 18'h03427; P_X_AB[25] = 18'h0c65a; P_X_AB[26] = 18'h089ac; P_X_AB[27] = 18'h02117;
+ P_X_AB[28] = 18'h0ff7d; P_X_AB[29] = 18'h01cde; P_X_AB[30] = 18'h02709; P_X_AB[31] = 18'h01c56;
+ P_X_AB[32] = 18'h0f35a; P_X_AB[33] = 18'h08ce6; P_X_AB[34] = 18'h0a8e5; P_X_AB[35] = 18'h0d6d4;
+ P_X_AB[36] = 18'h06868; P_X_AB[37] = 18'h09105; P_X_AB[38] = 18'h0219e; P_X_AB[39] = 18'h0bc40;
+ P_X_AB[40] = 18'h00e0a; P_X_AB[41] = 18'h07783; P_X_AB[42] = 18'h0187a; P_X_AB[43] = 18'h0b922;
+ P_X_AB[44] = 18'h02609; P_X_AB[45] = 18'h0c64b; P_X_AB[46] = 18'h06b4b; P_X_AB[47] = 18'h04b79;
+ P_X_AB[48] = 18'h0fed6; P_X_AB[49] = 18'h03eac; P_X_AB[50] = 18'h04cac; P_X_AB[51] = 18'h0d47d;
+ P_X_AB[52] = 18'h045fd; P_X_AB[53] = 18'h04fa8; P_X_AB[54] = 18'h0597c; P_X_AB[55] = 18'h0a10d;
+ P_X_AB[56] = 18'h0bf44; P_X_AB[57] = 18'h08671; P_X_AB[58] = 18'h0112a; P_X_AB[59] = 18'h08ccf;
+ P_X_AB[60] = 18'h0cae5; P_X_AB[61] = 18'h04d94; P_X_AB[62] = 18'h0b95a; P_X_AB[63] = 18'h00040;
+ //
+ P_X_Q[ 0] = 18'h021b1; P_X_Q[ 1] = 18'h0d2db; P_X_Q[ 2] = 18'h0754b; P_X_Q[ 3] = 18'h01fc1;
+ P_X_Q[ 4] = 18'h063f7; P_X_Q[ 5] = 18'h086e5; P_X_Q[ 6] = 18'h0bcea; P_X_Q[ 7] = 18'h02260;
+ P_X_Q[ 8] = 18'h0c54c; P_X_Q[ 9] = 18'h0e298; P_X_Q[10] = 18'h05d07; P_X_Q[11] = 18'h0f978;
+ P_X_Q[12] = 18'h0e742; P_X_Q[13] = 18'h0a3f0; P_X_Q[14] = 18'h0b31e; P_X_Q[15] = 18'h041b7;
+ P_X_Q[16] = 18'h06ed9; P_X_Q[17] = 18'h03ac5; P_X_Q[18] = 18'h0f8eb; P_X_Q[19] = 18'h0c619;
+ P_X_Q[20] = 18'h067e9; P_X_Q[21] = 18'h00350; P_X_Q[22] = 18'h00376; P_X_Q[23] = 18'h02ebf;
+ P_X_Q[24] = 18'h0b125; P_X_Q[25] = 18'h05f7d; P_X_Q[26] = 18'h0f121; P_X_Q[27] = 18'h07ba4;
+ P_X_Q[28] = 18'h03050; P_X_Q[29] = 18'h0642e; P_X_Q[30] = 18'h0c2fc; P_X_Q[31] = 18'h0dfcf;
+ P_X_Q[32] = 18'h03f9e;
+ //
+ P_X_M[ 0] = 18'h03e67; P_X_M[ 1] = 18'h06a85; P_X_M[ 2] = 18'h08f52; P_X_M[ 3] = 18'h01a59;
+ P_X_M[ 4] = 18'h00136; P_X_M[ 5] = 18'h0f48c; P_X_M[ 6] = 18'h0638d; P_X_M[ 7] = 18'h0320f;
+ P_X_M[ 8] = 18'h078aa; P_X_M[ 9] = 18'h08a9f; P_X_M[10] = 18'h07b4e; P_X_M[11] = 18'h052c0;
+ P_X_M[12] = 18'h08b01; P_X_M[13] = 18'h0b28b; P_X_M[14] = 18'h0f1e9; P_X_M[15] = 18'h02c4c;
+ P_X_M[16] = 18'h02be7; P_X_M[17] = 18'h0d0ed; P_X_M[18] = 18'h03cfe; P_X_M[19] = 18'h041d4;
+ P_X_M[20] = 18'h07ddd; P_X_M[21] = 18'h0fa93; P_X_M[22] = 18'h0e383; P_X_M[23] = 18'h0436a;
+ P_X_M[24] = 18'h0cbd8; P_X_M[25] = 18'h039a5; P_X_M[26] = 18'h07653; P_X_M[27] = 18'h0dee8;
+ P_X_M[28] = 18'h00082; P_X_M[29] = 18'h0e321; P_X_M[30] = 18'h0d8f6; P_X_M[31] = 18'h0e3a9;
+ P_X_M[32] = 18'h00ca5; P_X_M[33] = 18'h035ed; P_X_M[34] = 18'h02b8f; P_X_M[35] = 18'h063bd;
+ P_X_M[36] = 18'h0ec9f; P_X_M[37] = 18'h0b8bb; P_X_M[38] = 18'h00389; P_X_M[39] = 18'h0ca27;
+ P_X_M[40] = 18'h0bea7; P_X_M[41] = 18'h0df1e; P_X_M[42] = 18'h0d685; P_X_M[43] = 18'h0cc1b;
+ P_X_M[44] = 18'h036c4; P_X_M[45] = 18'h01ce9; P_X_M[46] = 18'h0c43b; P_X_M[47] = 18'h05f58;
+ P_X_M[48] = 18'h02c77; P_X_M[49] = 18'h03a12; P_X_M[50] = 18'h0eea8; P_X_M[51] = 18'h0ac31;
+ P_X_M[52] = 18'h05838; P_X_M[53] = 18'h093ac; P_X_M[54] = 18'h0fd54; P_X_M[55] = 18'h06e13;
+ P_X_M[56] = 18'h002e2; P_X_M[57] = 18'h06af4; P_X_M[58] = 18'h0ea18; P_X_M[59] = 18'h083b3;
+ P_X_M[60] = 18'h059f7; P_X_M[61] = 18'h016d3; P_X_M[62] = 18'h0c3ad; P_X_M[63] = 18'h0dbfc;
+ P_X_M[64] = 18'h03ba4;
+ //
+ P_Y_AB[ 0] = 18'h0d567; P_Y_AB[ 1] = 18'h0dbf1; P_Y_AB[ 2] = 18'h024b3; P_Y_AB[ 3] = 18'h0bb34;
+ P_Y_AB[ 4] = 18'h03ad4; P_Y_AB[ 5] = 18'h08997; P_Y_AB[ 6] = 18'h0d369; P_Y_AB[ 7] = 18'h0ebbc;
+ P_Y_AB[ 8] = 18'h09502; P_Y_AB[ 9] = 18'h01b76; P_Y_AB[10] = 18'h0a28f; P_Y_AB[11] = 18'h0c577;
+ P_Y_AB[12] = 18'h05f2f; P_Y_AB[13] = 18'h08c45; P_Y_AB[14] = 18'h0dbb8; P_Y_AB[15] = 18'h036bf;
+ P_Y_AB[16] = 18'h05086; P_Y_AB[17] = 18'h0437e; P_Y_AB[18] = 18'h08e3d; P_Y_AB[19] = 18'h0ec97;
+ P_Y_AB[20] = 18'h0195c; P_Y_AB[21] = 18'h02e75; P_Y_AB[22] = 18'h0d94f; P_Y_AB[23] = 18'h0ce1e;
+ P_Y_AB[24] = 18'h0fd8d; P_Y_AB[25] = 18'h0ec03; P_Y_AB[26] = 18'h058a0; P_Y_AB[27] = 18'h05fc4;
+ P_Y_AB[28] = 18'h0f83f; P_Y_AB[29] = 18'h09a60; P_Y_AB[30] = 18'h0f047; P_Y_AB[31] = 18'h05ee6;
+ P_Y_AB[32] = 18'h02a39; P_Y_AB[33] = 18'h08b08; P_Y_AB[34] = 18'h0f66d; P_Y_AB[35] = 18'h0b2fb;
+ P_Y_AB[36] = 18'h02f3f; P_Y_AB[37] = 18'h092b2; P_Y_AB[38] = 18'h09b4e; P_Y_AB[39] = 18'h0ce4f;
+ P_Y_AB[40] = 18'h04428; P_Y_AB[41] = 18'h00483; P_Y_AB[42] = 18'h0f595; P_Y_AB[43] = 18'h031cb;
+ P_Y_AB[44] = 18'h0d292; P_Y_AB[45] = 18'h0ded9; P_Y_AB[46] = 18'h0ef15; P_Y_AB[47] = 18'h0da51;
+ P_Y_AB[48] = 18'h0ed93; P_Y_AB[49] = 18'h03969; P_Y_AB[50] = 18'h05efc; P_Y_AB[51] = 18'h004e7;
+ P_Y_AB[52] = 18'h09434; P_Y_AB[53] = 18'h02b91; P_Y_AB[54] = 18'h0d3db; P_Y_AB[55] = 18'h0c4cf;
+ P_Y_AB[56] = 18'h09d34; P_Y_AB[57] = 18'h0cea8; P_Y_AB[58] = 18'h0de0d; P_Y_AB[59] = 18'h0f190;
+ P_Y_AB[60] = 18'h0b95a; P_Y_AB[61] = 18'h0bd8a; P_Y_AB[62] = 18'h079a6; P_Y_AB[63] = 18'h005f6;
+ //
+ P_Y_Q[ 0] = 18'h0dd4f; P_Y_Q[ 1] = 18'h084f9; P_Y_Q[ 2] = 18'h00105; P_Y_Q[ 3] = 18'h0cdff;
+ P_Y_Q[ 4] = 18'h0973c; P_Y_Q[ 5] = 18'h0440c; P_Y_Q[ 6] = 18'h0450b; P_Y_Q[ 7] = 18'h09e70;
+ P_Y_Q[ 8] = 18'h0d686; P_Y_Q[ 9] = 18'h0e21a; P_Y_Q[10] = 18'h02d26; P_Y_Q[11] = 18'h0b117;
+ P_Y_Q[12] = 18'h08556; P_Y_Q[13] = 18'h002ee; P_Y_Q[14] = 18'h0083d; P_Y_Q[15] = 18'h079fa;
+ P_Y_Q[16] = 18'h0f25d; P_Y_Q[17] = 18'h0cd26; P_Y_Q[18] = 18'h0bb7e; P_Y_Q[19] = 18'h07676;
+ P_Y_Q[20] = 18'h0f4bb; P_Y_Q[21] = 18'h02b87; P_Y_Q[22] = 18'h02909; P_Y_Q[23] = 18'h05e2d;
+ P_Y_Q[24] = 18'h09c80; P_Y_Q[25] = 18'h098f3; P_Y_Q[26] = 18'h0f08b; P_Y_Q[27] = 18'h0255b;
+ P_Y_Q[28] = 18'h0fbe5; P_Y_Q[29] = 18'h0ae8e; P_Y_Q[30] = 18'h0ba22; P_Y_Q[31] = 18'h0f2ea;
+ P_Y_Q[32] = 18'h0530e;
+ //
+ P_Y_M[ 0] = 18'h02a99; P_Y_M[ 1] = 18'h0240e; P_Y_M[ 2] = 18'h0db4c; P_Y_M[ 3] = 18'h044cb;
+ P_Y_M[ 4] = 18'h0c52b; P_Y_M[ 5] = 18'h07668; P_Y_M[ 6] = 18'h02c96; P_Y_M[ 7] = 18'h01443;
+ P_Y_M[ 8] = 18'h06afd; P_Y_M[ 9] = 18'h0e489; P_Y_M[10] = 18'h05d70; P_Y_M[11] = 18'h03a88;
+ P_Y_M[12] = 18'h0a0d0; P_Y_M[13] = 18'h073ba; P_Y_M[14] = 18'h02447; P_Y_M[15] = 18'h0c940;
+ P_Y_M[16] = 18'h0af79; P_Y_M[17] = 18'h0bc81; P_Y_M[18] = 18'h071c2; P_Y_M[19] = 18'h01368;
+ P_Y_M[20] = 18'h0e6a3; P_Y_M[21] = 18'h0d18a; P_Y_M[22] = 18'h026b0; P_Y_M[23] = 18'h031e1;
+ P_Y_M[24] = 18'h00272; P_Y_M[25] = 18'h013fc; P_Y_M[26] = 18'h0a75f; P_Y_M[27] = 18'h0a03b;
+ P_Y_M[28] = 18'h007c0; P_Y_M[29] = 18'h0659f; P_Y_M[30] = 18'h00fb8; P_Y_M[31] = 18'h0a119;
+ P_Y_M[32] = 18'h0d5c6; P_Y_M[33] = 18'h09926; P_Y_M[34] = 18'h0d69f; P_Y_M[35] = 18'h085cd;
+ P_Y_M[36] = 18'h0591a; P_Y_M[37] = 18'h0e6dd; P_Y_M[38] = 18'h0981f; P_Y_M[39] = 18'h087b4;
+ P_Y_M[40] = 18'h015b3; P_Y_M[41] = 18'h09421; P_Y_M[42] = 18'h0ea9d; P_Y_M[43] = 18'h013af;
+ P_Y_M[44] = 18'h096ac; P_Y_M[45] = 18'h06f86; P_Y_M[46] = 18'h0cab7; P_Y_M[47] = 18'h06ab1;
+ P_Y_M[48] = 18'h0903e; P_Y_M[49] = 18'h06203; P_Y_M[50] = 18'h0751a; P_Y_M[51] = 18'h02fce;
+ P_Y_M[52] = 18'h0d0c9; P_Y_M[53] = 18'h00522; P_Y_M[54] = 18'h096f8; P_Y_M[55] = 18'h03aee;
+ P_Y_M[56] = 18'h0a034; P_Y_M[57] = 18'h0e52e; P_Y_M[58] = 18'h07b7b; P_Y_M[59] = 18'h06bad;
+ P_Y_M[60] = 18'h0016d; P_Y_M[61] = 18'h01315; P_Y_M[62] = 18'h02586; P_Y_M[63] = 18'h0e73a;
+ P_Y_M[64] = 18'h04ddd;
+ //
+ P_X[ 0] = 18'h0c2d4; P_X[ 1] = 18'h0d474; P_X[ 2] = 18'h13a91; P_X[ 3] = 18'h15507;
+ P_X[ 4] = 18'h149c0; P_X[ 5] = 18'h02527; P_X[ 6] = 18'h18667; P_X[ 7] = 18'h0ccb1;
+ P_X[ 8] = 18'h156a1; P_X[ 9] = 18'h0eeff; P_X[10] = 18'h1853d; P_X[11] = 18'h05ccd;
+ P_X[12] = 18'h0e334; P_X[13] = 18'h12f86; P_X[14] = 18'h0aad1; P_X[15] = 18'h12b4d;
+ P_X[16] = 18'h078be; P_X[17] = 18'h13b54; P_X[18] = 18'h180ae; P_X[19] = 18'h09e35;
+ P_X[20] = 18'h0e354; P_X[21] = 18'h156d0; P_X[22] = 18'h10f20; P_X[23] = 18'h0c226;
+ P_X[24] = 18'h0f165; P_X[25] = 18'h0fb42; P_X[26] = 18'h11082; P_X[27] = 18'h124dc;
+ P_X[28] = 18'h06467; P_X[29] = 18'h17d07; P_X[30] = 18'h0dc3c; P_X[31] = 18'h03ba4;
+ //
+ P_Y[ 0] = 18'h1242f; P_Y[ 1] = 18'h1cd0c; P_Y[ 2] = 18'h138c8; P_Y[ 3] = 18'h08859;
+ P_Y[ 4] = 18'h1798f; P_Y[ 5] = 18'h1336d; P_Y[ 6] = 18'h15603; P_Y[ 7] = 18'h059db;
+ P_Y[ 8] = 18'h098a4; P_Y[ 9] = 18'h1e032; P_Y[10] = 18'h0457a; P_Y[11] = 18'h1693e;
+ P_Y[12] = 18'h14e5f; P_Y[13] = 18'h1b9cc; P_Y[14] = 18'h14502; P_Y[15] = 18'h17dd1;
+ P_Y[16] = 18'h09b6c; P_Y[17] = 18'h0d416; P_Y[18] = 18'h034b5; P_Y[19] = 18'h164fd;
+ P_Y[20] = 18'h030b3; P_Y[21] = 18'h16ad3; P_Y[22] = 18'h0ffbd; P_Y[23] = 18'h13d68;
+ P_Y[24] = 18'h1b3d6; P_Y[25] = 18'h15988; P_Y[26] = 18'h15d3d; P_Y[27] = 18'h0bac7;
+ P_Y[28] = 18'h0d09f; P_Y[29] = 18'h09f2c; P_Y[30] = 18'h0ed30; P_Y[31] = 18'h04ddd;
+ //
+ end
+
+
+ //
+ // Enable, Ready
+ //
+ reg ena = 1'b0;
+ wire rdy;
+
+
+ //
+ // Settings
+ //
+ reg p_ladder_mode;
+ reg [OP_ADDR_W -1:0] word_index_last;
+ reg [OP_ADDR_W -1:0] word_index_last_minus1;
+
+
+ //
+ // Script
+ //
+ integer i;
+ initial begin
+
+ wait_clock_ticks(10);
+ rst = 1'b0;
+ wait_clock_ticks(10);
+
+ word_index_last = PQ_WORD_INDEX_LAST;
+ word_index_last_minus1 = word_index_last - 1'b1;
+
+ p_prefill;
+
+ p_ladder_mode = P_LADDER_MODE;
+
+ wait_clock_ticks(10);
+
+ ena = 1'b1;
+ wait_clock_ticks(1);
+ ena = 1'b0;
+
+ while (!rdy)
+ wait_clock_ticks(1);
+
+ wait_clock_ticks(1000);
+ p_verify_ab;
+ p_verify_q;
+ p_verify_m;
+ p_verify_p;
+ end
+
+
+ //
+ // Storage Interfaces
+ //
+ wire wr_wide_xy_ena;
+ wire [BANK_ADDR_W -1:0] wr_wide_xy_bank;
+ wire [ OP_ADDR_W -1:0] wr_wide_xy_addr;
+ wire [ WORD_EXT_W -1:0] wr_wide_x_din;
+ wire [ WORD_EXT_W -1:0] wr_wide_y_din;
+
+ wire wr_narrow_xy_ena;
+ wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+ wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
+ wire [ WORD_EXT_W -1:0] wr_narrow_x_din;
+ wire [ WORD_EXT_W -1:0] wr_narrow_y_din;
+
+ wire rd_wide_xy_ena;
+ wire rd_wide_xy_ena_aux;
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
+ wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+ wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
+ wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
+ wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
+ wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
+ wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
+
+ wire rd_narrow_xy_ena;
+ wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+ wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
+ wire [ WORD_EXT_W -1:0] rd_narrow_x_dout;
+ wire [ WORD_EXT_W -1:0] rd_narrow_y_dout;
+
+ reg ext_wide_xy_ena = 1'b0;
+ reg [BANK_ADDR_W -1:0] ext_wide_xy_bank;
+ reg [ OP_ADDR_W -1:0] ext_wide_xy_addr;
+ reg [ WORD_EXT_W -1:0] ext_wide_x_din;
+ reg [ WORD_EXT_W -1:0] ext_wide_y_din;
+
+ reg ext_narrow_xy_ena = 1'b0;
+ reg [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
+ reg [ OP_ADDR_W -1:0] ext_narrow_xy_addr;
+ reg [ WORD_EXT_W -1:0] ext_narrow_x_din;
+ reg [ WORD_EXT_W -1:0] ext_narrow_y_din;
+
+ //
+ // Recombinator Interface
+ //
+ wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+ wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
+ wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
+ wire rcmb_wide_xy_valid;
+
+ wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+ wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
+ wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
+ wire rcmb_narrow_xy_valid;
+
+ wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank;
+ wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr;
+ wire [ WORD_EXT_W -1:0] rcmb_final_x_dout;
+ wire [ WORD_EXT_W -1:0] rcmb_final_y_dout;
+ wire rcmb_final_xy_valid;
+
+ //
+ // Reductor Interface
+ //
+ wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank;
+ wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr;
+ wire [ WORD_EXT_W -1:0] rdct_wide_x_dout;
+ wire [ WORD_EXT_W -1:0] rdct_wide_y_dout;
+ wire rdct_wide_xy_valid;
+
+ wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
+ wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout;
+ wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout;
+ wire rdct_narrow_xy_valid;
+
+ //
+ // Reductor Control/Status
+ //
+ wire rdct_ena;
+ wire rdct_rdy;
+
+ //
+ // UUT
+ //
+ modexpng_mmm_dual uut
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ladder_mode (p_ladder_mode),
+ .word_index_last (word_index_last),
+ .word_index_last_minus1 (word_index_last_minus1),
+ .force_unity_b (1'b0),
+ .only_reduce (1'b0),
+
+ .sel_wide_in (BANK_WIDE_A),
+ .sel_narrow_in (BANK_NARROW_A),
+
+ .rd_wide_xy_ena (rd_wide_xy_ena),
+ .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux),
+ .rd_wide_xy_bank (rd_wide_xy_bank),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux),
+ .rd_wide_xy_addr (rd_wide_xy_addr),
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux),
+ .rd_wide_x_dout (rd_wide_x_dout),
+ .rd_wide_y_dout (rd_wide_y_dout),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux),
+
+ .rd_narrow_xy_ena (rd_narrow_xy_ena),
+ .rd_narrow_xy_bank (rd_narrow_xy_bank),
+ .rd_narrow_xy_addr (rd_narrow_xy_addr),
+ .rd_narrow_x_dout (rd_narrow_x_dout),
+ .rd_narrow_y_dout (rd_narrow_y_dout),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr),
+ .rcmb_wide_x_dout (rcmb_wide_x_dout),
+ .rcmb_wide_y_dout (rcmb_wide_y_dout),
+ .rcmb_wide_xy_valid (rcmb_wide_xy_valid),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_dout),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_dout),
+ .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid),
+
+ .rcmb_xy_bank (rcmb_final_xy_bank),
+ .rcmb_xy_addr (rcmb_final_xy_addr),
+ .rcmb_x_dout (rcmb_final_x_dout),
+ .rcmb_y_dout (rcmb_final_y_dout),
+ .rcmb_xy_valid (rcmb_final_xy_valid),
+
+ .rdct_ena (rdct_ena),
+ .rdct_rdy (rdct_rdy)
+ );
+
+
+ //
+ // Reductor
+ //
+ modexpng_reductor reductor
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (rdct_ena),
+ .rdy (rdct_rdy),
+
+ .word_index_last (word_index_last),
+
+ .sel_wide_out (BANK_WIDE_B),
+ .sel_narrow_out (BANK_NARROW_B),
+
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux),
+
+ .rcmb_final_xy_bank (rcmb_final_xy_bank),
+ .rcmb_final_xy_addr (rcmb_final_xy_addr),
+ .rcmb_final_x_dout (rcmb_final_x_dout),
+ .rcmb_final_y_dout (rcmb_final_y_dout),
+ .rcmb_final_xy_valid (rcmb_final_xy_valid),
+
+ .rdct_wide_xy_bank (rdct_wide_xy_bank),
+ .rdct_wide_xy_addr (rdct_wide_xy_addr),
+ .rdct_wide_x_dout (rdct_wide_x_dout),
+ .rdct_wide_y_dout (rdct_wide_y_dout),
+ .rdct_wide_xy_valid (rdct_wide_xy_valid),
+
+ .rdct_narrow_xy_bank (rdct_narrow_xy_bank),
+ .rdct_narrow_xy_addr (rdct_narrow_xy_addr),
+ .rdct_narrow_x_dout (rdct_narrow_x_dout),
+ .rdct_narrow_y_dout (rdct_narrow_y_dout),
+ .rdct_narrow_xy_valid (rdct_narrow_xy_valid)
+ );
+
+ //
+ // Storage Block
+ //
+ modexpng_storage_block storage_block
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .wr_wide_xy_ena (wr_wide_xy_ena),
+ .wr_wide_xy_bank (wr_wide_xy_bank),
+ .wr_wide_xy_addr (wr_wide_xy_addr),
+ .wr_wide_x_din (wr_wide_x_din),
+ .wr_wide_y_din (wr_wide_y_din),
+
+ .wr_narrow_xy_ena (wr_narrow_xy_ena),
+ .wr_narrow_xy_bank (wr_narrow_xy_bank),
+ .wr_narrow_xy_addr (wr_narrow_xy_addr),
+ .wr_narrow_x_din (wr_narrow_x_din),
+ .wr_narrow_y_din (wr_narrow_y_din),
+
+ .rd_wide_xy_ena (rd_wide_xy_ena),
+ .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux),
+ .rd_wide_xy_bank (rd_wide_xy_bank),
+ .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux),
+ .rd_wide_xy_addr (rd_wide_xy_addr),
+ .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux),
+ .rd_wide_x_dout (rd_wide_x_dout),
+ .rd_wide_y_dout (rd_wide_y_dout),
+ .rd_wide_x_dout_aux (rd_wide_x_dout_aux),
+ .rd_wide_y_dout_aux (rd_wide_y_dout_aux),
+
+ .rd_narrow_xy_ena (rd_narrow_xy_ena),
+ .rd_narrow_xy_bank (rd_narrow_xy_bank),
+ .rd_narrow_xy_addr (rd_narrow_xy_addr),
+ .rd_narrow_x_dout (rd_narrow_x_dout),
+ .rd_narrow_y_dout (rd_narrow_y_dout)
+ );
+
+ modexpng_storage_manager storage_manager
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .wr_wide_xy_ena (wr_wide_xy_ena),
+ .wr_wide_xy_bank (wr_wide_xy_bank),
+ .wr_wide_xy_addr (wr_wide_xy_addr),
+ .wr_wide_x_din (wr_wide_x_din),
+ .wr_wide_y_din (wr_wide_y_din),
+
+ .wr_narrow_xy_ena (wr_narrow_xy_ena),
+ .wr_narrow_xy_bank (wr_narrow_xy_bank),
+ .wr_narrow_xy_addr (wr_narrow_xy_addr),
+ .wr_narrow_x_din (wr_narrow_x_din),
+ .wr_narrow_y_din (wr_narrow_y_din),
+
+ .ext_wide_xy_ena (ext_wide_xy_ena),
+ .ext_wide_xy_bank (ext_wide_xy_bank),
+ .ext_wide_xy_addr (ext_wide_xy_addr),
+ .ext_wide_x_din (ext_wide_x_din),
+ .ext_wide_y_din (ext_wide_y_din),
+
+ .ext_narrow_xy_ena (ext_narrow_xy_ena),
+ .ext_narrow_xy_bank (ext_narrow_xy_bank),
+ .ext_narrow_xy_addr (ext_narrow_xy_addr),
+ .ext_narrow_x_din (ext_narrow_x_din),
+ .ext_narrow_y_din (ext_narrow_y_din),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr),
+ .rcmb_wide_x_din (rcmb_wide_x_dout),
+ .rcmb_wide_y_din (rcmb_wide_y_dout),
+ .rcmb_wide_xy_ena (rcmb_wide_xy_valid),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr),
+ .rcmb_narrow_x_din (rcmb_narrow_x_dout),
+ .rcmb_narrow_y_din (rcmb_narrow_y_dout),
+ .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid)
+ );
+
+
+ //
+ // p_prefill()
+ //
+ task p_prefill;
+ begin
+
+ ext_wide_xy_ena = 1'b1;
+ ext_narrow_xy_ena = 1'b1;
+
+ for (i=0; i<PQ_NUM_WORDS; i=i+1) begin
+ ext_wide_xy_bank = BANK_WIDE_A;
+ ext_wide_xy_addr = i[OP_ADDR_W-1:0];
+ ext_wide_x_din = P_T1[i];
+ ext_wide_y_din = P_T2[i];
+
+ ext_narrow_xy_bank = BANK_NARROW_A;
+ ext_narrow_xy_addr = i[OP_ADDR_W-1:0];
+ ext_narrow_x_din = P_T1[i];
+ ext_narrow_y_din = P_T2[i];
+
+ wait_clock_tick;
+ end
+
+ for (i=0; i<PQ_NUM_WORDS; i=i+1) begin
+ ext_wide_xy_bank = BANK_WIDE_N;
+ ext_wide_xy_addr = i[OP_ADDR_W-1:0];
+ ext_wide_x_din = P_N[i];
+ ext_wide_y_din = P_N[i];
+
+ ext_narrow_xy_bank = BANK_NARROW_COEFF;
+ ext_narrow_xy_addr = i[OP_ADDR_W-1:0];
+ ext_narrow_x_din = P_N_COEFF[i];
+ ext_narrow_y_din = P_N_COEFF[i];
+
+ wait_clock_tick;
+ end
+
+ ext_wide_xy_ena = 1'b0;
+ ext_wide_xy_bank = BANK_DNC;
+ ext_wide_xy_addr = OP_ADDR_DNC;
+ ext_wide_x_din = WORD_EXT_DNC;
+ ext_wide_y_din = WORD_EXT_DNC;
+
+ for (i=32; i<33; i=i+1) begin
+ ext_narrow_xy_bank = BANK_NARROW_EXT;
+ ext_narrow_xy_addr = OP_ADDR_EXT_COEFF;
+ ext_narrow_x_din = P_N_COEFF[i];
+ ext_narrow_y_din = P_N_COEFF[i];
+
+ wait_clock_tick;
+ end
+
+ ext_narrow_xy_ena = 1'b0;
+ ext_narrow_xy_bank = BANK_DNC;
+ ext_narrow_xy_addr = OP_ADDR_DNC;
+ ext_narrow_x_din = WORD_EXT_DNC;
+ ext_narrow_y_din = WORD_EXT_DNC;
+
+ end
+ endtask
+
+
+ //
+ // wait_clock_tick()
+ //
+ task wait_clock_tick;
+ #`CLK_PERIOD_NS;
+ endtask
+
+
+ //
+ // wait_clock_ticks()
+ //
+ task wait_clock_ticks;
+ input integer num_ticks;
+ integer _n;
+ for (_n=0; _n<num_ticks; _n=_n+1)
+ wait_clock_tick;
+ endtask
+
+
+ //
+ // Debug Interceptor
+ //
+ reg [WORD_EXT_W-1:0] P_X_AB_READ[0:2*PQ_NUM_WORDS-1];
+ reg [WORD_EXT_W-1:0] P_Y_AB_READ[0:2*PQ_NUM_WORDS-1];
+
+ reg [WORD_EXT_W-1:0] P_X_Q_READ[0:PQ_NUM_WORDS];
+ reg [WORD_EXT_W-1:0] P_Y_Q_READ[0:PQ_NUM_WORDS];
+
+ reg [WORD_EXT_W-1:0] P_X_M_READ[0:2*PQ_NUM_WORDS];
+ reg [WORD_EXT_W-1:0] P_Y_M_READ[0:2*PQ_NUM_WORDS];
+
+ reg [WORD_EXT_W-1:0] P_X_WIDE_READ[0:PQ_NUM_WORDS-1];
+ reg [WORD_EXT_W-1:0] P_Y_WIDE_READ[0:PQ_NUM_WORDS-1];
+
+ reg [WORD_EXT_W-1:0] P_X_NARROW_READ[0:PQ_NUM_WORDS-1];
+ reg [WORD_EXT_W-1:0] P_Y_NARROW_READ[0:PQ_NUM_WORDS-1];
+
+ integer xy_offset;
+ always @(posedge clk) begin
+ //
+ if (rcmb_wide_xy_valid)
+ //
+ case (rcmb_wide_xy_bank)
+ //
+ BANK_WIDE_L: begin
+ //
+ xy_offset = rcmb_wide_xy_addr;
+ //
+ if (xy_offset >= PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to wide bank L!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_AB_READ[xy_offset] <= rcmb_wide_x_dout;
+ P_Y_AB_READ[xy_offset] <= rcmb_wide_y_dout;
+ //
+ end
+ //
+ BANK_WIDE_H: begin
+ //
+ xy_offset = PQ_NUM_WORDS + rcmb_wide_xy_addr;
+ //
+ if (xy_offset >= 2*PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to wide bank H!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_AB_READ[xy_offset] <= rcmb_wide_x_dout;
+ P_Y_AB_READ[xy_offset] <= rcmb_wide_y_dout;
+ //
+ end
+ //
+ default: begin
+ $display("ERROR: Encountered illegal wide bank (%d) while writing!", rcmb_wide_xy_bank);
+ $finish;
+ end
+ //
+ endcase
+ //
+ if (rcmb_narrow_xy_valid)
+ //
+ case (rcmb_narrow_xy_bank)
+ //
+ BANK_NARROW_Q: begin
+ //
+ xy_offset = rcmb_narrow_xy_addr;
+ //
+ if (xy_offset >= PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to narrow bank Q!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_Q_READ[xy_offset] <= rcmb_narrow_x_dout;
+ P_Y_Q_READ[xy_offset] <= rcmb_narrow_y_dout;
+ //
+ end
+ //
+ BANK_NARROW_EXT: begin
+ //
+ xy_offset = PQ_NUM_WORDS + rcmb_narrow_xy_addr - 1;
+ //
+ if (xy_offset != PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to narrow bank EXT!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_Q_READ[xy_offset] <= rcmb_narrow_x_dout;
+ P_Y_Q_READ[xy_offset] <= rcmb_narrow_y_dout;
+ //
+ end
+ //
+ default: begin
+ $display("ERROR: Encountered illegal narrow bank (%d) while writing!", rcmb_narrow_xy_bank);
+ $finish;
+ end
+ //
+ endcase
+ //
+ if (rcmb_final_xy_valid)
+ //
+ case (rcmb_final_xy_bank)
+ //
+ BANK_RCMB_ML: begin
+ //
+ xy_offset = rcmb_final_xy_addr;
+ //
+ if (xy_offset >= PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to narrow bank ML!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_M_READ[xy_offset] <= rcmb_final_x_dout;
+ P_Y_M_READ[xy_offset] <= rcmb_final_y_dout;
+ //
+ end
+ //
+ BANK_RCMB_MH: begin
+ //
+ xy_offset = PQ_NUM_WORDS + rcmb_final_xy_addr;
+ //
+ if (xy_offset >= 2*PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to narrow bank MH!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_M_READ[xy_offset] <= rcmb_final_x_dout;
+ P_Y_M_READ[xy_offset] <= rcmb_final_y_dout;
+ //
+ end
+ //
+ BANK_RCMB_EXT: begin
+ //
+ xy_offset = 2*PQ_NUM_WORDS + rcmb_final_xy_addr;
+ //
+ if (xy_offset != 2*PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to narrow bank EXT!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_M_READ[xy_offset] <= rcmb_final_x_dout;
+ P_Y_M_READ[xy_offset] <= rcmb_final_y_dout;
+ //
+ end
+ //
+ default: begin
+ $display("ERROR: Encountered illegal narrow bank (%d) while writing!", rcmb_final_xy_bank);
+ $finish;
+ end
+ //
+ endcase
+ //
+ if (rdct_narrow_xy_valid) begin
+ //
+ xy_offset = rdct_narrow_xy_addr;
+ //
+ if (xy_offset >= PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to narrow bank T1/T2!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_NARROW_READ[xy_offset] <= rdct_narrow_x_dout;
+ P_Y_NARROW_READ[xy_offset] <= rdct_narrow_y_dout;
+ //
+ end
+ //
+ if (rdct_wide_xy_valid) begin
+ //
+ xy_offset = rdct_wide_xy_addr;
+ //
+ if (xy_offset >= PQ_NUM_WORDS) begin
+ $display("ERROR: Encountered illegal offset (%d) writing to wide bank T1/T2!", xy_offset);
+ $finish;
+ end
+ //
+ P_X_WIDE_READ[xy_offset] <= rdct_wide_x_dout;
+ P_Y_WIDE_READ[xy_offset] <= rdct_wide_y_dout;
+ //
+ end
+ //
+ end
+
+ task p_verify_ab;
+ //
+ reg verify_x_ab_ok;
+ reg verify_y_ab_ok;
+ //
+ begin
+ //
+ verify_x_ab_ok = 1;
+ verify_y_ab_ok = 1;
+ //
+ for (i=0; i<2*PQ_NUM_WORDS; i=i+1) begin
+ if (P_X_AB_READ[i] !== P_X_AB[i]) verify_x_ab_ok = 0;
+ if (P_Y_AB_READ[i] !== P_Y_AB[i]) verify_y_ab_ok = 0;
+ end
+ //
+ if (!verify_x_ab_ok)
+ for (i=0; i<2*PQ_NUM_WORDS; i=i+1)
+ if (P_X_AB_READ[i] === P_X_AB[i]) $display("P_X_AB / P_X_AB_READ [%02d] = 0x%05x / 0x%05x", i, P_X_AB[i], P_X_AB_READ[i]);
+ else $display("P_X_AB / P_X_AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_AB[i], P_X_AB_READ[i]);
+ //
+ if (!verify_y_ab_ok)
+ for (i=0; i<2*PQ_NUM_WORDS; i=i+1)
+ if (P_Y_AB_READ[i] === P_Y_AB[i]) $display("P_Y_AB / P_Y_AB_READ [%02d] = 0x%05x / 0x%05x", i, P_Y_AB[i], P_Y_AB_READ[i]);
+ else $display("P_Y_AB / P_Y_AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_AB[i], P_Y_AB_READ[i]);
+ //
+ if (verify_x_ab_ok) $display("P_X_AB is OK.");
+ else $display("P_X_AB is WRONG!");
+ //
+ if (verify_y_ab_ok) $display("P_Y_AB is OK.");
+ else $display("P_Y_AB is WRONG!");
+ //
+ end
+ //
+ endtask
+
+ task p_verify_q;
+ //
+ reg verify_x_q_ok;
+ reg verify_y_q_ok;
+ //
+ begin
+ //
+ verify_x_q_ok = 1;
+ verify_y_q_ok = 1;
+ //
+ for (i=0; i<(PQ_NUM_WORDS+1); i=i+1) begin
+ if (P_X_Q_READ[i] !== P_X_Q[i]) verify_x_q_ok = 0;
+ if (P_Y_Q_READ[i] !== P_Y_Q[i]) verify_y_q_ok = 0;
+ end
+ //
+ if (!verify_x_q_ok)
+ for (i=0; i<(PQ_NUM_WORDS+1); i=i+1)
+ if (P_X_Q_READ[i] === P_X_Q[i]) $display("P_X_Q / P_X_Q_READ [%02d] = 0x%05x / 0x%05x", i, P_X_Q[i], P_X_Q_READ[i]);
+ else $display("P_X_Q / P_X_Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_Q[i], P_X_Q_READ[i]);
+ //
+ if (!verify_y_q_ok)
+ for (i=0; i<(PQ_NUM_WORDS+1); i=i+1)
+ if (P_Y_Q_READ[i] === P_Y_Q[i]) $display("P_Y_Q / P_Y_Q_READ [%02d] = 0x%05x / 0x%05x", i, P_Y_Q[i], P_Y_Q_READ[i]);
+ else $display("P_Y_Q / P_Y_Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_Q[i], P_Y_Q_READ[i]);
+ //
+ if (verify_x_q_ok) $display("P_X_Q is OK.");
+ else $display("P_X_Q is WRONG!");
+ //
+ if (verify_y_q_ok) $display("P_Y_Q is OK.");
+ else $display("P_Y_Q is WRONG!");
+ //
+ end
+ //
+ endtask
+
+ task p_verify_m;
+ //
+ reg verify_x_m_ok;
+ reg verify_y_m_ok;
+ //
+ begin
+ //
+ verify_x_m_ok = 1;
+ verify_y_m_ok = 1;
+ //
+ for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1) begin
+ if (P_X_M_READ[i] !== P_X_M[i]) verify_x_m_ok = 0;
+ if (P_Y_M_READ[i] !== P_Y_M[i]) verify_y_m_ok = 0;
+ end
+ //
+ if (!verify_x_m_ok)
+ for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1)
+ if (P_X_M_READ[i] === P_X_M[i]) $display("P_X_M / P_X_M_READ [%02d] = 0x%05x / 0x%05x", i, P_X_M[i], P_X_M_READ[i]);
+ else $display("P_X_M / P_X_M_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_M[i], P_X_M_READ[i]);
+ //
+ if (!verify_y_m_ok)
+ for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1)
+ if (P_Y_M_READ[i] === P_Y_M[i]) $display("P_Y_M / P_Y_M_READ [%02d] = 0x%05x / 0x%05x", i, P_Y_M[i], P_Y_M_READ[i]);
+ else $display("P_Y_M / P_Y_M_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_M[i], P_Y_M_READ[i]);
+ //
+ if (verify_x_m_ok) $display("P_X_M is OK.");
+ else $display("P_X_M is WRONG!");
+ //
+ if (verify_y_m_ok) $display("P_Y_M is OK.");
+ else $display("P_Y_M is WRONG!");
+ //
+ end
+ //
+ endtask
+
+ task p_verify_p;
+ //
+ reg verify_x_wide_ok;
+ reg verify_y_wide_ok;
+ reg verify_x_narrow_ok;
+ reg verify_y_narrow_ok;
+ //
+ begin
+ //
+ verify_x_wide_ok = 1;
+ verify_y_wide_ok = 1;
+ verify_x_narrow_ok = 1;
+ verify_y_narrow_ok = 1;
+ //
+ for (i=0; i<PQ_NUM_WORDS; i=i+1) begin
+ if (P_X_WIDE_READ[i] !== P_X[i]) verify_x_wide_ok = 0;
+ if (P_Y_WIDE_READ[i] !== P_Y[i]) verify_y_wide_ok = 0;
+ if (P_X_NARROW_READ[i] !== P_X[i]) verify_x_narrow_ok = 0;
+ if (P_Y_NARROW_READ[i] !== P_Y[i]) verify_y_narrow_ok = 0;
+ end
+ //
+ if (!verify_x_wide_ok || !verify_x_narrow_ok)
+ for (i=0; i<PQ_NUM_WORDS; i=i+1)
+ if ((P_X_WIDE_READ[i] === P_X[i]) && (P_X_NARROW_READ[i] === P_X[i])) $display("P_X / P_X_WIDE / P_X_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x", i, P_X[i], P_X_WIDE_READ[i], P_X_NARROW_READ[i]);
+ else $display("P_X / P_X_WIDE / P_X_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x <???>", i, P_X[i], P_X_WIDE_READ[i], P_X_NARROW_READ[i]);
+ //
+ if (!verify_y_wide_ok || !verify_y_narrow_ok)
+ for (i=0; i<PQ_NUM_WORDS; i=i+1)
+ if ((P_Y_WIDE_READ[i] === P_Y[i]) && (P_Y_NARROW_READ[i] === P_Y[i])) $display("P_Y / P_Y_WIDE / P_Y_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x", i, P_Y[i], P_Y_WIDE_READ[i], P_Y_NARROW_READ[i]);
+ else $display("P_Y / P_Y_WIDE / P_Y_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x <???>", i, P_Y[i], P_Y_WIDE_READ[i], P_Y_NARROW_READ[i]);
+ //
+ if (verify_x_wide_ok && verify_x_narrow_ok) $display("P_X is OK.");
+ else $display("P_X is WRONG!");
+ //
+ if (verify_y_wide_ok && verify_y_narrow_ok) $display("P_Y is OK.");
+ else $display("P_Y is WRONG!");
+ //
+ end
+ //
+ endtask
+
+endmodule
+
diff --git a/bench/tb_mmm_x8_dual.v b/bench/tb_mmm_x8_dual.v
deleted file mode 100644
index aa25900..0000000
--- a/bench/tb_mmm_x8_dual.v
+++ /dev/null
@@ -1,327 +0,0 @@
-`timescale 1ns / 1ps
-
-module tb_mmm_x8_dual;
-
-
- //
- // Headers
- //
- `include "../rtl/modexpng_parameters.vh"
- `include "../rtl/modexpng_parameters_x8.vh"
-
-
- //
- // Settings
- //
- localparam INDEX_WIDTH = 6;
-
- wire [INDEX_WIDTH-1:0] index_last = 31; // 512 bits
-
-
- //
- // Clock
- //
- `define CLK_FREQUENCY_MHZ 100.0
- `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
- `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
-
- reg clk = 1'b0;
-
- always begin
- #`CLK_PERIOD_HALF_NS clk = 1'b1;
- #`CLK_PERIOD_HALF_NS clk = 1'b0;
- end
-
-
- //
- // Reset
- //
- reg rst = 1'b1;
- wire rst_n = ~rst;
-
-
- //
- // Control
- //
- reg ena = 1'b0;
- wire rdy;
-
- reg mode;
- reg transfer;
-
-
- //
- // Interface
- //
-
-
- //
- // Interface - Data Buses
- //
- wire [NUM_MULTS*WORD_WIDTH-1:0] x_din;
- wire [NUM_MULTS*WORD_WIDTH-1:0] y_din;
- wire [NUM_MULTS*WORD_WIDTH-1:0] x_dout;
- wire [NUM_MULTS*WORD_WIDTH-1:0] y_dout;
-
-
- //
- // Interface - Address Buses
- //
- wire [INDEX_WIDTH-4:0] x_din_addr;
- wire [INDEX_WIDTH-4:0] y_din_addr;
- wire [INDEX_WIDTH-4:0] x_dout_addr;
- wire [INDEX_WIDTH-4:0] y_dout_addr;
-
-
- //
- // Interface - Enable Buses
- //
- wire [ 1-1:0] x_din_ena;
- wire [ 1-1:0] y_din_ena;
- wire [ 1-1:0] x_din_reg_ena;
- wire [ 1-1:0] y_din_reg_ena;
- wire [NUM_MULTS-1:0] x_dout_ena;
- wire [NUM_MULTS-1:0] y_dout_ena;
-
-
- //
- // Interface - Bank Buses
- //
- wire [3-1:0] x_din_bank;
- wire [3-1:0] y_din_bank;
- wire [3-1:0] x_dout_bank;
- wire [3-1:0] y_dout_bank;
-
-
- //
- // Operands
- //
- reg [WORD_WIDTH-1:0] T1[0:2**INDEX_WIDTH-1];
- reg [WORD_WIDTH-1:0] T2[0:2**INDEX_WIDTH-1];
- reg [WORD_WIDTH-1:0] N[0:2**INDEX_WIDTH-1];
- reg [WORD_WIDTH-1:0] N_COEFF[0:2**INDEX_WIDTH];
-
-
- //
- // Memories
- //
- genvar z;
- generate for (z=0; z<NUM_MULTS; z=z+1)
- //
- begin : gen_z_mem
- //
- modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ #
- (
- .MEM_WIDTH(WORD_WIDTH),
- .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS)
- )
- gen_z_mem_x
- (
- .clk (clk),
-
- .a_addr ({x_dout_bank, x_dout_addr}),
- .a_en (x_dout_ena[z]),
- .a_wr (x_dout_ena[z]),
- .a_in (x_dout[z*WORD_WIDTH+:WORD_WIDTH]),
- .a_out (), // unused
-
- .b_addr ({x_din_bank, x_din_addr}),
- .b_en (x_din_ena),
- .b_reg_en (x_din_reg_ena),
- .b_out (x_din[z*WORD_WIDTH+:WORD_WIDTH])
- );
- //
- modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ #
- (
- .MEM_WIDTH(WORD_WIDTH),
- .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS)
- )
- gen_z_mem_y
- (
- .clk (clk),
-
- .a_addr ({y_dout_bank, y_dout_addr}),
- .a_en (y_dout_ena[z]),
- .a_wr (y_dout_ena[z]),
- .a_in (y_dout[z*WORD_WIDTH+:WORD_WIDTH]),
- .a_out (), // unused
-
- .b_addr ({y_din_bank, y_din_addr}),
- .b_en (y_din_ena),
- .b_reg_en (y_din_reg_ena),
- .b_out (y_din[z*WORD_WIDTH+:WORD_WIDTH])
- );
- //
- end
- //
- endgenerate
-
-
- // T1 / T2
- // N / N_COEFF
- // AB_LSB
- // AB_MSB
- // M
- // Q_LSB
- // Q_MSB
- // ?
-
-
- //
- // Operands - Values
- //
- initial begin
- //
- T1[ 0] = 18'h0b27b; T1[ 1] = 18'h0fc7d; T1[ 2] = 18'h0a214; T1[ 3] = 18'h08d2b;
- T1[ 4] = 18'h1c80c; T1[ 5] = 18'h145f1; T1[ 6] = 18'h00db6; T1[ 7] = 18'h1cf0f;
- T1[ 8] = 18'h19386; T1[ 9] = 18'h02ad9; T1[10] = 18'h1a8b5; T1[11] = 18'h1479b;
- T1[12] = 18'h08b5f; T1[13] = 18'h14806; T1[14] = 18'h0e6f7; T1[15] = 18'h0ce9d;
- T1[16] = 18'h0cbc2; T1[17] = 18'h16ef1; T1[18] = 18'h0e14e; T1[19] = 18'h1796f;
- T1[20] = 18'h14901; T1[21] = 18'h06666; T1[22] = 18'h0cb9f; T1[23] = 18'h09ab4;
- T1[24] = 18'h12ffc; T1[25] = 18'h0a86d; T1[26] = 18'h19d35; T1[27] = 18'h0cda9;
- T1[28] = 18'h16a19; T1[29] = 18'h09a36; T1[30] = 18'h0b176; T1[31] = 18'h0e0dc;
- //
- T2[ 0] = 18'h0b21a; T2[ 1] = 18'h13e71; T2[ 2] = 18'h03459; T2[ 3] = 18'h1063f;
- T2[ 4] = 18'h18cef; T2[ 5] = 18'h1b8a5; T2[ 6] = 18'h082d1; T2[ 7] = 18'h1b1be;
- T2[ 8] = 18'h18979; T2[ 9] = 18'h1409a; T2[10] = 18'h1713c; T2[11] = 18'h0cda3;
- T2[12] = 18'h11c7d; T2[13] = 18'h0c943; T2[14] = 18'h12d7c; T2[15] = 18'h1531e;
- T2[16] = 18'h0a45a; T2[17] = 18'h1c637; T2[18] = 18'h0906a; T2[19] = 18'h1670e;
- T2[20] = 18'h12f78; T2[21] = 18'h08ce6; T2[22] = 18'h1c5c7; T2[23] = 18'h1292d;
- T2[24] = 18'h0fc4b; T2[25] = 18'h064fb; T2[26] = 18'h0cc3c; T2[27] = 18'h19b37;
- T2[28] = 18'h1b721; T2[29] = 18'h0f424; T2[30] = 18'h0f608; T2[31] = 18'h03e9b;
- //
- N[ 0] = 18'h00a9d; N[ 1] = 18'h01175; N[ 2] = 18'h0254f; N[ 3] = 18'h0ee38;
- N[ 4] = 18'h00a6a; N[ 5] = 18'h0c7bd; N[ 6] = 18'h0ddac; N[ 7] = 18'h069fe;
- N[ 8] = 18'h0e9d6; N[ 9] = 18'h0b6bf; N[10] = 18'h09230; N[11] = 18'h04fc5;
- N[12] = 18'h05c9f; N[13] = 18'h09502; N[14] = 18'h0cbc5; N[15] = 18'h03109;
- N[16] = 18'h08029; N[17] = 18'h0b27c; N[18] = 18'h0eeb8; N[19] = 18'h0c191;
- N[20] = 18'h0ff86; N[21] = 18'h027ab; N[22] = 18'h07d76; N[23] = 18'h0ff1a;
- N[24] = 18'h02afc; N[25] = 18'h0b25a; N[26] = 18'h0d3c1; N[27] = 18'h05589;
- N[28] = 18'h09f7c; N[29] = 18'h0ddd6; N[30] = 18'h0b4fc; N[31] = 18'h0e8e7;
- //
- N_COEFF[ 0] = 18'h0344b; N_COEFF[ 1] = 18'h0ca66; N_COEFF[ 2] = 18'h0d9e8; N_COEFF[ 3] = 18'h070d5;
- N_COEFF[ 4] = 18'h0ce4b; N_COEFF[ 5] = 18'h049b2; N_COEFF[ 6] = 18'h0abb3; N_COEFF[ 7] = 18'h0c3b2;
- N_COEFF[ 8] = 18'h0ad38; N_COEFF[ 9] = 18'h05672; N_COEFF[10] = 18'h0fd47; N_COEFF[11] = 18'h06671;
- N_COEFF[12] = 18'h00b7f; N_COEFF[13] = 18'h0fa35; N_COEFF[14] = 18'h0d4ac; N_COEFF[15] = 18'h0f1ca;
- N_COEFF[16] = 18'h08e0a; N_COEFF[17] = 18'h05858; N_COEFF[18] = 18'h02dc6; N_COEFF[19] = 18'h08cfc;
- N_COEFF[20] = 18'h01941; N_COEFF[21] = 18'h0f855; N_COEFF[22] = 18'h01e43; N_COEFF[23] = 18'h053f0;
- N_COEFF[24] = 18'h0a479; N_COEFF[25] = 18'h0ae7e; N_COEFF[26] = 18'h05c66; N_COEFF[27] = 18'h02413;
- N_COEFF[28] = 18'h0b5f8; N_COEFF[29] = 18'h0eb06; N_COEFF[30] = 18'h0de5b; N_COEFF[31] = 18'h0a751;
- N_COEFF[32] = 18'h0c1ec;
- //
- end
-
-
- //
- // Load Interface
- //
- wire load_phase;
- wire [ INDEX_WIDTH:0] load_xy_addr;
- wire load_xy_addr_vld;
- wire load_xy_req;
- reg [ WORD_WIDTH-1:0] load_x_din;
- reg [ WORD_WIDTH-1:0] load_y_din;
- reg [ WORD_WIDTH-1:0] load_x_pipe;
- reg [ WORD_WIDTH-1:0] load_y_pipe;
-
- always @(posedge clk)
- //
- if (load_xy_addr_vld) begin
-
- if (!load_phase) begin
- load_x_pipe <= T1[load_xy_addr];
- load_y_pipe <= T2[load_xy_addr];
- end else begin
- load_x_pipe <= !load_xy_addr[INDEX_WIDTH] ? N[load_xy_addr] : {WORD_WIDTH{1'bX}};
- load_y_pipe <= N_COEFF[load_xy_addr];
- end
- end
-
- always @(posedge clk)
- //
- if (load_xy_req)
- {load_y_din, load_x_din} <= {load_y_pipe, load_x_pipe};
- else
- {load_y_din, load_x_din} <= {2*WORD_WIDTH{1'bX}};
-
-
- //
- // UUT
- //
- modexpng_mmm_x8_dual #
- (
- .INDEX_WIDTH(INDEX_WIDTH)
- )
- uut
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (ena),
- .rdy (rdy),
-
- .mode (mode),
- .transfer (transfer),
-
- .index_last (index_last),
-
- .x_din (x_din),
- .y_din (y_din),
- .x_dout (x_dout),
- .y_dout (y_dout),
-
- .x_din_addr (x_din_addr),
- .y_din_addr (y_din_addr),
- .x_dout_addr (x_dout_addr),
- .y_dout_addr (y_dout_addr),
-
- .x_din_ena (x_din_ena),
- .y_din_ena (y_din_ena),
- .x_dout_ena (x_dout_ena),
- .y_dout_ena (y_dout_ena),
-
- .x_din_reg_ena (x_din_reg_ena),
- .y_din_reg_ena (y_din_reg_ena),
-
- .x_din_bank (x_din_bank),
- .y_din_bank (y_din_bank),
- .x_dout_bank (x_dout_bank),
- .y_dout_bank (y_dout_bank),
-
- .load_phase (load_phase),
- .load_xy_addr (load_xy_addr),
- .load_xy_addr_vld (load_xy_addr_vld),
- .load_xy_req (load_xy_req),
- .load_x_din (load_x_din),
- .load_y_din (load_y_din)
- );
-
-
- //
- // Script
- //
- initial begin
- #(100.0*`CLK_PERIOD_NS) rst = 1'b0;
- #(100.0*`CLK_PERIOD_NS) ena = 1'b1;
- transfer = 1'b1;
- mode = 1'b0;
- #( 1.0*`CLK_PERIOD_NS) ena = 1'b0;
- transfer = 1'bX;
- mode = 1'bX;
-
- while (!rdy) #`CLK_PERIOD_NS;
-
- #(100.0*`CLK_PERIOD_NS) ena = 1'b1;
- transfer = 1'b0;
- mode = 1'b0;
- #( 1.0*`CLK_PERIOD_NS) ena = 1'b0;
- transfer = 1'bX;
- mode = 1'bX;
-
- while (!rdy) #`CLK_PERIOD_NS;
-
- end
-
-
-endmodule
-
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index 6b194dc..c78a969 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -6,6 +6,8 @@ module modexpng_core_top
crt_mode,
word_index_last_n,
word_index_last_pq,
+ bit_index_last_n,
+ bit_index_last_pq,
bus_cs,
bus_we,
bus_addr,
@@ -37,6 +39,9 @@ module modexpng_core_top
input [ OP_ADDR_W -1:0] word_index_last_n;
input [ OP_ADDR_W -1:0] word_index_last_pq;
+ input [ BIT_INDEX_W -1:0] bit_index_last_n;
+ input [ BIT_INDEX_W -1:0] bit_index_last_pq;
+
input bus_cs;
input bus_we;
input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
@@ -71,16 +76,23 @@ module modexpng_core_top
wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
- wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
- wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
- (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
- wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
- wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ;
- wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
- (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ;
-
+ wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
+ wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
+ (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
+ wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
+ wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) ||
+ (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ;
+ wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ;
+ wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
+ (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
+ (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ;
+
+ wire uop_loop_now;
+
wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
- wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
+ wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1;
modexpng_uop_rom uop_rom
(
@@ -595,6 +607,14 @@ module modexpng_core_top
reg [OP_ADDR_W -1:0] io_mgr_word_index_last;
reg [UOP_OPCODE_W -1:0] io_mgr_opcode;
+ reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps;
+ wire io_mgr_ladder_d;
+ wire io_mgr_ladder_p;
+ wire io_mgr_ladder_q;
+ wire io_mgr_ladder_done;
+
+ assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done;
+
wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0];
wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0];
@@ -653,7 +673,13 @@ module modexpng_core_top
.io_out_dout (io_out_data),
.wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc),
- .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc)
+ .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc),
+
+ .ladder_steps (io_mgr_ladder_steps),
+ .ladder_d (io_mgr_ladder_d),
+ .ladder_p (io_mgr_ladder_p),
+ .ladder_q (io_mgr_ladder_q),
+ .ladder_done (io_mgr_ladder_done)
);
@@ -685,6 +711,9 @@ module modexpng_core_top
reg mmm_force_unity_b_x;
reg mmm_force_unity_b_y;
+ reg mmm_only_reduce_x;
+ reg mmm_only_reduce_y;
+
wire rdct_ena_x;
wire rdct_ena_y;
wire rdct_rdy_x;
@@ -702,6 +731,7 @@ module modexpng_core_top
.word_index_last (mmm_word_index_last_x),
.word_index_last_minus1 (mmm_word_index_last_minus1_x),
.force_unity_b (mmm_force_unity_b_x),
+ .only_reduce (mmm_only_reduce_x),
.sel_wide_in (mmm_sel_wide_in_x),
.sel_narrow_in (mmm_sel_narrow_in_x),
@@ -757,6 +787,7 @@ module modexpng_core_top
.word_index_last (mmm_word_index_last_y),
.word_index_last_minus1 (mmm_word_index_last_minus1_y),
.force_unity_b (mmm_force_unity_b_y),
+ .only_reduce (mmm_only_reduce_y),
.sel_wide_in (mmm_sel_wide_in_y),
.sel_narrow_in (mmm_sel_narrow_in_y),
@@ -898,6 +929,7 @@ module modexpng_core_top
reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in;
reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out;
reg [ OP_ADDR_W -1:0] wrk_word_index_last;
+ reg [ OP_ADDR_W -1:0] wrk_word_index_last_half;
reg [UOP_OPCODE_W -1:0] wrk_opcode;
modexpng_general_worker general_worker
@@ -916,6 +948,7 @@ module modexpng_core_top
.opcode (wrk_opcode),
.word_index_last (wrk_word_index_last),
+ .word_index_last_half (wrk_word_index_last_half),
.wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x),
.wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x),
@@ -976,10 +1009,11 @@ module modexpng_core_top
//
uop_exit_from_busy = 0;
//
- if (uop_opcode_is_in) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
- if (uop_opcode_is_out) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy);
- if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy ;
- if (uop_opcode_is_wrk) uop_exit_from_busy = ~wrk_ena & wrk_rdy ;
+ if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
+ if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy);
+ if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy;
+ if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy;
+ if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
//
end
@@ -995,17 +1029,22 @@ module modexpng_core_top
mmm_ena_y <= 1'b0;
wrk_ena <= 1'b0;
end else begin
- io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out) : 1'b0;
- mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0;
+ io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in ||
+ uop_opcode_is_out ||
+ uop_opcode_is_ladder): 1'b0;
+ mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk ||
+ uop_opcode_is_out ): 1'b0;
end
//
// Parameters
//
wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1;
-
+
+ // TODO: Add reset to default don't care values.
+
always @(posedge clk)
//
if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
@@ -1044,9 +1083,10 @@ module modexpng_core_top
UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00;
UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11;
UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
- UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+ UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q};
endcase
//
+ {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}};
{mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
{mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }};
{mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }};
@@ -1055,24 +1095,42 @@ module modexpng_core_top
//
end
//
+ UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+ //
+ {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'bX }};
+ //
+ {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b1 }};
+ {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{1'b0 }};
+ {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{BANK_DNC }};
+ {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{BANK_DNC }};
+ {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }};
+ {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}};
+ //
+ end
+ //
UOP_OPCODE_PROPAGATE_CARRIES: begin
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
- UOP_OPCODE_COPY_CRT_Y2X: begin
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
wrk_sel_wide_in <= uop_data_sel_wide_in;
wrk_sel_wide_out <= uop_data_sel_wide_out;
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ end
+ //
endcase
//
end
//
- // Length
+ // Lengths
//
wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1;
wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1;
@@ -1086,7 +1144,10 @@ module modexpng_core_top
case (uop_data_opcode)
//
UOP_OPCODE_INPUT_TO_WIDE,
- UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ UOP_OPCODE_INPUT_TO_NARROW,
+ UOP_OPCODE_OUTPUT_FROM_NARROW:
+ //
+ io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
//
UOP_OPCODE_MODULAR_MULTIPLY: begin
{mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
@@ -1094,8 +1155,31 @@ module modexpng_core_top
{rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
end
//
- UOP_OPCODE_PROPAGATE_CARRIES:
- wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+ wrk_word_index_last <= word_index_last_n;
+ wrk_word_index_last_half <= word_index_last_pq;
+ end
+ //
+ UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+ {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }};
+ {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}};
+ {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }};
+ end
+ //
+ UOP_OPCODE_LADDER_INIT: begin
+ io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
+ io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
+ end
+ //
+ UOP_OPCODE_LADDER_STEP: begin
+ io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
+ io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
+ end
//
endcase
//
@@ -1140,8 +1224,8 @@ module modexpng_core_top
UOP_FSM_STATE_IDLE: valid_reg <= ~next;
UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop;
endcase
-
-
+
+
//
// BEGIN DEBUG
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index c35f0b3..269ef98 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -14,6 +14,7 @@ module modexpng_general_worker
opcode,
word_index_last,
+ word_index_last_half,
wrk_rd_wide_xy_ena_x,
wrk_rd_wide_xy_bank_x,
@@ -88,6 +89,7 @@ module modexpng_general_worker
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
+ input [ OP_ADDR_W -1:0] word_index_last_half;
output wrk_rd_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
@@ -141,18 +143,35 @@ module modexpng_general_worker
//
// FSM Declaration
//
- localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
- localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
- localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
- localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3;
- localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
- localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
- localparam [3:0] WRK_FSM_STATE_STOP = 4'h7;
+ localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00;
- reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
- reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02;
+ localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06;
+
+ localparam [4:0] WRK_FSM_STATE_STOP = 5'h07;
+
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13;
+ localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14;
+ localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19;
+
+ reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
+
+
//
// Control Signals
//
@@ -244,32 +263,62 @@ module modexpng_general_worker
//
// Delays
//
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
+
always @(posedge clk) begin
//
- //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
- //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ //
+ {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2};
+ {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2};
//
{rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
{rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
//
+ {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2};
+ {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2};
+ //
+ {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
+ {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
+ //
+ {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
+ {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ //
end
-
-
- //
- // Handy Wires
- //
- wire rd_narrow_xy_addr_x_next_is_last;
- wire rd_narrow_xy_addr_y_next_is_last;
//
@@ -310,7 +359,8 @@ module modexpng_general_worker
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW:
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state_next_one_pass)
//
@@ -333,12 +383,30 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_BUSY: begin
//
- enable_narrow_xy_rd_en;
enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
//
end
//
endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1,
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M1,
+ WRK_FSM_STATE_BUSY_M2: begin
+ //
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ //
+ end
+ //
+ endcase
//
endcase
//
@@ -389,8 +457,7 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST2:
//
- enable_narrow_xy_wr_en;
- //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ enable_narrow_xy_wr_en;
//
//
endcase
@@ -405,7 +472,34 @@ module modexpng_general_worker
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
- //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ enable_wide_xy_wr_en;
+ //
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
//
end
//
@@ -424,7 +518,7 @@ module modexpng_general_worker
reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
@@ -497,6 +591,45 @@ module modexpng_general_worker
end
//
endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3;
+ wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2;
+ wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3;
+ wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2;
+ //
+ wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3;
+ wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2;
+ wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3;
+ wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x;
+ wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x;
+ wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y;
+ wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y;
+ //
+ end
+ //
+ endcase
//
endcase
//
@@ -506,6 +639,9 @@ module modexpng_general_worker
//
// Write Address Logic
//
+ wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
+ wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
+
always @(posedge clk) begin
//
{wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
@@ -534,22 +670,64 @@ module modexpng_general_worker
//
endcase
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
+ wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+ //
+ wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2;
+ wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4};
+ //
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4};
+ //
+ end
+ //
+ endcase
+ //
//
endcase
//
end
-
-
+
+
//
// Read Address Logic
//
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next;
+
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
- assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
- assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+ wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half;
+ wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half;
+
+ wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+ wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
- always @(posedge clk) begin
+ always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)??
//
{rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
{rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
@@ -572,6 +750,9 @@ module modexpng_general_worker
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
//
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
//
@@ -586,11 +767,113 @@ module modexpng_general_worker
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
//
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+ //
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+ //
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO};
+ //
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M2: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_BUSY_M1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next};
+ //
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
//
end
//
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M2: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+ //
+ end
+ //
endcase
//
//
@@ -608,7 +891,9 @@ module modexpng_general_worker
else case (opcode)
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
endcase
@@ -616,17 +901,20 @@ module modexpng_general_worker
//
// Busy Exit Logic
//
- reg wrk_fsm_done_one_pass = 1'b0;
+ reg wrk_fsm_done_one_pass = 1'b0;
+ reg wrk_fsm_done_one_pass_meander = 1'b0;
always @(posedge clk) begin
//
- wrk_fsm_done_one_pass <= 1'b0;
+ wrk_fsm_done_one_pass <= 1'b0;
+ wrk_fsm_done_one_pass_meander <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: begin
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
//
@@ -637,6 +925,20 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin
+ //
+ if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary...
+ if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
+ //
+ end
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1)
+ wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ //
+ end
+ //
endcase
//
end
@@ -654,7 +956,31 @@ module modexpng_general_worker
WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
- WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ;
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ;
+ WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ;
+ WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ;
+ WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ;
+ WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ;
+ WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ;
+ WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ;
+ WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ;
+ WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ;
+ WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ;
+ //
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
+ //
+ default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
endcase
//
end
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
index dfbd676..59f4709 100644
--- a/rtl/modexpng_io_manager.v
+++ b/rtl/modexpng_io_manager.v
@@ -53,7 +53,13 @@ module modexpng_io_manager
io_out_dout,
wrk_narrow_x_din_x_trunc,
- wrk_narrow_x_din_y_trunc
+ wrk_narrow_x_din_y_trunc,
+
+ ladder_steps,
+ ladder_d,
+ ladder_p,
+ ladder_q,
+ ladder_done
);
//
@@ -120,6 +126,12 @@ module modexpng_io_manager
output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc;
output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc;
+
+ input [ BIT_INDEX_W -1:0] ladder_steps;
+ output ladder_d;
+ output ladder_p;
+ output ladder_q;
+ output ladder_done;
//
@@ -254,6 +266,10 @@ module modexpng_io_manager
wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;
+ wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT;
+ wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP;
+ wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step;
+
wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE;
wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW;
@@ -269,8 +285,90 @@ module modexpng_io_manager
wire in_1_addr_op_next_is_last;
wire in_2_addr_op_next_is_last;
+ wire in_2_addr_op_next_is_one;
wire dummy_addr_op_next_is_last;
+
+ //
+ // Ladder Init/Step Logic
+ //
+ reg ladder_d_r;
+ reg ladder_p_r;
+ reg ladder_q_r;
+ reg ladder_done_r = 1'b0;
+
+ assign ladder_d = ladder_d_r;
+ assign ladder_p = ladder_p_r;
+ assign ladder_q = ladder_q_r;
+ assign ladder_done = ladder_done_r;
+
+ reg [BIT_INDEX_W -1:0] ladder_index;
+ reg [BIT_INDEX_W -1:0] ladder_index_next;
+ wire [ OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W];
+ wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W];
+ wire ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO;
+
+ always @(posedge clk)
+ //
+ if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin
+ //
+ if (opcode_is_ladder_init) begin
+ ladder_index <= ladder_steps;
+ ladder_index_next <= ladder_steps - 1'b1;
+ ladder_done_r <= 1'b0;
+ end
+ //
+ if (opcode_is_ladder_step) begin
+ ladder_index <= ladder_index_next;
+ ladder_index_next <= ladder_index_next - 1'b1;
+ if (ladder_index_is_zero) ladder_done_r <= 1'b1;
+ end
+ //
+ end
+
+
+ //
+ // Ladder Mux
+ //
+ reg ladder_dpq_mux;
+
+ always @(io_in_2_din, ladder_index_lsb)
+ //
+ case(ladder_index_lsb)
+ 4'b0000: ladder_dpq_mux = io_in_2_din[ 0];
+ 4'b0001: ladder_dpq_mux = io_in_2_din[ 1];
+ 4'b0010: ladder_dpq_mux = io_in_2_din[ 2];
+ 4'b0011: ladder_dpq_mux = io_in_2_din[ 3];
+ 4'b0100: ladder_dpq_mux = io_in_2_din[ 4];
+ 4'b0101: ladder_dpq_mux = io_in_2_din[ 5];
+ 4'b0110: ladder_dpq_mux = io_in_2_din[ 6];
+ 4'b0111: ladder_dpq_mux = io_in_2_din[ 7];
+ 4'b1000: ladder_dpq_mux = io_in_2_din[ 8];
+ 4'b1001: ladder_dpq_mux = io_in_2_din[ 9];
+ 4'b1010: ladder_dpq_mux = io_in_2_din[10];
+ 4'b1011: ladder_dpq_mux = io_in_2_din[11];
+ 4'b1100: ladder_dpq_mux = io_in_2_din[12];
+ 4'b1101: ladder_dpq_mux = io_in_2_din[13];
+ 4'b1110: ladder_dpq_mux = io_in_2_din[14];
+ 4'b1111: ladder_dpq_mux = io_in_2_din[15];
+ endcase
+
+ always @(posedge clk)
+ //
+ case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY:
+ if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux;
+ //
+ IO_FSM_STATE_LATENCY_POST1:
+ if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux;
+ //
+ IO_FSM_STATE_LATENCY_POST2:
+ if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux;
+ //
+ endcase
+
+
//
// Source Enable Logic
//
@@ -284,8 +382,8 @@ module modexpng_io_manager
IO_FSM_STATE_LATENCY_PRE1,
IO_FSM_STATE_LATENCY_PRE2,
IO_FSM_STATE_BUSY: begin
- in_1_en <= opcode_is_input && sel_aux_is_1;
- in_2_en <= opcode_is_input && sel_aux_is_2;
+ in_1_en <= opcode_is_input && sel_aux_is_1;
+ in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder;
end
//
IO_FSM_STATE_EXTRA: begin
@@ -450,35 +548,59 @@ module modexpng_io_manager
wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;
assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last;
- assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last;
+ assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last;
+ assign in_2_addr_op_next_is_one = in_2_addr_op_next == OP_ADDR_ONE;
assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last;
- always @(posedge clk)
+ always @(posedge clk) begin
+ //
+ {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+ {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+ { dummy_addr_op} <= { OP_ADDR_DNC};
+ //
+ in_1_addr_next <= {BANK_DNC, OP_ADDR_DNC};
+ in_2_addr_next <= {BANK_DNC, OP_ADDR_DNC};
+ dummy_addr_next <= { OP_ADDR_DNC};
//
case (io_fsm_state_next)
//
IO_FSM_STATE_LATENCY_PRE1: begin
//
- {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
- {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
- { dummy_addr_op} <= { OP_ADDR_ZERO};
+ {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
+ if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
+ else {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+ { dummy_addr_op} <= { OP_ADDR_ZERO};
+ //
+ in_1_addr_next <= {sel_in, OP_ADDR_ONE};
+ in_2_addr_next <= {sel_in, OP_ADDR_ONE};
+ dummy_addr_next <= { OP_ADDR_ONE};
+ //
+ end
+ //
+ IO_FSM_STATE_LATENCY_PRE2: begin
+ //
+ {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
+ if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
+ else {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb};
+ { dummy_addr_op} <= dummy_addr_next;
//
- in_1_addr_next <= {sel_in, OP_ADDR_ONE};
- in_2_addr_next <= {sel_in, OP_ADDR_ONE};
- dummy_addr_next <= { OP_ADDR_ONE};
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1;
+ else in_2_addr_next <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
+ dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
- IO_FSM_STATE_LATENCY_PRE2,
IO_FSM_STATE_BUSY: begin
//
{in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
{in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
{ dummy_addr_op} <= dummy_addr_next;
//
- in_1_addr_next <= in_1_addr_next + 1'b1;
- in_2_addr_next <= in_2_addr_next + 1'b1;
- dummy_addr_next <= dummy_addr_next + 1'b1;
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1;
+ else in_2_addr_next <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
+ dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
@@ -499,7 +621,8 @@ module modexpng_io_manager
end
//
endcase
-
+ //
+ end
//
@@ -525,7 +648,7 @@ module modexpng_io_manager
if (opcode_is_input) begin
if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1;
if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1;
- end else if (opcode_is_output) begin
+ end else if (opcode_is_output || opcode_is_ladder) begin
if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1;
end
//
@@ -571,4 +694,17 @@ module modexpng_io_manager
endcase
+ //
+ // BEGIN DEBUG
+ //
+ always @(posedge clk)
+ //
+ if ((io_fsm_state == IO_FSM_STATE_STOP) && opcode_is_ladder)
+ $display("[%4d] / %d / %d / %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r);
+
+ //
+ // END DEBUG
+ //
+
+
endmodule
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 2e591e7..f68c559 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -1,8 +1,8 @@
-localparam UOP_OPCODE_W = 4;
+localparam UOP_OPCODE_W = 5;
localparam UOP_CRT_W = 1;
localparam UOP_NPQ_W = 1;
localparam UOP_AUX_W = 1;
-localparam UOP_LADDER_W = 1;
+localparam UOP_LADDER_W = 2;
localparam UOP_SEL_W = 4 * BANK_ADDR_W;
localparam UOP_ADDR_W = 6; // 64 instructions
@@ -11,17 +11,17 @@ localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0;
localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31;
-// 4 1 1 1 2 4*3=12
+// 5 1 1 1 2 4*3=12
localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W;
-// [20:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0]
+// [21:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0]
// OPCODE
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 4'd0;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd0;
/* all fields are don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 4'd1;
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 5'd1;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd2;
/* CRT tells into which of the dual MMM to write
* NPQ specifies the width of the operand
* AUX specifies from which INPUT to read
@@ -31,7 +31,7 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2;
*
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3;
/* CRT tells from which of the dual MMM to read
* NPQ specifies the width of the operand
* AUX is don't care
@@ -39,27 +39,45 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
* source and destination WIDE are don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX is don't care
* LADDER is don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual)
* LADDER specifies Montgomery ladder mode
*/
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11;
+/* CRT
+ * NPQ
+ * AUX
+ * LADDER is don't care
+ */
+
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX is don't care
* LADDER is don't care
* source and destination WIDE are don't care
- */
+ */
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17;
+/* CRT is don't care
+ * NPQ is don't care
+ * AUX is don't care
+ * LADDER is don't care
+ * WIDE and NARROW are don't care
+ */
// CRT
localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0;
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 14f1b47..6e52a97 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -8,6 +8,7 @@ module modexpng_mmm_dual
word_index_last,
word_index_last_minus1,
force_unity_b,
+ only_reduce,
sel_wide_in, sel_narrow_in,
@@ -72,6 +73,7 @@ module modexpng_mmm_dual
input [7:0] word_index_last;
input [7:0] word_index_last_minus1;
input force_unity_b;
+ input only_reduce;
input [BANK_ADDR_W-1:0] sel_wide_in;
input [BANK_ADDR_W-1:0] sel_narrow_in;
@@ -120,7 +122,8 @@ module modexpng_mmm_dual
//
reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
-
+
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_idle;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
@@ -905,16 +908,17 @@ module modexpng_mmm_dual
//
// FSM Transition Logic
//
+ assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
- assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
always @* begin
//
fsm_state_next = FSM_STATE_IDLE;
//
case (fsm_state)
- FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
+ FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle /*FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : FSM_STATE_IDLE;
FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 6e6c3ca..c7566ad 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -23,7 +23,7 @@ localparam MAC_W = 47;
localparam BUS_DATA_W = 32;
localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
-
+localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W);
localparam BANK_ADDR_W = 3;
localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W);
@@ -33,6 +33,8 @@ localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS);
localparam CARRY_W = WORD_EXT_W - WORD_W;
+localparam WORD_MUX_W = cryptech_clog2(WORD_W);
+
localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}};
localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0;
@@ -80,6 +82,10 @@ localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2;
localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}};
+localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q
+
+localparam [BIT_INDEX_W-1:0] BIT_INDEX_ZERO = {BIT_INDEX_W{1'b0}};
+
localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0;
localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1;
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 016b1b0..04f0c83 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -10,53 +10,67 @@ module modexpng_uop_rom
input wire clk;
input wire [UOP_ADDR_W -1:0] addr;
- output reg [UOP_W -1:0] data;
+ output reg [UOP_W -1:0] data;
always @(posedge clk)
//
case (addr)
- 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
- 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
- 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; //
- 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
- 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
- 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; //
+ 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
+ 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
//
- 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
- 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
//
- 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
- 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
- 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
+ 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
//
- 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
+ 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
//
- 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
- 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
+ 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
+ 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
//
- 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
//
- 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
+ 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
//
- 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
//
- 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
- 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
- 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
- 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
- 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
+ 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
+ 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
+ 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
+ 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
+ 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
//
- 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; //
- //
- default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
+ 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; //
+ //
+ 6'd31: data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_DNC }; //
+ //
+ 6'd32: data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd33: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_D, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd34: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_D, BANK_NARROW_D }; //
+ 6'd35: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_A, BANK_DNC, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd36: data <= {UOP_OPCODE_COPY_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd37: data <= {UOP_OPCODE_LADDER_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
+ 6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
+ //
+ default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
endcase
endmodule