diff options
author | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-03 16:50:25 +0300 |
---|---|---|
committer | Pavel V. Shatov (Meister) <meisterpaul1@yandex.ru> | 2019-10-03 16:50:25 +0300 |
commit | 02247784f18dc683d5873a52c1650e72f02273b5 (patch) | |
tree | 2e1bc8872c183ab5424545bcaa8ff90ffa7b53be | |
parent | e340b1489b08905e3d8acd17686e178028de7922 (diff) |
Added more micro-operations, entire Montgomery exponentiation ladder works now.
-rw-r--r-- | bench/tb_core_full.v | 511 | ||||
-rw-r--r-- | bench/tb_mmm_dual_x8.v | 940 | ||||
-rw-r--r-- | bench/tb_mmm_x8_dual.v | 327 | ||||
-rw-r--r-- | rtl/modexpng_core_top.v | 138 | ||||
-rw-r--r-- | rtl/modexpng_general_worker.v | 402 | ||||
-rw-r--r-- | rtl/modexpng_io_manager.v | 170 | ||||
-rw-r--r-- | rtl/modexpng_microcode.vh | 42 | ||||
-rw-r--r-- | rtl/modexpng_mmm_dual.v | 10 | ||||
-rw-r--r-- | rtl/modexpng_parameters.vh | 8 | ||||
-rw-r--r-- | rtl/modexpng_uop_rom.v | 82 |
10 files changed, 2171 insertions, 459 deletions
diff --git a/bench/tb_core_full.v b/bench/tb_core_full.v new file mode 100644 index 0000000..248634e --- /dev/null +++ b/bench/tb_core_full.v @@ -0,0 +1,511 @@ +`timescale 1ns / 1ps + +module tb_core_full; + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + // + // Test Vectors + // + localparam TB_MODULUS_LENGTH_N = 1024; + localparam TB_MODULUS_LENGTH_PQ = TB_MODULUS_LENGTH_N / 2; + localparam TB_NUM_WORDS_PQ = TB_MODULUS_LENGTH_PQ / BUS_DATA_W; + localparam TB_NUM_WORDS_N = TB_MODULUS_LENGTH_N / BUS_DATA_W; + localparam CORE_NUM_WORDS_PQ = TB_MODULUS_LENGTH_PQ / WORD_W; + localparam CORE_NUM_WORDS_N = TB_MODULUS_LENGTH_N / WORD_W; + + reg [31:0] M[0:TB_NUM_WORDS_N-1]; + reg [31:0] N[0:TB_NUM_WORDS_N-1]; + reg [31:0] N_FACTOR[0:TB_NUM_WORDS_N-1]; + reg [31:0] N_COEFF[0:TB_NUM_WORDS_N]; + reg [31:0] X[0:TB_NUM_WORDS_N-1]; + reg [31:0] Y[0:TB_NUM_WORDS_N-1]; + reg [31:0] P[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] Q[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] P_FACTOR[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ]; + reg [31:0] DP[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1]; + reg [31:0] XM[0:TB_NUM_WORDS_N-1]; + reg [31:0] YM[0:TB_NUM_WORDS_N-1]; + reg [31:0] S[0:TB_NUM_WORDS_N-1]; + reg [31:0] XM_READBACK[0:TB_NUM_WORDS_N-1]; + reg [31:0] YM_READBACK[0:TB_NUM_WORDS_N-1]; + reg [31:0] S_READBACK[0:TB_NUM_WORDS_N-1]; + + initial begin + M[ 0] = 32'he1b3c6ac; M[ 1] = 32'haa2c5d8c; M[ 2] = 32'hbecc676a; M[ 3] = 32'hda087a3e; + M[ 4] = 32'hf0816496; M[ 5] = 32'hf9e17fd8; M[ 6] = 32'h304d4896; M[ 7] = 32'h81d4e9ab; + M[ 8] = 32'h80eff76c; M[ 9] = 32'he5b8f9b6; M[ 10] = 32'h4b1ebe55; M[ 11] = 32'ha1feb9dc; + M[ 12] = 32'heca4192f; M[ 13] = 32'h6ad6ea8e; M[ 14] = 32'hf34aed05; M[ 15] = 32'had38c275; + M[ 16] = 32'h8d3b583b; M[ 17] = 32'hc370f07e; M[ 18] = 32'hb9078738; M[ 19] = 32'haf37f86c; + M[ 20] = 32'h02f0e161; M[ 21] = 32'h0506a68a; M[ 22] = 32'h1ae65107; M[ 23] = 32'hcd3a97f1; + M[ 24] = 32'hb27244b8; M[ 25] = 32'h9bc3c400; M[ 26] = 32'he4d5636e; M[ 27] = 32'h35187c07; + M[ 28] = 32'h78a661c9; M[ 29] = 32'h1e7ec273; M[ 30] = 32'hcdc31041; M[ 31] = 32'h002291d8; + N[ 0] = 32'h6719997f; N[ 1] = 32'hef2df706; N[ 2] = 32'h9ba95792; N[ 3] = 32'h747e0580; + N[ 4] = 32'h7507684c; N[ 5] = 32'h7a10d0d1; N[ 6] = 32'h83a33941; N[ 7] = 32'haef9fda5; + N[ 8] = 32'h17972933; N[ 9] = 32'h0a98251a; N[ 10] = 32'h7dce3d13; N[ 11] = 32'hdad49a60; + N[ 12] = 32'h9f98006b; N[ 13] = 32'h46fd4a05; N[ 14] = 32'h51966e1d; N[ 15] = 32'hb1c59fab; + N[ 16] = 32'h8ab3096e; N[ 17] = 32'hef1f0436; N[ 18] = 32'heeed776f; N[ 19] = 32'h106d9d82; + N[ 20] = 32'hdd2a44af; N[ 21] = 32'h17c32585; N[ 22] = 32'hc854e454; N[ 23] = 32'h600fb6df; + N[ 24] = 32'h25c2d4bb; N[ 25] = 32'h5f09d790; N[ 26] = 32'he5a2bb93; N[ 27] = 32'h5bc6b044; + N[ 28] = 32'h2ecbb15f; N[ 29] = 32'h464817f5; N[ 30] = 32'h05cae32b; N[ 31] = 32'hde97bb85; + N_FACTOR[ 0] = 32'ha06a1113; N_FACTOR[ 1] = 32'hc9974806; N_FACTOR[ 2] = 32'h572d7a20; N_FACTOR[ 3] = 32'h04000838; + N_FACTOR[ 4] = 32'hb275c37a; N_FACTOR[ 5] = 32'hea78a046; N_FACTOR[ 6] = 32'h029e13b8; N_FACTOR[ 7] = 32'hae540753; + N_FACTOR[ 8] = 32'h1e98bc21; N_FACTOR[ 9] = 32'h34ede47a; N_FACTOR[ 10] = 32'h0c565ecd; N_FACTOR[ 11] = 32'h027ff3bf; + N_FACTOR[ 12] = 32'h08290d30; N_FACTOR[ 13] = 32'hb92857df; N_FACTOR[ 14] = 32'he6c59eb3; N_FACTOR[ 15] = 32'h09e53d6a; + N_FACTOR[ 16] = 32'h980d127e; N_FACTOR[ 17] = 32'h4dd6ced0; N_FACTOR[ 18] = 32'h3b9400d0; N_FACTOR[ 19] = 32'h276c6711; + N_FACTOR[ 20] = 32'h72eaf2e6; N_FACTOR[ 21] = 32'h749f81eb; N_FACTOR[ 22] = 32'h17b7d05f; N_FACTOR[ 23] = 32'h41a3a2cd; + N_FACTOR[ 24] = 32'h1ba098f3; N_FACTOR[ 25] = 32'h9b884af9; N_FACTOR[ 26] = 32'hdafd920c; N_FACTOR[ 27] = 32'h7b1f5cc6; + N_FACTOR[ 28] = 32'hb0a0d098; N_FACTOR[ 29] = 32'h4ee55bcf; N_FACTOR[ 30] = 32'haed9b905; N_FACTOR[ 31] = 32'h42d541fb; + N_COEFF[ 0] = 32'hb383d981; N_COEFF[ 1] = 32'h9bf1c20c; N_COEFF[ 2] = 32'h268999ff; N_COEFF[ 3] = 32'h11a3c01a; + N_COEFF[ 4] = 32'h12665495; N_COEFF[ 5] = 32'h515b0d96; N_COEFF[ 6] = 32'hb704fb07; N_COEFF[ 7] = 32'h8e1bd1d6; + N_COEFF[ 8] = 32'h62c5f506; N_COEFF[ 9] = 32'hfdcd0163; N_COEFF[ 10] = 32'h8dd55dee; N_COEFF[ 11] = 32'h6d79c8b1; + N_COEFF[ 12] = 32'hca16d0b9; N_COEFF[ 13] = 32'h88bead48; N_COEFF[ 14] = 32'hbcdb1e94; N_COEFF[ 15] = 32'h950c171d; + N_COEFF[ 16] = 32'h4fa810af; N_COEFF[ 17] = 32'h9b63e6d2; N_COEFF[ 18] = 32'ha2d0c26b; N_COEFF[ 19] = 32'hafa1ef25; + N_COEFF[ 20] = 32'h111bd21e; N_COEFF[ 21] = 32'hc2d896f0; N_COEFF[ 22] = 32'h189dc2cf; N_COEFF[ 23] = 32'h6144156a; + N_COEFF[ 24] = 32'hd1c67123; N_COEFF[ 25] = 32'ha127e4f3; N_COEFF[ 26] = 32'h40d342ef; N_COEFF[ 27] = 32'hee476d42; + N_COEFF[ 28] = 32'hee05f26a; N_COEFF[ 29] = 32'h4fc717bd; N_COEFF[ 30] = 32'h6baa4d60; N_COEFF[ 31] = 32'h1d6b10db; + N_COEFF[ 32] = 32'h00006545; + X[ 0] = 32'ha838f053; X[ 1] = 32'h8eb9747c; X[ 2] = 32'h5991b9eb; X[ 3] = 32'h74e6e776; + X[ 4] = 32'hcb5aa9e2; X[ 5] = 32'h7f8083d4; X[ 6] = 32'h3f7d47ec; X[ 7] = 32'hbd76a787; + X[ 8] = 32'hf4c166b7; X[ 9] = 32'hdbf67229; X[ 10] = 32'h975a5cfb; X[ 11] = 32'he8c35dca; + X[ 12] = 32'h6abc86e8; X[ 13] = 32'hfee472cb; X[ 14] = 32'h83ac8f2e; X[ 15] = 32'h82825cff; + X[ 16] = 32'h2d532c22; X[ 17] = 32'h2d3c3b06; X[ 18] = 32'he2862a8f; X[ 19] = 32'he8616ce4; + X[ 20] = 32'h5d77ee51; X[ 21] = 32'he609de07; X[ 22] = 32'hef718044; X[ 23] = 32'h82f35f8b; + X[ 24] = 32'hcdb9dcfe; X[ 25] = 32'hff6ea364; X[ 26] = 32'h0994ae28; X[ 27] = 32'h409b369b; + X[ 28] = 32'hcfabda4e; X[ 29] = 32'h5cd52bbc; X[ 30] = 32'hd90e1715; X[ 31] = 32'h00f4dcf2; + Y[ 0] = 32'h01b2730a; Y[ 1] = 32'h04ff1664; Y[ 2] = 32'h6d55dc06; Y[ 3] = 32'h1cda0da7; + Y[ 4] = 32'h98c812b4; Y[ 5] = 32'ha8f79f3b; Y[ 6] = 32'hb18d9ee1; Y[ 7] = 32'ha53e97db; + Y[ 8] = 32'hfbbfd687; Y[ 9] = 32'h6b8a8bf6; Y[ 10] = 32'h59fe5575; Y[ 11] = 32'he6ee62ca; + Y[ 12] = 32'h9fe3f32a; Y[ 13] = 32'h6d758eaa; Y[ 14] = 32'h121e3dac; Y[ 15] = 32'h31d77884; + Y[ 16] = 32'h8f2701dd; Y[ 17] = 32'hca5e7ac3; Y[ 18] = 32'h731977a3; Y[ 19] = 32'hc3c1af70; + Y[ 20] = 32'h5606786a; Y[ 21] = 32'h94b71191; Y[ 22] = 32'hd044c7e2; Y[ 23] = 32'h7d899cd7; + Y[ 24] = 32'hb17d4f5d; Y[ 25] = 32'h446e04de; Y[ 26] = 32'h9c40b33d; Y[ 27] = 32'habc2e23e; + Y[ 28] = 32'hbb98b1f6; Y[ 29] = 32'hf1f87f7e; Y[ 30] = 32'hf19f3050; Y[ 31] = 32'h91305f4c; + P[ 0] = 32'h18bb0f97; P[ 1] = 32'h08588a44; P[ 2] = 32'h042c6647; P[ 3] = 32'hc8d3fa09; + P[ 4] = 32'he2ddbbc7; P[ 5] = 32'hef4a17fd; P[ 6] = 32'h90c102ef; P[ 7] = 32'h28b132db; + P[ 8] = 32'hebfd5f0a; P[ 9] = 32'h958717ca; P[ 10] = 32'h563cd266; P[ 11] = 32'h433f41af; + P[ 12] = 32'hbc198e83; P[ 13] = 32'h5b5300b2; P[ 14] = 32'h9bc50e5d; P[ 15] = 32'hefffa822; + Q[ 0] = 32'h25de0259; Q[ 1] = 32'hd81461d0; Q[ 2] = 32'h613815b3; Q[ 3] = 32'h9bf274e0; + Q[ 4] = 32'hbfd89a48; Q[ 5] = 32'hc53e71ac; Q[ 6] = 32'hcce7aed3; Q[ 7] = 32'hce1d017c; + Q[ 8] = 32'h646547e1; Q[ 9] = 32'hd6779694; Q[ 10] = 32'h20ae39c0; Q[ 11] = 32'hef0d4b5b; + Q[ 12] = 32'h8e5f59d6; Q[ 13] = 32'h7e267974; Q[ 14] = 32'h14c86644; Q[ 15] = 32'hed6efcd0; + P_FACTOR[ 0] = 32'h614f99ce; P_FACTOR[ 1] = 32'hbcee5381; P_FACTOR[ 2] = 32'h10b70a9a; P_FACTOR[ 3] = 32'h1a697756; + P_FACTOR[ 4] = 32'h1a972b27; P_FACTOR[ 5] = 32'hd7c43f9e; P_FACTOR[ 6] = 32'h48cbad9c; P_FACTOR[ 7] = 32'hc350e206; + P_FACTOR[ 8] = 32'h51098b50; P_FACTOR[ 9] = 32'h93388ec6; P_FACTOR[ 10] = 32'h548960b5; P_FACTOR[ 11] = 32'h5ecd6b04; + P_FACTOR[ 12] = 32'h04d1d543; P_FACTOR[ 13] = 32'ha53994af; P_FACTOR[ 14] = 32'hd390be70; P_FACTOR[ 15] = 32'h0acdced0; + Q_FACTOR[ 0] = 32'h8a19423f; Q_FACTOR[ 1] = 32'h9d729c78; Q_FACTOR[ 2] = 32'h26ed5239; Q_FACTOR[ 3] = 32'h5a7eba92; + Q_FACTOR[ 4] = 32'h8465f60f; Q_FACTOR[ 5] = 32'hd50817dd; Q_FACTOR[ 6] = 32'hba703ab1; Q_FACTOR[ 7] = 32'h3d59bd42; + Q_FACTOR[ 8] = 32'h2c197fcc; Q_FACTOR[ 9] = 32'hed14f573; Q_FACTOR[ 10] = 32'hb860c105; Q_FACTOR[ 11] = 32'h89af91e7; + Q_FACTOR[ 12] = 32'h13a3742d; Q_FACTOR[ 13] = 32'h96e41677; Q_FACTOR[ 14] = 32'h86506b4d; Q_FACTOR[ 15] = 32'h4a834535; + P_COEFF[ 0] = 32'hb3679fd9; P_COEFF[ 1] = 32'hde24e467; P_COEFF[ 2] = 32'hf0e82022; P_COEFF[ 3] = 32'h99012919; + P_COEFF[ 4] = 32'h023bda43; P_COEFF[ 5] = 32'hf04eebf8; P_COEFF[ 6] = 32'h29e9942f; P_COEFF[ 7] = 32'h8c257cb0; + P_COEFF[ 8] = 32'h5cdc4e60; P_COEFF[ 9] = 32'h279bdff7; P_COEFF[ 10] = 32'hf04a610d; P_COEFF[ 11] = 32'h342901dc; + P_COEFF[ 12] = 32'hc3e2f78c; P_COEFF[ 13] = 32'h39c00ed8; P_COEFF[ 14] = 32'hf7032ac2; P_COEFF[ 15] = 32'h22d9c54e; + P_COEFF[ 16] = 32'h0000f994; + Q_COEFF[ 0] = 32'h95beda17; Q_COEFF[ 1] = 32'ha4b101fa; Q_COEFF[ 2] = 32'hd0b854bc; Q_COEFF[ 3] = 32'h5c952a67; + Q_COEFF[ 4] = 32'h56722aa8; Q_COEFF[ 5] = 32'h2176cace; Q_COEFF[ 6] = 32'h69beef2d; Q_COEFF[ 7] = 32'h95bf6eb2; + Q_COEFF[ 8] = 32'h0cf1175c; Q_COEFF[ 9] = 32'h4911b74e; Q_COEFF[ 10] = 32'h331e61cb; Q_COEFF[ 11] = 32'he9527ead; + Q_COEFF[ 12] = 32'h8d6a5911; Q_COEFF[ 13] = 32'hae42d654; Q_COEFF[ 14] = 32'he10d29a8; Q_COEFF[ 15] = 32'h50a5dd76; + Q_COEFF[ 16] = 32'h0000ed75; + DP[ 0] = 32'h63d165e5; DP[ 1] = 32'h856ac81e; DP[ 2] = 32'hc4b8779d; DP[ 3] = 32'h8b119544; + DP[ 4] = 32'had780837; DP[ 5] = 32'h3e920266; DP[ 6] = 32'he9d10f2e; DP[ 7] = 32'h7c1b42b2; + DP[ 8] = 32'hc7daca3b; DP[ 9] = 32'h7883be11; DP[ 10] = 32'ha384548d; DP[ 11] = 32'he0848b23; + DP[ 12] = 32'h0b62bdff; DP[ 13] = 32'h11c64350; DP[ 14] = 32'h2aa1e225; DP[ 15] = 32'h9c2bcaa7; + DQ[ 0] = 32'hd7ffdc71; DQ[ 1] = 32'hed01b8aa; DQ[ 2] = 32'h2f99d3a6; DQ[ 3] = 32'h8ccb4428; + DQ[ 4] = 32'hb1574616; DQ[ 5] = 32'hfc218e36; DQ[ 6] = 32'h4fe24f91; DQ[ 7] = 32'h9c367c42; + DQ[ 8] = 32'h69dfa208; DQ[ 9] = 32'h3ee3de79; DQ[ 10] = 32'h54ded59b; DQ[ 11] = 32'hcb3b487d; + DQ[ 12] = 32'hbcc0db4e; DQ[ 13] = 32'hb3e6678c; DQ[ 14] = 32'h3d13ec03; DQ[ 15] = 32'h99e0f684; QINV[ 0] = 32'h9a2f0db2; QINV[ 1] = 32'h4a8075a5; QINV[ 2] = 32'hb61201fa; QINV[ 3] = 32'h0e876a42; + QINV[ 4] = 32'h94667476; QINV[ 5] = 32'h7538b796; QINV[ 6] = 32'h8d8dfa35; QINV[ 7] = 32'h689ee4a7; + QINV[ 8] = 32'h6779dd63; QINV[ 9] = 32'he15b6b5e; QINV[ 10] = 32'h8275500c; QINV[ 11] = 32'he4dcd058; + QINV[ 12] = 32'haf54b86c; QINV[ 13] = 32'hba76dc50; QINV[ 14] = 32'h473d0d6d; QINV[ 15] = 32'ha023ba44; + XM[ 0] = 32'h9b067dd2; XM[ 1] = 32'hf47b497a; XM[ 2] = 32'he8044305; XM[ 3] = 32'hf74f1735; + XM[ 4] = 32'h494825f4; XM[ 5] = 32'h077bf4a3; XM[ 6] = 32'h637a9f36; XM[ 7] = 32'h3c3821a2; + XM[ 8] = 32'haa1fe167; XM[ 9] = 32'h01c7289a; XM[ 10] = 32'hb463d63d; XM[ 11] = 32'hc992252e; + XM[ 12] = 32'he43762bf; XM[ 13] = 32'h351d9416; XM[ 14] = 32'h10e7f813; XM[ 15] = 32'h33187c87; + XM[ 16] = 32'h9eb98306; XM[ 17] = 32'hb29be7b6; XM[ 18] = 32'h32b237a8; XM[ 19] = 32'h6c1d5e46; + XM[ 20] = 32'h1cf10b4a; XM[ 21] = 32'hd874a710; XM[ 22] = 32'h7d2df198; XM[ 23] = 32'h463701cc; + XM[ 24] = 32'h9b648da0; XM[ 25] = 32'hdc5d3b10; XM[ 26] = 32'hef88e7fd; XM[ 27] = 32'hcb888210; + XM[ 28] = 32'h24397651; XM[ 29] = 32'h9b9bd5a2; XM[ 30] = 32'hbc796763; XM[ 31] = 32'h5be48377; + YM[ 0] = 32'h78aba2bd; YM[ 1] = 32'h6885ed1d; YM[ 2] = 32'h0d4983a2; YM[ 3] = 32'h3b775d20; + YM[ 4] = 32'hf83145f4; YM[ 5] = 32'h66e52536; YM[ 6] = 32'h25c2377e; YM[ 7] = 32'h91ef1342; + YM[ 8] = 32'h73013f57; YM[ 9] = 32'h3862aa1a; YM[ 10] = 32'h37846437; YM[ 11] = 32'ha6ddd3c9; + YM[ 12] = 32'h3974d1b2; YM[ 13] = 32'h02aea3f6; YM[ 14] = 32'h2e71b229; YM[ 15] = 32'hb898d5b6; + YM[ 16] = 32'h71258bb8; YM[ 17] = 32'h654f94e8; YM[ 18] = 32'h5539e56e; YM[ 19] = 32'hd49567f2; + YM[ 20] = 32'he73efaa1; YM[ 21] = 32'h3e4e2162; YM[ 22] = 32'h772d786a; YM[ 23] = 32'hc27be96a; + YM[ 24] = 32'h9911c92d; YM[ 25] = 32'hddc1b0fd; YM[ 26] = 32'h829186bb; YM[ 27] = 32'h1bab454e; + YM[ 28] = 32'h2f9fd9ce; YM[ 29] = 32'ha57103d4; YM[ 30] = 32'h1a93390c; YM[ 31] = 32'hc0376429; + S[ 0] = 32'h8eb4aa6e; S[ 1] = 32'hababa077; S[ 2] = 32'h8758f3f6; S[ 3] = 32'h8282e4f4; + S[ 4] = 32'h747947ce; S[ 5] = 32'h9ac7dbb0; S[ 6] = 32'h9184f0b5; S[ 7] = 32'h4b572f47; + S[ 8] = 32'hf4807458; S[ 9] = 32'h6da8dcd4; S[ 10] = 32'h9f331c40; S[ 11] = 32'h65e2b7a2; + S[ 12] = 32'hd3704e85; S[ 13] = 32'h3366f4f0; S[ 14] = 32'h035044b1; S[ 15] = 32'h54758bc4; + S[ 16] = 32'h2a7e0970; S[ 17] = 32'hbcc7783c; S[ 18] = 32'hf62193e6; S[ 19] = 32'h5d7bb220; + S[ 20] = 32'hb0fcabdd; S[ 21] = 32'he6dc5c88; S[ 22] = 32'h8e4d5e53; S[ 23] = 32'haa40acba; + S[ 24] = 32'h1dfc9178; S[ 25] = 32'h842821bc; S[ 26] = 32'h318fc8e1; S[ 27] = 32'h0f8161fe; + S[ 28] = 32'hbf3d7945; S[ 29] = 32'he33612c7; S[ 30] = 32'h7eec7f9d; S[ 31] = 32'h66da2c5a; + end + + + + // + // Clocks + // + `define CLK_FREQUENCY_MHZ (100.0) + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + + `define CLK_BUS_FREQUENCY_MHZ (50.0) + `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ) + `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS) + + reg clk = 1'b1; + reg clk_bus = 1'b0; + + always #`CLK_PERIOD_HALF_NS clk = ~clk; + + always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus; + + + // + // Reset + // + reg rst = 1'b1; + + + // + // Control / Status + // + reg [ 7:0] word_index_last_n; + reg [ 7:0] word_index_last_pq; + reg [11:0] bit_index_last_n; + reg [11:0] bit_index_last_pq; + reg core_next = 1'b0; + wire core_valid; + reg core_crt_mode; + + + // + // System Bus + // + reg bus_ready; + reg bus_cs = 1'b0; + reg bus_we = 1'b0; + reg [11:0] bus_addr; + reg [31:0] bus_data_wr; + wire [31:0] bus_data_rd; + + wire [ 1:0] bus_addr_sel = bus_addr[11:10]; + wire [ 2:0] bus_addr_bank = bus_addr[9:7]; + wire [ 6:0] bus_addr_data = bus_addr[6:0]; + + + // + // UUT + // + modexpng_core_top uut + ( + .clk (clk), + .clk_bus (clk_bus), + + .rst (rst), + + .next (core_next), + .valid (core_valid), + + .crt_mode (core_crt_mode), + + .word_index_last_n (word_index_last_n), + .word_index_last_pq (word_index_last_pq), + + .bit_index_last_n (bit_index_last_n), + .bit_index_last_pq (bit_index_last_pq), + + .bus_cs (bus_cs), + .bus_we (bus_we), + .bus_addr (bus_addr), + .bus_data_wr (bus_data_wr), + .bus_data_rd (bus_data_rd) + ); + + + // + // Routine (Bus) + // + initial begin + + bus_ready = 1'b0; + + while (rst) wait_clock_bus_tick; + wait_clock_bus_ticks(10); + $display("Core came out of reset."); + + set_input_1; + set_input_2; + + wait_clock_bus_ticks(10); + bus_ready = 1'b1; + + end + + + // + // Routine (Control/Status, Bus) + // + initial begin + + _wait_half_clock_tick; + wait_clock_ticks(100); + rst = 1'b0; + + while (!bus_ready) wait_clock_tick; + wait_clock_ticks(10); + $display("Core input banks written."); + + word_index_last_n = CORE_NUM_WORDS_N - 1; + word_index_last_pq = CORE_NUM_WORDS_PQ - 1; + + bit_index_last_n = TB_MODULUS_LENGTH_N - 1; + bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1; + + core_crt_mode = 1'b1; + + core_next = 1'b1; + wait_clock_tick; + core_next = 1'b0; + $display("Pulsed 'next' control signal."); + + while (!core_valid) wait_clock_tick; + wait_clock_ticks(10); + + $display("Detected high 'valid' status signal."); + core_crt_mode = 1'bX; + + wait_clock_ticks(10); + get_output; + wait_clock_ticks(10); + + $display("Core output banks read."); + + //verify; + + end + + + // + // Variables + // + integer _w, _n; + + + // + // set_input_1; + // + task set_input_1; + reg [9:0] _tn; + begin + _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N, _w[6:0], N[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]); + bus_write(2'd0, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_X, _w[6:0], X[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_Y, _w[6:0], Y[_w]); + end + endtask + + + // + // set_input_2; + // + task set_input_2; + begin +// for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]); + for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]); + for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]); + for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]); + end + endtask + + + // + // get_output; + // + task get_output; + begin + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]); + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_S, _w[6:0], S_READBACK[_w]); + end + endtask + + + // + // verify; + // + task verify; + // + reg xm_ok; + reg ym_ok; + reg s_ok; + // + begin + // + xm_ok = 1; + ym_ok = 1; + s_ok = 1; + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + if (XM_READBACK[_w] !== XM[_w]) xm_ok = 0; + if (YM_READBACK[_w] !== YM[_w]) ym_ok = 0; + if (S_READBACK[_w] !== S[_w]) s_ok = 0; + end + // + if (!xm_ok) + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + $write("XM / XM_READBACK [%3d] = 0x%08x / 0x%08x", _w, XM[_w], XM_READBACK[_w]); + if (XM[_w] !== XM_READBACK[_w]) $write(" <???: 0x%08x> ", XM[_w] ^ XM_READBACK[_w]); + $write("\n"); + end + // + if (!ym_ok) + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + $write("YM / YM_READBACK [%3d] = 0x%08x / 0x%08x", _w, YM[_w], YM_READBACK[_w]); + if (YM[_w] !== YM_READBACK[_w]) $write(" <???: 0x%08x> ", YM[_w] ^ YM_READBACK[_w]); + $write("\n"); + end + // + if (!s_ok) + // + for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin + $write("S / S_READBACK [%3d] = 0x%08x / 0x%08x", _w, S[_w], S_READBACK[_w]); + if (S[_w] !== S_READBACK[_w]) $write(" <???: 0x%08x> ", S[_w] ^ S_READBACK[_w]); + $write("\n"); + end + // + $write("XM is "); + if (xm_ok) $write("OK.\n"); + else $write("WRONG!\n"); + // + $write("YM is "); + if (ym_ok) $write("OK.\n"); + else $write("WRONG!\n"); + // + $write("S is "); + if (s_ok) $write("OK.\n"); + else $write("WRONG!\n"); + // + end + // + endtask + + + + // + // _bus_drive() + // + task _bus_drive; + input cs; + input we; + input [11:0] addr; + input [31:0] data; + {bus_cs, bus_we, bus_addr, bus_data_wr} <= {cs, we, addr, data}; + endtask + + + // + // bus_write() + // + task bus_write; + input [ 1:0] sel; + input [ 2:0] bank; + input [ 6:0] addr; + input [31:0] data; + begin + _bus_drive(1'b1, 1'b1, {sel, bank, addr}, data); + wait_clock_bus_tick; + _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); + end + endtask + + + // + // bus_read() + // + task bus_read; + input [ 1:0] sel; + input [ 2:0] bank; + input [ 6:0] addr; + output [31:0] data; + begin + _bus_drive(1'b1, 1'b0, {sel, bank, addr}, 32'hXXXXXXXX); + wait_clock_bus_tick; + data = bus_data_rd; + _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX); + end + endtask + + + // + // _wait_half_clock_tick() + // + task _wait_half_clock_tick; + #`CLK_PERIOD_HALF_NS; + endtask + + // + // wait_clock_tick() + // + task wait_clock_tick; + begin + _wait_half_clock_tick; + _wait_half_clock_tick; + end + endtask + + + // + // wait_clock_bus_tick() + // + task wait_clock_bus_tick; + #`CLK_BUS_PERIOD_NS; + endtask + + + // + // wait_clock_ticks() + // + task wait_clock_ticks; + input integer num_ticks; + for (_n=0; _n<num_ticks; _n=_n+1) + wait_clock_tick; + endtask + + + // + // wait_clock_bus_ticks() + // + task wait_clock_bus_ticks; + input integer num_ticks; + for (_n=0; _n<num_ticks; _n=_n+1) + wait_clock_bus_tick; + endtask + +endmodule diff --git a/bench/tb_mmm_dual_x8.v b/bench/tb_mmm_dual_x8.v new file mode 100644 index 0000000..7e54d09 --- /dev/null +++ b/bench/tb_mmm_dual_x8.v @@ -0,0 +1,940 @@ +`timescale 1ns / 1ps + +module tb_mmm_dual_x8; + + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + //`include "../rtl_1/modexpng_mmm_fsm_old.vh" + + + // + // Clock + // + `define CLK_FREQUENCY_MHZ (100.0) + `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) + `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) + + reg clk = 1'b0; + + always begin + #`CLK_PERIOD_HALF_NS clk = 1'b1; + #`CLK_PERIOD_HALF_NS clk = 1'b0; + end + + + // + // Reset + // + reg rst = 1'b1; + + + // + // Test Vectors + // + localparam PQ_NUM_WORDS = 32; + localparam [OP_ADDR_W -1:0] PQ_WORD_INDEX_LAST = PQ_NUM_WORDS - 1; + + localparam P_LADDER_MODE = 1'b0; + + reg [WORD_EXT_W -1:0] P_T1 [0:PQ_NUM_WORDS -1]; + reg [WORD_EXT_W -1:0] P_T2 [0:PQ_NUM_WORDS -1]; + + reg [WORD_EXT_W -1:0] P_N [0:PQ_NUM_WORDS -1]; + reg [WORD_EXT_W -1:0] P_N_COEFF[0:PQ_NUM_WORDS ]; + + reg [WORD_EXT_W -1:0] P_X_AB [0:2*PQ_NUM_WORDS -1]; + reg [WORD_EXT_W -1:0] P_Y_AB [0:2*PQ_NUM_WORDS -1]; + + reg [WORD_EXT_W -1:0] P_X_Q [0:PQ_NUM_WORDS]; + reg [WORD_EXT_W -1:0] P_Y_Q [0:PQ_NUM_WORDS]; + + reg [WORD_EXT_W -1:0] P_X_M [0:2*PQ_NUM_WORDS]; + reg [WORD_EXT_W -1:0] P_Y_M [0:2*PQ_NUM_WORDS]; + + reg [WORD_EXT_W -1:0] P_X [0:PQ_NUM_WORDS -1]; + reg [WORD_EXT_W -1:0] P_Y [0:PQ_NUM_WORDS -1]; + + + // + // Test Vector Components + // + initial begin + // + P_T1[ 0] = 18'h191c5; P_T1[ 1] = 18'h1a118; P_T1[ 2] = 18'h16e06; P_T1[ 3] = 18'h0ea68; + P_T1[ 4] = 18'h12944; P_T1[ 5] = 18'h0c242; P_T1[ 6] = 18'h2fc64; P_T1[ 7] = 18'h14efc; + P_T1[ 8] = 18'h113da; P_T1[ 9] = 18'h16ff7; P_T1[10] = 18'h1ef0c; P_T1[11] = 18'h18580; + P_T1[12] = 18'h1a62c; P_T1[13] = 18'h352b7; P_T1[14] = 18'h114f4; P_T1[15] = 18'h1c53e; + P_T1[16] = 18'h0c63e; P_T1[17] = 18'h0dd14; P_T1[18] = 18'h2fba8; P_T1[19] = 18'h1b8e4; + P_T1[20] = 18'h2d944; P_T1[21] = 18'h10290; P_T1[22] = 18'h1d276; P_T1[23] = 18'h327b0; + P_T1[24] = 18'h1c0c4; P_T1[25] = 18'h100a8; P_T1[26] = 18'h2a9ab; P_T1[27] = 18'h0e694; + P_T1[28] = 18'h10798; P_T1[29] = 18'h1ae91; P_T1[30] = 18'h38d4c; P_T1[31] = 18'h00808; + // + P_T2[ 0] = 18'h1193b; P_T2[ 1] = 18'h0de9c; P_T2[ 2] = 18'h0b993; P_T2[ 3] = 18'h0d2cd; + P_T2[ 4] = 18'h106ad; P_T2[ 5] = 18'h076da; P_T2[ 6] = 18'h10cab; P_T2[ 7] = 18'h15cd5; + P_T2[ 8] = 18'h15425; P_T2[ 9] = 18'h16287; P_T2[10] = 18'h0fd64; P_T2[11] = 18'h06ee0; + P_T2[12] = 18'h1b0c9; P_T2[13] = 18'h01a5e; P_T2[14] = 18'h1855c; P_T2[15] = 18'h17bf9; + P_T2[16] = 18'h1c83c; P_T2[17] = 18'h158ed; P_T2[18] = 18'h086df; P_T2[19] = 18'h16676; + P_T2[20] = 18'h0a0f8; P_T2[21] = 18'h14545; P_T2[22] = 18'h09641; P_T2[23] = 18'h16863; + P_T2[24] = 18'h17e20; P_T2[25] = 18'h0d457; P_T2[26] = 18'h05a9b; P_T2[27] = 18'h1a4cf; + P_T2[28] = 18'h1582a; P_T2[29] = 18'h1686c; P_T2[30] = 18'h1394e; P_T2[31] = 18'h0bdbc; + // + P_N[ 0] = 18'h00f97; P_N[ 1] = 18'h018bb; P_N[ 2] = 18'h08a44; P_N[ 3] = 18'h00858; + P_N[ 4] = 18'h06647; P_N[ 5] = 18'h0042c; P_N[ 6] = 18'h0fa09; P_N[ 7] = 18'h0c8d3; + P_N[ 8] = 18'h0bbc7; P_N[ 9] = 18'h0e2dd; P_N[10] = 18'h017fd; P_N[11] = 18'h0ef4a; + P_N[12] = 18'h002ef; P_N[13] = 18'h090c1; P_N[14] = 18'h032db; P_N[15] = 18'h028b1; + P_N[16] = 18'h05f0a; P_N[17] = 18'h0ebfd; P_N[18] = 18'h017ca; P_N[19] = 18'h09587; + P_N[20] = 18'h0d266; P_N[21] = 18'h0563c; P_N[22] = 18'h041af; P_N[23] = 18'h0433f; + P_N[24] = 18'h08e83; P_N[25] = 18'h0bc19; P_N[26] = 18'h000b2; P_N[27] = 18'h05b53; + P_N[28] = 18'h00e5d; P_N[29] = 18'h09bc5; P_N[30] = 18'h0a822; P_N[31] = 18'h0efff; + // + P_N_COEFF[ 0] = 18'h09fd9; P_N_COEFF[ 1] = 18'h0b367; P_N_COEFF[ 2] = 18'h0e467; P_N_COEFF[ 3] = 18'h0de24; + P_N_COEFF[ 4] = 18'h02022; P_N_COEFF[ 5] = 18'h0f0e8; P_N_COEFF[ 6] = 18'h02919; P_N_COEFF[ 7] = 18'h09901; + P_N_COEFF[ 8] = 18'h0da43; P_N_COEFF[ 9] = 18'h0023b; P_N_COEFF[10] = 18'h0ebf8; P_N_COEFF[11] = 18'h0f04e; + P_N_COEFF[12] = 18'h0942f; P_N_COEFF[13] = 18'h029e9; P_N_COEFF[14] = 18'h07cb0; P_N_COEFF[15] = 18'h08c25; + P_N_COEFF[16] = 18'h04e60; P_N_COEFF[17] = 18'h05cdc; P_N_COEFF[18] = 18'h0dff7; P_N_COEFF[19] = 18'h0279b; + P_N_COEFF[20] = 18'h0610d; P_N_COEFF[21] = 18'h0f04a; P_N_COEFF[22] = 18'h001dc; P_N_COEFF[23] = 18'h03429; + P_N_COEFF[24] = 18'h0f78c; P_N_COEFF[25] = 18'h0c3e2; P_N_COEFF[26] = 18'h00ed8; P_N_COEFF[27] = 18'h039c0; + P_N_COEFF[28] = 18'h02ac2; P_N_COEFF[29] = 18'h0f703; P_N_COEFF[30] = 18'h0c54e; P_N_COEFF[31] = 18'h022d9; + P_N_COEFF[32] = 18'h0f994; + // + P_X_AB[ 0] = 18'h0c199; P_X_AB[ 1] = 18'h0957a; P_X_AB[ 2] = 18'h070ad; P_X_AB[ 3] = 18'h0e5a6; + P_X_AB[ 4] = 18'h0fec9; P_X_AB[ 5] = 18'h00b73; P_X_AB[ 6] = 18'h09c72; P_X_AB[ 7] = 18'h0cdf0; + P_X_AB[ 8] = 18'h08755; P_X_AB[ 9] = 18'h07560; P_X_AB[10] = 18'h084b1; P_X_AB[11] = 18'h0ad3f; + P_X_AB[12] = 18'h074fe; P_X_AB[13] = 18'h04d74; P_X_AB[14] = 18'h00e16; P_X_AB[15] = 18'h0d3b3; + P_X_AB[16] = 18'h0d418; P_X_AB[17] = 18'h02f12; P_X_AB[18] = 18'h0c301; P_X_AB[19] = 18'h0be2b; + P_X_AB[20] = 18'h08222; P_X_AB[21] = 18'h0056c; P_X_AB[22] = 18'h01c7c; P_X_AB[23] = 18'h0bc95; + P_X_AB[24] = 18'h03427; P_X_AB[25] = 18'h0c65a; P_X_AB[26] = 18'h089ac; P_X_AB[27] = 18'h02117; + P_X_AB[28] = 18'h0ff7d; P_X_AB[29] = 18'h01cde; P_X_AB[30] = 18'h02709; P_X_AB[31] = 18'h01c56; + P_X_AB[32] = 18'h0f35a; P_X_AB[33] = 18'h08ce6; P_X_AB[34] = 18'h0a8e5; P_X_AB[35] = 18'h0d6d4; + P_X_AB[36] = 18'h06868; P_X_AB[37] = 18'h09105; P_X_AB[38] = 18'h0219e; P_X_AB[39] = 18'h0bc40; + P_X_AB[40] = 18'h00e0a; P_X_AB[41] = 18'h07783; P_X_AB[42] = 18'h0187a; P_X_AB[43] = 18'h0b922; + P_X_AB[44] = 18'h02609; P_X_AB[45] = 18'h0c64b; P_X_AB[46] = 18'h06b4b; P_X_AB[47] = 18'h04b79; + P_X_AB[48] = 18'h0fed6; P_X_AB[49] = 18'h03eac; P_X_AB[50] = 18'h04cac; P_X_AB[51] = 18'h0d47d; + P_X_AB[52] = 18'h045fd; P_X_AB[53] = 18'h04fa8; P_X_AB[54] = 18'h0597c; P_X_AB[55] = 18'h0a10d; + P_X_AB[56] = 18'h0bf44; P_X_AB[57] = 18'h08671; P_X_AB[58] = 18'h0112a; P_X_AB[59] = 18'h08ccf; + P_X_AB[60] = 18'h0cae5; P_X_AB[61] = 18'h04d94; P_X_AB[62] = 18'h0b95a; P_X_AB[63] = 18'h00040; + // + P_X_Q[ 0] = 18'h021b1; P_X_Q[ 1] = 18'h0d2db; P_X_Q[ 2] = 18'h0754b; P_X_Q[ 3] = 18'h01fc1; + P_X_Q[ 4] = 18'h063f7; P_X_Q[ 5] = 18'h086e5; P_X_Q[ 6] = 18'h0bcea; P_X_Q[ 7] = 18'h02260; + P_X_Q[ 8] = 18'h0c54c; P_X_Q[ 9] = 18'h0e298; P_X_Q[10] = 18'h05d07; P_X_Q[11] = 18'h0f978; + P_X_Q[12] = 18'h0e742; P_X_Q[13] = 18'h0a3f0; P_X_Q[14] = 18'h0b31e; P_X_Q[15] = 18'h041b7; + P_X_Q[16] = 18'h06ed9; P_X_Q[17] = 18'h03ac5; P_X_Q[18] = 18'h0f8eb; P_X_Q[19] = 18'h0c619; + P_X_Q[20] = 18'h067e9; P_X_Q[21] = 18'h00350; P_X_Q[22] = 18'h00376; P_X_Q[23] = 18'h02ebf; + P_X_Q[24] = 18'h0b125; P_X_Q[25] = 18'h05f7d; P_X_Q[26] = 18'h0f121; P_X_Q[27] = 18'h07ba4; + P_X_Q[28] = 18'h03050; P_X_Q[29] = 18'h0642e; P_X_Q[30] = 18'h0c2fc; P_X_Q[31] = 18'h0dfcf; + P_X_Q[32] = 18'h03f9e; + // + P_X_M[ 0] = 18'h03e67; P_X_M[ 1] = 18'h06a85; P_X_M[ 2] = 18'h08f52; P_X_M[ 3] = 18'h01a59; + P_X_M[ 4] = 18'h00136; P_X_M[ 5] = 18'h0f48c; P_X_M[ 6] = 18'h0638d; P_X_M[ 7] = 18'h0320f; + P_X_M[ 8] = 18'h078aa; P_X_M[ 9] = 18'h08a9f; P_X_M[10] = 18'h07b4e; P_X_M[11] = 18'h052c0; + P_X_M[12] = 18'h08b01; P_X_M[13] = 18'h0b28b; P_X_M[14] = 18'h0f1e9; P_X_M[15] = 18'h02c4c; + P_X_M[16] = 18'h02be7; P_X_M[17] = 18'h0d0ed; P_X_M[18] = 18'h03cfe; P_X_M[19] = 18'h041d4; + P_X_M[20] = 18'h07ddd; P_X_M[21] = 18'h0fa93; P_X_M[22] = 18'h0e383; P_X_M[23] = 18'h0436a; + P_X_M[24] = 18'h0cbd8; P_X_M[25] = 18'h039a5; P_X_M[26] = 18'h07653; P_X_M[27] = 18'h0dee8; + P_X_M[28] = 18'h00082; P_X_M[29] = 18'h0e321; P_X_M[30] = 18'h0d8f6; P_X_M[31] = 18'h0e3a9; + P_X_M[32] = 18'h00ca5; P_X_M[33] = 18'h035ed; P_X_M[34] = 18'h02b8f; P_X_M[35] = 18'h063bd; + P_X_M[36] = 18'h0ec9f; P_X_M[37] = 18'h0b8bb; P_X_M[38] = 18'h00389; P_X_M[39] = 18'h0ca27; + P_X_M[40] = 18'h0bea7; P_X_M[41] = 18'h0df1e; P_X_M[42] = 18'h0d685; P_X_M[43] = 18'h0cc1b; + P_X_M[44] = 18'h036c4; P_X_M[45] = 18'h01ce9; P_X_M[46] = 18'h0c43b; P_X_M[47] = 18'h05f58; + P_X_M[48] = 18'h02c77; P_X_M[49] = 18'h03a12; P_X_M[50] = 18'h0eea8; P_X_M[51] = 18'h0ac31; + P_X_M[52] = 18'h05838; P_X_M[53] = 18'h093ac; P_X_M[54] = 18'h0fd54; P_X_M[55] = 18'h06e13; + P_X_M[56] = 18'h002e2; P_X_M[57] = 18'h06af4; P_X_M[58] = 18'h0ea18; P_X_M[59] = 18'h083b3; + P_X_M[60] = 18'h059f7; P_X_M[61] = 18'h016d3; P_X_M[62] = 18'h0c3ad; P_X_M[63] = 18'h0dbfc; + P_X_M[64] = 18'h03ba4; + // + P_Y_AB[ 0] = 18'h0d567; P_Y_AB[ 1] = 18'h0dbf1; P_Y_AB[ 2] = 18'h024b3; P_Y_AB[ 3] = 18'h0bb34; + P_Y_AB[ 4] = 18'h03ad4; P_Y_AB[ 5] = 18'h08997; P_Y_AB[ 6] = 18'h0d369; P_Y_AB[ 7] = 18'h0ebbc; + P_Y_AB[ 8] = 18'h09502; P_Y_AB[ 9] = 18'h01b76; P_Y_AB[10] = 18'h0a28f; P_Y_AB[11] = 18'h0c577; + P_Y_AB[12] = 18'h05f2f; P_Y_AB[13] = 18'h08c45; P_Y_AB[14] = 18'h0dbb8; P_Y_AB[15] = 18'h036bf; + P_Y_AB[16] = 18'h05086; P_Y_AB[17] = 18'h0437e; P_Y_AB[18] = 18'h08e3d; P_Y_AB[19] = 18'h0ec97; + P_Y_AB[20] = 18'h0195c; P_Y_AB[21] = 18'h02e75; P_Y_AB[22] = 18'h0d94f; P_Y_AB[23] = 18'h0ce1e; + P_Y_AB[24] = 18'h0fd8d; P_Y_AB[25] = 18'h0ec03; P_Y_AB[26] = 18'h058a0; P_Y_AB[27] = 18'h05fc4; + P_Y_AB[28] = 18'h0f83f; P_Y_AB[29] = 18'h09a60; P_Y_AB[30] = 18'h0f047; P_Y_AB[31] = 18'h05ee6; + P_Y_AB[32] = 18'h02a39; P_Y_AB[33] = 18'h08b08; P_Y_AB[34] = 18'h0f66d; P_Y_AB[35] = 18'h0b2fb; + P_Y_AB[36] = 18'h02f3f; P_Y_AB[37] = 18'h092b2; P_Y_AB[38] = 18'h09b4e; P_Y_AB[39] = 18'h0ce4f; + P_Y_AB[40] = 18'h04428; P_Y_AB[41] = 18'h00483; P_Y_AB[42] = 18'h0f595; P_Y_AB[43] = 18'h031cb; + P_Y_AB[44] = 18'h0d292; P_Y_AB[45] = 18'h0ded9; P_Y_AB[46] = 18'h0ef15; P_Y_AB[47] = 18'h0da51; + P_Y_AB[48] = 18'h0ed93; P_Y_AB[49] = 18'h03969; P_Y_AB[50] = 18'h05efc; P_Y_AB[51] = 18'h004e7; + P_Y_AB[52] = 18'h09434; P_Y_AB[53] = 18'h02b91; P_Y_AB[54] = 18'h0d3db; P_Y_AB[55] = 18'h0c4cf; + P_Y_AB[56] = 18'h09d34; P_Y_AB[57] = 18'h0cea8; P_Y_AB[58] = 18'h0de0d; P_Y_AB[59] = 18'h0f190; + P_Y_AB[60] = 18'h0b95a; P_Y_AB[61] = 18'h0bd8a; P_Y_AB[62] = 18'h079a6; P_Y_AB[63] = 18'h005f6; + // + P_Y_Q[ 0] = 18'h0dd4f; P_Y_Q[ 1] = 18'h084f9; P_Y_Q[ 2] = 18'h00105; P_Y_Q[ 3] = 18'h0cdff; + P_Y_Q[ 4] = 18'h0973c; P_Y_Q[ 5] = 18'h0440c; P_Y_Q[ 6] = 18'h0450b; P_Y_Q[ 7] = 18'h09e70; + P_Y_Q[ 8] = 18'h0d686; P_Y_Q[ 9] = 18'h0e21a; P_Y_Q[10] = 18'h02d26; P_Y_Q[11] = 18'h0b117; + P_Y_Q[12] = 18'h08556; P_Y_Q[13] = 18'h002ee; P_Y_Q[14] = 18'h0083d; P_Y_Q[15] = 18'h079fa; + P_Y_Q[16] = 18'h0f25d; P_Y_Q[17] = 18'h0cd26; P_Y_Q[18] = 18'h0bb7e; P_Y_Q[19] = 18'h07676; + P_Y_Q[20] = 18'h0f4bb; P_Y_Q[21] = 18'h02b87; P_Y_Q[22] = 18'h02909; P_Y_Q[23] = 18'h05e2d; + P_Y_Q[24] = 18'h09c80; P_Y_Q[25] = 18'h098f3; P_Y_Q[26] = 18'h0f08b; P_Y_Q[27] = 18'h0255b; + P_Y_Q[28] = 18'h0fbe5; P_Y_Q[29] = 18'h0ae8e; P_Y_Q[30] = 18'h0ba22; P_Y_Q[31] = 18'h0f2ea; + P_Y_Q[32] = 18'h0530e; + // + P_Y_M[ 0] = 18'h02a99; P_Y_M[ 1] = 18'h0240e; P_Y_M[ 2] = 18'h0db4c; P_Y_M[ 3] = 18'h044cb; + P_Y_M[ 4] = 18'h0c52b; P_Y_M[ 5] = 18'h07668; P_Y_M[ 6] = 18'h02c96; P_Y_M[ 7] = 18'h01443; + P_Y_M[ 8] = 18'h06afd; P_Y_M[ 9] = 18'h0e489; P_Y_M[10] = 18'h05d70; P_Y_M[11] = 18'h03a88; + P_Y_M[12] = 18'h0a0d0; P_Y_M[13] = 18'h073ba; P_Y_M[14] = 18'h02447; P_Y_M[15] = 18'h0c940; + P_Y_M[16] = 18'h0af79; P_Y_M[17] = 18'h0bc81; P_Y_M[18] = 18'h071c2; P_Y_M[19] = 18'h01368; + P_Y_M[20] = 18'h0e6a3; P_Y_M[21] = 18'h0d18a; P_Y_M[22] = 18'h026b0; P_Y_M[23] = 18'h031e1; + P_Y_M[24] = 18'h00272; P_Y_M[25] = 18'h013fc; P_Y_M[26] = 18'h0a75f; P_Y_M[27] = 18'h0a03b; + P_Y_M[28] = 18'h007c0; P_Y_M[29] = 18'h0659f; P_Y_M[30] = 18'h00fb8; P_Y_M[31] = 18'h0a119; + P_Y_M[32] = 18'h0d5c6; P_Y_M[33] = 18'h09926; P_Y_M[34] = 18'h0d69f; P_Y_M[35] = 18'h085cd; + P_Y_M[36] = 18'h0591a; P_Y_M[37] = 18'h0e6dd; P_Y_M[38] = 18'h0981f; P_Y_M[39] = 18'h087b4; + P_Y_M[40] = 18'h015b3; P_Y_M[41] = 18'h09421; P_Y_M[42] = 18'h0ea9d; P_Y_M[43] = 18'h013af; + P_Y_M[44] = 18'h096ac; P_Y_M[45] = 18'h06f86; P_Y_M[46] = 18'h0cab7; P_Y_M[47] = 18'h06ab1; + P_Y_M[48] = 18'h0903e; P_Y_M[49] = 18'h06203; P_Y_M[50] = 18'h0751a; P_Y_M[51] = 18'h02fce; + P_Y_M[52] = 18'h0d0c9; P_Y_M[53] = 18'h00522; P_Y_M[54] = 18'h096f8; P_Y_M[55] = 18'h03aee; + P_Y_M[56] = 18'h0a034; P_Y_M[57] = 18'h0e52e; P_Y_M[58] = 18'h07b7b; P_Y_M[59] = 18'h06bad; + P_Y_M[60] = 18'h0016d; P_Y_M[61] = 18'h01315; P_Y_M[62] = 18'h02586; P_Y_M[63] = 18'h0e73a; + P_Y_M[64] = 18'h04ddd; + // + P_X[ 0] = 18'h0c2d4; P_X[ 1] = 18'h0d474; P_X[ 2] = 18'h13a91; P_X[ 3] = 18'h15507; + P_X[ 4] = 18'h149c0; P_X[ 5] = 18'h02527; P_X[ 6] = 18'h18667; P_X[ 7] = 18'h0ccb1; + P_X[ 8] = 18'h156a1; P_X[ 9] = 18'h0eeff; P_X[10] = 18'h1853d; P_X[11] = 18'h05ccd; + P_X[12] = 18'h0e334; P_X[13] = 18'h12f86; P_X[14] = 18'h0aad1; P_X[15] = 18'h12b4d; + P_X[16] = 18'h078be; P_X[17] = 18'h13b54; P_X[18] = 18'h180ae; P_X[19] = 18'h09e35; + P_X[20] = 18'h0e354; P_X[21] = 18'h156d0; P_X[22] = 18'h10f20; P_X[23] = 18'h0c226; + P_X[24] = 18'h0f165; P_X[25] = 18'h0fb42; P_X[26] = 18'h11082; P_X[27] = 18'h124dc; + P_X[28] = 18'h06467; P_X[29] = 18'h17d07; P_X[30] = 18'h0dc3c; P_X[31] = 18'h03ba4; + // + P_Y[ 0] = 18'h1242f; P_Y[ 1] = 18'h1cd0c; P_Y[ 2] = 18'h138c8; P_Y[ 3] = 18'h08859; + P_Y[ 4] = 18'h1798f; P_Y[ 5] = 18'h1336d; P_Y[ 6] = 18'h15603; P_Y[ 7] = 18'h059db; + P_Y[ 8] = 18'h098a4; P_Y[ 9] = 18'h1e032; P_Y[10] = 18'h0457a; P_Y[11] = 18'h1693e; + P_Y[12] = 18'h14e5f; P_Y[13] = 18'h1b9cc; P_Y[14] = 18'h14502; P_Y[15] = 18'h17dd1; + P_Y[16] = 18'h09b6c; P_Y[17] = 18'h0d416; P_Y[18] = 18'h034b5; P_Y[19] = 18'h164fd; + P_Y[20] = 18'h030b3; P_Y[21] = 18'h16ad3; P_Y[22] = 18'h0ffbd; P_Y[23] = 18'h13d68; + P_Y[24] = 18'h1b3d6; P_Y[25] = 18'h15988; P_Y[26] = 18'h15d3d; P_Y[27] = 18'h0bac7; + P_Y[28] = 18'h0d09f; P_Y[29] = 18'h09f2c; P_Y[30] = 18'h0ed30; P_Y[31] = 18'h04ddd; + // + end + + + // + // Enable, Ready + // + reg ena = 1'b0; + wire rdy; + + + // + // Settings + // + reg p_ladder_mode; + reg [OP_ADDR_W -1:0] word_index_last; + reg [OP_ADDR_W -1:0] word_index_last_minus1; + + + // + // Script + // + integer i; + initial begin + + wait_clock_ticks(10); + rst = 1'b0; + wait_clock_ticks(10); + + word_index_last = PQ_WORD_INDEX_LAST; + word_index_last_minus1 = word_index_last - 1'b1; + + p_prefill; + + p_ladder_mode = P_LADDER_MODE; + + wait_clock_ticks(10); + + ena = 1'b1; + wait_clock_ticks(1); + ena = 1'b0; + + while (!rdy) + wait_clock_ticks(1); + + wait_clock_ticks(1000); + p_verify_ab; + p_verify_q; + p_verify_m; + p_verify_p; + end + + + // + // Storage Interfaces + // + wire wr_wide_xy_ena; + wire [BANK_ADDR_W -1:0] wr_wide_xy_bank; + wire [ OP_ADDR_W -1:0] wr_wide_xy_addr; + wire [ WORD_EXT_W -1:0] wr_wide_x_din; + wire [ WORD_EXT_W -1:0] wr_wide_y_din; + + wire wr_narrow_xy_ena; + wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank; + wire [ OP_ADDR_W -1:0] wr_narrow_xy_addr; + wire [ WORD_EXT_W -1:0] wr_narrow_x_din; + wire [ WORD_EXT_W -1:0] wr_narrow_y_din; + + wire rd_wide_xy_ena; + wire rd_wide_xy_ena_aux; + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank; + wire [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + wire [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr; + wire [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout; + wire [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout; + wire [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + wire [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + + wire rd_narrow_xy_ena; + wire [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + wire [ OP_ADDR_W -1:0] rd_narrow_xy_addr; + wire [ WORD_EXT_W -1:0] rd_narrow_x_dout; + wire [ WORD_EXT_W -1:0] rd_narrow_y_dout; + + reg ext_wide_xy_ena = 1'b0; + reg [BANK_ADDR_W -1:0] ext_wide_xy_bank; + reg [ OP_ADDR_W -1:0] ext_wide_xy_addr; + reg [ WORD_EXT_W -1:0] ext_wide_x_din; + reg [ WORD_EXT_W -1:0] ext_wide_y_din; + + reg ext_narrow_xy_ena = 1'b0; + reg [BANK_ADDR_W -1:0] ext_narrow_xy_bank; + reg [ OP_ADDR_W -1:0] ext_narrow_xy_addr; + reg [ WORD_EXT_W -1:0] ext_narrow_x_din; + reg [ WORD_EXT_W -1:0] ext_narrow_y_din; + + // + // Recombinator Interface + // + wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + wire [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + wire rcmb_wide_xy_valid; + + wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + wire [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + wire rcmb_narrow_xy_valid; + + wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank; + wire [ OP_ADDR_W -1:0] rcmb_final_xy_addr; + wire [ WORD_EXT_W -1:0] rcmb_final_x_dout; + wire [ WORD_EXT_W -1:0] rcmb_final_y_dout; + wire rcmb_final_xy_valid; + + // + // Reductor Interface + // + wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank; + wire [ OP_ADDR_W -1:0] rdct_wide_xy_addr; + wire [ WORD_EXT_W -1:0] rdct_wide_x_dout; + wire [ WORD_EXT_W -1:0] rdct_wide_y_dout; + wire rdct_wide_xy_valid; + + wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank; + wire [ OP_ADDR_W -1:0] rdct_narrow_xy_addr; + wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout; + wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout; + wire rdct_narrow_xy_valid; + + // + // Reductor Control/Status + // + wire rdct_ena; + wire rdct_rdy; + + // + // UUT + // + modexpng_mmm_dual uut + ( + .clk (clk), + .rst (rst), + + .ena (ena), + .rdy (rdy), + + .ladder_mode (p_ladder_mode), + .word_index_last (word_index_last), + .word_index_last_minus1 (word_index_last_minus1), + .force_unity_b (1'b0), + .only_reduce (1'b0), + + .sel_wide_in (BANK_WIDE_A), + .sel_narrow_in (BANK_NARROW_A), + + .rd_wide_xy_ena (rd_wide_xy_ena), + .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux), + .rd_wide_xy_bank (rd_wide_xy_bank), + .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux), + .rd_wide_xy_addr (rd_wide_xy_addr), + .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux), + .rd_wide_x_dout (rd_wide_x_dout), + .rd_wide_y_dout (rd_wide_y_dout), + .rd_wide_x_dout_aux (rd_wide_x_dout_aux), + .rd_wide_y_dout_aux (rd_wide_y_dout_aux), + + .rd_narrow_xy_ena (rd_narrow_xy_ena), + .rd_narrow_xy_bank (rd_narrow_xy_bank), + .rd_narrow_xy_addr (rd_narrow_xy_addr), + .rd_narrow_x_dout (rd_narrow_x_dout), + .rd_narrow_y_dout (rd_narrow_y_dout), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_dout (rcmb_wide_x_dout), + .rcmb_wide_y_dout (rcmb_wide_y_dout), + .rcmb_wide_xy_valid (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_dout (rcmb_narrow_x_dout), + .rcmb_narrow_y_dout (rcmb_narrow_y_dout), + .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid), + + .rcmb_xy_bank (rcmb_final_xy_bank), + .rcmb_xy_addr (rcmb_final_xy_addr), + .rcmb_x_dout (rcmb_final_x_dout), + .rcmb_y_dout (rcmb_final_y_dout), + .rcmb_xy_valid (rcmb_final_xy_valid), + + .rdct_ena (rdct_ena), + .rdct_rdy (rdct_rdy) + ); + + + // + // Reductor + // + modexpng_reductor reductor + ( + .clk (clk), + .rst (rst), + + .ena (rdct_ena), + .rdy (rdct_rdy), + + .word_index_last (word_index_last), + + .sel_wide_out (BANK_WIDE_B), + .sel_narrow_out (BANK_NARROW_B), + + .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux), + .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux), + .rd_wide_x_dout_aux (rd_wide_x_dout_aux), + .rd_wide_y_dout_aux (rd_wide_y_dout_aux), + + .rcmb_final_xy_bank (rcmb_final_xy_bank), + .rcmb_final_xy_addr (rcmb_final_xy_addr), + .rcmb_final_x_dout (rcmb_final_x_dout), + .rcmb_final_y_dout (rcmb_final_y_dout), + .rcmb_final_xy_valid (rcmb_final_xy_valid), + + .rdct_wide_xy_bank (rdct_wide_xy_bank), + .rdct_wide_xy_addr (rdct_wide_xy_addr), + .rdct_wide_x_dout (rdct_wide_x_dout), + .rdct_wide_y_dout (rdct_wide_y_dout), + .rdct_wide_xy_valid (rdct_wide_xy_valid), + + .rdct_narrow_xy_bank (rdct_narrow_xy_bank), + .rdct_narrow_xy_addr (rdct_narrow_xy_addr), + .rdct_narrow_x_dout (rdct_narrow_x_dout), + .rdct_narrow_y_dout (rdct_narrow_y_dout), + .rdct_narrow_xy_valid (rdct_narrow_xy_valid) + ); + + // + // Storage Block + // + modexpng_storage_block storage_block + ( + .clk (clk), + .rst (rst), + + .wr_wide_xy_ena (wr_wide_xy_ena), + .wr_wide_xy_bank (wr_wide_xy_bank), + .wr_wide_xy_addr (wr_wide_xy_addr), + .wr_wide_x_din (wr_wide_x_din), + .wr_wide_y_din (wr_wide_y_din), + + .wr_narrow_xy_ena (wr_narrow_xy_ena), + .wr_narrow_xy_bank (wr_narrow_xy_bank), + .wr_narrow_xy_addr (wr_narrow_xy_addr), + .wr_narrow_x_din (wr_narrow_x_din), + .wr_narrow_y_din (wr_narrow_y_din), + + .rd_wide_xy_ena (rd_wide_xy_ena), + .rd_wide_xy_ena_aux (rd_wide_xy_ena_aux), + .rd_wide_xy_bank (rd_wide_xy_bank), + .rd_wide_xy_bank_aux (rd_wide_xy_bank_aux), + .rd_wide_xy_addr (rd_wide_xy_addr), + .rd_wide_xy_addr_aux (rd_wide_xy_addr_aux), + .rd_wide_x_dout (rd_wide_x_dout), + .rd_wide_y_dout (rd_wide_y_dout), + .rd_wide_x_dout_aux (rd_wide_x_dout_aux), + .rd_wide_y_dout_aux (rd_wide_y_dout_aux), + + .rd_narrow_xy_ena (rd_narrow_xy_ena), + .rd_narrow_xy_bank (rd_narrow_xy_bank), + .rd_narrow_xy_addr (rd_narrow_xy_addr), + .rd_narrow_x_dout (rd_narrow_x_dout), + .rd_narrow_y_dout (rd_narrow_y_dout) + ); + + modexpng_storage_manager storage_manager + ( + .clk (clk), + .rst (rst), + + .wr_wide_xy_ena (wr_wide_xy_ena), + .wr_wide_xy_bank (wr_wide_xy_bank), + .wr_wide_xy_addr (wr_wide_xy_addr), + .wr_wide_x_din (wr_wide_x_din), + .wr_wide_y_din (wr_wide_y_din), + + .wr_narrow_xy_ena (wr_narrow_xy_ena), + .wr_narrow_xy_bank (wr_narrow_xy_bank), + .wr_narrow_xy_addr (wr_narrow_xy_addr), + .wr_narrow_x_din (wr_narrow_x_din), + .wr_narrow_y_din (wr_narrow_y_din), + + .ext_wide_xy_ena (ext_wide_xy_ena), + .ext_wide_xy_bank (ext_wide_xy_bank), + .ext_wide_xy_addr (ext_wide_xy_addr), + .ext_wide_x_din (ext_wide_x_din), + .ext_wide_y_din (ext_wide_y_din), + + .ext_narrow_xy_ena (ext_narrow_xy_ena), + .ext_narrow_xy_bank (ext_narrow_xy_bank), + .ext_narrow_xy_addr (ext_narrow_xy_addr), + .ext_narrow_x_din (ext_narrow_x_din), + .ext_narrow_y_din (ext_narrow_y_din), + + .rcmb_wide_xy_bank (rcmb_wide_xy_bank), + .rcmb_wide_xy_addr (rcmb_wide_xy_addr), + .rcmb_wide_x_din (rcmb_wide_x_dout), + .rcmb_wide_y_din (rcmb_wide_y_dout), + .rcmb_wide_xy_ena (rcmb_wide_xy_valid), + + .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank), + .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr), + .rcmb_narrow_x_din (rcmb_narrow_x_dout), + .rcmb_narrow_y_din (rcmb_narrow_y_dout), + .rcmb_narrow_xy_ena (rcmb_narrow_xy_valid) + ); + + + // + // p_prefill() + // + task p_prefill; + begin + + ext_wide_xy_ena = 1'b1; + ext_narrow_xy_ena = 1'b1; + + for (i=0; i<PQ_NUM_WORDS; i=i+1) begin + ext_wide_xy_bank = BANK_WIDE_A; + ext_wide_xy_addr = i[OP_ADDR_W-1:0]; + ext_wide_x_din = P_T1[i]; + ext_wide_y_din = P_T2[i]; + + ext_narrow_xy_bank = BANK_NARROW_A; + ext_narrow_xy_addr = i[OP_ADDR_W-1:0]; + ext_narrow_x_din = P_T1[i]; + ext_narrow_y_din = P_T2[i]; + + wait_clock_tick; + end + + for (i=0; i<PQ_NUM_WORDS; i=i+1) begin + ext_wide_xy_bank = BANK_WIDE_N; + ext_wide_xy_addr = i[OP_ADDR_W-1:0]; + ext_wide_x_din = P_N[i]; + ext_wide_y_din = P_N[i]; + + ext_narrow_xy_bank = BANK_NARROW_COEFF; + ext_narrow_xy_addr = i[OP_ADDR_W-1:0]; + ext_narrow_x_din = P_N_COEFF[i]; + ext_narrow_y_din = P_N_COEFF[i]; + + wait_clock_tick; + end + + ext_wide_xy_ena = 1'b0; + ext_wide_xy_bank = BANK_DNC; + ext_wide_xy_addr = OP_ADDR_DNC; + ext_wide_x_din = WORD_EXT_DNC; + ext_wide_y_din = WORD_EXT_DNC; + + for (i=32; i<33; i=i+1) begin + ext_narrow_xy_bank = BANK_NARROW_EXT; + ext_narrow_xy_addr = OP_ADDR_EXT_COEFF; + ext_narrow_x_din = P_N_COEFF[i]; + ext_narrow_y_din = P_N_COEFF[i]; + + wait_clock_tick; + end + + ext_narrow_xy_ena = 1'b0; + ext_narrow_xy_bank = BANK_DNC; + ext_narrow_xy_addr = OP_ADDR_DNC; + ext_narrow_x_din = WORD_EXT_DNC; + ext_narrow_y_din = WORD_EXT_DNC; + + end + endtask + + + // + // wait_clock_tick() + // + task wait_clock_tick; + #`CLK_PERIOD_NS; + endtask + + + // + // wait_clock_ticks() + // + task wait_clock_ticks; + input integer num_ticks; + integer _n; + for (_n=0; _n<num_ticks; _n=_n+1) + wait_clock_tick; + endtask + + + // + // Debug Interceptor + // + reg [WORD_EXT_W-1:0] P_X_AB_READ[0:2*PQ_NUM_WORDS-1]; + reg [WORD_EXT_W-1:0] P_Y_AB_READ[0:2*PQ_NUM_WORDS-1]; + + reg [WORD_EXT_W-1:0] P_X_Q_READ[0:PQ_NUM_WORDS]; + reg [WORD_EXT_W-1:0] P_Y_Q_READ[0:PQ_NUM_WORDS]; + + reg [WORD_EXT_W-1:0] P_X_M_READ[0:2*PQ_NUM_WORDS]; + reg [WORD_EXT_W-1:0] P_Y_M_READ[0:2*PQ_NUM_WORDS]; + + reg [WORD_EXT_W-1:0] P_X_WIDE_READ[0:PQ_NUM_WORDS-1]; + reg [WORD_EXT_W-1:0] P_Y_WIDE_READ[0:PQ_NUM_WORDS-1]; + + reg [WORD_EXT_W-1:0] P_X_NARROW_READ[0:PQ_NUM_WORDS-1]; + reg [WORD_EXT_W-1:0] P_Y_NARROW_READ[0:PQ_NUM_WORDS-1]; + + integer xy_offset; + always @(posedge clk) begin + // + if (rcmb_wide_xy_valid) + // + case (rcmb_wide_xy_bank) + // + BANK_WIDE_L: begin + // + xy_offset = rcmb_wide_xy_addr; + // + if (xy_offset >= PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to wide bank L!", xy_offset); + $finish; + end + // + P_X_AB_READ[xy_offset] <= rcmb_wide_x_dout; + P_Y_AB_READ[xy_offset] <= rcmb_wide_y_dout; + // + end + // + BANK_WIDE_H: begin + // + xy_offset = PQ_NUM_WORDS + rcmb_wide_xy_addr; + // + if (xy_offset >= 2*PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to wide bank H!", xy_offset); + $finish; + end + // + P_X_AB_READ[xy_offset] <= rcmb_wide_x_dout; + P_Y_AB_READ[xy_offset] <= rcmb_wide_y_dout; + // + end + // + default: begin + $display("ERROR: Encountered illegal wide bank (%d) while writing!", rcmb_wide_xy_bank); + $finish; + end + // + endcase + // + if (rcmb_narrow_xy_valid) + // + case (rcmb_narrow_xy_bank) + // + BANK_NARROW_Q: begin + // + xy_offset = rcmb_narrow_xy_addr; + // + if (xy_offset >= PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to narrow bank Q!", xy_offset); + $finish; + end + // + P_X_Q_READ[xy_offset] <= rcmb_narrow_x_dout; + P_Y_Q_READ[xy_offset] <= rcmb_narrow_y_dout; + // + end + // + BANK_NARROW_EXT: begin + // + xy_offset = PQ_NUM_WORDS + rcmb_narrow_xy_addr - 1; + // + if (xy_offset != PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to narrow bank EXT!", xy_offset); + $finish; + end + // + P_X_Q_READ[xy_offset] <= rcmb_narrow_x_dout; + P_Y_Q_READ[xy_offset] <= rcmb_narrow_y_dout; + // + end + // + default: begin + $display("ERROR: Encountered illegal narrow bank (%d) while writing!", rcmb_narrow_xy_bank); + $finish; + end + // + endcase + // + if (rcmb_final_xy_valid) + // + case (rcmb_final_xy_bank) + // + BANK_RCMB_ML: begin + // + xy_offset = rcmb_final_xy_addr; + // + if (xy_offset >= PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to narrow bank ML!", xy_offset); + $finish; + end + // + P_X_M_READ[xy_offset] <= rcmb_final_x_dout; + P_Y_M_READ[xy_offset] <= rcmb_final_y_dout; + // + end + // + BANK_RCMB_MH: begin + // + xy_offset = PQ_NUM_WORDS + rcmb_final_xy_addr; + // + if (xy_offset >= 2*PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to narrow bank MH!", xy_offset); + $finish; + end + // + P_X_M_READ[xy_offset] <= rcmb_final_x_dout; + P_Y_M_READ[xy_offset] <= rcmb_final_y_dout; + // + end + // + BANK_RCMB_EXT: begin + // + xy_offset = 2*PQ_NUM_WORDS + rcmb_final_xy_addr; + // + if (xy_offset != 2*PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to narrow bank EXT!", xy_offset); + $finish; + end + // + P_X_M_READ[xy_offset] <= rcmb_final_x_dout; + P_Y_M_READ[xy_offset] <= rcmb_final_y_dout; + // + end + // + default: begin + $display("ERROR: Encountered illegal narrow bank (%d) while writing!", rcmb_final_xy_bank); + $finish; + end + // + endcase + // + if (rdct_narrow_xy_valid) begin + // + xy_offset = rdct_narrow_xy_addr; + // + if (xy_offset >= PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to narrow bank T1/T2!", xy_offset); + $finish; + end + // + P_X_NARROW_READ[xy_offset] <= rdct_narrow_x_dout; + P_Y_NARROW_READ[xy_offset] <= rdct_narrow_y_dout; + // + end + // + if (rdct_wide_xy_valid) begin + // + xy_offset = rdct_wide_xy_addr; + // + if (xy_offset >= PQ_NUM_WORDS) begin + $display("ERROR: Encountered illegal offset (%d) writing to wide bank T1/T2!", xy_offset); + $finish; + end + // + P_X_WIDE_READ[xy_offset] <= rdct_wide_x_dout; + P_Y_WIDE_READ[xy_offset] <= rdct_wide_y_dout; + // + end + // + end + + task p_verify_ab; + // + reg verify_x_ab_ok; + reg verify_y_ab_ok; + // + begin + // + verify_x_ab_ok = 1; + verify_y_ab_ok = 1; + // + for (i=0; i<2*PQ_NUM_WORDS; i=i+1) begin + if (P_X_AB_READ[i] !== P_X_AB[i]) verify_x_ab_ok = 0; + if (P_Y_AB_READ[i] !== P_Y_AB[i]) verify_y_ab_ok = 0; + end + // + if (!verify_x_ab_ok) + for (i=0; i<2*PQ_NUM_WORDS; i=i+1) + if (P_X_AB_READ[i] === P_X_AB[i]) $display("P_X_AB / P_X_AB_READ [%02d] = 0x%05x / 0x%05x", i, P_X_AB[i], P_X_AB_READ[i]); + else $display("P_X_AB / P_X_AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_AB[i], P_X_AB_READ[i]); + // + if (!verify_y_ab_ok) + for (i=0; i<2*PQ_NUM_WORDS; i=i+1) + if (P_Y_AB_READ[i] === P_Y_AB[i]) $display("P_Y_AB / P_Y_AB_READ [%02d] = 0x%05x / 0x%05x", i, P_Y_AB[i], P_Y_AB_READ[i]); + else $display("P_Y_AB / P_Y_AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_AB[i], P_Y_AB_READ[i]); + // + if (verify_x_ab_ok) $display("P_X_AB is OK."); + else $display("P_X_AB is WRONG!"); + // + if (verify_y_ab_ok) $display("P_Y_AB is OK."); + else $display("P_Y_AB is WRONG!"); + // + end + // + endtask + + task p_verify_q; + // + reg verify_x_q_ok; + reg verify_y_q_ok; + // + begin + // + verify_x_q_ok = 1; + verify_y_q_ok = 1; + // + for (i=0; i<(PQ_NUM_WORDS+1); i=i+1) begin + if (P_X_Q_READ[i] !== P_X_Q[i]) verify_x_q_ok = 0; + if (P_Y_Q_READ[i] !== P_Y_Q[i]) verify_y_q_ok = 0; + end + // + if (!verify_x_q_ok) + for (i=0; i<(PQ_NUM_WORDS+1); i=i+1) + if (P_X_Q_READ[i] === P_X_Q[i]) $display("P_X_Q / P_X_Q_READ [%02d] = 0x%05x / 0x%05x", i, P_X_Q[i], P_X_Q_READ[i]); + else $display("P_X_Q / P_X_Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_Q[i], P_X_Q_READ[i]); + // + if (!verify_y_q_ok) + for (i=0; i<(PQ_NUM_WORDS+1); i=i+1) + if (P_Y_Q_READ[i] === P_Y_Q[i]) $display("P_Y_Q / P_Y_Q_READ [%02d] = 0x%05x / 0x%05x", i, P_Y_Q[i], P_Y_Q_READ[i]); + else $display("P_Y_Q / P_Y_Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_Q[i], P_Y_Q_READ[i]); + // + if (verify_x_q_ok) $display("P_X_Q is OK."); + else $display("P_X_Q is WRONG!"); + // + if (verify_y_q_ok) $display("P_Y_Q is OK."); + else $display("P_Y_Q is WRONG!"); + // + end + // + endtask + + task p_verify_m; + // + reg verify_x_m_ok; + reg verify_y_m_ok; + // + begin + // + verify_x_m_ok = 1; + verify_y_m_ok = 1; + // + for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1) begin + if (P_X_M_READ[i] !== P_X_M[i]) verify_x_m_ok = 0; + if (P_Y_M_READ[i] !== P_Y_M[i]) verify_y_m_ok = 0; + end + // + if (!verify_x_m_ok) + for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1) + if (P_X_M_READ[i] === P_X_M[i]) $display("P_X_M / P_X_M_READ [%02d] = 0x%05x / 0x%05x", i, P_X_M[i], P_X_M_READ[i]); + else $display("P_X_M / P_X_M_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_M[i], P_X_M_READ[i]); + // + if (!verify_y_m_ok) + for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1) + if (P_Y_M_READ[i] === P_Y_M[i]) $display("P_Y_M / P_Y_M_READ [%02d] = 0x%05x / 0x%05x", i, P_Y_M[i], P_Y_M_READ[i]); + else $display("P_Y_M / P_Y_M_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_M[i], P_Y_M_READ[i]); + // + if (verify_x_m_ok) $display("P_X_M is OK."); + else $display("P_X_M is WRONG!"); + // + if (verify_y_m_ok) $display("P_Y_M is OK."); + else $display("P_Y_M is WRONG!"); + // + end + // + endtask + + task p_verify_p; + // + reg verify_x_wide_ok; + reg verify_y_wide_ok; + reg verify_x_narrow_ok; + reg verify_y_narrow_ok; + // + begin + // + verify_x_wide_ok = 1; + verify_y_wide_ok = 1; + verify_x_narrow_ok = 1; + verify_y_narrow_ok = 1; + // + for (i=0; i<PQ_NUM_WORDS; i=i+1) begin + if (P_X_WIDE_READ[i] !== P_X[i]) verify_x_wide_ok = 0; + if (P_Y_WIDE_READ[i] !== P_Y[i]) verify_y_wide_ok = 0; + if (P_X_NARROW_READ[i] !== P_X[i]) verify_x_narrow_ok = 0; + if (P_Y_NARROW_READ[i] !== P_Y[i]) verify_y_narrow_ok = 0; + end + // + if (!verify_x_wide_ok || !verify_x_narrow_ok) + for (i=0; i<PQ_NUM_WORDS; i=i+1) + if ((P_X_WIDE_READ[i] === P_X[i]) && (P_X_NARROW_READ[i] === P_X[i])) $display("P_X / P_X_WIDE / P_X_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x", i, P_X[i], P_X_WIDE_READ[i], P_X_NARROW_READ[i]); + else $display("P_X / P_X_WIDE / P_X_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x <???>", i, P_X[i], P_X_WIDE_READ[i], P_X_NARROW_READ[i]); + // + if (!verify_y_wide_ok || !verify_y_narrow_ok) + for (i=0; i<PQ_NUM_WORDS; i=i+1) + if ((P_Y_WIDE_READ[i] === P_Y[i]) && (P_Y_NARROW_READ[i] === P_Y[i])) $display("P_Y / P_Y_WIDE / P_Y_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x", i, P_Y[i], P_Y_WIDE_READ[i], P_Y_NARROW_READ[i]); + else $display("P_Y / P_Y_WIDE / P_Y_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x <???>", i, P_Y[i], P_Y_WIDE_READ[i], P_Y_NARROW_READ[i]); + // + if (verify_x_wide_ok && verify_x_narrow_ok) $display("P_X is OK."); + else $display("P_X is WRONG!"); + // + if (verify_y_wide_ok && verify_y_narrow_ok) $display("P_Y is OK."); + else $display("P_Y is WRONG!"); + // + end + // + endtask + +endmodule + diff --git a/bench/tb_mmm_x8_dual.v b/bench/tb_mmm_x8_dual.v deleted file mode 100644 index aa25900..0000000 --- a/bench/tb_mmm_x8_dual.v +++ /dev/null @@ -1,327 +0,0 @@ -`timescale 1ns / 1ps - -module tb_mmm_x8_dual; - - - // - // Headers - // - `include "../rtl/modexpng_parameters.vh" - `include "../rtl/modexpng_parameters_x8.vh" - - - // - // Settings - // - localparam INDEX_WIDTH = 6; - - wire [INDEX_WIDTH-1:0] index_last = 31; // 512 bits - - - // - // Clock - // - `define CLK_FREQUENCY_MHZ 100.0 - `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ) - `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS) - - reg clk = 1'b0; - - always begin - #`CLK_PERIOD_HALF_NS clk = 1'b1; - #`CLK_PERIOD_HALF_NS clk = 1'b0; - end - - - // - // Reset - // - reg rst = 1'b1; - wire rst_n = ~rst; - - - // - // Control - // - reg ena = 1'b0; - wire rdy; - - reg mode; - reg transfer; - - - // - // Interface - // - - - // - // Interface - Data Buses - // - wire [NUM_MULTS*WORD_WIDTH-1:0] x_din; - wire [NUM_MULTS*WORD_WIDTH-1:0] y_din; - wire [NUM_MULTS*WORD_WIDTH-1:0] x_dout; - wire [NUM_MULTS*WORD_WIDTH-1:0] y_dout; - - - // - // Interface - Address Buses - // - wire [INDEX_WIDTH-4:0] x_din_addr; - wire [INDEX_WIDTH-4:0] y_din_addr; - wire [INDEX_WIDTH-4:0] x_dout_addr; - wire [INDEX_WIDTH-4:0] y_dout_addr; - - - // - // Interface - Enable Buses - // - wire [ 1-1:0] x_din_ena; - wire [ 1-1:0] y_din_ena; - wire [ 1-1:0] x_din_reg_ena; - wire [ 1-1:0] y_din_reg_ena; - wire [NUM_MULTS-1:0] x_dout_ena; - wire [NUM_MULTS-1:0] y_dout_ena; - - - // - // Interface - Bank Buses - // - wire [3-1:0] x_din_bank; - wire [3-1:0] y_din_bank; - wire [3-1:0] x_dout_bank; - wire [3-1:0] y_dout_bank; - - - // - // Operands - // - reg [WORD_WIDTH-1:0] T1[0:2**INDEX_WIDTH-1]; - reg [WORD_WIDTH-1:0] T2[0:2**INDEX_WIDTH-1]; - reg [WORD_WIDTH-1:0] N[0:2**INDEX_WIDTH-1]; - reg [WORD_WIDTH-1:0] N_COEFF[0:2**INDEX_WIDTH]; - - - // - // Memories - // - genvar z; - generate for (z=0; z<NUM_MULTS; z=z+1) - // - begin : gen_z_mem - // - modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ # - ( - .MEM_WIDTH(WORD_WIDTH), - .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS) - ) - gen_z_mem_x - ( - .clk (clk), - - .a_addr ({x_dout_bank, x_dout_addr}), - .a_en (x_dout_ena[z]), - .a_wr (x_dout_ena[z]), - .a_in (x_dout[z*WORD_WIDTH+:WORD_WIDTH]), - .a_out (), // unused - - .b_addr ({x_din_bank, x_din_addr}), - .b_en (x_din_ena), - .b_reg_en (x_din_reg_ena), - .b_out (x_din[z*WORD_WIDTH+:WORD_WIDTH]) - ); - // - modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ # - ( - .MEM_WIDTH(WORD_WIDTH), - .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS) - ) - gen_z_mem_y - ( - .clk (clk), - - .a_addr ({y_dout_bank, y_dout_addr}), - .a_en (y_dout_ena[z]), - .a_wr (y_dout_ena[z]), - .a_in (y_dout[z*WORD_WIDTH+:WORD_WIDTH]), - .a_out (), // unused - - .b_addr ({y_din_bank, y_din_addr}), - .b_en (y_din_ena), - .b_reg_en (y_din_reg_ena), - .b_out (y_din[z*WORD_WIDTH+:WORD_WIDTH]) - ); - // - end - // - endgenerate - - - // T1 / T2 - // N / N_COEFF - // AB_LSB - // AB_MSB - // M - // Q_LSB - // Q_MSB - // ? - - - // - // Operands - Values - // - initial begin - // - T1[ 0] = 18'h0b27b; T1[ 1] = 18'h0fc7d; T1[ 2] = 18'h0a214; T1[ 3] = 18'h08d2b; - T1[ 4] = 18'h1c80c; T1[ 5] = 18'h145f1; T1[ 6] = 18'h00db6; T1[ 7] = 18'h1cf0f; - T1[ 8] = 18'h19386; T1[ 9] = 18'h02ad9; T1[10] = 18'h1a8b5; T1[11] = 18'h1479b; - T1[12] = 18'h08b5f; T1[13] = 18'h14806; T1[14] = 18'h0e6f7; T1[15] = 18'h0ce9d; - T1[16] = 18'h0cbc2; T1[17] = 18'h16ef1; T1[18] = 18'h0e14e; T1[19] = 18'h1796f; - T1[20] = 18'h14901; T1[21] = 18'h06666; T1[22] = 18'h0cb9f; T1[23] = 18'h09ab4; - T1[24] = 18'h12ffc; T1[25] = 18'h0a86d; T1[26] = 18'h19d35; T1[27] = 18'h0cda9; - T1[28] = 18'h16a19; T1[29] = 18'h09a36; T1[30] = 18'h0b176; T1[31] = 18'h0e0dc; - // - T2[ 0] = 18'h0b21a; T2[ 1] = 18'h13e71; T2[ 2] = 18'h03459; T2[ 3] = 18'h1063f; - T2[ 4] = 18'h18cef; T2[ 5] = 18'h1b8a5; T2[ 6] = 18'h082d1; T2[ 7] = 18'h1b1be; - T2[ 8] = 18'h18979; T2[ 9] = 18'h1409a; T2[10] = 18'h1713c; T2[11] = 18'h0cda3; - T2[12] = 18'h11c7d; T2[13] = 18'h0c943; T2[14] = 18'h12d7c; T2[15] = 18'h1531e; - T2[16] = 18'h0a45a; T2[17] = 18'h1c637; T2[18] = 18'h0906a; T2[19] = 18'h1670e; - T2[20] = 18'h12f78; T2[21] = 18'h08ce6; T2[22] = 18'h1c5c7; T2[23] = 18'h1292d; - T2[24] = 18'h0fc4b; T2[25] = 18'h064fb; T2[26] = 18'h0cc3c; T2[27] = 18'h19b37; - T2[28] = 18'h1b721; T2[29] = 18'h0f424; T2[30] = 18'h0f608; T2[31] = 18'h03e9b; - // - N[ 0] = 18'h00a9d; N[ 1] = 18'h01175; N[ 2] = 18'h0254f; N[ 3] = 18'h0ee38; - N[ 4] = 18'h00a6a; N[ 5] = 18'h0c7bd; N[ 6] = 18'h0ddac; N[ 7] = 18'h069fe; - N[ 8] = 18'h0e9d6; N[ 9] = 18'h0b6bf; N[10] = 18'h09230; N[11] = 18'h04fc5; - N[12] = 18'h05c9f; N[13] = 18'h09502; N[14] = 18'h0cbc5; N[15] = 18'h03109; - N[16] = 18'h08029; N[17] = 18'h0b27c; N[18] = 18'h0eeb8; N[19] = 18'h0c191; - N[20] = 18'h0ff86; N[21] = 18'h027ab; N[22] = 18'h07d76; N[23] = 18'h0ff1a; - N[24] = 18'h02afc; N[25] = 18'h0b25a; N[26] = 18'h0d3c1; N[27] = 18'h05589; - N[28] = 18'h09f7c; N[29] = 18'h0ddd6; N[30] = 18'h0b4fc; N[31] = 18'h0e8e7; - // - N_COEFF[ 0] = 18'h0344b; N_COEFF[ 1] = 18'h0ca66; N_COEFF[ 2] = 18'h0d9e8; N_COEFF[ 3] = 18'h070d5; - N_COEFF[ 4] = 18'h0ce4b; N_COEFF[ 5] = 18'h049b2; N_COEFF[ 6] = 18'h0abb3; N_COEFF[ 7] = 18'h0c3b2; - N_COEFF[ 8] = 18'h0ad38; N_COEFF[ 9] = 18'h05672; N_COEFF[10] = 18'h0fd47; N_COEFF[11] = 18'h06671; - N_COEFF[12] = 18'h00b7f; N_COEFF[13] = 18'h0fa35; N_COEFF[14] = 18'h0d4ac; N_COEFF[15] = 18'h0f1ca; - N_COEFF[16] = 18'h08e0a; N_COEFF[17] = 18'h05858; N_COEFF[18] = 18'h02dc6; N_COEFF[19] = 18'h08cfc; - N_COEFF[20] = 18'h01941; N_COEFF[21] = 18'h0f855; N_COEFF[22] = 18'h01e43; N_COEFF[23] = 18'h053f0; - N_COEFF[24] = 18'h0a479; N_COEFF[25] = 18'h0ae7e; N_COEFF[26] = 18'h05c66; N_COEFF[27] = 18'h02413; - N_COEFF[28] = 18'h0b5f8; N_COEFF[29] = 18'h0eb06; N_COEFF[30] = 18'h0de5b; N_COEFF[31] = 18'h0a751; - N_COEFF[32] = 18'h0c1ec; - // - end - - - // - // Load Interface - // - wire load_phase; - wire [ INDEX_WIDTH:0] load_xy_addr; - wire load_xy_addr_vld; - wire load_xy_req; - reg [ WORD_WIDTH-1:0] load_x_din; - reg [ WORD_WIDTH-1:0] load_y_din; - reg [ WORD_WIDTH-1:0] load_x_pipe; - reg [ WORD_WIDTH-1:0] load_y_pipe; - - always @(posedge clk) - // - if (load_xy_addr_vld) begin - - if (!load_phase) begin - load_x_pipe <= T1[load_xy_addr]; - load_y_pipe <= T2[load_xy_addr]; - end else begin - load_x_pipe <= !load_xy_addr[INDEX_WIDTH] ? N[load_xy_addr] : {WORD_WIDTH{1'bX}}; - load_y_pipe <= N_COEFF[load_xy_addr]; - end - end - - always @(posedge clk) - // - if (load_xy_req) - {load_y_din, load_x_din} <= {load_y_pipe, load_x_pipe}; - else - {load_y_din, load_x_din} <= {2*WORD_WIDTH{1'bX}}; - - - // - // UUT - // - modexpng_mmm_x8_dual # - ( - .INDEX_WIDTH(INDEX_WIDTH) - ) - uut - ( - .clk (clk), - .rst_n (rst_n), - - .ena (ena), - .rdy (rdy), - - .mode (mode), - .transfer (transfer), - - .index_last (index_last), - - .x_din (x_din), - .y_din (y_din), - .x_dout (x_dout), - .y_dout (y_dout), - - .x_din_addr (x_din_addr), - .y_din_addr (y_din_addr), - .x_dout_addr (x_dout_addr), - .y_dout_addr (y_dout_addr), - - .x_din_ena (x_din_ena), - .y_din_ena (y_din_ena), - .x_dout_ena (x_dout_ena), - .y_dout_ena (y_dout_ena), - - .x_din_reg_ena (x_din_reg_ena), - .y_din_reg_ena (y_din_reg_ena), - - .x_din_bank (x_din_bank), - .y_din_bank (y_din_bank), - .x_dout_bank (x_dout_bank), - .y_dout_bank (y_dout_bank), - - .load_phase (load_phase), - .load_xy_addr (load_xy_addr), - .load_xy_addr_vld (load_xy_addr_vld), - .load_xy_req (load_xy_req), - .load_x_din (load_x_din), - .load_y_din (load_y_din) - ); - - - // - // Script - // - initial begin - #(100.0*`CLK_PERIOD_NS) rst = 1'b0; - #(100.0*`CLK_PERIOD_NS) ena = 1'b1; - transfer = 1'b1; - mode = 1'b0; - #( 1.0*`CLK_PERIOD_NS) ena = 1'b0; - transfer = 1'bX; - mode = 1'bX; - - while (!rdy) #`CLK_PERIOD_NS; - - #(100.0*`CLK_PERIOD_NS) ena = 1'b1; - transfer = 1'b0; - mode = 1'b0; - #( 1.0*`CLK_PERIOD_NS) ena = 1'b0; - transfer = 1'bX; - mode = 1'bX; - - while (!rdy) #`CLK_PERIOD_NS; - - end - - -endmodule - diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v index 6b194dc..c78a969 100644 --- a/rtl/modexpng_core_top.v +++ b/rtl/modexpng_core_top.v @@ -6,6 +6,8 @@ module modexpng_core_top crt_mode, word_index_last_n, word_index_last_pq, + bit_index_last_n, + bit_index_last_pq, bus_cs, bus_we, bus_addr, @@ -37,6 +39,9 @@ module modexpng_core_top input [ OP_ADDR_W -1:0] word_index_last_n; input [ OP_ADDR_W -1:0] word_index_last_pq; + input [ BIT_INDEX_W -1:0] bit_index_last_n; + input [ BIT_INDEX_W -1:0] bit_index_last_pq; + input bus_cs; input bus_we; input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr; @@ -71,16 +76,23 @@ module modexpng_core_top wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ]; wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ]; - wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; - wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || - (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; - wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; - wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ; - wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || - (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ; - + wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ; + wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) || + (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ; + wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ; + wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) || + (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ; + wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ; + wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) || + (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) || + (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) || + (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ; + + wire uop_loop_now; + wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT; - wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1; + wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1; modexpng_uop_rom uop_rom ( @@ -595,6 +607,14 @@ module modexpng_core_top reg [OP_ADDR_W -1:0] io_mgr_word_index_last; reg [UOP_OPCODE_W -1:0] io_mgr_opcode; + reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps; + wire io_mgr_ladder_d; + wire io_mgr_ladder_p; + wire io_mgr_ladder_q; + wire io_mgr_ladder_done; + + assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done; + wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0]; @@ -653,7 +673,13 @@ module modexpng_core_top .io_out_dout (io_out_data), .wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc), - .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc) + .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc), + + .ladder_steps (io_mgr_ladder_steps), + .ladder_d (io_mgr_ladder_d), + .ladder_p (io_mgr_ladder_p), + .ladder_q (io_mgr_ladder_q), + .ladder_done (io_mgr_ladder_done) ); @@ -685,6 +711,9 @@ module modexpng_core_top reg mmm_force_unity_b_x; reg mmm_force_unity_b_y; + reg mmm_only_reduce_x; + reg mmm_only_reduce_y; + wire rdct_ena_x; wire rdct_ena_y; wire rdct_rdy_x; @@ -702,6 +731,7 @@ module modexpng_core_top .word_index_last (mmm_word_index_last_x), .word_index_last_minus1 (mmm_word_index_last_minus1_x), .force_unity_b (mmm_force_unity_b_x), + .only_reduce (mmm_only_reduce_x), .sel_wide_in (mmm_sel_wide_in_x), .sel_narrow_in (mmm_sel_narrow_in_x), @@ -757,6 +787,7 @@ module modexpng_core_top .word_index_last (mmm_word_index_last_y), .word_index_last_minus1 (mmm_word_index_last_minus1_y), .force_unity_b (mmm_force_unity_b_y), + .only_reduce (mmm_only_reduce_y), .sel_wide_in (mmm_sel_wide_in_y), .sel_narrow_in (mmm_sel_narrow_in_y), @@ -898,6 +929,7 @@ module modexpng_core_top reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in; reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out; reg [ OP_ADDR_W -1:0] wrk_word_index_last; + reg [ OP_ADDR_W -1:0] wrk_word_index_last_half; reg [UOP_OPCODE_W -1:0] wrk_opcode; modexpng_general_worker general_worker @@ -916,6 +948,7 @@ module modexpng_core_top .opcode (wrk_opcode), .word_index_last (wrk_word_index_last), + .word_index_last_half (wrk_word_index_last_half), .wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x), .wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x), @@ -976,10 +1009,11 @@ module modexpng_core_top // uop_exit_from_busy = 0; // - if (uop_opcode_is_in) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; - if (uop_opcode_is_out) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy); - if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy ; - if (uop_opcode_is_wrk) uop_exit_from_busy = ~wrk_ena & wrk_rdy ; + if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; + if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy); + if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy; + if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy; + if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy; // end @@ -995,17 +1029,22 @@ module modexpng_core_top mmm_ena_y <= 1'b0; wrk_ena <= 1'b0; end else begin - io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out) : 1'b0; - mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; - wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0; + io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || + uop_opcode_is_out || + uop_opcode_is_ladder): 1'b0; + mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0; + wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || + uop_opcode_is_out ): 1'b0; end // // Parameters // wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1; - + + // TODO: Add reset to default don't care values. + always @(posedge clk) // if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin @@ -1044,9 +1083,10 @@ module modexpng_core_top UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00; UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11; UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; - UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX; + UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q}; endcase // + {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}}; {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}}; {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }}; {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }}; @@ -1055,24 +1095,42 @@ module modexpng_core_top // end // + UOP_OPCODE_MODULAR_REDUCE_PROC: begin + // + {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'bX }}; + // + {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b1 }}; + {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{1'b0 }}; + {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{BANK_DNC }}; + {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{BANK_DNC }}; + {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }}; + {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}}; + // + end + // UOP_OPCODE_PROPAGATE_CARRIES: begin wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // - UOP_OPCODE_COPY_CRT_Y2X: begin + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y: begin wrk_sel_wide_in <= uop_data_sel_wide_in; wrk_sel_wide_out <= uop_data_sel_wide_out; wrk_sel_narrow_in <= uop_data_sel_narrow_in; wrk_sel_narrow_out <= uop_data_sel_narrow_out; end // + UOP_OPCODE_MODULAR_REDUCE_INIT: begin + wrk_sel_narrow_in <= uop_data_sel_narrow_in; + end + // endcase // end // - // Length + // Lengths // wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1; wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1; @@ -1086,7 +1144,10 @@ module modexpng_core_top case (uop_data_opcode) // UOP_OPCODE_INPUT_TO_WIDE, - UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; + UOP_OPCODE_INPUT_TO_NARROW, + UOP_OPCODE_OUTPUT_FROM_NARROW: + // + io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; // UOP_OPCODE_MODULAR_MULTIPLY: begin {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; @@ -1094,8 +1155,31 @@ module modexpng_core_top {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }}; end // - UOP_OPCODE_PROPAGATE_CARRIES: - wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq; + UOP_OPCODE_PROPAGATE_CARRIES, + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_COPY_LADDERS_X2Y: + wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; + // + UOP_OPCODE_MODULAR_REDUCE_INIT: begin + wrk_word_index_last <= word_index_last_n; + wrk_word_index_last_half <= word_index_last_pq; + end + // + UOP_OPCODE_MODULAR_REDUCE_PROC: begin + {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }}; + {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}}; + {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }}; + end + // + UOP_OPCODE_LADDER_INIT: begin + io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; + io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; + end + // + UOP_OPCODE_LADDER_STEP: begin + io_mgr_word_index_last <= OP_ADDR_LADDER_LAST; + io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n; + end // endcase // @@ -1140,8 +1224,8 @@ module modexpng_core_top UOP_FSM_STATE_IDLE: valid_reg <= ~next; UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop; endcase - - + + // // BEGIN DEBUG diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v index c35f0b3..269ef98 100644 --- a/rtl/modexpng_general_worker.v +++ b/rtl/modexpng_general_worker.v @@ -14,6 +14,7 @@ module modexpng_general_worker opcode, word_index_last, + word_index_last_half, wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, @@ -88,6 +89,7 @@ module modexpng_general_worker input [ UOP_OPCODE_W -1:0] opcode; input [ OP_ADDR_W -1:0] word_index_last; + input [ OP_ADDR_W -1:0] word_index_last_half; output wrk_rd_wide_xy_ena_x; output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x; @@ -141,18 +143,35 @@ module modexpng_general_worker // // FSM Declaration // - localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0; - localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1; - localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2; - localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3; - localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! - localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6; - localparam [3:0] WRK_FSM_STATE_STOP = 4'h7; + localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00; - reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; - reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02; + localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug! + localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06; + + localparam [4:0] WRK_FSM_STATE_STOP = 5'h07; + + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12; + localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13; + localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14; + localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18; + localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19; + + reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE; + reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep + reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y) + // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps) + + // // Control Signals // @@ -244,32 +263,62 @@ module modexpng_general_worker // // Delays // - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; - //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4; + + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3; + + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3; + + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3; + + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2; + reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3; + always @(posedge clk) begin // - //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; - //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x}; + {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y}; + // + {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2}; + {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2}; // {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x}; {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y}; // + {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2}; + {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2}; + // + {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x}; + {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y}; + // + {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x}; + {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y}; + // end - - - // - // Handy Wires - // - wire rd_narrow_xy_addr_x_next_is_last; - wire rd_narrow_xy_addr_y_next_is_last; // @@ -310,7 +359,8 @@ module modexpng_general_worker case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, - UOP_OPCODE_OUTPUT_FROM_NARROW: + UOP_OPCODE_OUTPUT_FROM_NARROW, + UOP_OPCODE_MODULAR_REDUCE_INIT: // case (wrk_fsm_state_next_one_pass) // @@ -333,12 +383,30 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_PRE2, WRK_FSM_STATE_BUSY: begin // - enable_narrow_xy_rd_en; enable_wide_xy_rd_en; + enable_narrow_xy_rd_en; // end // endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state_next_one_pass_meander) + // + WRK_FSM_STATE_LATENCY_PRE1_M1, + WRK_FSM_STATE_LATENCY_PRE1_M2, + WRK_FSM_STATE_LATENCY_PRE2_M1, + WRK_FSM_STATE_LATENCY_PRE2_M2, + WRK_FSM_STATE_BUSY_M1, + WRK_FSM_STATE_BUSY_M2: begin + // + enable_wide_xy_rd_en; + enable_narrow_xy_rd_en; + // + end + // + endcase // endcase // @@ -389,8 +457,7 @@ module modexpng_general_worker WRK_FSM_STATE_LATENCY_POST1, WRK_FSM_STATE_LATENCY_POST2: // - enable_narrow_xy_wr_en; - //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + enable_narrow_xy_wr_en; // // endcase @@ -405,7 +472,34 @@ module modexpng_general_worker // enable_wide_xy_wr_en; enable_narrow_xy_wr_en; - //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}}; + // + end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: + // + enable_wide_xy_wr_en; + // + // + endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2, + WRK_FSM_STATE_LATENCY_POST2_M2: begin + // + enable_wide_xy_wr_en; + enable_narrow_xy_wr_en; // end // @@ -424,7 +518,7 @@ module modexpng_general_worker reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r; reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r; - wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; + wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r}; wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r}; @@ -497,6 +591,45 @@ module modexpng_general_worker end // endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2, + WRK_FSM_STATE_LATENCY_POST2_M2: begin + // + wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3; + wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2; + wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3; + wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2; + // + wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3; + wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2; + wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3; + wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2; + // + end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x; + wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x; + wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y; + wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y; + // + end + // + endcase // endcase // @@ -506,6 +639,9 @@ module modexpng_general_worker // // Write Address Logic // + wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half; + wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half; + always @(posedge clk) begin // {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; @@ -534,22 +670,64 @@ module modexpng_general_worker // endcase // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY, + WRK_FSM_STATE_LATENCY_POST1, + WRK_FSM_STATE_LATENCY_POST2: begin + // + wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H; + wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H; + // + wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2; + wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2; + // + end + // + endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state) + // + WRK_FSM_STATE_BUSY_M2, + WRK_FSM_STATE_LATENCY_POST1_M2, + WRK_FSM_STATE_LATENCY_POST2_M2: begin + // + {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4}; + {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4}; + // + {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4}; + {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4}; + // + end + // + endcase + // // endcase // end - - + + // // Read Address Logic // + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next; + reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next; + reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next; reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next; - assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last; - assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last; + wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half; + wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half; + + wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last; + wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last; - always @(posedge clk) begin + always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)?? // {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ?? {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC}; @@ -572,6 +750,9 @@ module modexpng_general_worker {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO}; {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO}; // + rd_wide_xy_addr_x_next <= OP_ADDR_ONE; + rd_wide_xy_addr_y_next <= OP_ADDR_ONE; + // rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; // @@ -586,11 +767,113 @@ module modexpng_general_worker {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next}; {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next}; // + rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; + rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO; + // + rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; + rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; + // + end + // + endcase + // + UOP_OPCODE_MODULAR_REDUCE_INIT: + // + case (wrk_fsm_state_next_one_pass) + // + WRK_FSM_STATE_LATENCY_PRE1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO}; + // + rd_wide_xy_addr_x_next <= OP_ADDR_ONE; + rd_wide_xy_addr_y_next <= OP_ADDR_ONE; + // + rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; + rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; + // + end + // + WRK_FSM_STATE_LATENCY_PRE2, + WRK_FSM_STATE_BUSY: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next}; + // + rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; + rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO; + // + rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; + rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; + // + end + // + endcase + // + UOP_OPCODE_COPY_LADDERS_X2Y: + // + case (wrk_fsm_state_next_one_pass_meander) + // + WRK_FSM_STATE_LATENCY_PRE1_M1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO}; + // + rd_wide_xy_addr_x_next <= OP_ADDR_ONE; + rd_wide_xy_addr_y_next <= OP_ADDR_ONE; + // + rd_narrow_xy_addr_x_next <= OP_ADDR_ONE; + rd_narrow_xy_addr_y_next <= OP_ADDR_ONE; + // + end + // + WRK_FSM_STATE_LATENCY_PRE1_M2: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y}; + // + end + // + WRK_FSM_STATE_LATENCY_PRE2_M1, + WRK_FSM_STATE_BUSY_M1: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next}; + // + rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; + rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO; + // rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1; rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1; // end // + WRK_FSM_STATE_LATENCY_PRE2_M2, + WRK_FSM_STATE_BUSY_M2: begin + // + {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x}; + {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y}; + // + {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x}; + {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y}; + // + end + // endcase // // @@ -608,7 +891,9 @@ module modexpng_general_worker else case (opcode) UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass; + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass; + UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander; default: wrk_fsm_state <= WRK_FSM_STATE_IDLE; endcase @@ -616,17 +901,20 @@ module modexpng_general_worker // // Busy Exit Logic // - reg wrk_fsm_done_one_pass = 1'b0; + reg wrk_fsm_done_one_pass = 1'b0; + reg wrk_fsm_done_one_pass_meander = 1'b0; always @(posedge clk) begin // - wrk_fsm_done_one_pass <= 1'b0; + wrk_fsm_done_one_pass <= 1'b0; + wrk_fsm_done_one_pass_meander <= 1'b0; // case (opcode) // UOP_OPCODE_PROPAGATE_CARRIES, UOP_OPCODE_OUTPUT_FROM_NARROW, - UOP_OPCODE_COPY_CRT_Y2X: begin + UOP_OPCODE_COPY_CRT_Y2X, + UOP_OPCODE_MODULAR_REDUCE_INIT: begin // if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin // @@ -637,6 +925,20 @@ module modexpng_general_worker // end // + UOP_OPCODE_COPY_LADDERS_X2Y: begin + // + if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin + // + if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary... + if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; + // + end + // + if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1) + wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander; + // + end + // endcase // end @@ -654,7 +956,31 @@ module modexpng_general_worker WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ; WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ; WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ; - WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ; + endcase + // + end + + always @* begin + // + case (wrk_fsm_state) + WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ; + // + WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ; + WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ; + WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ; + WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ; + WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ; + WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ; + WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ; + WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ; + WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ; + WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ; + // + WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; + // + default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ; endcase // end diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v index dfbd676..59f4709 100644 --- a/rtl/modexpng_io_manager.v +++ b/rtl/modexpng_io_manager.v @@ -53,7 +53,13 @@ module modexpng_io_manager io_out_dout, wrk_narrow_x_din_x_trunc, - wrk_narrow_x_din_y_trunc + wrk_narrow_x_din_y_trunc, + + ladder_steps, + ladder_d, + ladder_p, + ladder_q, + ladder_done ); // @@ -120,6 +126,12 @@ module modexpng_io_manager output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc; output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc; + + input [ BIT_INDEX_W -1:0] ladder_steps; + output ladder_d; + output ladder_p; + output ladder_q; + output ladder_done; // @@ -254,6 +266,10 @@ module modexpng_io_manager wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW; + wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT; + wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP; + wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step; + wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE; wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; @@ -269,8 +285,90 @@ module modexpng_io_manager wire in_1_addr_op_next_is_last; wire in_2_addr_op_next_is_last; + wire in_2_addr_op_next_is_one; wire dummy_addr_op_next_is_last; + + // + // Ladder Init/Step Logic + // + reg ladder_d_r; + reg ladder_p_r; + reg ladder_q_r; + reg ladder_done_r = 1'b0; + + assign ladder_d = ladder_d_r; + assign ladder_p = ladder_p_r; + assign ladder_q = ladder_q_r; + assign ladder_done = ladder_done_r; + + reg [BIT_INDEX_W -1:0] ladder_index; + reg [BIT_INDEX_W -1:0] ladder_index_next; + wire [ OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W]; + wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W]; + wire ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO; + + always @(posedge clk) + // + if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin + // + if (opcode_is_ladder_init) begin + ladder_index <= ladder_steps; + ladder_index_next <= ladder_steps - 1'b1; + ladder_done_r <= 1'b0; + end + // + if (opcode_is_ladder_step) begin + ladder_index <= ladder_index_next; + ladder_index_next <= ladder_index_next - 1'b1; + if (ladder_index_is_zero) ladder_done_r <= 1'b1; + end + // + end + + + // + // Ladder Mux + // + reg ladder_dpq_mux; + + always @(io_in_2_din, ladder_index_lsb) + // + case(ladder_index_lsb) + 4'b0000: ladder_dpq_mux = io_in_2_din[ 0]; + 4'b0001: ladder_dpq_mux = io_in_2_din[ 1]; + 4'b0010: ladder_dpq_mux = io_in_2_din[ 2]; + 4'b0011: ladder_dpq_mux = io_in_2_din[ 3]; + 4'b0100: ladder_dpq_mux = io_in_2_din[ 4]; + 4'b0101: ladder_dpq_mux = io_in_2_din[ 5]; + 4'b0110: ladder_dpq_mux = io_in_2_din[ 6]; + 4'b0111: ladder_dpq_mux = io_in_2_din[ 7]; + 4'b1000: ladder_dpq_mux = io_in_2_din[ 8]; + 4'b1001: ladder_dpq_mux = io_in_2_din[ 9]; + 4'b1010: ladder_dpq_mux = io_in_2_din[10]; + 4'b1011: ladder_dpq_mux = io_in_2_din[11]; + 4'b1100: ladder_dpq_mux = io_in_2_din[12]; + 4'b1101: ladder_dpq_mux = io_in_2_din[13]; + 4'b1110: ladder_dpq_mux = io_in_2_din[14]; + 4'b1111: ladder_dpq_mux = io_in_2_din[15]; + endcase + + always @(posedge clk) + // + case (io_fsm_state) + // + IO_FSM_STATE_BUSY: + if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux; + // + IO_FSM_STATE_LATENCY_POST1: + if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux; + // + IO_FSM_STATE_LATENCY_POST2: + if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux; + // + endcase + + // // Source Enable Logic // @@ -284,8 +382,8 @@ module modexpng_io_manager IO_FSM_STATE_LATENCY_PRE1, IO_FSM_STATE_LATENCY_PRE2, IO_FSM_STATE_BUSY: begin - in_1_en <= opcode_is_input && sel_aux_is_1; - in_2_en <= opcode_is_input && sel_aux_is_2; + in_1_en <= opcode_is_input && sel_aux_is_1; + in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder; end // IO_FSM_STATE_EXTRA: begin @@ -450,35 +548,59 @@ module modexpng_io_manager wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next; assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last; - assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last; + assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last; + assign in_2_addr_op_next_is_one = in_2_addr_op_next == OP_ADDR_ONE; assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; - always @(posedge clk) + always @(posedge clk) begin + // + {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; + {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; + { dummy_addr_op} <= { OP_ADDR_DNC}; + // + in_1_addr_next <= {BANK_DNC, OP_ADDR_DNC}; + in_2_addr_next <= {BANK_DNC, OP_ADDR_DNC}; + dummy_addr_next <= { OP_ADDR_DNC}; // case (io_fsm_state_next) // IO_FSM_STATE_LATENCY_PRE1: begin // - {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO}; - {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO}; - { dummy_addr_op} <= { OP_ADDR_ZERO}; + {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO}; + if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO}; + else {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC}; + { dummy_addr_op} <= { OP_ADDR_ZERO}; + // + in_1_addr_next <= {sel_in, OP_ADDR_ONE}; + in_2_addr_next <= {sel_in, OP_ADDR_ONE}; + dummy_addr_next <= { OP_ADDR_ONE}; + // + end + // + IO_FSM_STATE_LATENCY_PRE2: begin + // + {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next; + if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next; + else {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb}; + { dummy_addr_op} <= dummy_addr_next; // - in_1_addr_next <= {sel_in, OP_ADDR_ONE}; - in_2_addr_next <= {sel_in, OP_ADDR_ONE}; - dummy_addr_next <= { OP_ADDR_ONE}; + in_1_addr_next <= in_1_addr_next + 1'b1; + if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1; + else in_2_addr_next <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]}; + dummy_addr_next <= dummy_addr_next + 1'b1; // end // - IO_FSM_STATE_LATENCY_PRE2, IO_FSM_STATE_BUSY: begin // {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next; {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next; { dummy_addr_op} <= dummy_addr_next; // - in_1_addr_next <= in_1_addr_next + 1'b1; - in_2_addr_next <= in_2_addr_next + 1'b1; - dummy_addr_next <= dummy_addr_next + 1'b1; + in_1_addr_next <= in_1_addr_next + 1'b1; + if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1; + else in_2_addr_next <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]}; + dummy_addr_next <= dummy_addr_next + 1'b1; // end // @@ -499,7 +621,8 @@ module modexpng_io_manager end // endcase - + // + end // @@ -525,7 +648,7 @@ module modexpng_io_manager if (opcode_is_input) begin if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1; if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1; - end else if (opcode_is_output) begin + end else if (opcode_is_output || opcode_is_ladder) begin if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1; end // @@ -571,4 +694,17 @@ module modexpng_io_manager endcase + // + // BEGIN DEBUG + // + always @(posedge clk) + // + if ((io_fsm_state == IO_FSM_STATE_STOP) && opcode_is_ladder) + $display("[%4d] / %d / %d / %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r); + + // + // END DEBUG + // + + endmodule diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh index 2e591e7..f68c559 100644 --- a/rtl/modexpng_microcode.vh +++ b/rtl/modexpng_microcode.vh @@ -1,8 +1,8 @@ -localparam UOP_OPCODE_W = 4; +localparam UOP_OPCODE_W = 5; localparam UOP_CRT_W = 1; localparam UOP_NPQ_W = 1; localparam UOP_AUX_W = 1; -localparam UOP_LADDER_W = 1; +localparam UOP_LADDER_W = 2; localparam UOP_SEL_W = 4 * BANK_ADDR_W; localparam UOP_ADDR_W = 6; // 64 instructions @@ -11,17 +11,17 @@ localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0; localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31; -// 4 1 1 1 2 4*3=12 +// 5 1 1 1 2 4*3=12 localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W; -// [20:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0] +// [21:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0] // OPCODE -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 4'd0; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd0; /* all fields are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 4'd1; -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 5'd1; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd2; /* CRT tells into which of the dual MMM to write * NPQ specifies the width of the operand * AUX specifies from which INPUT to read @@ -31,7 +31,7 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2; * */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3; /* CRT tells from which of the dual MMM to read * NPQ specifies the width of the operand * AUX is don't care @@ -39,27 +39,45 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3; * source and destination WIDE are don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd4; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care */ -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8; /* CRT is don't care * NPQ specifies the width of the operand * AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual) * LADDER specifies Montgomery ladder mode */ +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11; +/* CRT + * NPQ + * AUX + * LADDER is don't care + */ + -localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12; /* CRT is don't care * NPQ specifies the width of the operand * AUX is don't care * LADDER is don't care * source and destination WIDE are don't care - */ + */ + +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16; +localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17; +/* CRT is don't care + * NPQ is don't care + * AUX is don't care + * LADDER is don't care + * WIDE and NARROW are don't care + */ // CRT localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0; diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v index 14f1b47..6e52a97 100644 --- a/rtl/modexpng_mmm_dual.v +++ b/rtl/modexpng_mmm_dual.v @@ -8,6 +8,7 @@ module modexpng_mmm_dual word_index_last, word_index_last_minus1, force_unity_b, + only_reduce, sel_wide_in, sel_narrow_in, @@ -72,6 +73,7 @@ module modexpng_mmm_dual input [7:0] word_index_last; input [7:0] word_index_last_minus1; input force_unity_b; + input only_reduce; input [BANK_ADDR_W-1:0] sel_wide_in; input [BANK_ADDR_W-1:0] sel_narrow_in; @@ -120,7 +122,8 @@ module modexpng_mmm_dual // reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE; reg [FSM_STATE_WIDTH-1:0] fsm_state_next; - + + wire [FSM_STATE_WIDTH-1:0] fsm_state_after_idle; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; @@ -905,16 +908,17 @@ module modexpng_mmm_dual // // FSM Transition Logic // + assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT; assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT; assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT; - assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; + assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; always @* begin // fsm_state_next = FSM_STATE_IDLE; // case (fsm_state) - FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE; + FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle /*FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : FSM_STATE_IDLE; FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ; FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ; diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index 6e6c3ca..c7566ad 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -23,7 +23,7 @@ localparam MAC_W = 47; localparam BUS_DATA_W = 32; localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W); - +localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W); localparam BANK_ADDR_W = 3; localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W); @@ -33,6 +33,8 @@ localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS); localparam CARRY_W = WORD_EXT_W - WORD_W; +localparam WORD_MUX_W = cryptech_clog2(WORD_W); + localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}}; localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0; @@ -80,6 +82,10 @@ localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2; localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}}; +localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q + +localparam [BIT_INDEX_W-1:0] BIT_INDEX_ZERO = {BIT_INDEX_W{1'b0}}; + localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0; localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1; diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v index 016b1b0..04f0c83 100644 --- a/rtl/modexpng_uop_rom.v +++ b/rtl/modexpng_uop_rom.v @@ -10,53 +10,67 @@ module modexpng_uop_rom input wire clk; input wire [UOP_ADDR_W -1:0] addr; - output reg [UOP_W -1:0] data; + output reg [UOP_W -1:0] data; always @(posedge clk) // case (addr) - 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // - 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // - 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // - 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // - 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // - 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; // + 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; // + 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; // + 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // + 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; // // - 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // - 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // + 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; // // - 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; // - 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // - 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; // + 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // // - 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // + 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; // // - 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // - 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // + 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; // + 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; // // - 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; // // - 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; // + 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; // // - 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // // - 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; // - 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; // - 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; // - 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; // - 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; // + 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; // + 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; // + 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; // + 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; // + 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; // // - 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // - 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; // - 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; // - // - default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; // + 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; // + 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; // + // + 6'd31: data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_DNC }; // + // + 6'd32: data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; // + // + 6'd33: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_D, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd34: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_D, BANK_NARROW_D }; // + 6'd35: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_A, BANK_DNC, BANK_WIDE_C, BANK_NARROW_C }; // + // + 6'd36: data <= {UOP_OPCODE_COPY_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; // + // + 6'd37: data <= {UOP_OPCODE_LADDER_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + 6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; // + 6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // + // + default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; // endcase endmodule |