From 3ea94c872afe6309c43ac7eccf877734c33f5421 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 15:11:04 +0300 Subject: Implemented the final stage of the Montgomery modular multiplication, i.e. addition of AB and M then reduction by right-shift. --- bench/tb_square.v | 418 +++++++++++++++++++++++++++++---------- rtl/modexpng_parameters.vh | 6 +- rtl/modexpng_part_recombinator.v | 15 ++ 3 files changed, 331 insertions(+), 108 deletions(-) diff --git a/bench/tb_square.v b/bench/tb_square.v index d35a5cc..733e741 100644 --- a/bench/tb_square.v +++ b/bench/tb_square.v @@ -50,87 +50,87 @@ module tb_square; // initial begin // - T1[ 0] = 18'h0f13e; T1[ 1] = 18'h0daf6; T1[ 2] = 18'h0aaa9; T1[ 3] = 18'h0c2c2; - T1[ 4] = 18'h0fc5f; T1[ 5] = 18'h12164; T1[ 6] = 18'h14375; T1[ 7] = 18'h15615; - T1[ 8] = 18'h0d8e2; T1[ 9] = 18'h0ec15; T1[10] = 18'h17c46; T1[11] = 18'h0c922; - T1[12] = 18'h08f00; T1[13] = 18'h152f9; T1[14] = 18'h0b0b6; T1[15] = 18'h0ce87; - T1[16] = 18'h178f2; T1[17] = 18'h09efb; T1[18] = 18'h0409d; T1[19] = 18'h11104; - T1[20] = 18'h0b4a6; T1[21] = 18'h158a6; T1[22] = 18'h0514e; T1[23] = 18'h0ec55; - T1[24] = 18'h11e73; T1[25] = 18'h11ddd; T1[26] = 18'h07bd4; T1[27] = 18'h0638b; - T1[28] = 18'h0e805; T1[29] = 18'h11c4f; T1[30] = 18'h0a2eb; T1[31] = 18'h05454; + T1[ 0] = 18'h191c5; T1[ 1] = 18'h1a118; T1[ 2] = 18'h06e06; T1[ 3] = 18'h0ea69; + T1[ 4] = 18'h12944; T1[ 5] = 18'h0c242; T1[ 6] = 18'h0fc64; T1[ 7] = 18'h14efe; + T1[ 8] = 18'h113da; T1[ 9] = 18'h06ff7; T1[10] = 18'h0ef0d; T1[11] = 18'h18581; + T1[12] = 18'h1a62c; T1[13] = 18'h052b7; T1[14] = 18'h114f7; T1[15] = 18'h1c53e; + T1[16] = 18'h0c63e; T1[17] = 18'h0dd14; T1[18] = 18'h0fba8; T1[19] = 18'h1b8e6; + T1[20] = 18'h0d944; T1[21] = 18'h10292; T1[22] = 18'h0d276; T1[23] = 18'h027b1; + T1[24] = 18'h0c0c7; T1[25] = 18'h100a9; T1[26] = 18'h0a9ab; T1[27] = 18'h0e696; + T1[28] = 18'h10798; T1[29] = 18'h0ae91; T1[30] = 18'h08d4d; T1[31] = 18'h0080b; // - T2[ 0] = 18'h1a479; T2[ 1] = 18'h102f5; T2[ 2] = 18'h10e72; T2[ 3] = 18'h120b1; - T2[ 4] = 18'h169cd; T2[ 5] = 18'h1d0c4; T2[ 6] = 18'h11462; T2[ 7] = 18'h12015; - T2[ 8] = 18'h16fca; T2[ 9] = 18'h1044f; T2[10] = 18'h122b4; T2[11] = 18'h10a5a; - T2[12] = 18'h12620; T2[13] = 18'h0e01a; T2[14] = 18'h095cd; T2[15] = 18'h1278a; - T2[16] = 18'h10763; T2[17] = 18'h09fe7; T2[18] = 18'h0d35c; T2[19] = 18'h10e24; - T2[20] = 18'h1527d; T2[21] = 18'h115b3; T2[22] = 18'h05443; T2[23] = 18'h1190a; - T2[24] = 18'h0fcc3; T2[25] = 18'h115e2; T2[26] = 18'h0a398; T2[27] = 18'h0608d; - T2[28] = 18'h13075; T2[29] = 18'h0d816; T2[30] = 18'h0bb4c; T2[31] = 18'h04e8a; + T2[ 0] = 18'h1193b; T2[ 1] = 18'h0de9c; T2[ 2] = 18'h0b993; T2[ 3] = 18'h0d2cd; + T2[ 4] = 18'h106ad; T2[ 5] = 18'h076da; T2[ 6] = 18'h10cab; T2[ 7] = 18'h15cd5; + T2[ 8] = 18'h15425; T2[ 9] = 18'h16287; T2[10] = 18'h0fd64; T2[11] = 18'h06ee0; + T2[12] = 18'h1b0c9; T2[13] = 18'h01a5e; T2[14] = 18'h1855c; T2[15] = 18'h17bf9; + T2[16] = 18'h1c83c; T2[17] = 18'h158ed; T2[18] = 18'h086df; T2[19] = 18'h16676; + T2[20] = 18'h0a0f8; T2[21] = 18'h14545; T2[22] = 18'h09641; T2[23] = 18'h16863; + T2[24] = 18'h17e20; T2[25] = 18'h0d457; T2[26] = 18'h05a9b; T2[27] = 18'h1a4cf; + T2[28] = 18'h1582a; T2[29] = 18'h1686c; T2[30] = 18'h1394e; T2[31] = 18'h0bdbc; // - AB[ 0] = 18'h0be4e; AB[ 1] = 18'h0fed7; AB[ 2] = 18'h09496; AB[ 3] = 18'h07181; - AB[ 4] = 18'h0ee73; AB[ 5] = 18'h04692; AB[ 6] = 18'h0141a; AB[ 7] = 18'h0078c; - AB[ 8] = 18'h030eb; AB[ 9] = 18'h0217c; AB[10] = 18'h0696f; AB[11] = 18'h0a165; - AB[12] = 18'h0b753; AB[13] = 18'h04af9; AB[14] = 18'h0ed7c; AB[15] = 18'h079ce; - AB[16] = 18'h0e863; AB[17] = 18'h097df; AB[18] = 18'h07984; AB[19] = 18'h048af; - AB[20] = 18'h0197f; AB[21] = 18'h0206a; AB[22] = 18'h027e7; AB[23] = 18'h04b3a; - AB[24] = 18'h03312; AB[25] = 18'h03b56; AB[26] = 18'h04487; AB[27] = 18'h0bd6a; - AB[28] = 18'h04e4b; AB[29] = 18'h069ca; AB[30] = 18'h0f994; AB[31] = 18'h0dd4e; - AB[32] = 18'h1b024; AB[33] = 18'h0127f; AB[34] = 18'h02631; AB[35] = 18'h0186b; - AB[36] = 18'h03adb; AB[37] = 18'h05368; AB[38] = 18'h059a5; AB[39] = 18'h002e0; - AB[40] = 18'h0b78a; AB[41] = 18'h016f3; AB[42] = 18'h0b58d; AB[43] = 18'h03ddb; - AB[44] = 18'h078b0; AB[45] = 18'h0073b; AB[46] = 18'h07337; AB[47] = 18'h0c7b0; - AB[48] = 18'h00668; AB[49] = 18'h0106d; AB[50] = 18'h01a44; AB[51] = 18'h05ee3; - AB[52] = 18'h0462d; AB[53] = 18'h0fdeb; AB[54] = 18'h05f85; AB[55] = 18'h02af9; - AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194; - AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df; + N[ 0] = 18'h00f97; N[ 1] = 18'h018bb; N[ 2] = 18'h08a44; N[ 3] = 18'h00858; + N[ 4] = 18'h06647; N[ 5] = 18'h0042c; N[ 6] = 18'h0fa09; N[ 7] = 18'h0c8d3; + N[ 8] = 18'h0bbc7; N[ 9] = 18'h0e2dd; N[10] = 18'h017fd; N[11] = 18'h0ef4a; + N[12] = 18'h002ef; N[13] = 18'h090c1; N[14] = 18'h032db; N[15] = 18'h028b1; + N[16] = 18'h05f0a; N[17] = 18'h0ebfd; N[18] = 18'h017ca; N[19] = 18'h09587; + N[20] = 18'h0d266; N[21] = 18'h0563c; N[22] = 18'h041af; N[23] = 18'h0433f; + N[24] = 18'h08e83; N[25] = 18'h0bc19; N[26] = 18'h000b2; N[27] = 18'h05b53; + N[28] = 18'h00e5d; N[29] = 18'h09bc5; N[30] = 18'h0a822; N[31] = 18'h0efff; // - N_COEFF[ 0] = 18'h05a97; N_COEFF[ 1] = 18'h0ac69; N_COEFF[ 2] = 18'h0d51e; N_COEFF[ 3] = 18'h07326; - N_COEFF[ 4] = 18'h01053; N_COEFF[ 5] = 18'h0f68a; N_COEFF[ 6] = 18'h09c70; N_COEFF[ 7] = 18'h064f7; - N_COEFF[ 8] = 18'h01041; N_COEFF[ 9] = 18'h0c2bf; N_COEFF[10] = 18'h0f01f; N_COEFF[11] = 18'h01842; - N_COEFF[12] = 18'h0e69a; N_COEFF[13] = 18'h037ea; N_COEFF[14] = 18'h0b4a0; N_COEFF[15] = 18'h0c1ab; - N_COEFF[16] = 18'h0bd5b; N_COEFF[17] = 18'h09e5e; N_COEFF[18] = 18'h039bd; N_COEFF[19] = 18'h06430; - N_COEFF[20] = 18'h0b460; N_COEFF[21] = 18'h08bd4; N_COEFF[22] = 18'h09fcd; N_COEFF[23] = 18'h05391; - N_COEFF[24] = 18'h0fa45; N_COEFF[25] = 18'h08892; N_COEFF[26] = 18'h0732c; N_COEFF[27] = 18'h0baf6; - N_COEFF[28] = 18'h067a9; N_COEFF[29] = 18'h0b184; N_COEFF[30] = 18'h02089; N_COEFF[31] = 18'h0297b; - N_COEFF[32] = 18'h01810; + N_COEFF[ 0] = 18'h09fd9; N_COEFF[ 1] = 18'h0b367; N_COEFF[ 2] = 18'h0e467; N_COEFF[ 3] = 18'h0de24; + N_COEFF[ 4] = 18'h02022; N_COEFF[ 5] = 18'h0f0e8; N_COEFF[ 6] = 18'h02919; N_COEFF[ 7] = 18'h09901; + N_COEFF[ 8] = 18'h0da43; N_COEFF[ 9] = 18'h0023b; N_COEFF[10] = 18'h0ebf8; N_COEFF[11] = 18'h0f04e; + N_COEFF[12] = 18'h0942f; N_COEFF[13] = 18'h029e9; N_COEFF[14] = 18'h07cb0; N_COEFF[15] = 18'h08c25; + N_COEFF[16] = 18'h04e60; N_COEFF[17] = 18'h05cdc; N_COEFF[18] = 18'h0dff7; N_COEFF[19] = 18'h0279b; + N_COEFF[20] = 18'h0610d; N_COEFF[21] = 18'h0f04a; N_COEFF[22] = 18'h001dc; N_COEFF[23] = 18'h03429; + N_COEFF[24] = 18'h0f78c; N_COEFF[25] = 18'h0c3e2; N_COEFF[26] = 18'h00ed8; N_COEFF[27] = 18'h039c0; + N_COEFF[28] = 18'h02ac2; N_COEFF[29] = 18'h0f703; N_COEFF[30] = 18'h0c54e; N_COEFF[31] = 18'h022d9; + N_COEFF[32] = 18'h0f994; // - Q[ 0] = 18'h0ac02; Q[ 1] = 18'h0a026; Q[ 2] = 18'h06825; Q[ 3] = 18'h08f06; - Q[ 4] = 18'h03783; Q[ 5] = 18'h04cb5; Q[ 6] = 18'h0e8ea; Q[ 7] = 18'h083d2; - Q[ 8] = 18'h0fec9; Q[ 9] = 18'h066d9; Q[10] = 18'h0edad; Q[11] = 18'h06c12; - Q[12] = 18'h0a5fb; Q[13] = 18'h07295; Q[14] = 18'h06a0c; Q[15] = 18'h081a5; - Q[16] = 18'h03493; Q[17] = 18'h0a393; Q[18] = 18'h03da6; Q[19] = 18'h0beb1; - Q[20] = 18'h0d138; Q[21] = 18'h02815; Q[22] = 18'h0f191; Q[23] = 18'h03617; - Q[24] = 18'h08d4f; Q[25] = 18'h0f641; Q[26] = 18'h00e82; Q[27] = 18'h01774; - Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a; - Q[32] = 18'h0eef3; + AB[ 0] = 18'h0c199; AB[ 1] = 18'h0957a; AB[ 2] = 18'h070ad; AB[ 3] = 18'h0e5a6; + AB[ 4] = 18'h0fec9; AB[ 5] = 18'h00b73; AB[ 6] = 18'h09c72; AB[ 7] = 18'h0cdf0; + AB[ 8] = 18'h08755; AB[ 9] = 18'h07560; AB[10] = 18'h084b1; AB[11] = 18'h0ad3f; + AB[12] = 18'h074fe; AB[13] = 18'h04d74; AB[14] = 18'h00e16; AB[15] = 18'h0d3b3; + AB[16] = 18'h0d418; AB[17] = 18'h02f12; AB[18] = 18'h0c301; AB[19] = 18'h0be2b; + AB[20] = 18'h08222; AB[21] = 18'h0056c; AB[22] = 18'h01c7c; AB[23] = 18'h0bc95; + AB[24] = 18'h03427; AB[25] = 18'h0c65a; AB[26] = 18'h089ac; AB[27] = 18'h02117; + AB[28] = 18'h0ff7d; AB[29] = 18'h01cde; AB[30] = 18'h02709; AB[31] = 18'h01c56; + AB[32] = 18'h0f35a; AB[33] = 18'h08ce6; AB[34] = 18'h0a8e5; AB[35] = 18'h0d6d4; + AB[36] = 18'h06868; AB[37] = 18'h09105; AB[38] = 18'h0219e; AB[39] = 18'h0bc40; + AB[40] = 18'h00e0a; AB[41] = 18'h07783; AB[42] = 18'h0187a; AB[43] = 18'h0b922; + AB[44] = 18'h02609; AB[45] = 18'h0c64b; AB[46] = 18'h06b4b; AB[47] = 18'h04b79; + AB[48] = 18'h0fed6; AB[49] = 18'h03eac; AB[50] = 18'h04cac; AB[51] = 18'h0d47d; + AB[52] = 18'h045fd; AB[53] = 18'h04fa8; AB[54] = 18'h0597c; AB[55] = 18'h0a10d; + AB[56] = 18'h0bf44; AB[57] = 18'h08671; AB[58] = 18'h0112a; AB[59] = 18'h08ccf; + AB[60] = 18'h0cae5; AB[61] = 18'h04d94; AB[62] = 18'h0b95a; AB[63] = 18'h00040; // - N[ 0] = 18'h03ad9; N[ 1] = 18'h046b4; N[ 2] = 18'h0e181; N[ 3] = 18'h0fac7; - N[ 4] = 18'h0be72; N[ 5] = 18'h029ab; N[ 6] = 18'h07e51; N[ 7] = 18'h037a8; - N[ 8] = 18'h0880c; N[ 9] = 18'h05a7d; N[10] = 18'h043c2; N[11] = 18'h038c9; - N[12] = 18'h01275; N[13] = 18'h0aa0d; N[14] = 18'h0c0c1; N[15] = 18'h0d035; - N[16] = 18'h04082; N[17] = 18'h0543c; N[18] = 18'h0dcb0; N[19] = 18'h0497c; - N[20] = 18'h0b12c; N[21] = 18'h013d4; N[22] = 18'h0b80a; N[23] = 18'h051cf; - N[24] = 18'h0286c; N[25] = 18'h0b600; N[26] = 18'h0d838; N[27] = 18'h0af4b; - N[28] = 18'h08274; N[29] = 18'h06a07; N[30] = 18'h0beea; N[31] = 18'h0f000; + Q[ 0] = 18'h021b1; Q[ 1] = 18'h0d2db; Q[ 2] = 18'h0754b; Q[ 3] = 18'h01fc1; + Q[ 4] = 18'h063f7; Q[ 5] = 18'h086e5; Q[ 6] = 18'h0bcea; Q[ 7] = 18'h02260; + Q[ 8] = 18'h0c54c; Q[ 9] = 18'h0e298; Q[10] = 18'h05d07; Q[11] = 18'h0f978; + Q[12] = 18'h0e742; Q[13] = 18'h0a3f0; Q[14] = 18'h0b31e; Q[15] = 18'h041b7; + Q[16] = 18'h06ed9; Q[17] = 18'h03ac5; Q[18] = 18'h0f8eb; Q[19] = 18'h0c619; + Q[20] = 18'h067e9; Q[21] = 18'h00350; Q[22] = 18'h00376; Q[23] = 18'h02ebf; + Q[24] = 18'h0b125; Q[25] = 18'h05f7d; Q[26] = 18'h0f121; Q[27] = 18'h07ba4; + Q[28] = 18'h03050; Q[29] = 18'h0642e; Q[30] = 18'h0c2fc; Q[31] = 18'h0dfcf; + Q[32] = 18'h03f9e; // - M[ 0] = 18'h041b2; M[ 1] = 18'h00128; M[ 2] = 18'h06b69; M[ 3] = 18'h08e7e; - M[ 4] = 18'h0118c; M[ 5] = 18'h0b96d; M[ 6] = 18'h0ebe5; M[ 7] = 18'h0f873; - M[ 8] = 18'h0cf14; M[ 9] = 18'h0de83; M[10] = 18'h09690; M[11] = 18'h05e9a; - M[12] = 18'h048ac; M[13] = 18'h0b506; M[14] = 18'h01283; M[15] = 18'h08631; - M[16] = 18'h0179c; M[17] = 18'h06820; M[18] = 18'h0867b; M[19] = 18'h0b750; - M[20] = 18'h0e680; M[21] = 18'h0df95; M[22] = 18'h0d818; M[23] = 18'h0b4c5; - M[24] = 18'h0cced; M[25] = 18'h0c4a9; M[26] = 18'h0bb78; M[27] = 18'h04295; - M[28] = 18'h0b1b4; M[29] = 18'h09635; M[30] = 18'h0066b; M[31] = 18'h022b1; - M[32] = 18'h04fdb; M[33] = 18'h0efc8; M[34] = 18'h00a14; M[35] = 18'h04bef; - M[36] = 18'h006a1; M[37] = 18'h0f1a6; M[38] = 18'h0fc40; M[39] = 18'h0adb5; - M[40] = 18'h06e8f; M[41] = 18'h02c60; M[42] = 18'h083e1; M[43] = 18'h0f862; - M[44] = 18'h0da61; M[45] = 18'h0dd3d; M[46] = 18'h03381; M[47] = 18'h09db0; - M[48] = 18'h05454; M[49] = 18'h07525; M[50] = 18'h0d9c7; M[51] = 18'h0a361; - M[52] = 18'h049e0; M[53] = 18'h0a671; M[54] = 18'h0242e; M[55] = 18'h07cb2; - M[56] = 18'h02021; M[57] = 18'h0bde1; M[58] = 18'h025aa; M[59] = 18'h0c615; - M[60] = 18'h05645; M[61] = 18'h03b46; M[62] = 18'h065d6; M[63] = 18'h0390d; - M[64] = 18'h0e005; + M[ 0] = 18'h03e67; M[ 1] = 18'h06a85; M[ 2] = 18'h08f52; M[ 3] = 18'h01a59; + M[ 4] = 18'h00136; M[ 5] = 18'h0f48c; M[ 6] = 18'h0638d; M[ 7] = 18'h0320f; + M[ 8] = 18'h078aa; M[ 9] = 18'h08a9f; M[10] = 18'h07b4e; M[11] = 18'h052c0; + M[12] = 18'h08b01; M[13] = 18'h0b28b; M[14] = 18'h0f1e9; M[15] = 18'h02c4c; + M[16] = 18'h02be7; M[17] = 18'h0d0ed; M[18] = 18'h03cfe; M[19] = 18'h041d4; + M[20] = 18'h07ddd; M[21] = 18'h0fa93; M[22] = 18'h0e383; M[23] = 18'h0436a; + M[24] = 18'h0cbd8; M[25] = 18'h039a5; M[26] = 18'h07653; M[27] = 18'h0dee8; + M[28] = 18'h00082; M[29] = 18'h0e321; M[30] = 18'h0d8f6; M[31] = 18'h0e3a9; + M[32] = 18'h00ca5; M[33] = 18'h035ed; M[34] = 18'h02b8f; M[35] = 18'h063bd; + M[36] = 18'h0ec9f; M[37] = 18'h0b8bb; M[38] = 18'h00389; M[39] = 18'h0ca27; + M[40] = 18'h0bea7; M[41] = 18'h0df1e; M[42] = 18'h0d685; M[43] = 18'h0cc1b; + M[44] = 18'h036c4; M[45] = 18'h01ce9; M[46] = 18'h0c43b; M[47] = 18'h05f58; + M[48] = 18'h02c77; M[49] = 18'h03a12; M[50] = 18'h0eea8; M[51] = 18'h0ac31; + M[52] = 18'h05838; M[53] = 18'h093ac; M[54] = 18'h0fd54; M[55] = 18'h06e13; + M[56] = 18'h002e2; M[57] = 18'h06af4; M[58] = 18'h0ea18; M[59] = 18'h083b3; + M[60] = 18'h059f7; M[61] = 18'h016d3; M[62] = 18'h0c3ad; M[63] = 18'h0dbfc; + M[64] = 18'h03ba4; // end @@ -151,12 +151,17 @@ module tb_square; reg [17:0] mgr_fat_bram_y_din; reg mac_fat_bram_xy_ena = 1'b0; + reg mac_fat_bram_xy_ena_aux = 1'b0; reg mac_fat_bram_xy_reg_ena = 1'b0; + reg mac_fat_bram_xy_reg_ena_aux = 1'b0; reg [ 2:0] mac_fat_bram_xy_bank; reg [ 2:0] mac_fat_bram_xy_bank_aux; reg [ 7:0] mac_fat_bram_xy_addr[0:4]; wire [17:0] mac_fat_bram_x_dout[0:4]; wire [17:0] mac_fat_bram_y_dout[0:4]; + wire [ 7:0] mac_fat_bram_xy_addr_aux = mac_fat_bram_xy_addr[4]; // handy for debug + wire [17:0] mac_fat_bram_x_dout_aux = mac_fat_bram_x_dout[4]; // handy for debug + wire [17:0] mac_fat_bram_y_dout_aux = mac_fat_bram_x_dout[4]; // handy for debug reg tb_slim_bram_xy_ena = 1'b0; reg [ 1:0] tb_slim_bram_xy_bank; @@ -201,8 +206,8 @@ module tb_square; .dina (mgr_fat_bram_x_din), .clkb (clk), - .enb (mac_fat_bram_xy_ena), - .regceb (mac_fat_bram_xy_reg_ena), + .enb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_ena : mac_fat_bram_xy_ena_aux), + .regceb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_reg_ena : mac_fat_bram_xy_reg_ena_aux), .addrb ({(z < (NUM_MULTS/2) ? mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux), mac_fat_bram_xy_addr[z]}), .doutb (mac_fat_bram_x_dout[z]) @@ -217,8 +222,8 @@ module tb_square; .dina (mgr_fat_bram_y_din), .clkb (clk), - .enb (mac_fat_bram_xy_ena), - .regceb (mac_fat_bram_xy_reg_ena), + .enb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_ena : mac_fat_bram_xy_ena_aux), + .regceb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_reg_ena : mac_fat_bram_xy_reg_ena_aux), .addrb ({z < (NUM_MULTS/2) ? mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_addr[z]}), .doutb (mac_fat_bram_y_dout[z]) @@ -550,6 +555,24 @@ module tb_square; end + // + // Recombinator Interface + // + wire [ 2:0] recomb_fat_bram_xy_bank; + wire [ 7:0] recomb_fat_bram_xy_addr; + wire [17:0] recomb_fat_bram_x_dout; + wire [17:0] recomb_fat_bram_y_dout; + wire recomb_fat_bram_xy_dout_valid; + wire [ 2:0] recomb_slim_bram_xy_bank; + wire [ 7:0] recomb_slim_bram_xy_addr; + wire [17:0] recomb_slim_bram_x_dout; + wire [17:0] recomb_slim_bram_y_dout; + wire recomb_slim_bram_xy_dout_valid; + wire recomb_rdy; + + + + // // FSM Transition Logic // @@ -558,6 +581,9 @@ module tb_square; wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle; + // + // Slim - Address + // always @(posedge clk) // case (fsm_state_next) @@ -596,7 +622,10 @@ module tb_square; assign fat_bram_offset_rom[(z-1)/2] = z[2:0]; end endgenerate - + + // + // Fat - Address + // integer j; always @(posedge clk) begin // @@ -648,12 +677,14 @@ module tb_square; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last); // - FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1}; - FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1}; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= 8'dX;//{5'd0, 3'd0}; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last); + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: mac_fat_bram_xy_addr[4] <= recomb_fat_bram_xy_dout_valid ? recomb_fat_bram_xy_addr : 8'dX;//recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ? + //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4]; // default: mac_fat_bram_xy_addr[4] <= 8'dX; endcase @@ -690,7 +721,29 @@ module tb_square; default: mac_slim_bram_xy_bank <= 2'bXX; endcase - always @(posedge clk) + always @(posedge clk) begin + // + case (fsm_state_next) + FSM_STATE_MULT_SQUARE_COL_0_INIT, + FSM_STATE_MULT_SQUARE_COL_N_INIT, + FSM_STATE_MULT_SQUARE_COL_0_TRIG, + FSM_STATE_MULT_SQUARE_COL_N_TRIG, + FSM_STATE_MULT_SQUARE_COL_0_BUSY, + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2; + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_fat_bram_xy_bank <= BANK_FAT_ABL; + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_ABL; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT, + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_N; + default: mac_fat_bram_xy_bank <= 3'bXXX; + endcase // case (fsm_state_next) FSM_STATE_MULT_SQUARE_COL_0_INIT, @@ -698,21 +751,31 @@ module tb_square; FSM_STATE_MULT_SQUARE_COL_0_TRIG, FSM_STATE_MULT_SQUARE_COL_N_TRIG, FSM_STATE_MULT_SQUARE_COL_0_BUSY, - FSM_STATE_MULT_SQUARE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_T1T2}}; + FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank_aux <= BANK_FAT_T1T2; FSM_STATE_MULT_TRIANGLE_COL_0_INIT, FSM_STATE_MULT_TRIANGLE_COL_N_INIT, FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, - FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL}; + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABH; FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, - FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}}; + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABL; FSM_STATE_MULT_RECTANGLE_COL_0_INIT, FSM_STATE_MULT_RECTANGLE_COL_N_INIT, FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, - FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_N}}; - default: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}}; + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (recomb_fat_bram_xy_dout_valid) + case (recomb_fat_bram_xy_bank) + BANK_FAT_ML: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABL; + BANK_FAT_MH: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABH; + BANK_FAT_EXT: mac_fat_bram_xy_bank_aux <= BANK_FAT_EXT; + default: mac_fat_bram_xy_bank_aux <= 3'bXXX; + endcase + else mac_fat_bram_xy_bank_aux <= 3'bXXX; + default: mac_fat_bram_xy_bank_aux <= 3'bXXX; endcase + // + end @@ -740,7 +803,7 @@ module tb_square; default: mac_slim_bram_xy_ena <= 1'b0; endcase - always @(posedge clk) + always @(posedge clk) begin // case (fsm_state_next) FSM_STATE_MULT_SQUARE_COL_0_INIT, @@ -763,7 +826,25 @@ module tb_square; FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1; default: mac_fat_bram_xy_ena <= 1'b0; endcase - + // + case (fsm_state_next) + FSM_STATE_MULT_TRIANGLE_COL_0_INIT, + FSM_STATE_MULT_TRIANGLE_COL_N_INIT, + FSM_STATE_MULT_TRIANGLE_COL_0_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_N_TRIG, + FSM_STATE_MULT_TRIANGLE_COL_0_BUSY, + FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena_aux <= 1'b1; + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_ena_aux <= 1'b0;//1'b1; + FSM_STATE_MULT_RECTANGLE_COL_N_INIT, + FSM_STATE_MULT_RECTANGLE_COL_0_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_N_TRIG, + FSM_STATE_MULT_RECTANGLE_COL_0_BUSY, + FSM_STATE_MULT_RECTANGLE_COL_N_BUSY, + FSM_STATE_MULT_RECTANGLE_HOLDOFF: mac_fat_bram_xy_ena_aux <= recomb_fat_bram_xy_dout_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML); + default: mac_fat_bram_xy_ena_aux <= 1'b0; + endcase + // + end always @(posedge clk) // @@ -771,9 +852,9 @@ module tb_square; always @(posedge clk) // - mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena; + {mac_fat_bram_xy_reg_ena_aux, mac_fat_bram_xy_reg_ena} <= {mac_fat_bram_xy_ena_aux, mac_fat_bram_xy_ena}; - reg ladder_mode = 1'b0; // 0 = X:T1*T2, Y:T2*T2 + reg ladder_mode = 1'b1; // 0 = X:T1*T2, Y:T2*T2 // 1 = X:T1*T2, Y:T2*T1 @@ -948,18 +1029,6 @@ module tb_square; // end - wire [ 2:0] recomb_fat_bram_xy_bank; - wire [ 7:0] recomb_fat_bram_xy_addr; - wire [17:0] recomb_fat_bram_x_dout; - wire [17:0] recomb_fat_bram_y_dout; - wire recomb_fat_bram_xy_dout_valid; - wire [ 2:0] recomb_slim_bram_xy_bank; - wire [ 7:0] recomb_slim_bram_xy_addr; - wire [17:0] recomb_slim_bram_x_dout; - wire [17:0] recomb_slim_bram_y_dout; - wire recomb_slim_bram_xy_dout_valid; - wire recomb_rdy; - modexpng_part_recombinator recomb ( .clk (clk), @@ -1167,5 +1236,144 @@ module tb_square; end + + // + // Reductor + // + reg reductor_ena = 1'b0; + + always @(posedge clk) + // + if (!reductor_ena) + case (fsm_state) + FSM_STATE_MULT_RECTANGLE_COL_0_INIT: reductor_ena <= 1'b1; + endcase + else begin + + + end + + + reg recomb_fat_bram_xy_dout_valid_dly1; + reg recomb_fat_bram_xy_dout_valid_dly2; + reg recomb_fat_bram_xy_dout_valid_dly3; + + reg [2:0] recomb_fat_bram_xy_bank_dly1; + reg [2:0] recomb_fat_bram_xy_bank_dly2; + reg [2:0] recomb_fat_bram_xy_bank_dly3; + + reg [7:0] recomb_fat_bram_xy_addr_dly1; + reg [7:0] recomb_fat_bram_xy_addr_dly2; + reg [7:0] recomb_fat_bram_xy_addr_dly3; + + always @(posedge clk) begin + // + recomb_fat_bram_xy_dout_valid_dly1 <= recomb_fat_bram_xy_dout_valid; + recomb_fat_bram_xy_dout_valid_dly2 <= recomb_fat_bram_xy_dout_valid_dly1; + recomb_fat_bram_xy_dout_valid_dly3 <= recomb_fat_bram_xy_dout_valid_dly2; + // + end + + reg [17:0] recomb_fat_bram_x_dout_dly1; + reg [17:0] recomb_fat_bram_x_dout_dly2; + reg [17:0] recomb_fat_bram_x_dout_dly3; + + always @(posedge clk) begin + // + if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_x_dout_dly1 <= recomb_fat_bram_x_dout; + if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_x_dout_dly2 <= recomb_fat_bram_x_dout_dly1; + if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_x_dout_dly3 <= recomb_fat_bram_x_dout_dly2; + // + end + + always @(posedge clk) begin + // + if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_xy_bank_dly1 <= recomb_fat_bram_xy_bank; + if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_xy_bank_dly2 <= recomb_fat_bram_xy_bank_dly1; + if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_xy_bank_dly3 <= recomb_fat_bram_xy_bank_dly2; + // + end + + always @(posedge clk) begin + // + if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_xy_addr_dly1 <= recomb_fat_bram_xy_addr; + if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_xy_addr_dly2 <= recomb_fat_bram_xy_addr_dly1; + if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_xy_addr_dly3 <= recomb_fat_bram_xy_addr_dly2; + // + end + + + reg [ 1:0] reductor_fat_bram_x_lsb_carry; + reg [15:0] reductor_fat_bram_x_lsb_dummy; + reg [17:0] reductor_fat_bram_x_lsb_dout; + + reg [17:0] reductor_fat_bram_x_msb_dout; + + always @(posedge clk) + // + if (!reductor_ena) begin + reductor_fat_bram_x_lsb_carry <= 2'b00; + end else if (recomb_fat_bram_xy_dout_valid_dly3) begin + + case (recomb_fat_bram_xy_bank_dly3) + BANK_FAT_ML: {reductor_fat_bram_x_lsb_carry, reductor_fat_bram_x_lsb_dummy} <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry; + BANK_FAT_MH: + if (recomb_fat_bram_xy_addr_dly3 == 8'd0) + {reductor_fat_bram_x_lsb_carry, reductor_fat_bram_x_lsb_dummy} <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry; + else if (recomb_fat_bram_xy_addr_dly3 == 8'd1) + reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry; + else + reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux; + BANK_FAT_EXT: + reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3; + endcase + // + end + /* + + + reg [17:0] recomb_fat_bram_x_dout_dly1; + reg [17:0] recomb_fat_bram_x_dout_dly2; + + reg [ 2:0] recomb_fat_bram_xy_bank_dly1; + reg [ 2:0] recomb_fat_bram_xy_bank_dly2; + + reg [1:0] reductor_fat_bram_x_carry; + + reg [15:0] reductor_fat_bram_x_dummy; + reg [17:0] reductor_fat_bram_x_dout; + reg reductor_fat_bram_xy_dout_valid; + + always @(posedge clk) + // + if (reductor_ena) begin + + if (recomb_fat_bram_xy_dout_valid) begin + recomb_fat_bram_x_dout_dly1 <= recomb_fat_bram_x_dout; + recomb_fat_bram_xy_bank_dly1 <= recomb_fat_bram_xy_bank; + end + + if (mac_fat_bram_xy_ena_aux) begin + recomb_fat_bram_x_dout_dly2 <= recomb_fat_bram_x_dout_dly1; + recomb_fat_bram_xy_bank_dly2 <= recomb_fat_bram_xy_bank_dly1; + end + + if (mac_fat_bram_xy_reg_ena_aux) + case (recomb_fat_bram_xy_bank_dly2) + BANK_FAT_ML: {reductor_fat_bram_x_carry, reductor_fat_bram_x_dummy} <= recomb_fat_bram_x_dout_dly2 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_carry; + endcase + + //reductor_fat_bram_xy_dout_valid <= mac_fat_bram_xy_reg_ena_aux; + + end else begin + + reductor_fat_bram_x_carry <= 2'b00; + reductor_fat_bram_xy_dout_valid <= 1'b0; + + end + */ + + + endmodule diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh index 57eef35..77b57f3 100644 --- a/rtl/modexpng_parameters.vh +++ b/rtl/modexpng_parameters.vh @@ -7,10 +7,10 @@ localparam [2:0] BANK_FAT_T1T2 = 3'd0; localparam [2:0] BANK_FAT_ABL = 3'd1; localparam [2:0] BANK_FAT_ABH = 3'd2; localparam [2:0] BANK_FAT_N = 3'd3; -localparam [2:0] BANK_FAT_ML = 3'd4; -localparam [2:0] BANK_FAT_MH = 3'd5; +localparam [2:0] BANK_FAT_ML = 3'd4; // not needed +localparam [2:0] BANK_FAT_MH = 3'd5; // not needed localparam [2:0] BANK_FAT_EXT = 3'd6; // 0 -> MH' -localparam [2:0] BANK_FAT_UNUSED = 3'd7; +localparam [2:0] BANK_FAT_UNUSED = 3'd7; // not needed localparam [1:0] BANK_SLIM_T1T2 = 2'd0; localparam [1:0] BANK_SLIM_N_COEFF = 2'd1; diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v index 567ecd5..957ba8e 100644 --- a/rtl/modexpng_part_recombinator.v +++ b/rtl/modexpng_part_recombinator.v @@ -1095,6 +1095,7 @@ module modexpng_part_recombinator end // end + // endcase // 2'd2: case (rcmb_xy_dout_valid) @@ -1103,6 +1104,20 @@ module modexpng_part_recombinator // endcase // + 2'd3: case (rcmb_xy_dout_valid) + // + 2'b00: begin + // + if (recomb_msb_flag_delay_2) begin + // + rdy_adv <= ~recomb_msb_flag_delay_1; + // + end + // + end + // + endcase + // endcase // end -- cgit v1.2.3