From f21fef907e68ee18762831d0a15cc55513ec740e Mon Sep 17 00:00:00 2001
From: Rob Austein <sra@hactrn.net>
Date: Wed, 14 Jun 2017 00:26:32 -0400
Subject: Faster prime generation algorithm for RSA.

Algorithm suggested by a note in Handbook of Applied Cryptography,
motivated by profiling of libtfm fp_isprime() function showing
something close to 50% of CPU time spent running Montgomery reductions
in the small primes test, before we even get to Miller-Rabin.
---
 rsa.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 98 insertions(+), 14 deletions(-)

(limited to 'rsa.c')

diff --git a/rsa.c b/rsa.c
index 1aae57c..bfeda86 100644
--- a/rsa.c
+++ b/rsa.c
@@ -580,34 +580,118 @@ hal_error_t hal_rsa_key_get_public_exponent(const hal_rsa_key_t * const key,
   return extract_component(key, offsetof(hal_rsa_key_t, e), res, res_len, res_max);
 }
 
+/*
+ * Number of Miller-Rabin rounds and table of primes smaller than
+ * 2000, per Schneier.
+ */
+
+#ifndef HAL_RSA_MILLER_RABIN_ROUNDS
+#define HAL_RSA_MILLER_RABIN_ROUNDS (5)
+#endif
+
+static uint16_t small_primes[] = {
+  2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
+  67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137,
+  139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199,
+  211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277,
+  281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359,
+  367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
+  443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521,
+  523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607,
+  613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683,
+  691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773,
+  787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863,
+  877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967,
+  971, 977, 983, 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039,
+  1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109,
+  1117, 1123, 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201,
+  1213, 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
+  1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367,
+  1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451,
+  1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, 1523,
+  1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, 1597, 1601,
+  1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667, 1669,
+  1693, 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, 1753, 1759,
+  1777, 1783, 1787, 1789, 1801, 1811, 1823, 1831, 1847, 1861, 1867,
+  1871, 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949,
+  1951, 1973, 1979, 1987, 1993, 1997, 1999
+};
+
 /*
  * Generate a prime factor for an RSA keypair.
  *
- * Get random bytes, munge a few bits, and stuff into a bignum.  Keep
- * doing this until we find a result that's (probably) prime and for
- * which result - 1 is relatively prime with respect to e.
+ * Get random bytes, munge a few bits, and stuff into a bignum to
+ * construct our initial candidate.
+ *
+ * Initialize table of remainders when dividing candidate by each
+ * entry in corresponding table of small primes.  We do this
+ * unconditionally, as a fixed cost.
+ *
+ * If all of the remainders were non-zero, run the requisite number of
+ * Miller-Rabin tests using the first few entries from that same table
+ * of small primes as the test values.
+ *
+ * If we get past Miller-Rabin, the candidate is (probably) prime.
+ * For RSA, we also need (result - 1) to be relatively prime with
+ * respect to the public exponent.  If we pass that test, we have a
+ * winner.
+ *
+ * If any of the above tests failed, we bump the candidate and all of
+ * the remainders by two, then try again.  This is where the table of
+ * remainders pays off: incrementing these remainders is really cheap.
+ *
+ * Remainder table suggested by HAC Note 4.51.
  */
 
 static hal_error_t find_prime(const unsigned prime_length,
                               const fp_int * const e,
                               fp_int *result)
 {
+  uint16_t remainder[sizeof(small_primes)/sizeof(*small_primes)];
   uint8_t buffer[prime_length];
-  hal_error_t err;
   fp_int t[1] = INIT_FP_INT;
+  hal_error_t err;
 
-  do {
-    if ((err = hal_get_random(NULL, buffer, sizeof(buffer))) != HAL_OK)
-      return err;
-    buffer[0                 ] |= 0xc0;
-    buffer[sizeof(buffer) - 1] |= 0x01;
-    fp_read_unsigned_bin(result, buffer, sizeof(buffer));
+  if ((err = hal_get_random(NULL, buffer, sizeof(buffer))) != HAL_OK)
+    return err;
 
-  } while (!fp_isprime(result) ||
-           (fp_sub_d(result, 1, t), fp_gcd(t, unconst_fp_int(e), t), fp_cmp_d(t, 1) != FP_EQ));
+  buffer[0                 ] |= 0xc0;
+  buffer[sizeof(buffer) - 1] |= 0x01;
+  fp_read_unsigned_bin(result, buffer, sizeof(buffer));
 
-  fp_zero(t);
-  return HAL_OK;
+  for (int i = 0; i < sizeof(small_primes)/sizeof(*small_primes); i++) {
+    fp_digit d;
+    fp_mod_d(result, small_primes[i], &d);
+    remainder[i] = d;
+  }
+
+  for (;;) {
+    int prime = 1;
+
+    for (int i = 0; i < sizeof(small_primes)/sizeof(*small_primes); i++)
+      prime &= remainder[i] != 0;
+
+    for (int i = 0; prime && i < HAL_RSA_MILLER_RABIN_ROUNDS; i++) {
+      fp_set(t, small_primes[i]);
+      fp_prime_miller_rabin(result, t, &prime);
+    }
+
+    if (prime) {
+      fp_sub_d(result, 1, t);
+      fp_gcd(t, unconst_fp_int(e), t);
+      prime = fp_cmp_d(t, 1) == FP_EQ;
+    }
+
+    if (prime) {
+      fp_zero(t);
+      return HAL_OK;
+    }
+
+    fp_add_d(result, 2, result);
+
+    for (int i = 0; i < sizeof(small_primes)/sizeof(*small_primes); i++)
+      remainder[i] = (remainder[i] + 2) % small_primes[i];
+  }
 }
 
 /*
-- 
cgit v1.2.3