diff options
author | Paul Selkirk <paul@psgd.org> | 2020-03-09 15:47:17 -0400 |
---|---|---|
committer | Paul Selkirk <paul@psgd.org> | 2020-03-09 15:50:12 -0400 |
commit | 38b388061364339c1259b56fe1d366de8b8630d3 (patch) | |
tree | e6243e7a5c8f8051d85510824dee551222015872 | |
parent | 6b2513f1e0def3a134383aa635cb41feac37be31 (diff) |
Replace the brutally inefficient fp_to_unsigned_bin with one based on
fp_read_unsigned_bin.
I thought about patching it directly in sw/thirdparty/libtfm, but
ultimately decided to keep that pristine (especially since we verify file
checksums there).
-rw-r--r-- | libraries/libtfm/Makefile | 7 | ||||
-rw-r--r-- | libraries/libtfm/fp_to_unsigned_bin.c | 62 |
2 files changed, 67 insertions, 2 deletions
diff --git a/libraries/libtfm/Makefile b/libraries/libtfm/Makefile index 34b9314..aa5031f 100644 --- a/libraries/libtfm/Makefile +++ b/libraries/libtfm/Makefile @@ -45,10 +45,12 @@ CFLAGS += -Wall -W -Wshadow -Wno-uninitialized TARGETS := $(notdir ${HDR} ${LIB}) +REPLACE = fp_to_unsigned_bin.o + all: ${TARGETS} clean: - rm -rf ${TARGETS} $(notdir ${HDR}.tmp) ${LIB} tomsfastmath/src + rm -rf ${TARGETS} $(notdir ${HDR}.tmp) ${LIB} tomsfastmath/src ${REPLACE} distclean: clean rm -f TAGS @@ -63,6 +65,7 @@ $(notdir ${HDR}): ${HDR} $(notdir ${LIB}): ${LIB} ln -f $^ $@ -${LIB}: ${HDR} +${LIB}: ${HDR} ${REPLACE} (cd ${LIBTFM_SRC} && find tomsfastmath/src -type d) | xargs mkdir -p cd tomsfastmath; ${MAKE} CFLAGS='${CFLAGS}' + ar r ${LIB} ${REPLACE} diff --git a/libraries/libtfm/fp_to_unsigned_bin.c b/libraries/libtfm/fp_to_unsigned_bin.c new file mode 100644 index 0000000..618167d --- /dev/null +++ b/libraries/libtfm/fp_to_unsigned_bin.c @@ -0,0 +1,62 @@ +/* TomsFastMath, a fast ISO C bignum library. + * + * This project is meant to fill in where LibTomMath + * falls short. That is speed ;-) + * + * This project is public domain and free for all purposes. + * + * Tom St Denis, tomstdenis@gmail.com + */ +#include <tfm_private.h> + +void fp_to_unsigned_bin(fp_int *a, unsigned char *b) +{ + /* If we know the endianness of this architecture, and we're using + 32-bit fp_digits, we can optimize this */ +#if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(FP_64BIT) + /* But not for both simultaneously */ +#if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG) +#error Both ENDIAN_LITTLE and ENDIAN_BIG defined. +#endif + { + int c = fp_unsigned_bin_size(a); + unsigned char *pd = (unsigned char *)a->dp; + + /* read the bytes out */ +#ifdef ENDIAN_BIG + { + /* Use Duff's device to unroll the loop. */ + int idx = (c - 1) & ~3; + switch (c % 4) { + case 0: do { b[idx+0] = *pd++; + case 3: b[idx+1] = *pd++; + case 2: b[idx+2] = *pd++; + case 1: b[idx+3] = *pd++; + idx -= 4; + } while ((c -= 4) > 0); + } + } +#else + for (c -= 1; c >= 0; c -= 1) { + b[c] = *pd++; + } +#endif + } +#else + int x; + fp_int t; + + fp_init_copy(&t, a); + + x = 0; + while (fp_iszero (&t) == FP_NO) { + b[x++] = (unsigned char) (t.dp[0] & 255); + fp_div_2d (&t, 8, &t, NULL); + } + fp_reverse (b, x); +#endif +} + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ |