aboutsummaryrefslogtreecommitdiff
path: root/libraries
diff options
context:
space:
mode:
authorPaul Selkirk <paul@psgd.org>2018-11-26 17:26:55 -0500
committerPaul Selkirk <paul@psgd.org>2018-11-26 17:27:58 -0500
commit97034edb35e92361daaa24512989d00f6c3fd517 (patch)
treec908bc4b4347d21bbdb776aef69f30936c4e4898 /libraries
parent23850319664533c42c5c60d1e91277358d07dd63 (diff)
Add loop unrolling to bring the profilable mem* functions closer to
newlib, because memset is called a LOT in the course of RSA signing, and we need to understand how much time we're actually spending there.
Diffstat (limited to 'libraries')
-rw-r--r--libraries/libprof/Makefile6
-rw-r--r--libraries/libprof/memfunc.c127
2 files changed, 132 insertions, 1 deletions
diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile
index 4fe5fb4..37b9a23 100644
--- a/libraries/libprof/Makefile
+++ b/libraries/libprof/Makefile
@@ -1,12 +1,16 @@
LIB = libprof.a
-OBJS = gmon.o profil.o profiler.o
+OBJS = gmon.o profil.o profiler.o memfunc.o
# Don't profile the profiling code, because that way lies madness (and recursion).
CFLAGS := $(subst -pg,,$(CFLAGS))
all: $(LIB)
+# But do profile the mem functions
+memfunc.o: memfunc.c
+ $(CC) $(CFLAGS) -pg -c -o $@ $<
+
%.o : %.c
$(CC) $(CFLAGS) -c -o $@ $<
diff --git a/libraries/libprof/memfunc.c b/libraries/libprof/memfunc.c
new file mode 100644
index 0000000..fc908e1
--- /dev/null
+++ b/libraries/libprof/memfunc.c
@@ -0,0 +1,127 @@
+#include <stdint.h>
+#include <string.h>
+
+/*
+ * Profilable substitutes for mem*(), lacking libc_p.a
+ *
+ * This code was written with reference to newlib, and was recently
+ * brought closer into line with newlib, to make profiling more accurate.
+ *
+ * Newlib is maintained by Cygwin, which is Red Hat. There is no copyright
+ * statement in the corresponding newlib source files, nor is there a
+ * COPYING file in newlib/libc/string or newlib/libc. Consider this file
+ * to be covered under one or more of the 50 copyright notices in
+ * newlib/COPYING, most of which are BSD. In any case, this file is only
+ * used for profiling, and is not used in production builds.
+ */
+
+#define is_word_aligned(x) (((size_t)(x) & 3) == 0)
+
+void *memcpy(void *dst, const void *src, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t *s8 = (uint8_t *)src;
+
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
+ uint32_t *d32 = (uint32_t *)dst;
+ uint32_t *s32 = (uint32_t *)src;
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ n -= sizeof(uint32_t);
+ }
+ d8 = (uint8_t *)d32;
+ s8 = (uint8_t *)s32;
+ }
+ while (n-- > 0) {
+ *d8++ = *s8++;
+ }
+
+ return dst;
+}
+
+void *memset(void *dst, int c, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t c8 = (uint8_t)c;
+
+ while (!is_word_aligned(d8)) {
+ if (n--)
+ *d8++ = c8;
+ else
+ return dst;
+ }
+ if (n >= sizeof(uint32_t)) {
+ uint32_t *d32 = (uint32_t *)d8;
+ uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8);
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
+ *d32++ = c32;
+ n -= sizeof(uint32_t);
+ }
+ d8 = (uint8_t *)d32;
+ }
+ while (n-- > 0) {
+ *d8++ = c8;
+ }
+
+ return dst;
+}
+
+int memcmp(const void *dst, const void *src, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t *s8 = (uint8_t *)src;
+
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
+ uint32_t *d32 = (uint32_t *)dst;
+ uint32_t *s32 = (uint32_t *)src;
+ while (n >= sizeof(uint32_t)) {
+ if (*d32 != *s32)
+ break;
+ d32++;
+ s32++;
+ n -= sizeof(uint32_t);
+ }
+ d8 = (uint8_t *)d32;
+ s8 = (uint8_t *)s32;
+ }
+ while (n-- > 0) {
+ if (*d8 != *s8)
+ return (*d8 - *s8);
+ d8++;
+ s8++;
+ }
+
+ return 0;
+}
+
+void *memmove(void *dst, const void *src, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t *s8 = (uint8_t *)src;
+
+ if ((s8 < d8) && (d8 < s8 + n)) {
+ /* Destructive overlap...have to copy backwards */
+ s8 += n;
+ d8 += n;
+ while (n-- > 0) {
+ *--d8 = *--s8;
+ }
+ return dst;
+ }
+
+ return memcpy(dst, src, n);
+}