aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Selkirk <paul@psgd.org>2018-11-26 17:26:55 -0500
committerPaul Selkirk <paul@psgd.org>2018-11-26 17:27:58 -0500
commit97034edb35e92361daaa24512989d00f6c3fd517 (patch)
treec908bc4b4347d21bbdb776aef69f30936c4e4898
parent23850319664533c42c5c60d1e91277358d07dd63 (diff)
Add loop unrolling to bring the profilable mem* functions closer to
newlib, because memset is called a LOT in the course of RSA signing, and we need to understand how much time we're actually spending there.
-rw-r--r--libraries/libprof/Makefile6
-rw-r--r--libraries/libprof/memfunc.c (renamed from memfunc.c)52
-rw-r--r--projects/hsm/Makefile1
3 files changed, 44 insertions, 15 deletions
diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile
index 4fe5fb4..37b9a23 100644
--- a/libraries/libprof/Makefile
+++ b/libraries/libprof/Makefile
@@ -1,12 +1,16 @@
LIB = libprof.a
-OBJS = gmon.o profil.o profiler.o
+OBJS = gmon.o profil.o profiler.o memfunc.o
# Don't profile the profiling code, because that way lies madness (and recursion).
CFLAGS := $(subst -pg,,$(CFLAGS))
all: $(LIB)
+# But do profile the mem functions
+memfunc.o: memfunc.c
+ $(CC) $(CFLAGS) -pg -c -o $@ $<
+
%.o : %.c
$(CC) $(CFLAGS) -c -o $@ $<
diff --git a/memfunc.c b/libraries/libprof/memfunc.c
index fd94b28..fc908e1 100644
--- a/memfunc.c
+++ b/libraries/libprof/memfunc.c
@@ -4,9 +4,15 @@
/*
* Profilable substitutes for mem*(), lacking libc_p.a
*
- * This code was written with reference to newlib, but does not copy every
- * quirk and loop-unrolling optimization from newlib. Its only purpose is
- * to let us figure out who is calling memcpy 2 million times.
+ * This code was written with reference to newlib, and was recently
+ * brought closer into line with newlib, to make profiling more accurate.
+ *
+ * Newlib is maintained by Cygwin, which is Red Hat. There is no copyright
+ * statement in the corresponding newlib source files, nor is there a
+ * COPYING file in newlib/libc/string or newlib/libc. Consider this file
+ * to be covered under one or more of the 50 copyright notices in
+ * newlib/COPYING, most of which are BSD. In any case, this file is only
+ * used for profiling, and is not used in production builds.
*/
#define is_word_aligned(x) (((size_t)(x) & 3) == 0)
@@ -16,12 +22,19 @@ void *memcpy(void *dst, const void *src, size_t n)
uint8_t *d8 = (uint8_t *)dst;
uint8_t *s8 = (uint8_t *)src;
- if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) {
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
uint32_t *d32 = (uint32_t *)dst;
uint32_t *s32 = (uint32_t *)src;
- while (n >= 4) {
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ *d32++ = *s32++;
*d32++ = *s32++;
- n -= 4;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ n -= sizeof(uint32_t);
}
d8 = (uint8_t *)d32;
s8 = (uint8_t *)s32;
@@ -38,12 +51,25 @@ void *memset(void *dst, int c, size_t n)
uint8_t *d8 = (uint8_t *)dst;
uint8_t c8 = (uint8_t)c;
- if (n >= 4 && is_word_aligned(dst)) {
- uint32_t *d32 = (uint32_t *)dst;
+ while (!is_word_aligned(d8)) {
+ if (n--)
+ *d8++ = c8;
+ else
+ return dst;
+ }
+ if (n >= sizeof(uint32_t)) {
+ uint32_t *d32 = (uint32_t *)d8;
uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8);
- while (n >= 4) {
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
*d32++ = c32;
- n -= 4;
+ n -= sizeof(uint32_t);
}
d8 = (uint8_t *)d32;
}
@@ -59,15 +85,15 @@ int memcmp(const void *dst, const void *src, size_t n)
uint8_t *d8 = (uint8_t *)dst;
uint8_t *s8 = (uint8_t *)src;
- if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) {
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
uint32_t *d32 = (uint32_t *)dst;
uint32_t *s32 = (uint32_t *)src;
- while (n >= 4) {
+ while (n >= sizeof(uint32_t)) {
if (*d32 != *s32)
break;
d32++;
s32++;
- n -= 4;
+ n -= sizeof(uint32_t);
}
d8 = (uint8_t *)d32;
s8 = (uint8_t *)s32;
diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile
index 3430e14..37c552d 100644
--- a/projects/hsm/Makefile
+++ b/projects/hsm/Makefile
@@ -25,7 +25,6 @@ LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16
LDFLAGS += -Wl,--gc-sections
ifdef DO_PROFILING
-OBJS += $(TOPLEVEL)/memfunc.o
LDFLAGS += --specs=rdimon.specs -lc -lrdimon
endif