diff options
author | Paul Selkirk <paul@psgd.org> | 2017-05-10 00:00:04 -0400 |
---|---|---|
committer | Paul Selkirk <paul@psgd.org> | 2017-05-10 00:00:04 -0400 |
commit | 65b94ef5ba1981c74a99cb43ee768fbf480c698b (patch) | |
tree | 15215df2a92b88ef7918ad25ef276d15dbe7381d | |
parent | 4d69f1a0ef2ef3aa23b0ac9f1b9cbc84582136a7 (diff) |
Sigh, right offset for the wrong register. Get the PC (the address we
interrupted) rather than LR (the return address from the function we
interrupted).
Also, change u_short and u_int to unsigned short and unsigned int, since
gcc recently decided that those aren't part of the C99 standard.
Finally, add profilable versions of memcpy, memset, and friends, because
they get called a lot in the course of unit testing, and it would be nice
to know who's calling them.
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | libraries/libprof/Makefile | 20 | ||||
-rw-r--r-- | libraries/libprof/README.txt | 4 | ||||
-rw-r--r-- | libraries/libprof/gmon.c | 17 | ||||
-rw-r--r-- | libraries/libprof/gmon.h | 10 | ||||
-rw-r--r-- | libraries/libprof/profil.c | 11 | ||||
-rw-r--r-- | libraries/libprof/profil.h | 10 | ||||
-rw-r--r-- | memfunc.c | 101 | ||||
-rw-r--r-- | projects/hsm/Makefile | 8 |
9 files changed, 153 insertions, 30 deletions
@@ -108,7 +108,7 @@ CFLAGS += -DUSE_STDPERIPH_DRIVER -DSTM32F4XX -DSTM32F429xx CFLAGS += -D__CORTEX_M4 -DTARGET_STM -DTARGET_STM32F4 -DTARGET_STM32F429ZI -DTOOLCHAIN_GCC -D__FPU_PRESENT=1 -D$(BOARD) CFLAGS += -DENABLE_WEAK_FUNCTIONS CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -std=gnu99 +CFLAGS += -std=c99 CFLAGS += -I$(TOPLEVEL) CFLAGS += -I$(MBED_DIR)/api CFLAGS += -I$(MBED_DIR)/targets/cmsis diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile new file mode 100644 index 0000000..4fe5fb4 --- /dev/null +++ b/libraries/libprof/Makefile @@ -0,0 +1,20 @@ +LIB = libprof.a + +OBJS = gmon.o profil.o profiler.o + +# Don't profile the profiling code, because that way lies madness (and recursion). +CFLAGS := $(subst -pg,,$(CFLAGS)) + +all: $(LIB) + +%.o : %.c + $(CC) $(CFLAGS) -c -o $@ $< + +%.o : %.S + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJS) + $(AR) -r $@ $^ + +clean: + rm -f $(OBJS) $(LIB) diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index 9db27a6..1fe378c 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -50,6 +50,10 @@ In the OpenOCD console, enable semihosting: > arm semihosting enable +In another window, start the debugger: + + $ sw/stm32/bin/debug projects/hsm/hsm + In the CLI, type `profile start`, then start the unit test or whatever will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 458028b..92054fc 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -38,7 +38,6 @@ #include <unistd.h> #include "gmon.h" #include "profil.h" -#include <stdint.h> #include <string.h> #define bzero(ptr,size) memset (ptr, 0, size); @@ -98,9 +97,9 @@ void monstartup (size_t lowpc, size_t highpc) { p->tos = (struct tostruct *)cp; cp += p->tossize; - p->kcount = (u_short *)cp; + p->kcount = (unsigned short *)cp; cp += p->kcountsize; - p->froms = (u_short *)cp; + p->froms = (unsigned short *)cp; p->tos[0].link = 0; @@ -238,7 +237,7 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { goto done; } frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; - toindex = *((u_short*)frompcindex); /* get froms[] value */ + toindex = *((unsigned short*)frompcindex); /* get froms[] value */ if (toindex == 0) { /* * first time traversing this arc @@ -247,7 +246,7 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ goto overflow; } - *((u_short*)frompcindex) = (u_short)toindex; /* store new 'to' value into froms[] */ + *((unsigned short*)frompcindex) = (unsigned short)toindex; /* store new 'to' value into froms[] */ top = &p->tos[toindex]; top->selfpc = (size_t)selfpc; top->count = 1; @@ -283,8 +282,8 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { top = &p->tos[toindex]; top->selfpc = (size_t)selfpc; top->count = 1; - top->link = *((u_short*)frompcindex); - *(u_short*)frompcindex = (u_short)toindex; + top->link = *((unsigned short*)frompcindex); + *(unsigned short*)frompcindex = (unsigned short)toindex; goto done; } /* @@ -301,8 +300,8 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { top->count++; toindex = prevtop->link; prevtop->link = top->link; - top->link = *((u_short*)frompcindex); - *((u_short*)frompcindex) = (u_short)toindex; + top->link = *((unsigned short*)frompcindex); + *((unsigned short*)frompcindex) = (unsigned short)toindex; goto done; } } diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h index 5eb5180..8b5ecf0 100644 --- a/libraries/libprof/gmon.h +++ b/libraries/libprof/gmon.h @@ -125,8 +125,8 @@ struct gmonhdr { struct tostruct { size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ long count; /* how many times it has been called */ - u_short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ - u_short pad; /* additional padding bytes, to have entries 4byte aligned */ + unsigned short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ + unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */ }; /* @@ -150,13 +150,13 @@ struct rawarc { */ struct gmonparam { int state; - u_short *kcount; /* histogram PC sample array */ + unsigned short *kcount; /* histogram PC sample array */ size_t kcountsize; /* size of kcount[] array in bytes */ - u_short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ + unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ size_t fromssize; /* size of froms[] array in bytes */ struct tostruct *tos; /* to struct, contains histogram counter */ size_t tossize; /* size of tos[] array in bytes */ - long tolimit; + long tolimit; size_t lowpc; /* low program counter of area */ size_t highpc; /* high program counter */ size_t textsize; /* code size */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 004af77..0654879 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -17,7 +17,6 @@ #include <math.h> #include "profil.h" #include <string.h> -#include <stdint.h> #include "stm32f4xx_hal.h" /* __get_MSP */ @@ -30,7 +29,7 @@ extern void set_SysTick_hook(void (*hook)(void)); /* sample the current program counter */ static void SysTick_hook(void) { - size_t pc = (size_t)((uint32_t *)__get_MSP())[7]; + size_t pc = (size_t)((uint32_t *)__get_MSP())[8]; if (pc >= prof.lowpc && pc < prof.highpc) { size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; @@ -55,7 +54,7 @@ static int profile_on (struct profinfo *p) { * start or stop profiling * * profiling goes into the SAMPLES buffer of size SIZE (which is treated - * as an array of u_shorts of size size/2) + * as an array of unsigned shorts of size size/2) * * each bin represents a range of pc addresses from OFFSET. The number * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps @@ -63,7 +62,7 @@ static int profile_on (struct profinfo *p) { * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, * or zero to turn off profiling */ -int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, u_int scale) { +int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, unsigned int scale) { size_t maxbin; if (scale > 65536) { @@ -75,7 +74,7 @@ int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, memset(samples, 0, size); memset(p, 0, sizeof *p); maxbin = size >> 1; - prof.counter = (u_short*)samples; + prof.counter = (unsigned short*)samples; prof.lowpc = offset; prof.highpc = PROFADDR(maxbin, offset, scale); prof.scale = scale; @@ -88,7 +87,7 @@ int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, Every SLEEPTIME interval, the user's program counter (PC) is examined: offset is subtracted and the result is multiplied by scale. The word pointed to by this address is incremented. */ -int profil (char *samples, size_t size, size_t offset, u_int scale) { +int profil (char *samples, size_t size, size_t offset, unsigned int scale) { return profile_ctl (&prof, samples, size, offset, scale); } diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h index af7a3ed..c72dc00 100644 --- a/libraries/libprof/profil.h +++ b/libraries/libprof/profil.h @@ -48,13 +48,13 @@ typedef enum { } PROFILE_State; struct profinfo { - PROFILE_State state; /* profiling state */ - u_short *counter; /* profiling counters */ + PROFILE_State state; /* profiling state */ + unsigned short *counter; /* profiling counters */ size_t lowpc, highpc; /* range to be profiled */ - u_int scale; /* scale value of bins */ + unsigned int scale; /* scale value of bins */ }; -int profile_ctl(struct profinfo *, char *, size_t, size_t, u_int); -int profil(char *, size_t, size_t, u_int); +int profile_ctl(struct profinfo *, char *, size_t, size_t, unsigned int); +int profil(char *, size_t, size_t, unsigned int); #endif /* __PROFIL_H__ */ diff --git a/memfunc.c b/memfunc.c new file mode 100644 index 0000000..fd94b28 --- /dev/null +++ b/memfunc.c @@ -0,0 +1,101 @@ +#include <stdint.h> +#include <string.h> + +/* + * Profilable substitutes for mem*(), lacking libc_p.a + * + * This code was written with reference to newlib, but does not copy every + * quirk and loop-unrolling optimization from newlib. Its only purpose is + * to let us figure out who is calling memcpy 2 million times. + */ + +#define is_word_aligned(x) (((size_t)(x) & 3) == 0) + +void *memcpy(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + *d32++ = *s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + *d8++ = *s8++; + } + + return dst; +} + +void *memset(void *dst, int c, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t c8 = (uint8_t)c; + + if (n >= 4 && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8); + while (n >= 4) { + *d32++ = c32; + n -= 4; + } + d8 = (uint8_t *)d32; + } + while (n-- > 0) { + *d8++ = c8; + } + + return dst; +} + +int memcmp(const void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + if (*d32 != *s32) + break; + d32++; + s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + if (*d8 != *s8) + return (*d8 - *s8); + d8++; + s8++; + } + + return 0; +} + +void *memmove(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if ((s8 < d8) && (d8 < s8 + n)) { + /* Destructive overlap...have to copy backwards */ + s8 += n; + d8 += n; + while (n-- > 0) { + *--d8 = *--s8; + } + return dst; + } + + return memcpy(dst, src, n); +} diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 6add6a8..4df60d7 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -9,9 +9,8 @@ OBJS = mgmt-cli.o \ mgmt-masterkey.o \ mgmt-misc.o \ mgmt-task.o \ - log.o - -BOARD_OBJS += $(TOPLEVEL)/task.o + log.o \ + $(TOPLEVEL)/task.o CFLAGS += -DNUM_RPC_TASK=4 @@ -26,6 +25,7 @@ LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 LDFLAGS += -Wl,--gc-sections ifdef DO_PROFILING +OBJS += $(TOPLEVEL)/memfunc.o LIBS += $(LIBPROF_BLD)/libprof.a LDFLAGS += --specs=rdimon.specs -lc -lrdimon endif @@ -33,7 +33,7 @@ endif all: $(PROJ:=.elf) %.elf: %.o $(BOARD_OBJS) $(OBJS) $(LIBS) - $(CC) $(LDFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map + $(CC) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map $(LDFLAGS) $(OBJCOPY) -O binary $*.elf $*.bin $(SIZE) $*.elf |