diff options
-rw-r--r-- | Makefile | 26 | ||||
-rw-r--r-- | libraries/libprof/Makefile | 20 | ||||
-rw-r--r-- | libraries/libprof/README.md | 65 | ||||
-rw-r--r-- | libraries/libprof/gmon.c | 324 | ||||
-rw-r--r-- | libraries/libprof/gmon.h | 177 | ||||
-rw-r--r-- | libraries/libprof/profil.c | 96 | ||||
-rw-r--r-- | libraries/libprof/profil.h | 60 | ||||
-rw-r--r-- | libraries/libprof/profiler.S | 28 | ||||
-rw-r--r-- | libraries/libtfm/Makefile | 5 | ||||
-rw-r--r-- | libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c | 7 | ||||
-rw-r--r-- | memfunc.c | 101 | ||||
-rw-r--r-- | projects/hsm/Makefile | 20 | ||||
-rw-r--r-- | projects/hsm/hsm.c | 10 | ||||
-rw-r--r-- | projects/hsm/mgmt-misc.c | 38 | ||||
-rw-r--r-- | projects/hsm/mgmt-task.c | 44 | ||||
-rw-r--r-- | syscalls.c | 6 | ||||
-rw-r--r-- | task.c | 62 | ||||
-rw-r--r-- | task.h | 12 |
18 files changed, 1097 insertions, 4 deletions
@@ -27,6 +27,16 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# A couple features that can be enabled at build time, but are not turned on +# by default: +# DO_PROFILING: Enable gmon profiling. See libraries/libprof/README.md for +# more details. +# DO_TASK_METRICS: Enable task metrics - average/max time between yields. This +# can be helpful when experimentally adding yields to improve responsiveness. +# +# To enable, run `make DO_PROFILING=1 DO_TASK_METRICS=1` +# (or DO_PROFILING=xyzzy - `make` just cares that the symbol is defined) + # export all variables to child processes by default .EXPORT_ALL_VARIABLES: @@ -51,6 +61,9 @@ LIBCLI_BLD = $(LIBS_DIR)/libcli LIBTFM_SRC = $(CRYPTECH_ROOT)/sw/thirdparty/libtfm LIBTFM_BLD = $(LIBS_DIR)/libtfm +LIBPROF_SRC = $(LIBS_DIR)/libprof +LIBPROF_BLD = $(LIBS_DIR)/libprof + LIBS = $(MBED_DIR)/libstmf4.a # linker script @@ -114,6 +127,9 @@ CFLAGS += -I$(MBED_DIR)/targets/cmsis/TARGET_STM/TARGET_STM32F4 CFLAGS += -I$(MBED_DIR)/targets/cmsis/TARGET_STM/TARGET_STM32F4/$(BOARD) CFLAGS += -I$(MBED_DIR)/targets/hal/TARGET_STM/TARGET_STM32F4 CFLAGS += -I$(MBED_DIR)/targets/hal/TARGET_STM/TARGET_STM32F4/$(BOARD) +ifdef DO_TASK_METRICS +CFLAGS += -DDO_TASK_METRICS +endif %.o : %.c $(CC) $(CFLAGS) -c -o $@ $< @@ -121,7 +137,13 @@ CFLAGS += -I$(MBED_DIR)/targets/hal/TARGET_STM/TARGET_STM32F4/$(BOARD) %.o : %.S $(CC) $(CFLAGS) -c -o $@ $< +ifdef DO_PROFILING +CFLAGS += -pg -DDO_PROFILING +LIBS += $(LIBPROF_BLD)/libprof.a +all: hsm +else all: board-test cli-test libhal-test hsm bootloader +endif $(MBED_DIR)/libstmf4.a: .FORCE $(MAKE) -C $(MBED_DIR) @@ -141,6 +163,9 @@ $(LIBHAL_BLD)/libhal.a: $(LIBTFM_BLD)/libtfm.a .FORCE $(LIBCLI_BLD)/libcli.a: .FORCE $(MAKE) -C $(LIBCLI_BLD) +$(LIBPROF_BLD)/libprof.a: .FORCE + $(MAKE) -C $(LIBPROF_BLD) + libhal-test: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/libhal-test @@ -177,3 +202,4 @@ distclean: clean $(MAKE) -C $(MBED_DIR) clean $(MAKE) -C $(LIBTFM_BLD) clean $(MAKE) -C $(LIBCLI_BLD) clean + $(MAKE) -C $(LIBPROF_BLD) clean diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile new file mode 100644 index 0000000..4fe5fb4 --- /dev/null +++ b/libraries/libprof/Makefile @@ -0,0 +1,20 @@ +LIB = libprof.a + +OBJS = gmon.o profil.o profiler.o + +# Don't profile the profiling code, because that way lies madness (and recursion). +CFLAGS := $(subst -pg,,$(CFLAGS)) + +all: $(LIB) + +%.o : %.c + $(CC) $(CFLAGS) -c -o $@ $< + +%.o : %.S + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJS) + $(AR) -r $@ $^ + +clean: + rm -f $(OBJS) $(LIB) diff --git a/libraries/libprof/README.md b/libraries/libprof/README.md new file mode 100644 index 0000000..d464644 --- /dev/null +++ b/libraries/libprof/README.md @@ -0,0 +1,65 @@ +Profiling the Cryptech Alpha +============================ + +Origin +------ + +This code was copied from https://github.com/ErichStyger/mcuoneclipse.git, +directory `Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling`, commit +9b7eedddd8b24968128582aedc63be95b61f782c, dated Mon Jan 9 16:56:17 2017 +0100. + +References +---------- + +I recommend reading both of these to understand how the profiling code works. + +1. [Tutorial: Using GNU Profiling (gprof) with ARM Cortex-M](https://mcuoneclipse.com/2015/08/23/tutorial-using-gnu-profiling-gprof-with-arm-cortex-m/) + +2. [Semihosting with ARM, GCC, and OpenOCD](http://bgamari.github.io/posts/2014-10-31-semihosting.html) + +How to build +------------ + +From the top level, run + + $ make DO_PROFILING=1 hsm + +By default, all code is profiled, *except* the profiling code itself, +because that would cause fatal recursion. + +How to run +---------- + +You need to start OpenOCD on the host, and enable semihosting, at least +before you try to use it as a remote file system. + +I recommend executing the following in the `projects/hsm` directory, so that +`gmon.out` ends up in the same directory as `hsm.elf`. + +Start the debugger: + + $ ../../bin/debug hsm + +In another window, connect to OpenOCD: + + $ telnet localhost 4444 + +In the OpenOCD console, enable semihosting: + + > arm semihosting enable + > exit + +Then connect to the Cryptech management console: + + $ cryptech_console + +In the Cryptech console, type `profile start`, then start the unit test or +whatever will be exercising the hsm. Afterwards, in the console, type +`profile stop`. + +After invoking `profile stop`, it can take several minutes to write +`gmon.out` over OpenOCD to the host. + +In the `projects/hsm` directory, run `gprof` to analyse the `gmon.out` file: + + $ gprof hsm.elf >gprof.txt diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c new file mode 100644 index 0000000..317a173 --- /dev/null +++ b/libraries/libprof/gmon.c @@ -0,0 +1,324 @@ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <stdint.h> +#include "gmon.h" +#include "profil.h" +#include <string.h> + +#define bzero(ptr,size) memset (ptr, 0, size); +#define ERR(s) write(2, s, sizeof(s)) + +struct gmonparam _gmonparam = { GMON_PROF_OFF, NULL, 0, NULL, 0, NULL, 0, 0L, 0, 0, 0}; +static char already_setup = 0; /* flag to indicate if we need to init */ +static int s_scale; +/* see profil(2) where this is described (incorrectly) */ +#define SCALE_1_TO_1 0x10000L + +static void moncontrol(int mode); + +void monstartup (size_t lowpc, size_t highpc) { + register size_t o; + char *cp; + struct gmonparam *p = &_gmonparam; + + if (already_setup) { + /* zero out cp as value will be added there */ + bzero(p->tos, p->kcountsize + p->fromssize + p->tossize); + moncontrol(1); /* start */ + return; + } + already_setup = 1; + + /* enable semihosting, for eventual output */ + extern void initialise_monitor_handles(void); + initialise_monitor_handles(); + + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->textsize = p->highpc - p->lowpc + 0x20; + p->kcountsize = p->textsize / HISTFRACTION; + p->fromssize = p->textsize / HASHFRACTION; + p->tolimit = p->textsize * ARCDENSITY / 100; + if (p->tolimit < MINARCS) { + p->tolimit = MINARCS; + } else if (p->tolimit > MAXARCS) { + p->tolimit = MAXARCS; + } + p->tossize = p->tolimit * sizeof(struct tostruct); + + extern void *hal_allocate_static_memory(const size_t size); + cp = hal_allocate_static_memory(p->kcountsize + p->fromssize + p->tossize); + if (cp == NULL) { + ERR("monstartup: out of memory\n"); + return; + } + + /* zero out cp as value will be added there */ + bzero(cp, p->kcountsize + p->fromssize + p->tossize); + + p->tos = (struct tostruct *)cp; + cp += p->tossize; + p->kcount = (unsigned short *)cp; + cp += p->kcountsize; + p->froms = (unsigned short *)cp; + + p->tos[0].link = 0; + + o = p->highpc - p->lowpc; + if (p->kcountsize < o) { +#ifndef notdef + s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1; +#else /* avoid floating point */ + int quot = o / p->kcountsize; + + if (quot >= 0x10000) + s_scale = 1; + else if (quot >= 0x100) + s_scale = 0x10000 / quot; + else if (o >= 0x800000) + s_scale = 0x1000000 / (o / (p->kcountsize >> 8)); + else + s_scale = 0x1000000 / ((o << 8) / p->kcountsize); +#endif + } else { + s_scale = SCALE_1_TO_1; + } + moncontrol(1); /* start */ +} + +void _mcleanup(void) { + static const char gmon_out[] = "gmon.out"; + int fd; + int fromindex; + int endfrom; + size_t frompc; + int toindex; + struct rawarc rawarc; + struct gmonparam *p = &_gmonparam; + struct gmonhdr gmonhdr = {0}, *hdr; + const char *proffile; +#ifdef DEBUG + int log, len; + char dbuf[200]; +#endif + + if (p->state == GMON_PROF_ERROR) { + ERR("_mcleanup: tos overflow\n"); + } + moncontrol(0); /* stop */ + proffile = gmon_out; + fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); + if (fd < 0) { + perror( proffile ); + return; + } +#ifdef DEBUG + log = open("gmon.log", O_CREAT|O_TRUNC|O_WRONLY, 0664); + if (log < 0) { + perror("mcount: gmon.log"); + return; + } + len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n", + (unsigned int)p->kcount, p->kcountsize); + write(log, dbuf, len); +#endif + hdr = (struct gmonhdr *)&gmonhdr; + hdr->lpc = p->lowpc; + hdr->hpc = p->highpc; + hdr->ncnt = p->kcountsize + sizeof(gmonhdr); + hdr->version = GMONVERSION; + hdr->profrate = PROF_HZ; + hdr->spare[0] = hdr->spare[1] = hdr->spare[2] = 0; + write(fd, (char *)hdr, sizeof *hdr); + write(fd, p->kcount, p->kcountsize); + endfrom = p->fromssize / sizeof(*p->froms); + for (fromindex = 0; fromindex < endfrom; fromindex++) { + if (p->froms[fromindex] == 0) { + continue; + } + frompc = p->lowpc; + frompc += fromindex * HASHFRACTION * sizeof(*p->froms); + for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { +#ifdef DEBUG + len = sprintf(dbuf, + "[mcleanup2] frompc 0x%x selfpc 0x%x count %ld\n" , + frompc, p->tos[toindex].selfpc, + p->tos[toindex].count); + write(log, dbuf, len); +#endif + rawarc.raw_frompc = frompc; + rawarc.raw_selfpc = p->tos[toindex].selfpc; + rawarc.raw_count = p->tos[toindex].count; + write(fd, &rawarc, sizeof rawarc); + } + } + close(fd); +} + +/* + * Control profiling + * profiling is what mcount checks to see if + * all the data structures are ready. + */ +static void moncontrol(int mode) { + struct gmonparam *p = &_gmonparam; + + if (mode) { + /* start */ + profil((char *)p->kcount, p->kcountsize, p->lowpc, s_scale); + p->state = GMON_PROF_ON; + } else { + /* stop */ + profil((char *)0, 0, 0, 0); + p->state = GMON_PROF_OFF; + } +} + +void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { + register struct tostruct *top; + register struct tostruct *prevtop; + register long toindex; + struct gmonparam *p = &_gmonparam; + + /* + * check that we are profiling + * and that we aren't recursively invoked. + */ + if (p->state!=GMON_PROF_ON) { + goto out; + } + p->state++; + /* + * check that frompcindex is a reasonable pc value. + * for example: signal catchers get called from the stack, + * not from text space. too bad. + */ + frompcindex = (uint32_t*)((long)frompcindex - (long)p->lowpc); + if ((unsigned long)frompcindex > p->textsize) { + goto done; + } + frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; + toindex = *((unsigned short*)frompcindex); /* get froms[] value */ + if (toindex == 0) { + /* + * first time traversing this arc + */ + toindex = ++p->tos[0].link; /* the link of tos[0] points to the last used record in the array */ + if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ + goto overflow; + } + *((unsigned short*)frompcindex) = (unsigned short)toindex; /* store new 'to' value into froms[] */ + top = &p->tos[toindex]; + top->selfpc = (size_t)selfpc; + top->count = 1; + top->link = 0; + goto done; + } + top = &p->tos[toindex]; + if (top->selfpc == (size_t)selfpc) { + /* + * arc at front of chain; usual case. + */ + top->count++; + goto done; + } + /* + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. + */ + for (; /* goto done */; ) { + if (top->link == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and link it to the head of the chain. + */ + toindex = ++p->tos[0].link; + if (toindex >= p->tolimit) { + goto overflow; + } + top = &p->tos[toindex]; + top->selfpc = (size_t)selfpc; + top->count = 1; + top->link = *((unsigned short*)frompcindex); + *(unsigned short*)frompcindex = (unsigned short)toindex; + goto done; + } + /* + * otherwise, check the next arc on the chain. + */ + prevtop = top; + top = &p->tos[top->link]; + if (top->selfpc == (size_t)selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->link; + prevtop->link = top->link; + top->link = *((unsigned short*)frompcindex); + *((unsigned short*)frompcindex) = (unsigned short)toindex; + goto done; + } + } + done: + p->state--; + /* and fall through */ + out: + return; /* normal return restores saved registers */ + overflow: + p->state++; /* halt further profiling */ + #define TOLIMIT "mcount: tos overflow\n" + write (2, TOLIMIT, sizeof(TOLIMIT)); + goto out; +} + +void _monInit(void) { + _gmonparam.state = GMON_PROF_OFF; + already_setup = 0; +} diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h new file mode 100644 index 0000000..8b5ecf0 --- /dev/null +++ b/libraries/libprof/gmon.h @@ -0,0 +1,177 @@ +/* $OpenBSD: gmon.h,v 1.3 1996/04/21 22:31:46 deraadt Exp $ */ +/* $NetBSD: gmon.h,v 1.5 1996/04/09 20:55:30 cgd Exp $ */ + +/*- + * Copyright (c) 1982, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)gmon.h 8.2 (Berkeley) 1/4/94 + */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#ifndef _SYS_GMON_H_ +#define _SYS_GMON_H_ + +#ifndef __P +#define __P(x) x +#endif + +/* On POSIX systems, profile.h is a KRB5 header. To avoid collisions, just + pull in profile.h's content here. The profile.h header won't be provided + by Mingw-w64 anymore at one point. */ +#if 0 +#include <profile.h> +#else +#ifndef _WIN64 +#define _MCOUNT_CALL __attribute__ ((regparm (2))) +extern void _mcount(void); +#else +#define _MCOUNT_CALL +extern void mcount(void); +#endif +#define _MCOUNT_DECL __attribute__((gnu_inline)) __inline__ \ + void _MCOUNT_CALL _mcount_private +#define MCOUNT +#endif + +/* + * Structure prepended to gmon.out profiling data file. + */ +struct gmonhdr { + size_t lpc; /* base pc address of sample buffer */ + size_t hpc; /* max pc address of sampled buffer */ + int ncnt; /* size of sample buffer (plus this header) */ + int version; /* version number */ + int profrate; /* profiling clock rate */ + int spare[3]; /* reserved */ +}; +#define GMONVERSION 0x00051879 + +/* + * histogram counters are unsigned shorts (according to the kernel). + */ +#define HISTCOUNTER unsigned short + +/* + * fraction of text space to allocate for histogram counters here, 1/2 + */ +#define HISTFRACTION 2 + +/* + * Fraction of text space to allocate for from hash buckets. + * The value of HASHFRACTION is based on the minimum number of bytes + * of separation between two subroutine call points in the object code. + * Given MIN_SUBR_SEPARATION bytes of separation the value of + * HASHFRACTION is calculated as: + * + * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1); + * + * For example, on the VAX, the shortest two call sequence is: + * + * calls $0,(r0) + * calls $0,(r0) + * + * which is separated by only three bytes, thus HASHFRACTION is + * calculated as: + * + * HASHFRACTION = 3 / (2 * 2 - 1) = 1 + * + * Note that the division above rounds down, thus if MIN_SUBR_FRACTION + * is less than three, this algorithm will not work! + * + * In practice, however, call instructions are rarely at a minimal + * distance. Hence, we will define HASHFRACTION to be 2 across all + * architectures. This saves a reasonable amount of space for + * profiling data structures without (in practice) sacrificing + * any granularity. + */ +#define HASHFRACTION 2 + +/* + * percent of text space to allocate for tostructs with a minimum. + */ +#define ARCDENSITY 2 /* this is in percentage, relative to text size! */ +#define MINARCS 50 +#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2) + +struct tostruct { + size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ + long count; /* how many times it has been called */ + unsigned short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ + unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */ +}; + +/* + * a raw arc, with pointers to the calling site and + * the called site and a count. + */ +struct rawarc { + size_t raw_frompc; + size_t raw_selfpc; + long raw_count; +}; + +/* + * general rounding functions. + */ +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +/* + * The profiling data structures are housed in this structure. + */ +struct gmonparam { + int state; + unsigned short *kcount; /* histogram PC sample array */ + size_t kcountsize; /* size of kcount[] array in bytes */ + unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ + size_t fromssize; /* size of froms[] array in bytes */ + struct tostruct *tos; /* to struct, contains histogram counter */ + size_t tossize; /* size of tos[] array in bytes */ + long tolimit; + size_t lowpc; /* low program counter of area */ + size_t highpc; /* high program counter */ + size_t textsize; /* code size */ +}; +extern struct gmonparam _gmonparam; + +/* + * Possible states of profiling. + */ +#define GMON_PROF_ON 0 +#define GMON_PROF_BUSY 1 +#define GMON_PROF_ERROR 2 +#define GMON_PROF_OFF 3 + +void _mcleanup(void); /* routine to be called to write gmon.out file */ +void _monInit(void); /* initialization routine */ + +#endif /* !_SYS_GMONH_ */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c new file mode 100644 index 0000000..b0d8d55 --- /dev/null +++ b/libraries/libprof/profil.c @@ -0,0 +1,96 @@ +/* profil.c -- win32 profil.c equivalent + + Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. + + This file is part of Cygwin. + + This software is a copyrighted work licensed under the terms of the + Cygwin license. Please consult the file "CYGWIN_LICENSE" for + details. */ + +/* + * This file is taken from Cygwin distribution, adapted to be used for bare embedded targets. + */ +#include <stdio.h> +#include <sys/types.h> +#include <errno.h> +#include <math.h> +#include "profil.h" +#include <string.h> + +#include "stm32f4xx_hal.h" /* __get_MSP */ + +/* global profinfo for profil() call */ +static struct profinfo prof = { + PROFILE_NOT_INIT, 0, 0, 0, 0 +}; + +/* sample the current program counter */ +void profil_callback(void) { + if (prof.state == PROFILE_ON) { + /* The interrupt mechanism pushes xPSR, PC, LR, R12, and R3-R0 onto the + * stack, so PC is the 6th word from the top at that point. However, the + * normal function entry code pushes registers as well, so the stack + * offset right now depends on the call tree that got us here. + */ + size_t pc = (size_t)((uint32_t *)__get_MSP())[6 + 6]; + if (pc >= prof.lowpc && pc < prof.highpc) { + size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); + prof.counter[idx]++; + } + } +} + +/* Stop profiling to the profiling buffer pointed to by p. */ +static int profile_off (struct profinfo *p) { + p->state = PROFILE_OFF; + return 0; +} + +/* Create a timer thread and pass it a pointer P to the profiling buffer. */ +static int profile_on (struct profinfo *p) { + p->state = PROFILE_ON; + return 0; /* ok */ +} + +/* + * start or stop profiling + * + * profiling goes into the SAMPLES buffer of size SIZE (which is treated + * as an array of unsigned shorts of size size/2) + * + * each bin represents a range of pc addresses from OFFSET. The number + * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps + * each bin to two addresses, A scale of 32768 maps each bin to 4 addresses, + * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, + * or zero to turn off profiling + */ +int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, unsigned int scale) { + size_t maxbin; + + if (scale > 65536) { + errno = EINVAL; + return -1; + } + profile_off(p); + if (scale) { + memset(samples, 0, size); + memset(p, 0, sizeof *p); + maxbin = size >> 1; + prof.counter = (unsigned short*)samples; + prof.lowpc = offset; + prof.highpc = PROFADDR(maxbin, offset, scale); + prof.scale = scale; + return profile_on(p); + } + return 0; +} + +/* Equivalent to unix profil() + Every SLEEPTIME interval, the user's program counter (PC) is examined: + offset is subtracted and the result is multiplied by scale. + The word pointed to by this address is incremented. */ +int profil (char *samples, size_t size, size_t offset, unsigned int scale) { + return profile_ctl (&prof, samples, size, offset, scale); +} + diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h new file mode 100644 index 0000000..c72dc00 --- /dev/null +++ b/libraries/libprof/profil.h @@ -0,0 +1,60 @@ +/* profil.h: gprof profiling header file + + Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. + +This file is part of Cygwin. + +This software is a copyrighted work licensed under the terms of the +Cygwin license. Please consult the file "CYGWIN_LICENSE" for +details. */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#ifndef __PROFIL_H__ +#define __PROFIL_H__ + +/* profiling frequency. (No larger than 1000) */ +#define PROF_HZ 1000 + +/* convert an addr to an index */ +#define PROFIDX(pc, base, scale) \ + ({ \ + size_t i = (pc - base) / 2; \ + if (sizeof (unsigned long long int) > sizeof (size_t)) \ + i = (unsigned long long int) i * scale / 65536; \ + else \ + i = i / 65536 * scale + i % 65536 * scale / 65536; \ + i; \ + }) + +/* convert an index into an address */ +#define PROFADDR(idx, base, scale) \ + ((base) \ + + ((((unsigned long long)(idx) << 16) \ + / (unsigned long long)(scale)) << 1)) + +/* convert a bin size into a scale */ +#define PROFSCALE(range, bins) (((bins) << 16) / ((range) >> 1)) + +typedef void *_WINHANDLE; + +typedef enum { + PROFILE_NOT_INIT = 0, + PROFILE_ON, + PROFILE_OFF +} PROFILE_State; + +struct profinfo { + PROFILE_State state; /* profiling state */ + unsigned short *counter; /* profiling counters */ + size_t lowpc, highpc; /* range to be profiled */ + unsigned int scale; /* scale value of bins */ +}; + +int profile_ctl(struct profinfo *, char *, size_t, size_t, unsigned int); +int profil(char *, size_t, size_t, unsigned int); + +#endif /* __PROFIL_H__ */ diff --git a/libraries/libprof/profiler.S b/libraries/libprof/profiler.S new file mode 100644 index 0000000..1aa5c97 --- /dev/null +++ b/libraries/libprof/profiler.S @@ -0,0 +1,28 @@ +/* + * profiler.S + * Implements the gprof profiler arc counting function. + * Created on: 06.08.2015 + * Author: Erich Styger + */ + + .syntax unified + .arch armv7-m + +.globl __gnu_mcount_nc +.type __gnu_mcount_nc, %function + +__gnu_mcount_nc: +#if 0 /* dummy version, doing nothing */ + mov ip, lr + pop { lr } + bx ip +#else + push {r0, r1, r2, r3, lr} /* save registers */ + bic r1, lr, #1 /* R1 contains callee address, with thumb bit cleared */ + ldr r0, [sp, #20] /* R0 contains caller address */ + bic r0, r0, #1 /* clear thumb bit */ + bl _mcount_internal /* jump to internal _mcount() implementation */ + pop {r0, r1, r2, r3, ip, lr} /* restore saved registers */ + bx ip /* return to caller */ +#endif + diff --git a/libraries/libtfm/Makefile b/libraries/libtfm/Makefile index 57c76a5..b50421c 100644 --- a/libraries/libtfm/Makefile +++ b/libraries/libtfm/Makefile @@ -30,7 +30,12 @@ LIB := tomsfastmath/libtfm.a # for now, at least until we confirm that it hasn't already been fixed # in a newer version of libtfm. +ifdef DO_PROFILING +# arm-none-eabi-gcc: error: -pg and -fomit-frame-pointer are incompatible +STM32_LIBTFM_CFLAGS_OPTIMIZATION := -O3 -funroll-loops +else STM32_LIBTFM_CFLAGS_OPTIMIZATION := -O3 -funroll-loops -fomit-frame-pointer +endif CFLAGS := $(subst ${STM32_CFLAGS_OPTIMIZATION},${STM32_LIBTFM_CFLAGS_OPTIMIZATION},${CFLAGS}) CFLAGS += -DTFM_ARM -Dasm=__asm__ -Wa,-mimplicit-it=thumb diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c index d3bafb2..81b27cb 100644 --- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c +++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c @@ -64,6 +64,13 @@ void HardFault_Handler(void) while (1) { ; } } +static void default_SysTick_hook(void) { }; +static void (*SysTick_hook)(void) = default_SysTick_hook; +void set_SysTick_hook(void (*hook)(void)) +{ + SysTick_hook = (hook == NULL) ? default_SysTick_hook : hook; +} + /** * @brief This function handles SysTick Handler. * @param None diff --git a/memfunc.c b/memfunc.c new file mode 100644 index 0000000..fd94b28 --- /dev/null +++ b/memfunc.c @@ -0,0 +1,101 @@ +#include <stdint.h> +#include <string.h> + +/* + * Profilable substitutes for mem*(), lacking libc_p.a + * + * This code was written with reference to newlib, but does not copy every + * quirk and loop-unrolling optimization from newlib. Its only purpose is + * to let us figure out who is calling memcpy 2 million times. + */ + +#define is_word_aligned(x) (((size_t)(x) & 3) == 0) + +void *memcpy(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + *d32++ = *s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + *d8++ = *s8++; + } + + return dst; +} + +void *memset(void *dst, int c, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t c8 = (uint8_t)c; + + if (n >= 4 && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8); + while (n >= 4) { + *d32++ = c32; + n -= 4; + } + d8 = (uint8_t *)d32; + } + while (n-- > 0) { + *d8++ = c8; + } + + return dst; +} + +int memcmp(const void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + if (*d32 != *s32) + break; + d32++; + s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + if (*d8 != *s8) + return (*d8 - *s8); + d8++; + s8++; + } + + return 0; +} + +void *memmove(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if ((s8 < d8) && (d8 < s8 + n)) { + /* Destructive overlap...have to copy backwards */ + s8 += n; + d8 += n; + while (n-- > 0) { + *--d8 = *--s8; + } + return dst; + } + + return memcpy(dst, src, n); +} diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 927c9f1..3430e14 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -9,9 +9,8 @@ OBJS = mgmt-cli.o \ mgmt-masterkey.o \ mgmt-misc.o \ mgmt-task.o \ - log.o - -BOARD_OBJS += $(TOPLEVEL)/task.o + log.o \ + $(TOPLEVEL)/task.o CFLAGS += -DNUM_RPC_TASK=4 @@ -21,10 +20,23 @@ CFLAGS += -I$(LIBCLI_SRC) LIBS += $(LIBHAL_BLD)/libhal.a $(LIBTFM_BLD)/libtfm.a LIBS += $(LIBCLI_BLD)/libcli.a +LDFLAGS += -mcpu=cortex-m4 -mthumb -mlittle-endian -mthumb-interwork +LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 +LDFLAGS += -Wl,--gc-sections + +ifdef DO_PROFILING +OBJS += $(TOPLEVEL)/memfunc.o +LDFLAGS += --specs=rdimon.specs -lc -lrdimon +endif + +ifdef DO_TASK_METRICS +CFLAGS += -DDO_TASK_METRICS +endif + all: $(PROJ:=.elf) %.elf: %.o $(BOARD_OBJS) $(OBJS) $(LIBS) - $(CC) $(CFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map + $(CC) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map $(LDFLAGS) $(OBJCOPY) -O binary $*.elf $*.bin $(SIZE) $*.elf diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index 7fc7410..f20ee64 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -268,6 +268,11 @@ size_t uart_rx_max = 0; void HAL_SYSTICK_Callback(void) { +#ifdef DO_PROFILING + extern void profil_callback(void); + profil_callback(); +#endif + size_t count = RINGBUF_COUNT(uart_ringbuf); if (uart_rx_max < count) uart_rx_max = count; @@ -393,6 +398,11 @@ void hal_task_yield(void) task_yield(); } +void hal_task_yield_maybe(void) +{ + task_yield_maybe(); +} + /* A mutex to arbitrate concurrent access to the keystore. */ task_mutex_t ks_mutex = { 0 }; diff --git a/projects/hsm/mgmt-misc.c b/projects/hsm/mgmt-misc.c index b06003f..86f1be8 100644 --- a/projects/hsm/mgmt-misc.c +++ b/projects/hsm/mgmt-misc.c @@ -113,6 +113,35 @@ int cli_receive_data(struct cli_def *cli, uint8_t *buf, size_t len, cli_data_cal return CLI_ERROR; } +#ifdef DO_PROFILING +static int cmd_profile_start(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + cli = cli; + command = command; + argv = argv; + argc = argc; + + extern uint32_t CRYPTECH_FIRMWARE_START; + extern char __etext; /* end of text/code symbol, defined by linker */ + extern void monstartup (size_t lowpc, size_t highpc); + monstartup((size_t)&CRYPTECH_FIRMWARE_START, (size_t)&__etext); + return CLI_OK; +} + +static int cmd_profile_stop(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + cli = cli; + command = command; + argv = argv; + argc = argc; + + extern void _mcleanup(void); + _mcleanup(); + return CLI_OK; +} + +#endif + static int cmd_reboot(struct cli_def *cli, const char *command, char *argv[], int argc) { command = command; @@ -128,6 +157,15 @@ static int cmd_reboot(struct cli_def *cli, const char *command, char *argv[], in void configure_cli_misc(struct cli_def *cli) { +#ifdef DO_PROFILING + struct cli_command *c_profile = cli_register_command(cli, NULL, "profile", NULL, 0, 0, NULL); + + /* profile start */ + cli_register_command(cli, c_profile, "start", cmd_profile_start, 0, 0, "Start collecting profiling data"); + + /* profile stop */ + cli_register_command(cli, c_profile, "stop", cmd_profile_stop, 0, 0, "Stop collecting profiling data"); +#endif /* reboot */ cli_register_command(cli, NULL, "reboot", cmd_reboot, 0, 0, "Reboot the STM32"); } diff --git a/projects/hsm/mgmt-task.c b/projects/hsm/mgmt-task.c index beffc32..c2a3d3f 100644 --- a/projects/hsm/mgmt-task.c +++ b/projects/hsm/mgmt-task.c @@ -77,10 +77,54 @@ static int cmd_task_show(struct cli_def *cli, const char *command, char *argv[], return CLI_OK; } +#ifdef DO_TASK_METRICS +static int cmd_task_show_metrics(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + command = command; + argv = argv; + argc = argc; + + struct task_metrics tm; + + task_get_metrics(&tm); + + cli_print(cli, "avg time between yields: %ld.%06ld sec", tm.avg.tv_sec, tm.avg.tv_usec); + cli_print(cli, "max time between yields: %ld.%06ld sec", tm.max.tv_sec, tm.max.tv_usec); + + return CLI_OK; +} + +static int cmd_task_reset_metrics(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + cli = cli; + command = command; + argv = argv; + argc = argc; + + task_reset_metrics(); + + return CLI_OK; +} +#endif + void configure_cli_task(struct cli_def *cli) { struct cli_command *c = cli_register_command(cli, NULL, "task", NULL, 0, 0, NULL); /* task show */ +#ifdef DO_TASK_METRICS + struct cli_command *c_show = +#endif cli_register_command(cli, c, "show", cmd_task_show, 0, 0, "Show the active tasks"); + +#ifdef DO_TASK_METRICS + /* task show metrics */ + cli_register_command(cli, c_show, "metrics", cmd_task_show_metrics, 0, 0, "Show task metrics"); + + /* task reset */ + struct cli_command *c_reset = cli_register_command(cli, c, "reset", NULL, 0, 0, NULL); + + /* task reset metrics */ + cli_register_command(cli, c_reset, "metrics", cmd_task_reset_metrics, 0, 0, "Reset task metrics"); +#endif } @@ -48,6 +48,7 @@ /***************************************************************************/ +#ifndef DO_PROFILING int _read_r (struct _reent *r, int file, char * ptr, int len) { r = r; @@ -106,6 +107,7 @@ int _close_r (struct _reent *r, int file) return 0; } +#endif /***************************************************************************/ @@ -148,6 +150,7 @@ caddr_t _sbrk_r (struct _reent *r, int incr) /***************************************************************************/ +#ifndef DO_PROFILING int _fstat_r (struct _reent *r, int file, struct stat * st) { r = r; @@ -186,6 +189,7 @@ int _kill (int a, int b) return 0; } +#endif /***************************************************************************/ @@ -198,6 +202,7 @@ int _getpid(int a) /***************************************************************************/ +#ifndef DO_PROFILING int _open(int a, int b) { a = a; @@ -205,5 +210,6 @@ int _open(int a, int b) return 0; } +#endif /*** EOF ***/ @@ -81,6 +81,18 @@ static tcb_t *cur_task = NULL; #define STACK_GUARD_WORD 0x55AA5A5A +#ifdef DO_TASK_METRICS +static uint32_t tick_start = 0; +static uint32_t tick_idle = 0; +static uint32_t tick_max = 0; +static uint32_t nyield = 0; +#endif + +static uint32_t tick_prev = 0; +#ifndef TASK_YIELD_THRESHOLD +#define TASK_YIELD_THRESHOLD 100 +#endif + /* Add a task. */ tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size_t stack_len) @@ -180,6 +192,10 @@ void task_yield(void) if (tail == NULL) return; +#ifdef DO_TASK_METRICS + uint32_t tick0 = HAL_GetTick(); +#endif + /* Find the next runnable task. Loop if every task is waiting. */ while (1) { next = next_task(); @@ -197,6 +213,22 @@ void task_yield(void) * } while (next == NULL); */ +#ifdef DO_TASK_METRICS + uint32_t tick = HAL_GetTick(); + tick_idle += (tick - tick0); + if (tick_start == 0) + tick_start = tick; + if (tick_prev != 0) { + uint32_t duration = tick0 - tick_prev; + if (duration > tick_max) + tick_max = duration; + } + tick_prev = tick; + ++nyield; +#else + tick_prev = HAL_GetTick(); +#endif + /* If there are no other runnable tasks (and cur_task is runnable), * we don't need to context-switch. */ @@ -230,6 +262,14 @@ void task_yield(void) } } +/* Yield if it's been "too long" since the last yield. + */ +void task_yield_maybe(void) +{ + if (HAL_GetTick() - tick_prev >= TASK_YIELD_THRESHOLD) + task_yield(); +} + /* Put the current task to sleep (make it non-runnable). */ void task_sleep(void) @@ -354,3 +394,25 @@ void task_mutex_unlock(task_mutex_t *mutex) if (mutex != NULL) mutex->locked = 0; } + +#ifdef DO_TASK_METRICS +void task_get_metrics(struct task_metrics *tm) +{ + if (tm != NULL) { + tm->avg.tv_sec = 0; + tm->avg.tv_usec = (HAL_GetTick() - tick_start - tick_idle) * 1000 / nyield; + if (tm->avg.tv_usec > 1000000) { + tm->avg.tv_sec = tm->avg.tv_usec / 1000000; + tm->avg.tv_usec = tm->avg.tv_usec % 1000000; + } + tm->max.tv_sec = tick_max / 1000; + tm->max.tv_usec = (tick_max % 1000) * 1000; + } +} + +void task_reset_metrics(void) +{ + tick_start = HAL_GetTick(); + tick_prev = tick_idle = tick_max = nyield = 0; +} +#endif @@ -55,6 +55,7 @@ extern tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size extern void task_set_idle_hook(funcp_t func); extern void task_yield(void); +extern void task_yield_maybe(void); extern void task_sleep(void); extern void task_wake(tcb_t *t); @@ -73,4 +74,15 @@ extern void task_delay(uint32_t delay); extern void task_mutex_lock(task_mutex_t *mutex); extern void task_mutex_unlock(task_mutex_t *mutex); +#ifdef DO_TASK_METRICS +#include <sys/time.h> + +struct task_metrics { + struct timeval avg, max; +}; + +void task_get_metrics(struct task_metrics *tm); +void task_reset_metrics(void); +#endif + #endif /* _TASK_H_ */ |