aboutsummaryrefslogtreecommitdiff
path: root/libraries/libprof
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/libprof')
-rw-r--r--libraries/libprof/Makefile24
-rw-r--r--libraries/libprof/README.md65
-rw-r--r--libraries/libprof/gmon.c261
-rw-r--r--libraries/libprof/gmon.h148
-rw-r--r--libraries/libprof/memfunc.c127
-rwxr-xr-xlibraries/libprof/profile-runner.py73
-rw-r--r--libraries/libprof/profiler.S28
7 files changed, 726 insertions, 0 deletions
diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile
new file mode 100644
index 0000000..28bedea
--- /dev/null
+++ b/libraries/libprof/Makefile
@@ -0,0 +1,24 @@
+LIB = libprof.a
+
+OBJS = gmon.o profiler.o memfunc.o
+
+# Don't profile the profiling code, because that way lies madness (and recursion).
+CFLAGS := $(subst -pg,,$(CFLAGS))
+
+all: $(LIB)
+
+# But do profile the mem functions
+memfunc.o: memfunc.c
+ $(CC) $(CFLAGS) -pg -c -o $@ $<
+
+%.o : %.c
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+%.o : %.S
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+$(LIB): $(OBJS)
+ $(AR) -r $@ $^
+
+clean:
+ rm -f $(OBJS) $(LIB)
diff --git a/libraries/libprof/README.md b/libraries/libprof/README.md
new file mode 100644
index 0000000..d464644
--- /dev/null
+++ b/libraries/libprof/README.md
@@ -0,0 +1,65 @@
+Profiling the Cryptech Alpha
+============================
+
+Origin
+------
+
+This code was copied from https://github.com/ErichStyger/mcuoneclipse.git,
+directory `Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling`, commit
+9b7eedddd8b24968128582aedc63be95b61f782c, dated Mon Jan 9 16:56:17 2017 +0100.
+
+References
+----------
+
+I recommend reading both of these to understand how the profiling code works.
+
+1. [Tutorial: Using GNU Profiling (gprof) with ARM Cortex-M](https://mcuoneclipse.com/2015/08/23/tutorial-using-gnu-profiling-gprof-with-arm-cortex-m/)
+
+2. [Semihosting with ARM, GCC, and OpenOCD](http://bgamari.github.io/posts/2014-10-31-semihosting.html)
+
+How to build
+------------
+
+From the top level, run
+
+ $ make DO_PROFILING=1 hsm
+
+By default, all code is profiled, *except* the profiling code itself,
+because that would cause fatal recursion.
+
+How to run
+----------
+
+You need to start OpenOCD on the host, and enable semihosting, at least
+before you try to use it as a remote file system.
+
+I recommend executing the following in the `projects/hsm` directory, so that
+`gmon.out` ends up in the same directory as `hsm.elf`.
+
+Start the debugger:
+
+ $ ../../bin/debug hsm
+
+In another window, connect to OpenOCD:
+
+ $ telnet localhost 4444
+
+In the OpenOCD console, enable semihosting:
+
+ > arm semihosting enable
+ > exit
+
+Then connect to the Cryptech management console:
+
+ $ cryptech_console
+
+In the Cryptech console, type `profile start`, then start the unit test or
+whatever will be exercising the hsm. Afterwards, in the console, type
+`profile stop`.
+
+After invoking `profile stop`, it can take several minutes to write
+`gmon.out` over OpenOCD to the host.
+
+In the `projects/hsm` directory, run `gprof` to analyse the `gmon.out` file:
+
+ $ gprof hsm.elf >gprof.txt
diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c
new file mode 100644
index 0000000..a34f1a2
--- /dev/null
+++ b/libraries/libprof/gmon.c
@@ -0,0 +1,261 @@
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file is taken from Cygwin distribution. Please keep it in sync.
+ * The differences should be within __MINGW32__ guard.
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include "gmon.h"
+
+#define bzero(ptr,size) memset (ptr, 0, size);
+#define ERR(s) write(2, s, sizeof(s))
+
+/* profiling frequency. (No larger than 1000) */
+/* Note this doesn't set the frequency, but merely describes it. */
+#define PROF_HZ 1000
+
+struct gmonparam _gmonparam = { off, NULL, 0, NULL, 0, NULL, 0, 0, 0, 0, 0};
+
+void monstartup(size_t lowpc, size_t highpc)
+{
+ static char already_setup = 0;
+ struct gmonparam *p = &_gmonparam;
+
+ if (already_setup) {
+ /* reinitialize counters and arcs */
+ bzero(p->kcount, p->kcountsize);
+ bzero(p->froms, p->fromssize);
+ bzero(p->tos, p->tossize);
+ p->state = on;
+ return;
+ }
+ already_setup = 1;
+
+ /* enable semihosting, for eventual output */
+ extern void initialise_monitor_handles(void);
+ initialise_monitor_handles();
+
+ /*
+ * round lowpc and highpc to multiples of the density we're using
+ * so the rest of the scaling (here and in gprof) stays in ints.
+ */
+ p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
+ p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
+ p->textsize = p->highpc - p->lowpc;
+ p->kcountsize = p->textsize / HISTFRACTION;
+ p->fromssize = p->textsize / HASHFRACTION;
+ p->tolimit = p->textsize * ARCDENSITY / 100;
+ if (p->tolimit < MINARCS) {
+ p->tolimit = MINARCS;
+ } else if (p->tolimit > MAXARCS) {
+ p->tolimit = MAXARCS;
+ }
+ p->tossize = p->tolimit * sizeof(struct tostruct);
+
+ extern void *hal_allocate_static_memory(const size_t size);
+ void *cp = hal_allocate_static_memory(p->kcountsize + p->fromssize + p->tossize);
+ if (cp == NULL) {
+ ERR("monstartup: out of memory\n");
+ return;
+ }
+
+ bzero(cp, p->kcountsize + p->fromssize + p->tossize);
+ p->kcount = (unsigned short *)cp; cp += p->kcountsize;
+ p->froms = (unsigned short *)cp; cp += p->fromssize;
+ p->tos = (struct tostruct *)cp;
+
+ p->state = on;
+}
+
+void _mcleanup(void)
+{
+ static const char gmon_out[] = "gmon.out";
+ int fd;
+ struct gmonparam *p = &_gmonparam;
+
+ if (p->state == err) {
+ ERR("_mcleanup: tos overflow\n");
+ }
+
+ fd = open(gmon_out , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666);
+ if (fd < 0) {
+ perror( gmon_out );
+ return;
+ }
+
+ struct gmonhdr hdr = {
+ .lpc = p->lowpc,
+ .hpc = p->highpc,
+ .ncnt = p->kcountsize + sizeof(struct gmonhdr),
+ .version = GMONVERSION,
+ .profrate = PROF_HZ
+ };
+ write(fd, &hdr, sizeof(hdr));
+
+ write(fd, p->kcount, p->kcountsize);
+
+ for (size_t fromindex = 0; fromindex < p->fromssize / sizeof(*p->froms); fromindex++) {
+ size_t frompc = p->lowpc + fromindex * HASHFRACTION * sizeof(*p->froms);
+ for (size_t toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].next) {
+ struct rawarc arc = {
+ .frompc = frompc,
+ .selfpc = p->tos[toindex].selfpc,
+ .count = p->tos[toindex].count
+ };
+ write(fd, &arc, sizeof(arc));
+ }
+ }
+
+ close(fd);
+}
+
+void _mcount_internal(size_t frompc, size_t selfpc)
+{
+ register unsigned short *fromptr;
+ register struct tostruct *top;
+ register unsigned short toindex;
+ struct gmonparam *p = &_gmonparam;
+
+ /*
+ * check that we are profiling and that we aren't recursively invoked.
+ * check that frompc is a reasonable pc value.
+ */
+ if (p->state != on || (frompc -= p->lowpc) > p->textsize) {
+ return;
+ }
+
+ fromptr = &p->froms[frompc / (HASHFRACTION * sizeof(*p->froms))];
+ toindex = *fromptr; /* get froms[] value */
+
+ if (toindex == 0) { /* we haven't seen this caller before */
+ toindex = ++p->tos[0].next; /* index of the last used record in the array */
+ if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */
+ overflow:
+ p->state = err; /* halt further profiling */
+#define TOLIMIT "mcount: tos overflow\n"
+ write (2, TOLIMIT, sizeof(TOLIMIT));
+ return;
+ }
+ *fromptr = toindex; /* store new 'to' value into froms[] */
+ top = &p->tos[toindex];
+ top->selfpc = selfpc;
+ top->count = 1;
+ top->next = 0;
+ }
+
+ else { /* we've seen this caller before */
+ top = &p->tos[toindex];
+ if (top->selfpc == selfpc) {
+ /*
+ * arc at front of chain; usual case.
+ */
+ top->count++;
+ }
+
+ else {
+ /*
+ * have to go looking down chain for it.
+ * top points to what we are looking at,
+ * prevtop points to previous top.
+ * we know it is not at the head of the chain.
+ */
+ while (1) {
+ if (top->next == 0) {
+ /*
+ * top is end of the chain and none of the chain
+ * had top->selfpc == selfpc.
+ * so we allocate a new tostruct
+ * and put it at the head of the chain.
+ */
+ toindex = ++p->tos[0].next;
+ if (toindex >= p->tolimit) {
+ goto overflow;
+ }
+ top = &p->tos[toindex];
+ top->selfpc = selfpc;
+ top->count = 1;
+ top->next = *fromptr;
+ *fromptr = toindex;
+ break;
+ }
+
+ else {
+ /*
+ * otherwise, check the next arc on the chain.
+ */
+ register struct tostruct *prevtop = top;
+ top = &p->tos[top->next];
+ if (top->selfpc == selfpc) {
+ /*
+ * there it is.
+ * increment its count
+ * move it to the head of the chain.
+ */
+ top->count++;
+ toindex = prevtop->next;
+ prevtop->next = top->next;
+ top->next = *fromptr;
+ *fromptr = toindex;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return; /* normal return restores saved registers */
+}
+
+#include <stdint.h>
+#include "stm32f4xx_hal.h" /* __get_MSP */
+
+/* called from the SysTick handler */
+void profil_callback(void)
+{
+ struct gmonparam *p = &_gmonparam;
+
+ if (p->state == on) {
+ /* The interrupt mechanism pushes xPSR, PC, LR, R12, and R3-R0 onto the
+ * stack, so PC is the 6th word from the top at that point. However, the
+ * normal function entry code pushes registers as well, so the stack
+ * offset right now depends on the call tree that got us here.
+ */
+ size_t pc = (size_t)((uint32_t *)__get_MSP())[6 + 6];
+ if ((pc -= p->lowpc) < p->textsize) {
+ size_t idx = pc / (HISTFRACTION * sizeof(*p->kcount));
+ p->kcount[idx]++;
+ }
+ }
+}
diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h
new file mode 100644
index 0000000..9016502
--- /dev/null
+++ b/libraries/libprof/gmon.h
@@ -0,0 +1,148 @@
+/* $OpenBSD: gmon.h,v 1.3 1996/04/21 22:31:46 deraadt Exp $ */
+/* $NetBSD: gmon.h,v 1.5 1996/04/09 20:55:30 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1982, 1986, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)gmon.h 8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * This file is taken from Cygwin distribution. Please keep it in sync.
+ * The differences should be within __MINGW32__ guard.
+ */
+
+#ifndef _SYS_GMON_H_
+#define _SYS_GMON_H_
+
+/*
+ * Structure prepended to gmon.out profiling data file.
+ */
+struct gmonhdr {
+ size_t lpc; /* base pc address of sample buffer */
+ size_t hpc; /* max pc address of sampled buffer */
+ int ncnt; /* size of sample buffer (plus this header) */
+ int version; /* version number */
+ int profrate; /* profiling clock rate */
+ int spare[3]; /* reserved */
+};
+#define GMONVERSION 0x00051879
+
+/*
+ * histogram counters are unsigned shorts (according to the kernel).
+ */
+#define HISTCOUNTER unsigned short
+
+/*
+ * fraction of text space to allocate for histogram counters here, 1/2
+ */
+//#define HISTFRACTION 2
+#define HISTFRACTION 1
+
+/*
+ * Fraction of text space to allocate for from hash buckets.
+ * The value of HASHFRACTION is based on the minimum number of bytes
+ * of separation between two subroutine call points in the object code.
+ * Given MIN_SUBR_SEPARATION bytes of separation the value of
+ * HASHFRACTION is calculated as:
+ *
+ * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
+ *
+ * For example, on the VAX, the shortest two call sequence is:
+ *
+ * calls $0,(r0)
+ * calls $0,(r0)
+ *
+ * which is separated by only three bytes, thus HASHFRACTION is
+ * calculated as:
+ *
+ * HASHFRACTION = 3 / (2 * 2 - 1) = 1
+ *
+ * Note that the division above rounds down, thus if MIN_SUBR_FRACTION
+ * is less than three, this algorithm will not work!
+ *
+ * In practice, however, call instructions are rarely at a minimal
+ * distance. Hence, we will define HASHFRACTION to be 2 across all
+ * architectures. This saves a reasonable amount of space for
+ * profiling data structures without (in practice) sacrificing
+ * any granularity.
+ */
+//#define HASHFRACTION 2
+#define HASHFRACTION 1
+
+/*
+ * percent of text space to allocate for tostructs with a minimum.
+ */
+#define ARCDENSITY 2 /* this is in percentage, relative to text size! */
+#define MINARCS 50
+#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2)
+
+struct tostruct {
+ size_t selfpc; /* callee address. The caller address is in froms[] array which points to tos[] array */
+ unsigned long count; /* how many times it has been called */
+ unsigned short next; /* next entry in hash table. For tos[0] this is the index of the last used entry */
+ unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */
+};
+
+/*
+ * a raw arc, with pointers to the calling site and
+ * the called site and a count.
+ */
+struct rawarc {
+ size_t frompc;
+ size_t selfpc;
+ long count;
+};
+
+/*
+ * general rounding functions.
+ */
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
+
+/*
+ * The profiling data structures are housed in this structure.
+ */
+struct gmonparam {
+ enum { off, on, err } state;
+ unsigned short *kcount; /* histogram PC sample array */
+ size_t kcountsize; /* size of kcount[] array in bytes */
+ unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */
+ size_t fromssize; /* size of froms[] array in bytes */
+ struct tostruct *tos; /* to struct, contains arc counters */
+ size_t tossize; /* size of tos[] array in bytes */
+ size_t tolimit;
+ size_t lowpc; /* low program counter of area */
+ size_t highpc; /* high program counter */
+ size_t textsize; /* code size */
+};
+extern struct gmonparam _gmonparam;
+
+void _mcleanup(void); /* routine to be called to write gmon.out file */
+
+#endif /* !_SYS_GMONH_ */
diff --git a/libraries/libprof/memfunc.c b/libraries/libprof/memfunc.c
new file mode 100644
index 0000000..fc908e1
--- /dev/null
+++ b/libraries/libprof/memfunc.c
@@ -0,0 +1,127 @@
+#include <stdint.h>
+#include <string.h>
+
+/*
+ * Profilable substitutes for mem*(), lacking libc_p.a
+ *
+ * This code was written with reference to newlib, and was recently
+ * brought closer into line with newlib, to make profiling more accurate.
+ *
+ * Newlib is maintained by Cygwin, which is Red Hat. There is no copyright
+ * statement in the corresponding newlib source files, nor is there a
+ * COPYING file in newlib/libc/string or newlib/libc. Consider this file
+ * to be covered under one or more of the 50 copyright notices in
+ * newlib/COPYING, most of which are BSD. In any case, this file is only
+ * used for profiling, and is not used in production builds.
+ */
+
+#define is_word_aligned(x) (((size_t)(x) & 3) == 0)
+
+void *memcpy(void *dst, const void *src, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t *s8 = (uint8_t *)src;
+
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
+ uint32_t *d32 = (uint32_t *)dst;
+ uint32_t *s32 = (uint32_t *)src;
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ n -= sizeof(uint32_t);
+ }
+ d8 = (uint8_t *)d32;
+ s8 = (uint8_t *)s32;
+ }
+ while (n-- > 0) {
+ *d8++ = *s8++;
+ }
+
+ return dst;
+}
+
+void *memset(void *dst, int c, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t c8 = (uint8_t)c;
+
+ while (!is_word_aligned(d8)) {
+ if (n--)
+ *d8++ = c8;
+ else
+ return dst;
+ }
+ if (n >= sizeof(uint32_t)) {
+ uint32_t *d32 = (uint32_t *)d8;
+ uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8);
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
+ *d32++ = c32;
+ n -= sizeof(uint32_t);
+ }
+ d8 = (uint8_t *)d32;
+ }
+ while (n-- > 0) {
+ *d8++ = c8;
+ }
+
+ return dst;
+}
+
+int memcmp(const void *dst, const void *src, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t *s8 = (uint8_t *)src;
+
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
+ uint32_t *d32 = (uint32_t *)dst;
+ uint32_t *s32 = (uint32_t *)src;
+ while (n >= sizeof(uint32_t)) {
+ if (*d32 != *s32)
+ break;
+ d32++;
+ s32++;
+ n -= sizeof(uint32_t);
+ }
+ d8 = (uint8_t *)d32;
+ s8 = (uint8_t *)s32;
+ }
+ while (n-- > 0) {
+ if (*d8 != *s8)
+ return (*d8 - *s8);
+ d8++;
+ s8++;
+ }
+
+ return 0;
+}
+
+void *memmove(void *dst, const void *src, size_t n)
+{
+ uint8_t *d8 = (uint8_t *)dst;
+ uint8_t *s8 = (uint8_t *)src;
+
+ if ((s8 < d8) && (d8 < s8 + n)) {
+ /* Destructive overlap...have to copy backwards */
+ s8 += n;
+ d8 += n;
+ while (n-- > 0) {
+ *--d8 = *--s8;
+ }
+ return dst;
+ }
+
+ return memcpy(dst, src, n);
+}
diff --git a/libraries/libprof/profile-runner.py b/libraries/libprof/profile-runner.py
new file mode 100755
index 0000000..b96d6b6
--- /dev/null
+++ b/libraries/libprof/profile-runner.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+"""
+Tool to run some test code under the profiler on the Cryptech Alpha.
+
+This assumes that the HSM code was built with DO_PROFILING=1, and
+requires an ST-LINK programmer and the Python pexpect package.
+"""
+
+import subprocess
+import argparse
+import pexpect
+import atexit
+import time
+import sys
+import os
+
+parser = argparse.ArgumentParser(description = __doc__,
+ formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("--hsm-elf",
+ default = os.path.expanduser("~/git.cryptech.is/sw/stm32/projects/hsm/hsm.elf"),
+ help = "where you keep the profiled hsm.elf binary")
+parser.add_argument("--openocd-config",
+ default = "/usr/share/openocd/scripts/board/st_nucleo_f401re.cfg",
+ help = "OpenOCD ST-LINK configuration file ")
+parser.add_argument("--gmon-output",
+ default = "profile-runner.gmon",
+ help = "where to leave raw profiler output")
+parser.add_argument("--gprof-output", type = argparse.FileType("w"),
+ default = "profile-runner.gprof",
+ help = "where to leave profiler output after processing with gprof")
+parser.add_argument("--user",
+ default = "wheel",
+ help = "user name for logging in on the HSM console")
+parser.add_argument("--pin",
+ default = "fnord",
+ help = "PIN for logging in on the HSM console")
+parser.add_argument("command", nargs = 1,
+ help = "test program to run with profiling")
+parser.add_argument("arguments", nargs = argparse.REMAINDER,
+ help = argparse.SUPPRESS)
+args = parser.parse_args()
+
+openocd = subprocess.Popen(("openocd", "-f", args.openocd_config))
+atexit.register(openocd.terminate)
+
+time.sleep(5)
+
+telnet = pexpect.spawn("telnet localhost 4444")
+telnet.expect(">")
+telnet.sendline("arm semihosting enable")
+telnet.expect(">")
+telnet.sendline("exit")
+
+console = pexpect.spawn("cryptech_console")
+console.sendline("")
+if console.expect(["cryptech>", "Username:"]):
+ console.sendline(args.user)
+ console.expect("Password:")
+ console.sendline(args.pin)
+ console.expect("cryptech>")
+console.sendline("profile start")
+console.expect("cryptech>")
+
+cmd = args.command + args.arguments
+sys.stderr.write("Running command: {}\n".format(" ".join(cmd)))
+subprocess.check_call(cmd)
+
+console.sendline("profile stop")
+console.expect("cryptech>", timeout = 900)
+os.rename("gmon.out", args.gmon_output)
+
+subprocess.check_call(("gprof", args.hsm_elf, args.gmon_output), stdout = args.gprof_output)
diff --git a/libraries/libprof/profiler.S b/libraries/libprof/profiler.S
new file mode 100644
index 0000000..1aa5c97
--- /dev/null
+++ b/libraries/libprof/profiler.S
@@ -0,0 +1,28 @@
+/*
+ * profiler.S
+ * Implements the gprof profiler arc counting function.
+ * Created on: 06.08.2015
+ * Author: Erich Styger
+ */
+
+ .syntax unified
+ .arch armv7-m
+
+.globl __gnu_mcount_nc
+.type __gnu_mcount_nc, %function
+
+__gnu_mcount_nc:
+#if 0 /* dummy version, doing nothing */
+ mov ip, lr
+ pop { lr }
+ bx ip
+#else
+ push {r0, r1, r2, r3, lr} /* save registers */
+ bic r1, lr, #1 /* R1 contains callee address, with thumb bit cleared */
+ ldr r0, [sp, #20] /* R0 contains caller address */
+ bic r0, r0, #1 /* clear thumb bit */
+ bl _mcount_internal /* jump to internal _mcount() implementation */
+ pop {r0, r1, r2, r3, ip, lr} /* restore saved registers */
+ bx ip /* return to caller */
+#endif
+