From 7ef51e89d5a1d7d75cb0b8d3832327beb46319dd Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 3 May 2017 16:38:09 -0400 Subject: Add some task metrics. --- projects/hsm/Makefile | 2 ++ projects/hsm/mgmt-task.c | 34 +++++++++++++++++++++++++++++++++- task.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ task.h | 11 +++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 927c9f1..ecd1a5d 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -13,6 +13,8 @@ OBJS = mgmt-cli.o \ BOARD_OBJS += $(TOPLEVEL)/task.o +CFLAGS += -DTASK_METRICS + CFLAGS += -DNUM_RPC_TASK=4 CFLAGS += -I$(LIBHAL_SRC) diff --git a/projects/hsm/mgmt-task.c b/projects/hsm/mgmt-task.c index a1ae7e6..12ce2b8 100644 --- a/projects/hsm/mgmt-task.c +++ b/projects/hsm/mgmt-task.c @@ -69,10 +69,42 @@ static int cmd_task_show(struct cli_def *cli, const char *command, char *argv[], return CLI_OK; } +#ifdef TASK_METRICS +static int cmd_task_show_metrics(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + struct task_metrics tm; + + task_get_metrics(&tm); + + cli_print(cli, "avg time between yields: %ld.%06ld sec", tm.avg.tv_sec, tm.avg.tv_usec); + cli_print(cli, "max time between yields: %ld.%06ld sec", tm.max.tv_sec, tm.max.tv_usec); + + return CLI_OK; +} + +static int cmd_task_reset_metrics(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + task_reset_metrics(); + + return CLI_OK; +} +#endif + void configure_cli_task(struct cli_def *cli) { struct cli_command *c = cli_register_command(cli, NULL, "task", NULL, 0, 0, NULL); /* task show */ - cli_register_command(cli, c, "show", cmd_task_show, 0, 0, "Show the active tasks"); + struct cli_command *c_show = cli_register_command(cli, c, "show", cmd_task_show, 0, 0, "Show the active tasks"); + +#ifdef TASK_METRICS + /* task show metrics */ + cli_register_command(cli, c_show, "metrics", cmd_task_show_metrics, 0, 0, "Show task metrics"); + + /* task reset */ + struct cli_command *c_reset = cli_register_command(cli, c, "reset", NULL, 0, 0, NULL); + + /* task reset metrics */ + cli_register_command(cli, c_reset, "metrics", cmd_task_reset_metrics, 0, 0, "Reset task metrics"); +#endif } diff --git a/task.c b/task.c index 2e2ddec..e156940 100644 --- a/task.c +++ b/task.c @@ -81,6 +81,14 @@ static tcb_t *cur_task = NULL; #define STACK_GUARD_WORD 0x55AA5A5A +#ifdef TASK_METRICS +static uint32_t tick_start = 0; +static uint32_t tick_prev = 0; +static uint32_t tick_idle = 0; +static uint32_t tick_max = 0; +static uint32_t nyield = 0; +#endif + /* Add a task. */ tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size_t stack_len) @@ -180,6 +188,10 @@ void task_yield(void) if (tail == NULL) return; +#ifdef TASK_METRICS + uint32_t tick0 = HAL_GetTick(); +#endif + /* Find the next runnable task. Loop if every task is waiting. */ while (1) { next = next_task(); @@ -197,6 +209,20 @@ void task_yield(void) * } while (next == NULL); */ +#ifdef TASK_METRICS + uint32_t tick = HAL_GetTick(); + tick_idle += (tick - tick0); + if (tick_start == 0) + tick_start = tick; + if (tick_prev != 0) { + uint32_t duration = tick0 - tick_prev; + if (duration > tick_max) + tick_max = duration; + } + tick_prev = tick; + ++nyield; +#endif + /* If there are no other runnable tasks (and cur_task is runnable), * we don't need to context-switch. */ @@ -354,3 +380,25 @@ void task_mutex_unlock(task_mutex_t *mutex) if (mutex != NULL) mutex->locked = 0; } + +#ifdef TASK_METRICS +void task_get_metrics(struct task_metrics *tm) +{ + if (tm != NULL) { + tm->avg.tv_sec = 0; + tm->avg.tv_usec = (HAL_GetTick() - tick_start - tick_idle) * 1000 / nyield; + if (tm->avg.tv_usec > 1000000) { + tm->avg.tv_sec = tm->avg.tv_usec / 1000000; + tm->avg.tv_usec = tm->avg.tv_usec % 1000000; + } + tm->max.tv_sec = tick_max / 1000; + tm->max.tv_usec = (tick_max % 1000) * 1000; + } +} + +void task_reset_metrics(void) +{ + tick_start = HAL_GetTick(); + tick_prev = tick_idle = tick_max = nyield = 0; +} +#endif diff --git a/task.h b/task.h index 6b45db8..24f87ce 100644 --- a/task.h +++ b/task.h @@ -73,4 +73,15 @@ extern void task_delay(uint32_t delay); extern void task_mutex_lock(task_mutex_t *mutex); extern void task_mutex_unlock(task_mutex_t *mutex); +#ifdef TASK_METRICS +#include + +struct task_metrics { + struct timeval avg, max; +}; + +void task_get_metrics(struct task_metrics *tm); +void task_reset_metrics(void); +#endif + #endif /* _TASK_H_ */ -- cgit v1.2.3 From 9cec66f9200cb573353928bd3292fb1f710e4b3c Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Thu, 4 May 2017 15:23:10 -0400 Subject: Copy profiling code from MCUOnEclipse. --- libraries/libprof/README.txt | 6 + libraries/libprof/gmon.c | 343 +++++++++++++++++++++++++++++++++++++++++++ libraries/libprof/gmon.h | 177 ++++++++++++++++++++++ libraries/libprof/profil.c | 94 ++++++++++++ libraries/libprof/profil.h | 60 ++++++++ libraries/libprof/profiler.S | 28 ++++ 6 files changed, 708 insertions(+) create mode 100644 libraries/libprof/README.txt create mode 100644 libraries/libprof/gmon.c create mode 100644 libraries/libprof/gmon.h create mode 100644 libraries/libprof/profil.c create mode 100644 libraries/libprof/profil.h create mode 100644 libraries/libprof/profiler.S diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt new file mode 100644 index 0000000..2df8b96 --- /dev/null +++ b/libraries/libprof/README.txt @@ -0,0 +1,6 @@ +Copied from https://github.com/ErichStyger/mcuoneclipse.git, +directory Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling, +commit 9b7eedddd8b24968128582aedc63be95b61f782c, +dated Mon Jan 9 16:56:17 2017 +0100. +(This is in turn adapted from Cygwin, and can be found in newlib distributions.) + diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c new file mode 100644 index 0000000..2be8bb2 --- /dev/null +++ b/libraries/libprof/gmon.c @@ -0,0 +1,343 @@ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#include +#include +#include +#include +#include "gmon.h" +#include "profil.h" +#include +#include + +#define MINUS_ONE_P (-1) +#define bzero(ptr,size) memset (ptr, 0, size); +#define ERR(s) write(2, s, sizeof(s)) + +struct gmonparam _gmonparam = { GMON_PROF_OFF, NULL, 0, NULL, 0, NULL, 0, 0L, 0, 0, 0}; +static char already_setup = 0; /* flag to indicate if we need to init */ +static int s_scale; +/* see profil(2) where this is described (incorrectly) */ +#define SCALE_1_TO_1 0x10000L + +static void moncontrol(int mode); + +/* required for gcc ARM Embedded 4.9-2015-q2 */ +#if 0 +void *_sbrk(int incr) { + extern char __HeapLimit; /* Defined by the linker */ + static char *heap_end = 0; + char *prev_heap_end; + + if (heap_end==0) { + heap_end = &__HeapLimit; + } + prev_heap_end = heap_end; + heap_end += incr; + return (void *)prev_heap_end; +} +#endif + +static void *fake_sbrk(int size) { + void *rv = malloc(size); + if (rv) { + return rv; + } else { + return (void *) MINUS_ONE_P; + } +} + +void monstartup (size_t lowpc, size_t highpc) { + register size_t o; + char *cp; + struct gmonparam *p = &_gmonparam; + + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->textsize = p->highpc - p->lowpc; + p->kcountsize = p->textsize / HISTFRACTION; + p->fromssize = p->textsize / HASHFRACTION; + p->tolimit = p->textsize * ARCDENSITY / 100; + if (p->tolimit < MINARCS) { + p->tolimit = MINARCS; + } else if (p->tolimit > MAXARCS) { + p->tolimit = MAXARCS; + } + p->tossize = p->tolimit * sizeof(struct tostruct); + + cp = fake_sbrk(p->kcountsize + p->fromssize + p->tossize); + if (cp == (char *)MINUS_ONE_P) { + ERR("monstartup: out of memory\n"); + return; + } + + /* zero out cp as value will be added there */ + bzero(cp, p->kcountsize + p->fromssize + p->tossize); + + p->tos = (struct tostruct *)cp; + cp += p->tossize; + p->kcount = (u_short *)cp; + cp += p->kcountsize; + p->froms = (u_short *)cp; + + p->tos[0].link = 0; + + o = p->highpc - p->lowpc; + if (p->kcountsize < o) { +#ifndef notdef + s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1; +#else /* avoid floating point */ + int quot = o / p->kcountsize; + + if (quot >= 0x10000) + s_scale = 1; + else if (quot >= 0x100) + s_scale = 0x10000 / quot; + else if (o >= 0x800000) + s_scale = 0x1000000 / (o / (p->kcountsize >> 8)); + else + s_scale = 0x1000000 / ((o << 8) / p->kcountsize); +#endif + } else { + s_scale = SCALE_1_TO_1; + } + moncontrol(1); /* start */ +} + +void _mcleanup(void) { + static const char gmon_out[] = "gmon.out"; + int fd; + int hz; + int fromindex; + int endfrom; + size_t frompc; + int toindex; + struct rawarc rawarc; + struct gmonparam *p = &_gmonparam; + struct gmonhdr gmonhdr, *hdr; + const char *proffile; +#ifdef DEBUG + int log, len; + char dbuf[200]; +#endif + + if (p->state == GMON_PROF_ERROR) { + ERR("_mcleanup: tos overflow\n"); + } + hz = PROF_HZ; + moncontrol(0); /* stop */ + proffile = gmon_out; + fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); + if (fd < 0) { + perror( proffile ); + return; + } +#ifdef DEBUG + log = open("gmon.log", O_CREAT|O_TRUNC|O_WRONLY, 0664); + if (log < 0) { + perror("mcount: gmon.log"); + return; + } + len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n", + p->kcount, p->kcountsize); + write(log, dbuf, len); +#endif + hdr = (struct gmonhdr *)&gmonhdr; + hdr->lpc = p->lowpc; + hdr->hpc = p->highpc; + hdr->ncnt = p->kcountsize + sizeof(gmonhdr); + hdr->version = GMONVERSION; + hdr->profrate = hz; + write(fd, (char *)hdr, sizeof *hdr); + write(fd, p->kcount, p->kcountsize); + endfrom = p->fromssize / sizeof(*p->froms); + for (fromindex = 0; fromindex < endfrom; fromindex++) { + if (p->froms[fromindex] == 0) { + continue; + } + frompc = p->lowpc; + frompc += fromindex * HASHFRACTION * sizeof(*p->froms); + for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { +#ifdef DEBUG + len = sprintf(dbuf, + "[mcleanup2] frompc 0x%x selfpc 0x%x count %d\n" , + frompc, p->tos[toindex].selfpc, + p->tos[toindex].count); + write(log, dbuf, len); +#endif + rawarc.raw_frompc = frompc; + rawarc.raw_selfpc = p->tos[toindex].selfpc; + rawarc.raw_count = p->tos[toindex].count; + write(fd, &rawarc, sizeof rawarc); + } + } + close(fd); +} + +/* + * Control profiling + * profiling is what mcount checks to see if + * all the data structures are ready. + */ +static void moncontrol(int mode) { + struct gmonparam *p = &_gmonparam; + + if (mode) { + /* start */ + profil((char *)p->kcount, p->kcountsize, p->lowpc, s_scale); + p->state = GMON_PROF_ON; + } else { + /* stop */ + profil((char *)0, 0, 0, 0); + p->state = GMON_PROF_OFF; + } +} + +void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { + register struct tostruct *top; + register struct tostruct *prevtop; + register long toindex; + struct gmonparam *p = &_gmonparam; + + if (!already_setup) { + extern char __etext; /* end of text/code symbol, defined by linker */ + already_setup = 1; + monstartup(0x410, (uint32_t)&__etext); + } + /* + * check that we are profiling + * and that we aren't recursively invoked. + */ + if (p->state!=GMON_PROF_ON) { + goto out; + } + p->state++; + /* + * check that frompcindex is a reasonable pc value. + * for example: signal catchers get called from the stack, + * not from text space. too bad. + */ + frompcindex = (uint32_t*)((long)frompcindex - (long)p->lowpc); + if ((unsigned long)frompcindex > p->textsize) { + goto done; + } + frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; + toindex = *((u_short*)frompcindex); /* get froms[] value */ + if (toindex == 0) { + /* + * first time traversing this arc + */ + toindex = ++p->tos[0].link; /* the link of tos[0] points to the last used record in the array */ + if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ + goto overflow; + } + *((u_short*)frompcindex) = (u_short)toindex; /* store new 'to' value into froms[] */ + top = &p->tos[toindex]; + top->selfpc = (size_t)selfpc; + top->count = 1; + top->link = 0; + goto done; + } + top = &p->tos[toindex]; + if (top->selfpc == (size_t)selfpc) { + /* + * arc at front of chain; usual case. + */ + top->count++; + goto done; + } + /* + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. + */ + for (; /* goto done */; ) { + if (top->link == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and link it to the head of the chain. + */ + toindex = ++p->tos[0].link; + if (toindex >= p->tolimit) { + goto overflow; + } + top = &p->tos[toindex]; + top->selfpc = (size_t)selfpc; + top->count = 1; + top->link = *((u_short*)frompcindex); + *(u_short*)frompcindex = (u_short)toindex; + goto done; + } + /* + * otherwise, check the next arc on the chain. + */ + prevtop = top; + top = &p->tos[top->link]; + if (top->selfpc == (size_t)selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->link; + prevtop->link = top->link; + top->link = *((u_short*)frompcindex); + *((u_short*)frompcindex) = (u_short)toindex; + goto done; + } + } + done: + p->state--; + /* and fall through */ + out: + return; /* normal return restores saved registers */ + overflow: + p->state++; /* halt further profiling */ + #define TOLIMIT "mcount: tos overflow\n" + write (2, TOLIMIT, sizeof(TOLIMIT)); + goto out; +} + +void _monInit(void) { + _gmonparam.state = GMON_PROF_OFF; + already_setup = 0; +} diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h new file mode 100644 index 0000000..5eb5180 --- /dev/null +++ b/libraries/libprof/gmon.h @@ -0,0 +1,177 @@ +/* $OpenBSD: gmon.h,v 1.3 1996/04/21 22:31:46 deraadt Exp $ */ +/* $NetBSD: gmon.h,v 1.5 1996/04/09 20:55:30 cgd Exp $ */ + +/*- + * Copyright (c) 1982, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)gmon.h 8.2 (Berkeley) 1/4/94 + */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#ifndef _SYS_GMON_H_ +#define _SYS_GMON_H_ + +#ifndef __P +#define __P(x) x +#endif + +/* On POSIX systems, profile.h is a KRB5 header. To avoid collisions, just + pull in profile.h's content here. The profile.h header won't be provided + by Mingw-w64 anymore at one point. */ +#if 0 +#include +#else +#ifndef _WIN64 +#define _MCOUNT_CALL __attribute__ ((regparm (2))) +extern void _mcount(void); +#else +#define _MCOUNT_CALL +extern void mcount(void); +#endif +#define _MCOUNT_DECL __attribute__((gnu_inline)) __inline__ \ + void _MCOUNT_CALL _mcount_private +#define MCOUNT +#endif + +/* + * Structure prepended to gmon.out profiling data file. + */ +struct gmonhdr { + size_t lpc; /* base pc address of sample buffer */ + size_t hpc; /* max pc address of sampled buffer */ + int ncnt; /* size of sample buffer (plus this header) */ + int version; /* version number */ + int profrate; /* profiling clock rate */ + int spare[3]; /* reserved */ +}; +#define GMONVERSION 0x00051879 + +/* + * histogram counters are unsigned shorts (according to the kernel). + */ +#define HISTCOUNTER unsigned short + +/* + * fraction of text space to allocate for histogram counters here, 1/2 + */ +#define HISTFRACTION 2 + +/* + * Fraction of text space to allocate for from hash buckets. + * The value of HASHFRACTION is based on the minimum number of bytes + * of separation between two subroutine call points in the object code. + * Given MIN_SUBR_SEPARATION bytes of separation the value of + * HASHFRACTION is calculated as: + * + * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1); + * + * For example, on the VAX, the shortest two call sequence is: + * + * calls $0,(r0) + * calls $0,(r0) + * + * which is separated by only three bytes, thus HASHFRACTION is + * calculated as: + * + * HASHFRACTION = 3 / (2 * 2 - 1) = 1 + * + * Note that the division above rounds down, thus if MIN_SUBR_FRACTION + * is less than three, this algorithm will not work! + * + * In practice, however, call instructions are rarely at a minimal + * distance. Hence, we will define HASHFRACTION to be 2 across all + * architectures. This saves a reasonable amount of space for + * profiling data structures without (in practice) sacrificing + * any granularity. + */ +#define HASHFRACTION 2 + +/* + * percent of text space to allocate for tostructs with a minimum. + */ +#define ARCDENSITY 2 /* this is in percentage, relative to text size! */ +#define MINARCS 50 +#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2) + +struct tostruct { + size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ + long count; /* how many times it has been called */ + u_short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ + u_short pad; /* additional padding bytes, to have entries 4byte aligned */ +}; + +/* + * a raw arc, with pointers to the calling site and + * the called site and a count. + */ +struct rawarc { + size_t raw_frompc; + size_t raw_selfpc; + long raw_count; +}; + +/* + * general rounding functions. + */ +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +/* + * The profiling data structures are housed in this structure. + */ +struct gmonparam { + int state; + u_short *kcount; /* histogram PC sample array */ + size_t kcountsize; /* size of kcount[] array in bytes */ + u_short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ + size_t fromssize; /* size of froms[] array in bytes */ + struct tostruct *tos; /* to struct, contains histogram counter */ + size_t tossize; /* size of tos[] array in bytes */ + long tolimit; + size_t lowpc; /* low program counter of area */ + size_t highpc; /* high program counter */ + size_t textsize; /* code size */ +}; +extern struct gmonparam _gmonparam; + +/* + * Possible states of profiling. + */ +#define GMON_PROF_ON 0 +#define GMON_PROF_BUSY 1 +#define GMON_PROF_ERROR 2 +#define GMON_PROF_OFF 3 + +void _mcleanup(void); /* routine to be called to write gmon.out file */ +void _monInit(void); /* initialization routine */ + +#endif /* !_SYS_GMONH_ */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c new file mode 100644 index 0000000..24ede21 --- /dev/null +++ b/libraries/libprof/profil.c @@ -0,0 +1,94 @@ +/* profil.c -- win32 profil.c equivalent + + Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. + + This file is part of Cygwin. + + This software is a copyrighted work licensed under the terms of the + Cygwin license. Please consult the file "CYGWIN_LICENSE" for + details. */ + +/* + * This file is taken from Cygwin distribution, adopted to be used for bare embeeded targets. + */ +#include +#include +#include +#include +#include "profil.h" +#include +#include + +/* global profinfo for profil() call */ +static struct profinfo prof = { + PROFILE_NOT_INIT, 0, 0, 0, 0 +}; + +/* sample the current program counter */ +void SysTick_Handler(void) { + void OSA_SysTick_Handler(void); + static size_t pc, idx; + + OSA_SysTick_Handler(); /* call normal Kinetis SDK SysTick handler */ + if (prof.state==PROFILE_ON) { + pc = ((uint32_t*)(__builtin_frame_address(0)))[14]; /* get SP and use it to get the return address from stack */ + if (pc >= prof.lowpc && pc < prof.highpc) { + idx = PROFIDX (pc, prof.lowpc, prof.scale); + prof.counter[idx]++; + } + } +} + +/* Stop profiling to the profiling buffer pointed to by p. */ +static int profile_off (struct profinfo *p) { + p->state = PROFILE_OFF; + return 0; +} + +/* Create a timer thread and pass it a pointer P to the profiling buffer. */ +static int profile_on (struct profinfo *p) { + p->state = PROFILE_ON; + return 0; /* ok */ +} + +/* + * start or stop profiling + * + * profiling goes into the SAMPLES buffer of size SIZE (which is treated + * as an array of u_shorts of size size/2) + * + * each bin represents a range of pc addresses from OFFSET. The number + * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps + * each bin to two addresses, A scale of 32768 maps each bin to 4 addresses, + * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, + * or zero to turn off profiling + */ +int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, u_int scale) { + size_t maxbin; + + if (scale > 65536) { + errno = EINVAL; + return -1; + } + profile_off(p); + if (scale) { + memset(samples, 0, size); + memset(p, 0, sizeof *p); + maxbin = size >> 1; + prof.counter = (u_short*)samples; + prof.lowpc = offset; + prof.highpc = PROFADDR(maxbin, offset, scale); + prof.scale = scale; + return profile_on(p); + } + return 0; +} + +/* Equivalent to unix profil() + Every SLEEPTIME interval, the user's program counter (PC) is examined: + offset is subtracted and the result is multiplied by scale. + The word pointed to by this address is incremented. */ +int profil (char *samples, size_t size, size_t offset, u_int scale) { + return profile_ctl (&prof, samples, size, offset, scale); +} + diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h new file mode 100644 index 0000000..af7a3ed --- /dev/null +++ b/libraries/libprof/profil.h @@ -0,0 +1,60 @@ +/* profil.h: gprof profiling header file + + Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. + +This file is part of Cygwin. + +This software is a copyrighted work licensed under the terms of the +Cygwin license. Please consult the file "CYGWIN_LICENSE" for +details. */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#ifndef __PROFIL_H__ +#define __PROFIL_H__ + +/* profiling frequency. (No larger than 1000) */ +#define PROF_HZ 1000 + +/* convert an addr to an index */ +#define PROFIDX(pc, base, scale) \ + ({ \ + size_t i = (pc - base) / 2; \ + if (sizeof (unsigned long long int) > sizeof (size_t)) \ + i = (unsigned long long int) i * scale / 65536; \ + else \ + i = i / 65536 * scale + i % 65536 * scale / 65536; \ + i; \ + }) + +/* convert an index into an address */ +#define PROFADDR(idx, base, scale) \ + ((base) \ + + ((((unsigned long long)(idx) << 16) \ + / (unsigned long long)(scale)) << 1)) + +/* convert a bin size into a scale */ +#define PROFSCALE(range, bins) (((bins) << 16) / ((range) >> 1)) + +typedef void *_WINHANDLE; + +typedef enum { + PROFILE_NOT_INIT = 0, + PROFILE_ON, + PROFILE_OFF +} PROFILE_State; + +struct profinfo { + PROFILE_State state; /* profiling state */ + u_short *counter; /* profiling counters */ + size_t lowpc, highpc; /* range to be profiled */ + u_int scale; /* scale value of bins */ +}; + +int profile_ctl(struct profinfo *, char *, size_t, size_t, u_int); +int profil(char *, size_t, size_t, u_int); + +#endif /* __PROFIL_H__ */ diff --git a/libraries/libprof/profiler.S b/libraries/libprof/profiler.S new file mode 100644 index 0000000..1aa5c97 --- /dev/null +++ b/libraries/libprof/profiler.S @@ -0,0 +1,28 @@ +/* + * profiler.S + * Implements the gprof profiler arc counting function. + * Created on: 06.08.2015 + * Author: Erich Styger + */ + + .syntax unified + .arch armv7-m + +.globl __gnu_mcount_nc +.type __gnu_mcount_nc, %function + +__gnu_mcount_nc: +#if 0 /* dummy version, doing nothing */ + mov ip, lr + pop { lr } + bx ip +#else + push {r0, r1, r2, r3, lr} /* save registers */ + bic r1, lr, #1 /* R1 contains callee address, with thumb bit cleared */ + ldr r0, [sp, #20] /* R0 contains caller address */ + bic r0, r0, #1 /* clear thumb bit */ + bl _mcount_internal /* jump to internal _mcount() implementation */ + pop {r0, r1, r2, r3, ip, lr} /* restore saved registers */ + bx ip /* return to caller */ +#endif + -- cgit v1.2.3 From 1815f1b2aa0a3ff0654f4eb65fdd0a5bdfe8c7b7 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Fri, 5 May 2017 22:58:34 -0400 Subject: Port profiling code, using a new SysTick hook and new CLI commands. --- Makefile | 15 ++++- libraries/libprof/README.txt | 65 ++++++++++++++++++++-- libraries/libprof/gmon.c | 61 +++++++------------- libraries/libprof/profil.c | 22 ++++---- .../TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c | 8 +++ projects/hsm/Makefile | 11 +++- projects/hsm/mgmt-misc.c | 28 ++++++++++ syscalls.c | 6 ++ 8 files changed, 158 insertions(+), 58 deletions(-) diff --git a/Makefile b/Makefile index d8c9593..398e442 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,9 @@ LIBCLI_BLD = $(LIBS_DIR)/libcli LIBTFM_SRC = $(CRYPTECH_ROOT)/sw/thirdparty/libtfm LIBTFM_BLD = $(LIBS_DIR)/libtfm +LIBPROF_SRC = $(LIBS_DIR)/libprof +LIBPROF_BLD = $(LIBS_DIR)/libprof + LIBS = $(MBED_DIR)/libstmf4.a # linker script @@ -105,7 +108,7 @@ CFLAGS += -DUSE_STDPERIPH_DRIVER -DSTM32F4XX -DSTM32F429xx CFLAGS += -D__CORTEX_M4 -DTARGET_STM -DTARGET_STM32F4 -DTARGET_STM32F429ZI -DTOOLCHAIN_GCC -D__FPU_PRESENT=1 -D$(BOARD) CFLAGS += -DENABLE_WEAK_FUNCTIONS CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -std=c99 +CFLAGS += -std=gnu99 CFLAGS += -I$(TOPLEVEL) CFLAGS += -I$(MBED_DIR)/api CFLAGS += -I$(MBED_DIR)/targets/cmsis @@ -140,11 +143,20 @@ $(LIBHAL_BLD)/libhal.a: $(LIBTFM_BLD)/libtfm.a .FORCE $(LIBCLI_BLD)/libcli.a: .FORCE $(MAKE) -C $(LIBCLI_BLD) +$(LIBPROF_BLD)/libprof.a: .FORCE + $(MAKE) -C $(LIBPROF_BLD) + libhal-test: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/libhal-test +ifdef DO_PROFILING +CFLAGS += -pg -DDO_PROFILING +hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a $(LIBPROF_BLD)/libprof.a .FORCE + $(MAKE) -C projects/hsm +else hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a .FORCE $(MAKE) -C projects/hsm +endif bootloader: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/bootloader @@ -176,3 +188,4 @@ distclean: clean $(MAKE) -C $(MBED_DIR) clean $(MAKE) -C $(LIBTFM_BLD) clean $(MAKE) -C $(LIBCLI_BLD) clean + $(MAKE) -C $(LIBPROF_BLD) clean diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index 2df8b96..da138c2 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -1,6 +1,61 @@ -Copied from https://github.com/ErichStyger/mcuoneclipse.git, -directory Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling, -commit 9b7eedddd8b24968128582aedc63be95b61f782c, -dated Mon Jan 9 16:56:17 2017 +0100. -(This is in turn adapted from Cygwin, and can be found in newlib distributions.) +Profiling the Cryptech Alpha +============================ +Origin +------ + +This code was copied from https://github.com/ErichStyger/mcuoneclipse.git, +directory Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling, commit +9b7eedddd8b24968128582aedc63be95b61f782c, dated Mon Jan 9 16:56:17 2017 +0100. + +References +---------- + +I recommend reading both of these to understand how the profiling code works. + +[1]: https://mcuoneclipse.com/2015/08/23/tutorial-using-gnu-profiling-gprof-with-arm-cortex-m/ +"Tutorial: Using GNU Profiling (gprof) with ARM Cortex-M" + +[2]: http://bgamari.github.io/posts/2014-10-31-semihosting.html +"Semihosting with ARM, GCC, and OpenOCD" + +How to build +------------ + +From the top level, run + + make DO_PROFILING=1 hsm + +By default, all code is profiled, *except* the profiling code itself, +because that would cause fatal recursion. + +How to run +---------- + +You need to start OpenOCD on the host, and enable semihosting, at least +before you try to use it as a remote file system. + +I recommend executing the following in the projects/hsm directory, so that +gmon.out ends up in the same directory as hsm.elf. + +Start OpenOCD: + + $ openocd -f /usr/share/openocd/scripts/board/stm32f4discovery.cfg & + +Connect to OpenOCD: + + $ telnet localhost 4444 + +In the OpenOCD console, enable semihosting: + + > arm semihosting enable + +In the CLI, type `profile start`, then start the unit test or whatever +will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. + +After invoking `profile stop`, it takes almost 2 minutes to write gmon.out +over OpenOCD to the host. + +In the projects/hsm directory, run gprof to analyse the gmon.out file: + + $ gprof hsm.elf >gprof.txt diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 2be8bb2..458028b 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -41,7 +41,6 @@ #include #include -#define MINUS_ONE_P (-1) #define bzero(ptr,size) memset (ptr, 0, size); #define ERR(s) write(2, s, sizeof(s)) @@ -53,43 +52,30 @@ static int s_scale; static void moncontrol(int mode); -/* required for gcc ARM Embedded 4.9-2015-q2 */ -#if 0 -void *_sbrk(int incr) { - extern char __HeapLimit; /* Defined by the linker */ - static char *heap_end = 0; - char *prev_heap_end; - - if (heap_end==0) { - heap_end = &__HeapLimit; - } - prev_heap_end = heap_end; - heap_end += incr; - return (void *)prev_heap_end; -} -#endif - -static void *fake_sbrk(int size) { - void *rv = malloc(size); - if (rv) { - return rv; - } else { - return (void *) MINUS_ONE_P; - } -} - void monstartup (size_t lowpc, size_t highpc) { register size_t o; char *cp; struct gmonparam *p = &_gmonparam; + if (already_setup) { + /* zero out cp as value will be added there */ + bzero(p->tos, p->kcountsize + p->fromssize + p->tossize); + moncontrol(1); /* start */ + return; + } + already_setup = 1; + + /* enable semihosting, for eventual output */ + extern void initialise_monitor_handles(void); + initialise_monitor_handles(); + /* * round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. */ p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); - p->textsize = p->highpc - p->lowpc; + p->textsize = p->highpc - p->lowpc + 0x20; p->kcountsize = p->textsize / HISTFRACTION; p->fromssize = p->textsize / HASHFRACTION; p->tolimit = p->textsize * ARCDENSITY / 100; @@ -100,8 +86,9 @@ void monstartup (size_t lowpc, size_t highpc) { } p->tossize = p->tolimit * sizeof(struct tostruct); - cp = fake_sbrk(p->kcountsize + p->fromssize + p->tossize); - if (cp == (char *)MINUS_ONE_P) { + extern void *hal_allocate_static_memory(const size_t size); + cp = hal_allocate_static_memory(p->kcountsize + p->fromssize + p->tossize); + if (cp == NULL) { ERR("monstartup: out of memory\n"); return; } @@ -142,14 +129,13 @@ void monstartup (size_t lowpc, size_t highpc) { void _mcleanup(void) { static const char gmon_out[] = "gmon.out"; int fd; - int hz; int fromindex; int endfrom; size_t frompc; int toindex; struct rawarc rawarc; struct gmonparam *p = &_gmonparam; - struct gmonhdr gmonhdr, *hdr; + struct gmonhdr gmonhdr = {0}, *hdr; const char *proffile; #ifdef DEBUG int log, len; @@ -159,7 +145,6 @@ void _mcleanup(void) { if (p->state == GMON_PROF_ERROR) { ERR("_mcleanup: tos overflow\n"); } - hz = PROF_HZ; moncontrol(0); /* stop */ proffile = gmon_out; fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); @@ -174,7 +159,7 @@ void _mcleanup(void) { return; } len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n", - p->kcount, p->kcountsize); + (unsigned int)p->kcount, p->kcountsize); write(log, dbuf, len); #endif hdr = (struct gmonhdr *)&gmonhdr; @@ -182,7 +167,8 @@ void _mcleanup(void) { hdr->hpc = p->highpc; hdr->ncnt = p->kcountsize + sizeof(gmonhdr); hdr->version = GMONVERSION; - hdr->profrate = hz; + hdr->profrate = PROF_HZ; + hdr->spare[0] = hdr->spare[1] = hdr->spare[2] = 0; write(fd, (char *)hdr, sizeof *hdr); write(fd, p->kcount, p->kcountsize); endfrom = p->fromssize / sizeof(*p->froms); @@ -195,7 +181,7 @@ void _mcleanup(void) { for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { #ifdef DEBUG len = sprintf(dbuf, - "[mcleanup2] frompc 0x%x selfpc 0x%x count %d\n" , + "[mcleanup2] frompc 0x%x selfpc 0x%x count %ld\n" , frompc, p->tos[toindex].selfpc, p->tos[toindex].count); write(log, dbuf, len); @@ -234,11 +220,6 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { register long toindex; struct gmonparam *p = &_gmonparam; - if (!already_setup) { - extern char __etext; /* end of text/code symbol, defined by linker */ - already_setup = 1; - monstartup(0x410, (uint32_t)&__etext); - } /* * check that we are profiling * and that we aren't recursively invoked. diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 24ede21..07761dd 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -9,7 +9,7 @@ details. */ /* - * This file is taken from Cygwin distribution, adopted to be used for bare embeeded targets. + * This file is taken from Cygwin distribution, adapted to be used for bare embedded targets. */ #include #include @@ -19,34 +19,34 @@ #include #include +#include "stm32f4xx_hal.h" /* __get_MSP */ + /* global profinfo for profil() call */ static struct profinfo prof = { PROFILE_NOT_INIT, 0, 0, 0, 0 }; -/* sample the current program counter */ -void SysTick_Handler(void) { - void OSA_SysTick_Handler(void); - static size_t pc, idx; +extern void set_SysTick_hook(void (*hook)(void)); - OSA_SysTick_Handler(); /* call normal Kinetis SDK SysTick handler */ - if (prof.state==PROFILE_ON) { - pc = ((uint32_t*)(__builtin_frame_address(0)))[14]; /* get SP and use it to get the return address from stack */ - if (pc >= prof.lowpc && pc < prof.highpc) { - idx = PROFIDX (pc, prof.lowpc, prof.scale); +/* sample the current program counter */ +static void SysTick_hook(void) { + size_t pc = (size_t)((uint32_t *)__get_MSP())[5]; + if (pc >= prof.lowpc && pc < prof.highpc) { + size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; - } } } /* Stop profiling to the profiling buffer pointed to by p. */ static int profile_off (struct profinfo *p) { + set_SysTick_hook(NULL); p->state = PROFILE_OFF; return 0; } /* Create a timer thread and pass it a pointer P to the profiling buffer. */ static int profile_on (struct profinfo *p) { + set_SysTick_hook(SysTick_hook); p->state = PROFILE_ON; return 0; /* ok */ } diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c index 8e5cc73..4629e44 100644 --- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c +++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c @@ -64,6 +64,13 @@ void HardFault_Handler(void) while (1) { ; } } +static void default_SysTick_hook(void) { }; +static void (*SysTick_hook)(void) = default_SysTick_hook; +void set_SysTick_hook(void (*hook)(void)) +{ + SysTick_hook = (hook == NULL) ? default_SysTick_hook : hook; +} + /** * @brief This function handles SysTick Handler. * @param None @@ -72,6 +79,7 @@ void HardFault_Handler(void) void SysTick_Handler(void) { HAL_IncTick(); + SysTick_hook(); } /******************************************************************************/ diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 927c9f1..6add6a8 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -21,10 +21,19 @@ CFLAGS += -I$(LIBCLI_SRC) LIBS += $(LIBHAL_BLD)/libhal.a $(LIBTFM_BLD)/libtfm.a LIBS += $(LIBCLI_BLD)/libcli.a +LDFLAGS += -mcpu=cortex-m4 -mthumb -mlittle-endian -mthumb-interwork +LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 +LDFLAGS += -Wl,--gc-sections + +ifdef DO_PROFILING +LIBS += $(LIBPROF_BLD)/libprof.a +LDFLAGS += --specs=rdimon.specs -lc -lrdimon +endif + all: $(PROJ:=.elf) %.elf: %.o $(BOARD_OBJS) $(OBJS) $(LIBS) - $(CC) $(CFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map + $(CC) $(LDFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map $(OBJCOPY) -O binary $*.elf $*.bin $(SIZE) $*.elf diff --git a/projects/hsm/mgmt-misc.c b/projects/hsm/mgmt-misc.c index ccd032b..016d7cb 100644 --- a/projects/hsm/mgmt-misc.c +++ b/projects/hsm/mgmt-misc.c @@ -113,6 +113,25 @@ int cli_receive_data(struct cli_def *cli, uint8_t *buf, size_t len, cli_data_cal return CLI_ERROR; } +#ifdef DO_PROFILING +static int cmd_profile_start(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + extern uint32_t CRYPTECH_FIRMWARE_START; + extern char __etext; /* end of text/code symbol, defined by linker */ + extern void monstartup (size_t lowpc, size_t highpc); + monstartup((size_t)&CRYPTECH_FIRMWARE_START, (size_t)&__etext); + return CLI_OK; +} + +static int cmd_profile_stop(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + extern void _mcleanup(void); + _mcleanup(); + return CLI_OK; +} + +#endif + static int cmd_reboot(struct cli_def *cli, const char *command, char *argv[], int argc) { cli_print(cli, "\n\n\nRebooting\n\n\n"); @@ -124,6 +143,15 @@ static int cmd_reboot(struct cli_def *cli, const char *command, char *argv[], in void configure_cli_misc(struct cli_def *cli) { +#ifdef DO_PROFILING + struct cli_command *c_profile = cli_register_command(cli, NULL, "profile", NULL, 0, 0, NULL); + + /* profile start */ + cli_register_command(cli, c_profile, "start", cmd_profile_start, 0, 0, "Start collecting profiling data"); + + /* profile stop */ + cli_register_command(cli, c_profile, "stop", cmd_profile_stop, 0, 0, "Stop collecting profiling data"); +#endif /* reboot */ cli_register_command(cli, NULL, "reboot", cmd_reboot, 0, 0, "Reboot the STM32"); } diff --git a/syscalls.c b/syscalls.c index d7b7211..1624454 100644 --- a/syscalls.c +++ b/syscalls.c @@ -48,6 +48,7 @@ /***************************************************************************/ +#ifndef DO_PROFILING int _read_r (struct _reent *r, int file, char * ptr, int len) { r = r; @@ -103,6 +104,7 @@ int _close_r (struct _reent *r, int file) { return 0; } +#endif /***************************************************************************/ @@ -143,6 +145,7 @@ caddr_t _sbrk_r (struct _reent *r, int incr) /***************************************************************************/ +#ifndef DO_PROFILING int _fstat_r (struct _reent *r, int file, struct stat * st) { r = r; @@ -181,6 +184,7 @@ int _kill (int a, int b) return 0; } +#endif /***************************************************************************/ @@ -193,6 +197,7 @@ int _getpid(int a) /***************************************************************************/ +#ifndef DO_PROFILING int _open(int a, int b) { a = a; @@ -200,5 +205,6 @@ int _open(int a, int b) return 0; } +#endif /*** EOF ***/ -- cgit v1.2.3 From 4d69f1a0ef2ef3aa23b0ac9f1b9cbc84582136a7 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Sat, 6 May 2017 13:07:59 -0400 Subject: Correct offset to get the PC. A previous version of this code ran over the RTOS, where threads used the Process Stack, while the SysTick interrupt used the Main Stack. Now everything's on the main stack, so we need to account for 2 extra words that SysTick_Handler pushes on the stack at entry. --- libraries/libprof/README.txt | 4 ++-- libraries/libprof/profil.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index da138c2..9db27a6 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -53,8 +53,8 @@ In the OpenOCD console, enable semihosting: In the CLI, type `profile start`, then start the unit test or whatever will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. -After invoking `profile stop`, it takes almost 2 minutes to write gmon.out -over OpenOCD to the host. +After invoking `profile stop`, it can take several minutes to write +gmon.out over OpenOCD to the host. In the projects/hsm directory, run gprof to analyse the gmon.out file: diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 07761dd..004af77 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -30,7 +30,7 @@ extern void set_SysTick_hook(void (*hook)(void)); /* sample the current program counter */ static void SysTick_hook(void) { - size_t pc = (size_t)((uint32_t *)__get_MSP())[5]; + size_t pc = (size_t)((uint32_t *)__get_MSP())[7]; if (pc >= prof.lowpc && pc < prof.highpc) { size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; -- cgit v1.2.3 From 65b94ef5ba1981c74a99cb43ee768fbf480c698b Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 10 May 2017 00:00:04 -0400 Subject: Sigh, right offset for the wrong register. Get the PC (the address we interrupted) rather than LR (the return address from the function we interrupted). Also, change u_short and u_int to unsigned short and unsigned int, since gcc recently decided that those aren't part of the C99 standard. Finally, add profilable versions of memcpy, memset, and friends, because they get called a lot in the course of unit testing, and it would be nice to know who's calling them. --- Makefile | 2 +- libraries/libprof/Makefile | 20 +++++++++ libraries/libprof/README.txt | 4 ++ libraries/libprof/gmon.c | 17 ++++---- libraries/libprof/gmon.h | 10 ++--- libraries/libprof/profil.c | 11 +++-- libraries/libprof/profil.h | 10 ++--- memfunc.c | 101 +++++++++++++++++++++++++++++++++++++++++++ projects/hsm/Makefile | 8 ++-- 9 files changed, 153 insertions(+), 30 deletions(-) create mode 100644 libraries/libprof/Makefile create mode 100644 memfunc.c diff --git a/Makefile b/Makefile index 398e442..432260b 100644 --- a/Makefile +++ b/Makefile @@ -108,7 +108,7 @@ CFLAGS += -DUSE_STDPERIPH_DRIVER -DSTM32F4XX -DSTM32F429xx CFLAGS += -D__CORTEX_M4 -DTARGET_STM -DTARGET_STM32F4 -DTARGET_STM32F429ZI -DTOOLCHAIN_GCC -D__FPU_PRESENT=1 -D$(BOARD) CFLAGS += -DENABLE_WEAK_FUNCTIONS CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -std=gnu99 +CFLAGS += -std=c99 CFLAGS += -I$(TOPLEVEL) CFLAGS += -I$(MBED_DIR)/api CFLAGS += -I$(MBED_DIR)/targets/cmsis diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile new file mode 100644 index 0000000..4fe5fb4 --- /dev/null +++ b/libraries/libprof/Makefile @@ -0,0 +1,20 @@ +LIB = libprof.a + +OBJS = gmon.o profil.o profiler.o + +# Don't profile the profiling code, because that way lies madness (and recursion). +CFLAGS := $(subst -pg,,$(CFLAGS)) + +all: $(LIB) + +%.o : %.c + $(CC) $(CFLAGS) -c -o $@ $< + +%.o : %.S + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJS) + $(AR) -r $@ $^ + +clean: + rm -f $(OBJS) $(LIB) diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index 9db27a6..1fe378c 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -50,6 +50,10 @@ In the OpenOCD console, enable semihosting: > arm semihosting enable +In another window, start the debugger: + + $ sw/stm32/bin/debug projects/hsm/hsm + In the CLI, type `profile start`, then start the unit test or whatever will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 458028b..92054fc 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -38,7 +38,6 @@ #include #include "gmon.h" #include "profil.h" -#include #include #define bzero(ptr,size) memset (ptr, 0, size); @@ -98,9 +97,9 @@ void monstartup (size_t lowpc, size_t highpc) { p->tos = (struct tostruct *)cp; cp += p->tossize; - p->kcount = (u_short *)cp; + p->kcount = (unsigned short *)cp; cp += p->kcountsize; - p->froms = (u_short *)cp; + p->froms = (unsigned short *)cp; p->tos[0].link = 0; @@ -238,7 +237,7 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { goto done; } frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; - toindex = *((u_short*)frompcindex); /* get froms[] value */ + toindex = *((unsigned short*)frompcindex); /* get froms[] value */ if (toindex == 0) { /* * first time traversing this arc @@ -247,7 +246,7 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ goto overflow; } - *((u_short*)frompcindex) = (u_short)toindex; /* store new 'to' value into froms[] */ + *((unsigned short*)frompcindex) = (unsigned short)toindex; /* store new 'to' value into froms[] */ top = &p->tos[toindex]; top->selfpc = (size_t)selfpc; top->count = 1; @@ -283,8 +282,8 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { top = &p->tos[toindex]; top->selfpc = (size_t)selfpc; top->count = 1; - top->link = *((u_short*)frompcindex); - *(u_short*)frompcindex = (u_short)toindex; + top->link = *((unsigned short*)frompcindex); + *(unsigned short*)frompcindex = (unsigned short)toindex; goto done; } /* @@ -301,8 +300,8 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { top->count++; toindex = prevtop->link; prevtop->link = top->link; - top->link = *((u_short*)frompcindex); - *((u_short*)frompcindex) = (u_short)toindex; + top->link = *((unsigned short*)frompcindex); + *((unsigned short*)frompcindex) = (unsigned short)toindex; goto done; } } diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h index 5eb5180..8b5ecf0 100644 --- a/libraries/libprof/gmon.h +++ b/libraries/libprof/gmon.h @@ -125,8 +125,8 @@ struct gmonhdr { struct tostruct { size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ long count; /* how many times it has been called */ - u_short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ - u_short pad; /* additional padding bytes, to have entries 4byte aligned */ + unsigned short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ + unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */ }; /* @@ -150,13 +150,13 @@ struct rawarc { */ struct gmonparam { int state; - u_short *kcount; /* histogram PC sample array */ + unsigned short *kcount; /* histogram PC sample array */ size_t kcountsize; /* size of kcount[] array in bytes */ - u_short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ + unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ size_t fromssize; /* size of froms[] array in bytes */ struct tostruct *tos; /* to struct, contains histogram counter */ size_t tossize; /* size of tos[] array in bytes */ - long tolimit; + long tolimit; size_t lowpc; /* low program counter of area */ size_t highpc; /* high program counter */ size_t textsize; /* code size */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 004af77..0654879 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -17,7 +17,6 @@ #include #include "profil.h" #include -#include #include "stm32f4xx_hal.h" /* __get_MSP */ @@ -30,7 +29,7 @@ extern void set_SysTick_hook(void (*hook)(void)); /* sample the current program counter */ static void SysTick_hook(void) { - size_t pc = (size_t)((uint32_t *)__get_MSP())[7]; + size_t pc = (size_t)((uint32_t *)__get_MSP())[8]; if (pc >= prof.lowpc && pc < prof.highpc) { size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; @@ -55,7 +54,7 @@ static int profile_on (struct profinfo *p) { * start or stop profiling * * profiling goes into the SAMPLES buffer of size SIZE (which is treated - * as an array of u_shorts of size size/2) + * as an array of unsigned shorts of size size/2) * * each bin represents a range of pc addresses from OFFSET. The number * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps @@ -63,7 +62,7 @@ static int profile_on (struct profinfo *p) { * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, * or zero to turn off profiling */ -int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, u_int scale) { +int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, unsigned int scale) { size_t maxbin; if (scale > 65536) { @@ -75,7 +74,7 @@ int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, memset(samples, 0, size); memset(p, 0, sizeof *p); maxbin = size >> 1; - prof.counter = (u_short*)samples; + prof.counter = (unsigned short*)samples; prof.lowpc = offset; prof.highpc = PROFADDR(maxbin, offset, scale); prof.scale = scale; @@ -88,7 +87,7 @@ int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, Every SLEEPTIME interval, the user's program counter (PC) is examined: offset is subtracted and the result is multiplied by scale. The word pointed to by this address is incremented. */ -int profil (char *samples, size_t size, size_t offset, u_int scale) { +int profil (char *samples, size_t size, size_t offset, unsigned int scale) { return profile_ctl (&prof, samples, size, offset, scale); } diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h index af7a3ed..c72dc00 100644 --- a/libraries/libprof/profil.h +++ b/libraries/libprof/profil.h @@ -48,13 +48,13 @@ typedef enum { } PROFILE_State; struct profinfo { - PROFILE_State state; /* profiling state */ - u_short *counter; /* profiling counters */ + PROFILE_State state; /* profiling state */ + unsigned short *counter; /* profiling counters */ size_t lowpc, highpc; /* range to be profiled */ - u_int scale; /* scale value of bins */ + unsigned int scale; /* scale value of bins */ }; -int profile_ctl(struct profinfo *, char *, size_t, size_t, u_int); -int profil(char *, size_t, size_t, u_int); +int profile_ctl(struct profinfo *, char *, size_t, size_t, unsigned int); +int profil(char *, size_t, size_t, unsigned int); #endif /* __PROFIL_H__ */ diff --git a/memfunc.c b/memfunc.c new file mode 100644 index 0000000..fd94b28 --- /dev/null +++ b/memfunc.c @@ -0,0 +1,101 @@ +#include +#include + +/* + * Profilable substitutes for mem*(), lacking libc_p.a + * + * This code was written with reference to newlib, but does not copy every + * quirk and loop-unrolling optimization from newlib. Its only purpose is + * to let us figure out who is calling memcpy 2 million times. + */ + +#define is_word_aligned(x) (((size_t)(x) & 3) == 0) + +void *memcpy(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + *d32++ = *s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + *d8++ = *s8++; + } + + return dst; +} + +void *memset(void *dst, int c, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t c8 = (uint8_t)c; + + if (n >= 4 && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8); + while (n >= 4) { + *d32++ = c32; + n -= 4; + } + d8 = (uint8_t *)d32; + } + while (n-- > 0) { + *d8++ = c8; + } + + return dst; +} + +int memcmp(const void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + if (*d32 != *s32) + break; + d32++; + s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + if (*d8 != *s8) + return (*d8 - *s8); + d8++; + s8++; + } + + return 0; +} + +void *memmove(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if ((s8 < d8) && (d8 < s8 + n)) { + /* Destructive overlap...have to copy backwards */ + s8 += n; + d8 += n; + while (n-- > 0) { + *--d8 = *--s8; + } + return dst; + } + + return memcpy(dst, src, n); +} diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 6add6a8..4df60d7 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -9,9 +9,8 @@ OBJS = mgmt-cli.o \ mgmt-masterkey.o \ mgmt-misc.o \ mgmt-task.o \ - log.o - -BOARD_OBJS += $(TOPLEVEL)/task.o + log.o \ + $(TOPLEVEL)/task.o CFLAGS += -DNUM_RPC_TASK=4 @@ -26,6 +25,7 @@ LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 LDFLAGS += -Wl,--gc-sections ifdef DO_PROFILING +OBJS += $(TOPLEVEL)/memfunc.o LIBS += $(LIBPROF_BLD)/libprof.a LDFLAGS += --specs=rdimon.specs -lc -lrdimon endif @@ -33,7 +33,7 @@ endif all: $(PROJ:=.elf) %.elf: %.o $(BOARD_OBJS) $(OBJS) $(LIBS) - $(CC) $(LDFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map + $(CC) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map $(LDFLAGS) $(OBJCOPY) -O binary $*.elf $*.bin $(SIZE) $*.elf -- cgit v1.2.3 From 15d74433c59ba4410cd9b5706916a27e4a43e027 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 24 May 2017 17:53:00 -0400 Subject: Duh, actually build task_metrics --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 432260b..68f4da3 100644 --- a/Makefile +++ b/Makefile @@ -151,6 +151,9 @@ libhal-test: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE ifdef DO_PROFILING CFLAGS += -pg -DDO_PROFILING +endif +ifdef DO_TASK_METRICS +CFLAGS += -DDO_TASK_METRICS hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a $(LIBPROF_BLD)/libprof.a .FORCE $(MAKE) -C projects/hsm else -- cgit v1.2.3 From 5ff8c9512db48d128cf07904f68eb5139bebf952 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 24 May 2017 18:03:19 -0400 Subject: Add task_yield_maybe --- projects/hsm/hsm.c | 5 +++++ task.c | 16 +++++++++++++++- task.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index 8a8f441..33342c0 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -380,6 +380,11 @@ void hal_task_yield(void) task_yield(); } +void hal_task_yield_maybe(void) +{ + task_yield_maybe(); +} + /* A mutex to arbitrate concurrent access to the keystore. */ task_mutex_t ks_mutex = { 0 }; diff --git a/task.c b/task.c index b0028e7..36e8580 100644 --- a/task.c +++ b/task.c @@ -83,12 +83,16 @@ static tcb_t *cur_task = NULL; #ifdef DO_TASK_METRICS static uint32_t tick_start = 0; -static uint32_t tick_prev = 0; static uint32_t tick_idle = 0; static uint32_t tick_max = 0; static uint32_t nyield = 0; #endif +static uint32_t tick_prev = 0; +#ifndef TASK_YIELD_THRESHOLD +#define TASK_YIELD_THRESHOLD 100 +#endif + /* Add a task. */ tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size_t stack_len) @@ -221,6 +225,8 @@ void task_yield(void) } tick_prev = tick; ++nyield; +#else + tick_prev = HAL_GetTick(); #endif /* If there are no other runnable tasks (and cur_task is runnable), @@ -256,6 +262,14 @@ void task_yield(void) } } +/* Yield if it's been "too long" since the last yield. + */ +void task_yield_maybe(void) +{ + if (HAL_GetTick() - tick_prev >= TASK_YIELD_THRESHOLD) + task_yield(); +} + /* Put the current task to sleep (make it non-runnable). */ void task_sleep(void) diff --git a/task.h b/task.h index de3e275..47d62d6 100644 --- a/task.h +++ b/task.h @@ -55,6 +55,7 @@ extern tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size extern void task_set_idle_hook(funcp_t func); extern void task_yield(void); +extern void task_yield_maybe(void); extern void task_sleep(void); extern void task_wake(tcb_t *t); -- cgit v1.2.3 From 12585a41bbf124ff8204fc1b538b9796812db747 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 3 May 2017 16:38:09 -0400 Subject: Add some task metrics. --- projects/hsm/Makefile | 2 ++ projects/hsm/mgmt-task.c | 34 +++++++++++++++++++++++++++++++++- task.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ task.h | 11 +++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 927c9f1..ecd1a5d 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -13,6 +13,8 @@ OBJS = mgmt-cli.o \ BOARD_OBJS += $(TOPLEVEL)/task.o +CFLAGS += -DTASK_METRICS + CFLAGS += -DNUM_RPC_TASK=4 CFLAGS += -I$(LIBHAL_SRC) diff --git a/projects/hsm/mgmt-task.c b/projects/hsm/mgmt-task.c index ac1a737..9f6a908 100644 --- a/projects/hsm/mgmt-task.c +++ b/projects/hsm/mgmt-task.c @@ -73,10 +73,42 @@ static int cmd_task_show(struct cli_def *cli, const char *command, char *argv[], return CLI_OK; } +#ifdef TASK_METRICS +static int cmd_task_show_metrics(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + struct task_metrics tm; + + task_get_metrics(&tm); + + cli_print(cli, "avg time between yields: %ld.%06ld sec", tm.avg.tv_sec, tm.avg.tv_usec); + cli_print(cli, "max time between yields: %ld.%06ld sec", tm.max.tv_sec, tm.max.tv_usec); + + return CLI_OK; +} + +static int cmd_task_reset_metrics(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + task_reset_metrics(); + + return CLI_OK; +} +#endif + void configure_cli_task(struct cli_def *cli) { struct cli_command *c = cli_register_command(cli, NULL, "task", NULL, 0, 0, NULL); /* task show */ - cli_register_command(cli, c, "show", cmd_task_show, 0, 0, "Show the active tasks"); + struct cli_command *c_show = cli_register_command(cli, c, "show", cmd_task_show, 0, 0, "Show the active tasks"); + +#ifdef TASK_METRICS + /* task show metrics */ + cli_register_command(cli, c_show, "metrics", cmd_task_show_metrics, 0, 0, "Show task metrics"); + + /* task reset */ + struct cli_command *c_reset = cli_register_command(cli, c, "reset", NULL, 0, 0, NULL); + + /* task reset metrics */ + cli_register_command(cli, c_reset, "metrics", cmd_task_reset_metrics, 0, 0, "Reset task metrics"); +#endif } diff --git a/task.c b/task.c index 2e2ddec..e156940 100644 --- a/task.c +++ b/task.c @@ -81,6 +81,14 @@ static tcb_t *cur_task = NULL; #define STACK_GUARD_WORD 0x55AA5A5A +#ifdef TASK_METRICS +static uint32_t tick_start = 0; +static uint32_t tick_prev = 0; +static uint32_t tick_idle = 0; +static uint32_t tick_max = 0; +static uint32_t nyield = 0; +#endif + /* Add a task. */ tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size_t stack_len) @@ -180,6 +188,10 @@ void task_yield(void) if (tail == NULL) return; +#ifdef TASK_METRICS + uint32_t tick0 = HAL_GetTick(); +#endif + /* Find the next runnable task. Loop if every task is waiting. */ while (1) { next = next_task(); @@ -197,6 +209,20 @@ void task_yield(void) * } while (next == NULL); */ +#ifdef TASK_METRICS + uint32_t tick = HAL_GetTick(); + tick_idle += (tick - tick0); + if (tick_start == 0) + tick_start = tick; + if (tick_prev != 0) { + uint32_t duration = tick0 - tick_prev; + if (duration > tick_max) + tick_max = duration; + } + tick_prev = tick; + ++nyield; +#endif + /* If there are no other runnable tasks (and cur_task is runnable), * we don't need to context-switch. */ @@ -354,3 +380,25 @@ void task_mutex_unlock(task_mutex_t *mutex) if (mutex != NULL) mutex->locked = 0; } + +#ifdef TASK_METRICS +void task_get_metrics(struct task_metrics *tm) +{ + if (tm != NULL) { + tm->avg.tv_sec = 0; + tm->avg.tv_usec = (HAL_GetTick() - tick_start - tick_idle) * 1000 / nyield; + if (tm->avg.tv_usec > 1000000) { + tm->avg.tv_sec = tm->avg.tv_usec / 1000000; + tm->avg.tv_usec = tm->avg.tv_usec % 1000000; + } + tm->max.tv_sec = tick_max / 1000; + tm->max.tv_usec = (tick_max % 1000) * 1000; + } +} + +void task_reset_metrics(void) +{ + tick_start = HAL_GetTick(); + tick_prev = tick_idle = tick_max = nyield = 0; +} +#endif diff --git a/task.h b/task.h index 6b45db8..24f87ce 100644 --- a/task.h +++ b/task.h @@ -73,4 +73,15 @@ extern void task_delay(uint32_t delay); extern void task_mutex_lock(task_mutex_t *mutex); extern void task_mutex_unlock(task_mutex_t *mutex); +#ifdef TASK_METRICS +#include + +struct task_metrics { + struct timeval avg, max; +}; + +void task_get_metrics(struct task_metrics *tm); +void task_reset_metrics(void); +#endif + #endif /* _TASK_H_ */ -- cgit v1.2.3 From 9ecd51ab1028e8033057df3117aac27f6f2cd406 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Thu, 4 May 2017 15:23:10 -0400 Subject: Copy profiling code from MCUOnEclipse. --- libraries/libprof/README.txt | 6 + libraries/libprof/gmon.c | 343 +++++++++++++++++++++++++++++++++++++++++++ libraries/libprof/gmon.h | 177 ++++++++++++++++++++++ libraries/libprof/profil.c | 94 ++++++++++++ libraries/libprof/profil.h | 60 ++++++++ libraries/libprof/profiler.S | 28 ++++ 6 files changed, 708 insertions(+) create mode 100644 libraries/libprof/README.txt create mode 100644 libraries/libprof/gmon.c create mode 100644 libraries/libprof/gmon.h create mode 100644 libraries/libprof/profil.c create mode 100644 libraries/libprof/profil.h create mode 100644 libraries/libprof/profiler.S diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt new file mode 100644 index 0000000..2df8b96 --- /dev/null +++ b/libraries/libprof/README.txt @@ -0,0 +1,6 @@ +Copied from https://github.com/ErichStyger/mcuoneclipse.git, +directory Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling, +commit 9b7eedddd8b24968128582aedc63be95b61f782c, +dated Mon Jan 9 16:56:17 2017 +0100. +(This is in turn adapted from Cygwin, and can be found in newlib distributions.) + diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c new file mode 100644 index 0000000..2be8bb2 --- /dev/null +++ b/libraries/libprof/gmon.c @@ -0,0 +1,343 @@ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#include +#include +#include +#include +#include "gmon.h" +#include "profil.h" +#include +#include + +#define MINUS_ONE_P (-1) +#define bzero(ptr,size) memset (ptr, 0, size); +#define ERR(s) write(2, s, sizeof(s)) + +struct gmonparam _gmonparam = { GMON_PROF_OFF, NULL, 0, NULL, 0, NULL, 0, 0L, 0, 0, 0}; +static char already_setup = 0; /* flag to indicate if we need to init */ +static int s_scale; +/* see profil(2) where this is described (incorrectly) */ +#define SCALE_1_TO_1 0x10000L + +static void moncontrol(int mode); + +/* required for gcc ARM Embedded 4.9-2015-q2 */ +#if 0 +void *_sbrk(int incr) { + extern char __HeapLimit; /* Defined by the linker */ + static char *heap_end = 0; + char *prev_heap_end; + + if (heap_end==0) { + heap_end = &__HeapLimit; + } + prev_heap_end = heap_end; + heap_end += incr; + return (void *)prev_heap_end; +} +#endif + +static void *fake_sbrk(int size) { + void *rv = malloc(size); + if (rv) { + return rv; + } else { + return (void *) MINUS_ONE_P; + } +} + +void monstartup (size_t lowpc, size_t highpc) { + register size_t o; + char *cp; + struct gmonparam *p = &_gmonparam; + + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->textsize = p->highpc - p->lowpc; + p->kcountsize = p->textsize / HISTFRACTION; + p->fromssize = p->textsize / HASHFRACTION; + p->tolimit = p->textsize * ARCDENSITY / 100; + if (p->tolimit < MINARCS) { + p->tolimit = MINARCS; + } else if (p->tolimit > MAXARCS) { + p->tolimit = MAXARCS; + } + p->tossize = p->tolimit * sizeof(struct tostruct); + + cp = fake_sbrk(p->kcountsize + p->fromssize + p->tossize); + if (cp == (char *)MINUS_ONE_P) { + ERR("monstartup: out of memory\n"); + return; + } + + /* zero out cp as value will be added there */ + bzero(cp, p->kcountsize + p->fromssize + p->tossize); + + p->tos = (struct tostruct *)cp; + cp += p->tossize; + p->kcount = (u_short *)cp; + cp += p->kcountsize; + p->froms = (u_short *)cp; + + p->tos[0].link = 0; + + o = p->highpc - p->lowpc; + if (p->kcountsize < o) { +#ifndef notdef + s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1; +#else /* avoid floating point */ + int quot = o / p->kcountsize; + + if (quot >= 0x10000) + s_scale = 1; + else if (quot >= 0x100) + s_scale = 0x10000 / quot; + else if (o >= 0x800000) + s_scale = 0x1000000 / (o / (p->kcountsize >> 8)); + else + s_scale = 0x1000000 / ((o << 8) / p->kcountsize); +#endif + } else { + s_scale = SCALE_1_TO_1; + } + moncontrol(1); /* start */ +} + +void _mcleanup(void) { + static const char gmon_out[] = "gmon.out"; + int fd; + int hz; + int fromindex; + int endfrom; + size_t frompc; + int toindex; + struct rawarc rawarc; + struct gmonparam *p = &_gmonparam; + struct gmonhdr gmonhdr, *hdr; + const char *proffile; +#ifdef DEBUG + int log, len; + char dbuf[200]; +#endif + + if (p->state == GMON_PROF_ERROR) { + ERR("_mcleanup: tos overflow\n"); + } + hz = PROF_HZ; + moncontrol(0); /* stop */ + proffile = gmon_out; + fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); + if (fd < 0) { + perror( proffile ); + return; + } +#ifdef DEBUG + log = open("gmon.log", O_CREAT|O_TRUNC|O_WRONLY, 0664); + if (log < 0) { + perror("mcount: gmon.log"); + return; + } + len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n", + p->kcount, p->kcountsize); + write(log, dbuf, len); +#endif + hdr = (struct gmonhdr *)&gmonhdr; + hdr->lpc = p->lowpc; + hdr->hpc = p->highpc; + hdr->ncnt = p->kcountsize + sizeof(gmonhdr); + hdr->version = GMONVERSION; + hdr->profrate = hz; + write(fd, (char *)hdr, sizeof *hdr); + write(fd, p->kcount, p->kcountsize); + endfrom = p->fromssize / sizeof(*p->froms); + for (fromindex = 0; fromindex < endfrom; fromindex++) { + if (p->froms[fromindex] == 0) { + continue; + } + frompc = p->lowpc; + frompc += fromindex * HASHFRACTION * sizeof(*p->froms); + for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { +#ifdef DEBUG + len = sprintf(dbuf, + "[mcleanup2] frompc 0x%x selfpc 0x%x count %d\n" , + frompc, p->tos[toindex].selfpc, + p->tos[toindex].count); + write(log, dbuf, len); +#endif + rawarc.raw_frompc = frompc; + rawarc.raw_selfpc = p->tos[toindex].selfpc; + rawarc.raw_count = p->tos[toindex].count; + write(fd, &rawarc, sizeof rawarc); + } + } + close(fd); +} + +/* + * Control profiling + * profiling is what mcount checks to see if + * all the data structures are ready. + */ +static void moncontrol(int mode) { + struct gmonparam *p = &_gmonparam; + + if (mode) { + /* start */ + profil((char *)p->kcount, p->kcountsize, p->lowpc, s_scale); + p->state = GMON_PROF_ON; + } else { + /* stop */ + profil((char *)0, 0, 0, 0); + p->state = GMON_PROF_OFF; + } +} + +void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { + register struct tostruct *top; + register struct tostruct *prevtop; + register long toindex; + struct gmonparam *p = &_gmonparam; + + if (!already_setup) { + extern char __etext; /* end of text/code symbol, defined by linker */ + already_setup = 1; + monstartup(0x410, (uint32_t)&__etext); + } + /* + * check that we are profiling + * and that we aren't recursively invoked. + */ + if (p->state!=GMON_PROF_ON) { + goto out; + } + p->state++; + /* + * check that frompcindex is a reasonable pc value. + * for example: signal catchers get called from the stack, + * not from text space. too bad. + */ + frompcindex = (uint32_t*)((long)frompcindex - (long)p->lowpc); + if ((unsigned long)frompcindex > p->textsize) { + goto done; + } + frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; + toindex = *((u_short*)frompcindex); /* get froms[] value */ + if (toindex == 0) { + /* + * first time traversing this arc + */ + toindex = ++p->tos[0].link; /* the link of tos[0] points to the last used record in the array */ + if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ + goto overflow; + } + *((u_short*)frompcindex) = (u_short)toindex; /* store new 'to' value into froms[] */ + top = &p->tos[toindex]; + top->selfpc = (size_t)selfpc; + top->count = 1; + top->link = 0; + goto done; + } + top = &p->tos[toindex]; + if (top->selfpc == (size_t)selfpc) { + /* + * arc at front of chain; usual case. + */ + top->count++; + goto done; + } + /* + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. + */ + for (; /* goto done */; ) { + if (top->link == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and link it to the head of the chain. + */ + toindex = ++p->tos[0].link; + if (toindex >= p->tolimit) { + goto overflow; + } + top = &p->tos[toindex]; + top->selfpc = (size_t)selfpc; + top->count = 1; + top->link = *((u_short*)frompcindex); + *(u_short*)frompcindex = (u_short)toindex; + goto done; + } + /* + * otherwise, check the next arc on the chain. + */ + prevtop = top; + top = &p->tos[top->link]; + if (top->selfpc == (size_t)selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->link; + prevtop->link = top->link; + top->link = *((u_short*)frompcindex); + *((u_short*)frompcindex) = (u_short)toindex; + goto done; + } + } + done: + p->state--; + /* and fall through */ + out: + return; /* normal return restores saved registers */ + overflow: + p->state++; /* halt further profiling */ + #define TOLIMIT "mcount: tos overflow\n" + write (2, TOLIMIT, sizeof(TOLIMIT)); + goto out; +} + +void _monInit(void) { + _gmonparam.state = GMON_PROF_OFF; + already_setup = 0; +} diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h new file mode 100644 index 0000000..5eb5180 --- /dev/null +++ b/libraries/libprof/gmon.h @@ -0,0 +1,177 @@ +/* $OpenBSD: gmon.h,v 1.3 1996/04/21 22:31:46 deraadt Exp $ */ +/* $NetBSD: gmon.h,v 1.5 1996/04/09 20:55:30 cgd Exp $ */ + +/*- + * Copyright (c) 1982, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)gmon.h 8.2 (Berkeley) 1/4/94 + */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#ifndef _SYS_GMON_H_ +#define _SYS_GMON_H_ + +#ifndef __P +#define __P(x) x +#endif + +/* On POSIX systems, profile.h is a KRB5 header. To avoid collisions, just + pull in profile.h's content here. The profile.h header won't be provided + by Mingw-w64 anymore at one point. */ +#if 0 +#include +#else +#ifndef _WIN64 +#define _MCOUNT_CALL __attribute__ ((regparm (2))) +extern void _mcount(void); +#else +#define _MCOUNT_CALL +extern void mcount(void); +#endif +#define _MCOUNT_DECL __attribute__((gnu_inline)) __inline__ \ + void _MCOUNT_CALL _mcount_private +#define MCOUNT +#endif + +/* + * Structure prepended to gmon.out profiling data file. + */ +struct gmonhdr { + size_t lpc; /* base pc address of sample buffer */ + size_t hpc; /* max pc address of sampled buffer */ + int ncnt; /* size of sample buffer (plus this header) */ + int version; /* version number */ + int profrate; /* profiling clock rate */ + int spare[3]; /* reserved */ +}; +#define GMONVERSION 0x00051879 + +/* + * histogram counters are unsigned shorts (according to the kernel). + */ +#define HISTCOUNTER unsigned short + +/* + * fraction of text space to allocate for histogram counters here, 1/2 + */ +#define HISTFRACTION 2 + +/* + * Fraction of text space to allocate for from hash buckets. + * The value of HASHFRACTION is based on the minimum number of bytes + * of separation between two subroutine call points in the object code. + * Given MIN_SUBR_SEPARATION bytes of separation the value of + * HASHFRACTION is calculated as: + * + * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1); + * + * For example, on the VAX, the shortest two call sequence is: + * + * calls $0,(r0) + * calls $0,(r0) + * + * which is separated by only three bytes, thus HASHFRACTION is + * calculated as: + * + * HASHFRACTION = 3 / (2 * 2 - 1) = 1 + * + * Note that the division above rounds down, thus if MIN_SUBR_FRACTION + * is less than three, this algorithm will not work! + * + * In practice, however, call instructions are rarely at a minimal + * distance. Hence, we will define HASHFRACTION to be 2 across all + * architectures. This saves a reasonable amount of space for + * profiling data structures without (in practice) sacrificing + * any granularity. + */ +#define HASHFRACTION 2 + +/* + * percent of text space to allocate for tostructs with a minimum. + */ +#define ARCDENSITY 2 /* this is in percentage, relative to text size! */ +#define MINARCS 50 +#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2) + +struct tostruct { + size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ + long count; /* how many times it has been called */ + u_short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ + u_short pad; /* additional padding bytes, to have entries 4byte aligned */ +}; + +/* + * a raw arc, with pointers to the calling site and + * the called site and a count. + */ +struct rawarc { + size_t raw_frompc; + size_t raw_selfpc; + long raw_count; +}; + +/* + * general rounding functions. + */ +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +/* + * The profiling data structures are housed in this structure. + */ +struct gmonparam { + int state; + u_short *kcount; /* histogram PC sample array */ + size_t kcountsize; /* size of kcount[] array in bytes */ + u_short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ + size_t fromssize; /* size of froms[] array in bytes */ + struct tostruct *tos; /* to struct, contains histogram counter */ + size_t tossize; /* size of tos[] array in bytes */ + long tolimit; + size_t lowpc; /* low program counter of area */ + size_t highpc; /* high program counter */ + size_t textsize; /* code size */ +}; +extern struct gmonparam _gmonparam; + +/* + * Possible states of profiling. + */ +#define GMON_PROF_ON 0 +#define GMON_PROF_BUSY 1 +#define GMON_PROF_ERROR 2 +#define GMON_PROF_OFF 3 + +void _mcleanup(void); /* routine to be called to write gmon.out file */ +void _monInit(void); /* initialization routine */ + +#endif /* !_SYS_GMONH_ */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c new file mode 100644 index 0000000..24ede21 --- /dev/null +++ b/libraries/libprof/profil.c @@ -0,0 +1,94 @@ +/* profil.c -- win32 profil.c equivalent + + Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. + + This file is part of Cygwin. + + This software is a copyrighted work licensed under the terms of the + Cygwin license. Please consult the file "CYGWIN_LICENSE" for + details. */ + +/* + * This file is taken from Cygwin distribution, adopted to be used for bare embeeded targets. + */ +#include +#include +#include +#include +#include "profil.h" +#include +#include + +/* global profinfo for profil() call */ +static struct profinfo prof = { + PROFILE_NOT_INIT, 0, 0, 0, 0 +}; + +/* sample the current program counter */ +void SysTick_Handler(void) { + void OSA_SysTick_Handler(void); + static size_t pc, idx; + + OSA_SysTick_Handler(); /* call normal Kinetis SDK SysTick handler */ + if (prof.state==PROFILE_ON) { + pc = ((uint32_t*)(__builtin_frame_address(0)))[14]; /* get SP and use it to get the return address from stack */ + if (pc >= prof.lowpc && pc < prof.highpc) { + idx = PROFIDX (pc, prof.lowpc, prof.scale); + prof.counter[idx]++; + } + } +} + +/* Stop profiling to the profiling buffer pointed to by p. */ +static int profile_off (struct profinfo *p) { + p->state = PROFILE_OFF; + return 0; +} + +/* Create a timer thread and pass it a pointer P to the profiling buffer. */ +static int profile_on (struct profinfo *p) { + p->state = PROFILE_ON; + return 0; /* ok */ +} + +/* + * start or stop profiling + * + * profiling goes into the SAMPLES buffer of size SIZE (which is treated + * as an array of u_shorts of size size/2) + * + * each bin represents a range of pc addresses from OFFSET. The number + * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps + * each bin to two addresses, A scale of 32768 maps each bin to 4 addresses, + * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, + * or zero to turn off profiling + */ +int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, u_int scale) { + size_t maxbin; + + if (scale > 65536) { + errno = EINVAL; + return -1; + } + profile_off(p); + if (scale) { + memset(samples, 0, size); + memset(p, 0, sizeof *p); + maxbin = size >> 1; + prof.counter = (u_short*)samples; + prof.lowpc = offset; + prof.highpc = PROFADDR(maxbin, offset, scale); + prof.scale = scale; + return profile_on(p); + } + return 0; +} + +/* Equivalent to unix profil() + Every SLEEPTIME interval, the user's program counter (PC) is examined: + offset is subtracted and the result is multiplied by scale. + The word pointed to by this address is incremented. */ +int profil (char *samples, size_t size, size_t offset, u_int scale) { + return profile_ctl (&prof, samples, size, offset, scale); +} + diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h new file mode 100644 index 0000000..af7a3ed --- /dev/null +++ b/libraries/libprof/profil.h @@ -0,0 +1,60 @@ +/* profil.h: gprof profiling header file + + Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. + +This file is part of Cygwin. + +This software is a copyrighted work licensed under the terms of the +Cygwin license. Please consult the file "CYGWIN_LICENSE" for +details. */ + +/* + * This file is taken from Cygwin distribution. Please keep it in sync. + * The differences should be within __MINGW32__ guard. + */ + +#ifndef __PROFIL_H__ +#define __PROFIL_H__ + +/* profiling frequency. (No larger than 1000) */ +#define PROF_HZ 1000 + +/* convert an addr to an index */ +#define PROFIDX(pc, base, scale) \ + ({ \ + size_t i = (pc - base) / 2; \ + if (sizeof (unsigned long long int) > sizeof (size_t)) \ + i = (unsigned long long int) i * scale / 65536; \ + else \ + i = i / 65536 * scale + i % 65536 * scale / 65536; \ + i; \ + }) + +/* convert an index into an address */ +#define PROFADDR(idx, base, scale) \ + ((base) \ + + ((((unsigned long long)(idx) << 16) \ + / (unsigned long long)(scale)) << 1)) + +/* convert a bin size into a scale */ +#define PROFSCALE(range, bins) (((bins) << 16) / ((range) >> 1)) + +typedef void *_WINHANDLE; + +typedef enum { + PROFILE_NOT_INIT = 0, + PROFILE_ON, + PROFILE_OFF +} PROFILE_State; + +struct profinfo { + PROFILE_State state; /* profiling state */ + u_short *counter; /* profiling counters */ + size_t lowpc, highpc; /* range to be profiled */ + u_int scale; /* scale value of bins */ +}; + +int profile_ctl(struct profinfo *, char *, size_t, size_t, u_int); +int profil(char *, size_t, size_t, u_int); + +#endif /* __PROFIL_H__ */ diff --git a/libraries/libprof/profiler.S b/libraries/libprof/profiler.S new file mode 100644 index 0000000..1aa5c97 --- /dev/null +++ b/libraries/libprof/profiler.S @@ -0,0 +1,28 @@ +/* + * profiler.S + * Implements the gprof profiler arc counting function. + * Created on: 06.08.2015 + * Author: Erich Styger + */ + + .syntax unified + .arch armv7-m + +.globl __gnu_mcount_nc +.type __gnu_mcount_nc, %function + +__gnu_mcount_nc: +#if 0 /* dummy version, doing nothing */ + mov ip, lr + pop { lr } + bx ip +#else + push {r0, r1, r2, r3, lr} /* save registers */ + bic r1, lr, #1 /* R1 contains callee address, with thumb bit cleared */ + ldr r0, [sp, #20] /* R0 contains caller address */ + bic r0, r0, #1 /* clear thumb bit */ + bl _mcount_internal /* jump to internal _mcount() implementation */ + pop {r0, r1, r2, r3, ip, lr} /* restore saved registers */ + bx ip /* return to caller */ +#endif + -- cgit v1.2.3 From 358d038067b10330011ef4e6596ae37842d516fe Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Fri, 5 May 2017 22:58:34 -0400 Subject: Port profiling code, using a new SysTick hook and new CLI commands. --- Makefile | 15 ++++- libraries/libprof/README.txt | 65 ++++++++++++++++++++-- libraries/libprof/gmon.c | 61 +++++++------------- libraries/libprof/profil.c | 22 ++++---- .../TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c | 7 +++ projects/hsm/Makefile | 11 +++- projects/hsm/mgmt-misc.c | 28 ++++++++++ syscalls.c | 6 ++ 8 files changed, 157 insertions(+), 58 deletions(-) diff --git a/Makefile b/Makefile index e058edd..8083273 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,9 @@ LIBCLI_BLD = $(LIBS_DIR)/libcli LIBTFM_SRC = $(CRYPTECH_ROOT)/sw/thirdparty/libtfm LIBTFM_BLD = $(LIBS_DIR)/libtfm +LIBPROF_SRC = $(LIBS_DIR)/libprof +LIBPROF_BLD = $(LIBS_DIR)/libprof + LIBS = $(MBED_DIR)/libstmf4.a # linker script @@ -106,7 +109,7 @@ CFLAGS += -DUSE_STDPERIPH_DRIVER -DSTM32F4XX -DSTM32F429xx CFLAGS += -D__CORTEX_M4 -DTARGET_STM -DTARGET_STM32F4 -DTARGET_STM32F429ZI -DTOOLCHAIN_GCC -D__FPU_PRESENT=1 -D$(BOARD) CFLAGS += -DENABLE_WEAK_FUNCTIONS CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -std=c99 +CFLAGS += -std=gnu99 CFLAGS += -I$(TOPLEVEL) CFLAGS += -I$(MBED_DIR)/api CFLAGS += -I$(MBED_DIR)/targets/cmsis @@ -141,11 +144,20 @@ $(LIBHAL_BLD)/libhal.a: $(LIBTFM_BLD)/libtfm.a .FORCE $(LIBCLI_BLD)/libcli.a: .FORCE $(MAKE) -C $(LIBCLI_BLD) +$(LIBPROF_BLD)/libprof.a: .FORCE + $(MAKE) -C $(LIBPROF_BLD) + libhal-test: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/libhal-test +ifdef DO_PROFILING +CFLAGS += -pg -DDO_PROFILING +hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a $(LIBPROF_BLD)/libprof.a .FORCE + $(MAKE) -C projects/hsm +else hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a .FORCE $(MAKE) -C projects/hsm +endif bootloader: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/bootloader @@ -177,3 +189,4 @@ distclean: clean $(MAKE) -C $(MBED_DIR) clean $(MAKE) -C $(LIBTFM_BLD) clean $(MAKE) -C $(LIBCLI_BLD) clean + $(MAKE) -C $(LIBPROF_BLD) clean diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index 2df8b96..da138c2 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -1,6 +1,61 @@ -Copied from https://github.com/ErichStyger/mcuoneclipse.git, -directory Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling, -commit 9b7eedddd8b24968128582aedc63be95b61f782c, -dated Mon Jan 9 16:56:17 2017 +0100. -(This is in turn adapted from Cygwin, and can be found in newlib distributions.) +Profiling the Cryptech Alpha +============================ +Origin +------ + +This code was copied from https://github.com/ErichStyger/mcuoneclipse.git, +directory Examples/KDS/FRDM-K64F120M/FRDM-K64F_Profiling/Profiling, commit +9b7eedddd8b24968128582aedc63be95b61f782c, dated Mon Jan 9 16:56:17 2017 +0100. + +References +---------- + +I recommend reading both of these to understand how the profiling code works. + +[1]: https://mcuoneclipse.com/2015/08/23/tutorial-using-gnu-profiling-gprof-with-arm-cortex-m/ +"Tutorial: Using GNU Profiling (gprof) with ARM Cortex-M" + +[2]: http://bgamari.github.io/posts/2014-10-31-semihosting.html +"Semihosting with ARM, GCC, and OpenOCD" + +How to build +------------ + +From the top level, run + + make DO_PROFILING=1 hsm + +By default, all code is profiled, *except* the profiling code itself, +because that would cause fatal recursion. + +How to run +---------- + +You need to start OpenOCD on the host, and enable semihosting, at least +before you try to use it as a remote file system. + +I recommend executing the following in the projects/hsm directory, so that +gmon.out ends up in the same directory as hsm.elf. + +Start OpenOCD: + + $ openocd -f /usr/share/openocd/scripts/board/stm32f4discovery.cfg & + +Connect to OpenOCD: + + $ telnet localhost 4444 + +In the OpenOCD console, enable semihosting: + + > arm semihosting enable + +In the CLI, type `profile start`, then start the unit test or whatever +will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. + +After invoking `profile stop`, it takes almost 2 minutes to write gmon.out +over OpenOCD to the host. + +In the projects/hsm directory, run gprof to analyse the gmon.out file: + + $ gprof hsm.elf >gprof.txt diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 2be8bb2..458028b 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -41,7 +41,6 @@ #include #include -#define MINUS_ONE_P (-1) #define bzero(ptr,size) memset (ptr, 0, size); #define ERR(s) write(2, s, sizeof(s)) @@ -53,43 +52,30 @@ static int s_scale; static void moncontrol(int mode); -/* required for gcc ARM Embedded 4.9-2015-q2 */ -#if 0 -void *_sbrk(int incr) { - extern char __HeapLimit; /* Defined by the linker */ - static char *heap_end = 0; - char *prev_heap_end; - - if (heap_end==0) { - heap_end = &__HeapLimit; - } - prev_heap_end = heap_end; - heap_end += incr; - return (void *)prev_heap_end; -} -#endif - -static void *fake_sbrk(int size) { - void *rv = malloc(size); - if (rv) { - return rv; - } else { - return (void *) MINUS_ONE_P; - } -} - void monstartup (size_t lowpc, size_t highpc) { register size_t o; char *cp; struct gmonparam *p = &_gmonparam; + if (already_setup) { + /* zero out cp as value will be added there */ + bzero(p->tos, p->kcountsize + p->fromssize + p->tossize); + moncontrol(1); /* start */ + return; + } + already_setup = 1; + + /* enable semihosting, for eventual output */ + extern void initialise_monitor_handles(void); + initialise_monitor_handles(); + /* * round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. */ p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); - p->textsize = p->highpc - p->lowpc; + p->textsize = p->highpc - p->lowpc + 0x20; p->kcountsize = p->textsize / HISTFRACTION; p->fromssize = p->textsize / HASHFRACTION; p->tolimit = p->textsize * ARCDENSITY / 100; @@ -100,8 +86,9 @@ void monstartup (size_t lowpc, size_t highpc) { } p->tossize = p->tolimit * sizeof(struct tostruct); - cp = fake_sbrk(p->kcountsize + p->fromssize + p->tossize); - if (cp == (char *)MINUS_ONE_P) { + extern void *hal_allocate_static_memory(const size_t size); + cp = hal_allocate_static_memory(p->kcountsize + p->fromssize + p->tossize); + if (cp == NULL) { ERR("monstartup: out of memory\n"); return; } @@ -142,14 +129,13 @@ void monstartup (size_t lowpc, size_t highpc) { void _mcleanup(void) { static const char gmon_out[] = "gmon.out"; int fd; - int hz; int fromindex; int endfrom; size_t frompc; int toindex; struct rawarc rawarc; struct gmonparam *p = &_gmonparam; - struct gmonhdr gmonhdr, *hdr; + struct gmonhdr gmonhdr = {0}, *hdr; const char *proffile; #ifdef DEBUG int log, len; @@ -159,7 +145,6 @@ void _mcleanup(void) { if (p->state == GMON_PROF_ERROR) { ERR("_mcleanup: tos overflow\n"); } - hz = PROF_HZ; moncontrol(0); /* stop */ proffile = gmon_out; fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); @@ -174,7 +159,7 @@ void _mcleanup(void) { return; } len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n", - p->kcount, p->kcountsize); + (unsigned int)p->kcount, p->kcountsize); write(log, dbuf, len); #endif hdr = (struct gmonhdr *)&gmonhdr; @@ -182,7 +167,8 @@ void _mcleanup(void) { hdr->hpc = p->highpc; hdr->ncnt = p->kcountsize + sizeof(gmonhdr); hdr->version = GMONVERSION; - hdr->profrate = hz; + hdr->profrate = PROF_HZ; + hdr->spare[0] = hdr->spare[1] = hdr->spare[2] = 0; write(fd, (char *)hdr, sizeof *hdr); write(fd, p->kcount, p->kcountsize); endfrom = p->fromssize / sizeof(*p->froms); @@ -195,7 +181,7 @@ void _mcleanup(void) { for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { #ifdef DEBUG len = sprintf(dbuf, - "[mcleanup2] frompc 0x%x selfpc 0x%x count %d\n" , + "[mcleanup2] frompc 0x%x selfpc 0x%x count %ld\n" , frompc, p->tos[toindex].selfpc, p->tos[toindex].count); write(log, dbuf, len); @@ -234,11 +220,6 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { register long toindex; struct gmonparam *p = &_gmonparam; - if (!already_setup) { - extern char __etext; /* end of text/code symbol, defined by linker */ - already_setup = 1; - monstartup(0x410, (uint32_t)&__etext); - } /* * check that we are profiling * and that we aren't recursively invoked. diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 24ede21..07761dd 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -9,7 +9,7 @@ details. */ /* - * This file is taken from Cygwin distribution, adopted to be used for bare embeeded targets. + * This file is taken from Cygwin distribution, adapted to be used for bare embedded targets. */ #include #include @@ -19,34 +19,34 @@ #include #include +#include "stm32f4xx_hal.h" /* __get_MSP */ + /* global profinfo for profil() call */ static struct profinfo prof = { PROFILE_NOT_INIT, 0, 0, 0, 0 }; -/* sample the current program counter */ -void SysTick_Handler(void) { - void OSA_SysTick_Handler(void); - static size_t pc, idx; +extern void set_SysTick_hook(void (*hook)(void)); - OSA_SysTick_Handler(); /* call normal Kinetis SDK SysTick handler */ - if (prof.state==PROFILE_ON) { - pc = ((uint32_t*)(__builtin_frame_address(0)))[14]; /* get SP and use it to get the return address from stack */ - if (pc >= prof.lowpc && pc < prof.highpc) { - idx = PROFIDX (pc, prof.lowpc, prof.scale); +/* sample the current program counter */ +static void SysTick_hook(void) { + size_t pc = (size_t)((uint32_t *)__get_MSP())[5]; + if (pc >= prof.lowpc && pc < prof.highpc) { + size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; - } } } /* Stop profiling to the profiling buffer pointed to by p. */ static int profile_off (struct profinfo *p) { + set_SysTick_hook(NULL); p->state = PROFILE_OFF; return 0; } /* Create a timer thread and pass it a pointer P to the profiling buffer. */ static int profile_on (struct profinfo *p) { + set_SysTick_hook(SysTick_hook); p->state = PROFILE_ON; return 0; /* ok */ } diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c index 32b7707..b8b6fce 100644 --- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c +++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c @@ -64,6 +64,13 @@ void HardFault_Handler(void) while (1) { ; } } +static void default_SysTick_hook(void) { }; +static void (*SysTick_hook)(void) = default_SysTick_hook; +void set_SysTick_hook(void (*hook)(void)) +{ + SysTick_hook = (hook == NULL) ? default_SysTick_hook : hook; +} + /** * @brief This function handles SysTick Handler. * @param None diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index ecd1a5d..7efd41d 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -23,10 +23,19 @@ CFLAGS += -I$(LIBCLI_SRC) LIBS += $(LIBHAL_BLD)/libhal.a $(LIBTFM_BLD)/libtfm.a LIBS += $(LIBCLI_BLD)/libcli.a +LDFLAGS += -mcpu=cortex-m4 -mthumb -mlittle-endian -mthumb-interwork +LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 +LDFLAGS += -Wl,--gc-sections + +ifdef DO_PROFILING +LIBS += $(LIBPROF_BLD)/libprof.a +LDFLAGS += --specs=rdimon.specs -lc -lrdimon +endif + all: $(PROJ:=.elf) %.elf: %.o $(BOARD_OBJS) $(OBJS) $(LIBS) - $(CC) $(CFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map + $(CC) $(LDFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map $(OBJCOPY) -O binary $*.elf $*.bin $(SIZE) $*.elf diff --git a/projects/hsm/mgmt-misc.c b/projects/hsm/mgmt-misc.c index ccd032b..016d7cb 100644 --- a/projects/hsm/mgmt-misc.c +++ b/projects/hsm/mgmt-misc.c @@ -113,6 +113,25 @@ int cli_receive_data(struct cli_def *cli, uint8_t *buf, size_t len, cli_data_cal return CLI_ERROR; } +#ifdef DO_PROFILING +static int cmd_profile_start(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + extern uint32_t CRYPTECH_FIRMWARE_START; + extern char __etext; /* end of text/code symbol, defined by linker */ + extern void monstartup (size_t lowpc, size_t highpc); + monstartup((size_t)&CRYPTECH_FIRMWARE_START, (size_t)&__etext); + return CLI_OK; +} + +static int cmd_profile_stop(struct cli_def *cli, const char *command, char *argv[], int argc) +{ + extern void _mcleanup(void); + _mcleanup(); + return CLI_OK; +} + +#endif + static int cmd_reboot(struct cli_def *cli, const char *command, char *argv[], int argc) { cli_print(cli, "\n\n\nRebooting\n\n\n"); @@ -124,6 +143,15 @@ static int cmd_reboot(struct cli_def *cli, const char *command, char *argv[], in void configure_cli_misc(struct cli_def *cli) { +#ifdef DO_PROFILING + struct cli_command *c_profile = cli_register_command(cli, NULL, "profile", NULL, 0, 0, NULL); + + /* profile start */ + cli_register_command(cli, c_profile, "start", cmd_profile_start, 0, 0, "Start collecting profiling data"); + + /* profile stop */ + cli_register_command(cli, c_profile, "stop", cmd_profile_stop, 0, 0, "Stop collecting profiling data"); +#endif /* reboot */ cli_register_command(cli, NULL, "reboot", cmd_reboot, 0, 0, "Reboot the STM32"); } diff --git a/syscalls.c b/syscalls.c index d7b7211..1624454 100644 --- a/syscalls.c +++ b/syscalls.c @@ -48,6 +48,7 @@ /***************************************************************************/ +#ifndef DO_PROFILING int _read_r (struct _reent *r, int file, char * ptr, int len) { r = r; @@ -103,6 +104,7 @@ int _close_r (struct _reent *r, int file) { return 0; } +#endif /***************************************************************************/ @@ -143,6 +145,7 @@ caddr_t _sbrk_r (struct _reent *r, int incr) /***************************************************************************/ +#ifndef DO_PROFILING int _fstat_r (struct _reent *r, int file, struct stat * st) { r = r; @@ -181,6 +184,7 @@ int _kill (int a, int b) return 0; } +#endif /***************************************************************************/ @@ -193,6 +197,7 @@ int _getpid(int a) /***************************************************************************/ +#ifndef DO_PROFILING int _open(int a, int b) { a = a; @@ -200,5 +205,6 @@ int _open(int a, int b) return 0; } +#endif /*** EOF ***/ -- cgit v1.2.3 From b2858c0eabeb2aba36ad7b5a964d52e51711c8df Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Sat, 6 May 2017 13:07:59 -0400 Subject: Correct offset to get the PC. A previous version of this code ran over the RTOS, where threads used the Process Stack, while the SysTick interrupt used the Main Stack. Now everything's on the main stack, so we need to account for 2 extra words that SysTick_Handler pushes on the stack at entry. --- libraries/libprof/README.txt | 4 ++-- libraries/libprof/profil.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index da138c2..9db27a6 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -53,8 +53,8 @@ In the OpenOCD console, enable semihosting: In the CLI, type `profile start`, then start the unit test or whatever will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. -After invoking `profile stop`, it takes almost 2 minutes to write gmon.out -over OpenOCD to the host. +After invoking `profile stop`, it can take several minutes to write +gmon.out over OpenOCD to the host. In the projects/hsm directory, run gprof to analyse the gmon.out file: diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 07761dd..004af77 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -30,7 +30,7 @@ extern void set_SysTick_hook(void (*hook)(void)); /* sample the current program counter */ static void SysTick_hook(void) { - size_t pc = (size_t)((uint32_t *)__get_MSP())[5]; + size_t pc = (size_t)((uint32_t *)__get_MSP())[7]; if (pc >= prof.lowpc && pc < prof.highpc) { size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; -- cgit v1.2.3 From 03d7fa26a89d44349df86e29ac782d075856c570 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 10 May 2017 00:00:04 -0400 Subject: Sigh, right offset for the wrong register. Get the PC (the address we interrupted) rather than LR (the return address from the function we interrupted). Also, change u_short and u_int to unsigned short and unsigned int, since gcc recently decided that those aren't part of the C99 standard. Finally, add profilable versions of memcpy, memset, and friends, because they get called a lot in the course of unit testing, and it would be nice to know who's calling them. --- Makefile | 2 +- libraries/libprof/Makefile | 20 +++++++++ libraries/libprof/README.txt | 4 ++ libraries/libprof/gmon.c | 17 ++++---- libraries/libprof/gmon.h | 10 ++--- libraries/libprof/profil.c | 11 +++-- libraries/libprof/profil.h | 10 ++--- memfunc.c | 101 +++++++++++++++++++++++++++++++++++++++++++ projects/hsm/Makefile | 8 ++-- 9 files changed, 153 insertions(+), 30 deletions(-) create mode 100644 libraries/libprof/Makefile create mode 100644 memfunc.c diff --git a/Makefile b/Makefile index 8083273..c7af040 100644 --- a/Makefile +++ b/Makefile @@ -109,7 +109,7 @@ CFLAGS += -DUSE_STDPERIPH_DRIVER -DSTM32F4XX -DSTM32F429xx CFLAGS += -D__CORTEX_M4 -DTARGET_STM -DTARGET_STM32F4 -DTARGET_STM32F429ZI -DTOOLCHAIN_GCC -D__FPU_PRESENT=1 -D$(BOARD) CFLAGS += -DENABLE_WEAK_FUNCTIONS CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -std=gnu99 +CFLAGS += -std=c99 CFLAGS += -I$(TOPLEVEL) CFLAGS += -I$(MBED_DIR)/api CFLAGS += -I$(MBED_DIR)/targets/cmsis diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile new file mode 100644 index 0000000..4fe5fb4 --- /dev/null +++ b/libraries/libprof/Makefile @@ -0,0 +1,20 @@ +LIB = libprof.a + +OBJS = gmon.o profil.o profiler.o + +# Don't profile the profiling code, because that way lies madness (and recursion). +CFLAGS := $(subst -pg,,$(CFLAGS)) + +all: $(LIB) + +%.o : %.c + $(CC) $(CFLAGS) -c -o $@ $< + +%.o : %.S + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJS) + $(AR) -r $@ $^ + +clean: + rm -f $(OBJS) $(LIB) diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index 9db27a6..1fe378c 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -50,6 +50,10 @@ In the OpenOCD console, enable semihosting: > arm semihosting enable +In another window, start the debugger: + + $ sw/stm32/bin/debug projects/hsm/hsm + In the CLI, type `profile start`, then start the unit test or whatever will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 458028b..92054fc 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -38,7 +38,6 @@ #include #include "gmon.h" #include "profil.h" -#include #include #define bzero(ptr,size) memset (ptr, 0, size); @@ -98,9 +97,9 @@ void monstartup (size_t lowpc, size_t highpc) { p->tos = (struct tostruct *)cp; cp += p->tossize; - p->kcount = (u_short *)cp; + p->kcount = (unsigned short *)cp; cp += p->kcountsize; - p->froms = (u_short *)cp; + p->froms = (unsigned short *)cp; p->tos[0].link = 0; @@ -238,7 +237,7 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { goto done; } frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; - toindex = *((u_short*)frompcindex); /* get froms[] value */ + toindex = *((unsigned short*)frompcindex); /* get froms[] value */ if (toindex == 0) { /* * first time traversing this arc @@ -247,7 +246,7 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ goto overflow; } - *((u_short*)frompcindex) = (u_short)toindex; /* store new 'to' value into froms[] */ + *((unsigned short*)frompcindex) = (unsigned short)toindex; /* store new 'to' value into froms[] */ top = &p->tos[toindex]; top->selfpc = (size_t)selfpc; top->count = 1; @@ -283,8 +282,8 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { top = &p->tos[toindex]; top->selfpc = (size_t)selfpc; top->count = 1; - top->link = *((u_short*)frompcindex); - *(u_short*)frompcindex = (u_short)toindex; + top->link = *((unsigned short*)frompcindex); + *(unsigned short*)frompcindex = (unsigned short)toindex; goto done; } /* @@ -301,8 +300,8 @@ void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { top->count++; toindex = prevtop->link; prevtop->link = top->link; - top->link = *((u_short*)frompcindex); - *((u_short*)frompcindex) = (u_short)toindex; + top->link = *((unsigned short*)frompcindex); + *((unsigned short*)frompcindex) = (unsigned short)toindex; goto done; } } diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h index 5eb5180..8b5ecf0 100644 --- a/libraries/libprof/gmon.h +++ b/libraries/libprof/gmon.h @@ -125,8 +125,8 @@ struct gmonhdr { struct tostruct { size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ long count; /* how many times it has been called */ - u_short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ - u_short pad; /* additional padding bytes, to have entries 4byte aligned */ + unsigned short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ + unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */ }; /* @@ -150,13 +150,13 @@ struct rawarc { */ struct gmonparam { int state; - u_short *kcount; /* histogram PC sample array */ + unsigned short *kcount; /* histogram PC sample array */ size_t kcountsize; /* size of kcount[] array in bytes */ - u_short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ + unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ size_t fromssize; /* size of froms[] array in bytes */ struct tostruct *tos; /* to struct, contains histogram counter */ size_t tossize; /* size of tos[] array in bytes */ - long tolimit; + long tolimit; size_t lowpc; /* low program counter of area */ size_t highpc; /* high program counter */ size_t textsize; /* code size */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c index 004af77..0654879 100644 --- a/libraries/libprof/profil.c +++ b/libraries/libprof/profil.c @@ -17,7 +17,6 @@ #include #include "profil.h" #include -#include #include "stm32f4xx_hal.h" /* __get_MSP */ @@ -30,7 +29,7 @@ extern void set_SysTick_hook(void (*hook)(void)); /* sample the current program counter */ static void SysTick_hook(void) { - size_t pc = (size_t)((uint32_t *)__get_MSP())[7]; + size_t pc = (size_t)((uint32_t *)__get_MSP())[8]; if (pc >= prof.lowpc && pc < prof.highpc) { size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); prof.counter[idx]++; @@ -55,7 +54,7 @@ static int profile_on (struct profinfo *p) { * start or stop profiling * * profiling goes into the SAMPLES buffer of size SIZE (which is treated - * as an array of u_shorts of size size/2) + * as an array of unsigned shorts of size size/2) * * each bin represents a range of pc addresses from OFFSET. The number * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps @@ -63,7 +62,7 @@ static int profile_on (struct profinfo *p) { * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, * or zero to turn off profiling */ -int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, u_int scale) { +int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, unsigned int scale) { size_t maxbin; if (scale > 65536) { @@ -75,7 +74,7 @@ int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, memset(samples, 0, size); memset(p, 0, sizeof *p); maxbin = size >> 1; - prof.counter = (u_short*)samples; + prof.counter = (unsigned short*)samples; prof.lowpc = offset; prof.highpc = PROFADDR(maxbin, offset, scale); prof.scale = scale; @@ -88,7 +87,7 @@ int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, Every SLEEPTIME interval, the user's program counter (PC) is examined: offset is subtracted and the result is multiplied by scale. The word pointed to by this address is incremented. */ -int profil (char *samples, size_t size, size_t offset, u_int scale) { +int profil (char *samples, size_t size, size_t offset, unsigned int scale) { return profile_ctl (&prof, samples, size, offset, scale); } diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h index af7a3ed..c72dc00 100644 --- a/libraries/libprof/profil.h +++ b/libraries/libprof/profil.h @@ -48,13 +48,13 @@ typedef enum { } PROFILE_State; struct profinfo { - PROFILE_State state; /* profiling state */ - u_short *counter; /* profiling counters */ + PROFILE_State state; /* profiling state */ + unsigned short *counter; /* profiling counters */ size_t lowpc, highpc; /* range to be profiled */ - u_int scale; /* scale value of bins */ + unsigned int scale; /* scale value of bins */ }; -int profile_ctl(struct profinfo *, char *, size_t, size_t, u_int); -int profil(char *, size_t, size_t, u_int); +int profile_ctl(struct profinfo *, char *, size_t, size_t, unsigned int); +int profil(char *, size_t, size_t, unsigned int); #endif /* __PROFIL_H__ */ diff --git a/memfunc.c b/memfunc.c new file mode 100644 index 0000000..fd94b28 --- /dev/null +++ b/memfunc.c @@ -0,0 +1,101 @@ +#include +#include + +/* + * Profilable substitutes for mem*(), lacking libc_p.a + * + * This code was written with reference to newlib, but does not copy every + * quirk and loop-unrolling optimization from newlib. Its only purpose is + * to let us figure out who is calling memcpy 2 million times. + */ + +#define is_word_aligned(x) (((size_t)(x) & 3) == 0) + +void *memcpy(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + *d32++ = *s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + *d8++ = *s8++; + } + + return dst; +} + +void *memset(void *dst, int c, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t c8 = (uint8_t)c; + + if (n >= 4 && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8); + while (n >= 4) { + *d32++ = c32; + n -= 4; + } + d8 = (uint8_t *)d32; + } + while (n-- > 0) { + *d8++ = c8; + } + + return dst; +} + +int memcmp(const void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4) { + if (*d32 != *s32) + break; + d32++; + s32++; + n -= 4; + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + if (*d8 != *s8) + return (*d8 - *s8); + d8++; + s8++; + } + + return 0; +} + +void *memmove(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if ((s8 < d8) && (d8 < s8 + n)) { + /* Destructive overlap...have to copy backwards */ + s8 += n; + d8 += n; + while (n-- > 0) { + *--d8 = *--s8; + } + return dst; + } + + return memcpy(dst, src, n); +} diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 7efd41d..429069d 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -9,9 +9,8 @@ OBJS = mgmt-cli.o \ mgmt-masterkey.o \ mgmt-misc.o \ mgmt-task.o \ - log.o - -BOARD_OBJS += $(TOPLEVEL)/task.o + log.o \ + $(TOPLEVEL)/task.o CFLAGS += -DTASK_METRICS @@ -28,6 +27,7 @@ LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 LDFLAGS += -Wl,--gc-sections ifdef DO_PROFILING +OBJS += $(TOPLEVEL)/memfunc.o LIBS += $(LIBPROF_BLD)/libprof.a LDFLAGS += --specs=rdimon.specs -lc -lrdimon endif @@ -35,7 +35,7 @@ endif all: $(PROJ:=.elf) %.elf: %.o $(BOARD_OBJS) $(OBJS) $(LIBS) - $(CC) $(LDFLAGS) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map + $(CC) $^ -o $@ -T$(LDSCRIPT) -g -Wl,-Map=$*.map $(LDFLAGS) $(OBJCOPY) -O binary $*.elf $*.bin $(SIZE) $*.elf -- cgit v1.2.3 From 2913492229286b0578f64ce8c97ef21a9af09464 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 24 May 2017 17:53:00 -0400 Subject: Duh, actually build task_metrics --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index c7af040..12e7df8 100644 --- a/Makefile +++ b/Makefile @@ -152,6 +152,9 @@ libhal-test: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE ifdef DO_PROFILING CFLAGS += -pg -DDO_PROFILING +endif +ifdef DO_TASK_METRICS +CFLAGS += -DDO_TASK_METRICS hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a $(LIBPROF_BLD)/libprof.a .FORCE $(MAKE) -C projects/hsm else -- cgit v1.2.3 From 2e1f88062c7ec6cd12688ce7522e802bbf09bba1 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 24 May 2017 18:03:19 -0400 Subject: Add task_yield_maybe --- projects/hsm/hsm.c | 5 +++++ task.c | 16 +++++++++++++++- task.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index 800edcc..b6b8820 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -393,6 +393,11 @@ void hal_task_yield(void) task_yield(); } +void hal_task_yield_maybe(void) +{ + task_yield_maybe(); +} + /* A mutex to arbitrate concurrent access to the keystore. */ task_mutex_t ks_mutex = { 0 }; diff --git a/task.c b/task.c index e156940..d8af217 100644 --- a/task.c +++ b/task.c @@ -83,12 +83,16 @@ static tcb_t *cur_task = NULL; #ifdef TASK_METRICS static uint32_t tick_start = 0; -static uint32_t tick_prev = 0; static uint32_t tick_idle = 0; static uint32_t tick_max = 0; static uint32_t nyield = 0; #endif +static uint32_t tick_prev = 0; +#ifndef TASK_YIELD_THRESHOLD +#define TASK_YIELD_THRESHOLD 100 +#endif + /* Add a task. */ tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size_t stack_len) @@ -221,6 +225,8 @@ void task_yield(void) } tick_prev = tick; ++nyield; +#else + tick_prev = HAL_GetTick(); #endif /* If there are no other runnable tasks (and cur_task is runnable), @@ -256,6 +262,14 @@ void task_yield(void) } } +/* Yield if it's been "too long" since the last yield. + */ +void task_yield_maybe(void) +{ + if (HAL_GetTick() - tick_prev >= TASK_YIELD_THRESHOLD) + task_yield(); +} + /* Put the current task to sleep (make it non-runnable). */ void task_sleep(void) diff --git a/task.h b/task.h index 24f87ce..73ff33f 100644 --- a/task.h +++ b/task.h @@ -55,6 +55,7 @@ extern tcb_t *task_add(char *name, funcp_t func, void *cookie, void *stack, size extern void task_set_idle_hook(funcp_t func); extern void task_yield(void); +extern void task_yield_maybe(void); extern void task_sleep(void); extern void task_wake(tcb_t *t); -- cgit v1.2.3 From f508e24f5b872a8f7d642eb4fb2217dd1497de96 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Thu, 21 Sep 2017 11:24:02 -0400 Subject: cleanup --- Makefile | 8 ++------ libraries/libprof/README.txt | 18 ++++++++++-------- libraries/libprof/gmon.c | 1 + projects/hsm/Makefile | 1 - 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index e349ab1..2b421f5 100644 --- a/Makefile +++ b/Makefile @@ -128,6 +128,8 @@ endif $(CC) $(CFLAGS) -c -o $@ $< ifdef DO_PROFILING +CFLAGS += -pg -DDO_PROFILING +LIBS += $(LIBPROF_BLD)/libprof.a all: hsm else all: board-test cli-test libhal-test hsm bootloader @@ -157,14 +159,8 @@ $(LIBPROF_BLD)/libprof.a: .FORCE libhal-test: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/libhal-test -ifdef DO_PROFILING -CFLAGS += -pg -DDO_PROFILING -hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a $(LIBPROF_BLD)/libprof.a .FORCE - $(MAKE) -C projects/hsm -else hsm: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a $(LIBCLI_BLD)/libcli.a .FORCE $(MAKE) -C projects/hsm -endif bootloader: $(BOARD_OBJS) $(LIBS) $(LIBHAL_BLD)/libhal.a .FORCE $(MAKE) -C projects/bootloader diff --git a/libraries/libprof/README.txt b/libraries/libprof/README.txt index f0b8ee8..f0bacc7 100644 --- a/libraries/libprof/README.txt +++ b/libraries/libprof/README.txt @@ -24,7 +24,7 @@ How to build From the top level, run - make DO_PROFILING=1 hsm + $ make DO_PROFILING=1 hsm By default, all code is profiled, *except* the profiling code itself, because that would cause fatal recursion. @@ -38,24 +38,26 @@ before you try to use it as a remote file system. I recommend executing the following in the projects/hsm directory, so that gmon.out ends up in the same directory as hsm.elf. -Start OpenOCD: +Start the debugger: - $ openocd -f /usr/share/openocd/scripts/board/stm32f4discovery.cfg & + $ ../../bin/debug hsm -Connect to OpenOCD: +In another window, connect to OpenOCD: $ telnet localhost 4444 In the OpenOCD console, enable semihosting: > arm semihosting enable + > exit -In another window, start the debugger: +Then connect to the Cryptech management console: - $ ../../bin/debug hsm + $ cryptech_console -In the CLI, type `profile start`, then start the unit test or whatever -will be exercising the hsm. Afterwards, in the CLI, type `profile stop`. +In the Cryptech console, type `profile start`, then start the unit test or +whatever will be exercising the hsm. Afterwards, in the console, type +`profile stop`. After invoking `profile stop`, it can take several minutes to write gmon.out over OpenOCD to the host. diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 92054fc..317a173 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "gmon.h" #include "profil.h" #include diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 9a75b92..3430e14 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -26,7 +26,6 @@ LDFLAGS += -Wl,--gc-sections ifdef DO_PROFILING OBJS += $(TOPLEVEL)/memfunc.o -LIBS += $(LIBPROF_BLD)/libprof.a LDFLAGS += --specs=rdimon.specs -lc -lrdimon endif -- cgit v1.2.3