From 0f8c20fe0658082c044443d0c8a51b773c457191 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Sat, 7 Jul 2018 00:40:13 +0300 Subject: Changed FMC initialization code to match the new sync FMC arbiter. Removed unnecessary code (no more double read, yay!) --- stm-fmc.c | 25 ++++++++++++++++++++++--- stm-fmc.h | 52 ++++------------------------------------------------ 2 files changed, 26 insertions(+), 51 deletions(-) diff --git a/stm-fmc.c b/stm-fmc.c index c5086b4..58df6fe 100644 --- a/stm-fmc.c +++ b/stm-fmc.c @@ -115,7 +115,7 @@ void fmc_init(void) _fmc_fpga_inst.Init.WrapMode = FMC_WRAP_MODE_DISABLE; // don't care in fixed latency mode - _fmc_fpga_inst.Init.WaitSignalActive = FMC_WAIT_TIMING_DURING_WS; + _fmc_fpga_inst.Init.WaitSignalActive = FMC_WAIT_TIMING_BEFORE_WS; // allow write access to fpga _fmc_fpga_inst.Init.WriteOperation = FMC_WRITE_OPERATION_ENABLE; @@ -155,12 +155,31 @@ void fmc_init(void) // use smallest allowed divisor for best performance fmc_timing.CLKDivision = 2; - // stm is too slow to work with min allowed 2-cycle latency - fmc_timing.DataLatency = 3; + // use min suitable for fastest transfer + fmc_timing.DataLatency = 4; // don't care in sync mode fmc_timing.AccessMode = FMC_ACCESS_MODE_A; // initialize fmc HAL_SRAM_Init(&_fmc_fpga_inst, &fmc_timing, NULL); + + // STM32 only enables FMC clock right before the very first read/write + // access. FPGA takes certain time (<= 100 us) to lock its PLL to this frequency, + // so a certain number of initial FMC transactions may be missed. One read transaction + // takes ~0.1 us (9 ticks @ 90 MHz), so doing 1000 dummy reads will make sure, that FPGA + // has already locked its PLL and is ready. Another way around is to repeatedly read + // some register that is guaranteed to have known value until reading starts returning + // correct data. + + // to prevent compiler from optimizing this away, we pretent we're calculating sum + int cyc; + uint32_t sum; + volatile uint32_t part; + + for (cyc=0; cyc<1000; cyc++) + { + part = *(__IO uint32_t *)FMC_FPGA_BASE_ADDR; + sum += part; + } } diff --git a/stm-fmc.h b/stm-fmc.h index eab053d..92b261b 100644 --- a/stm-fmc.h +++ b/stm-fmc.h @@ -39,12 +39,6 @@ #define FMC_FPGA_BASE_ADDR 0x60000000 #define FMC_FPGA_ADDR_MASK 0x03FFFFFC // there are 26 physical lines, but "only" 24 usable for now -#define FMC_FPGA_NWAIT_MAX_POLL_TICKS 10 - -#define FMC_GPIO_PORT_NWAIT GPIOD -#define FMC_GPIO_PIN_NWAIT GPIO_PIN_6 - -#define FMC_NWAIT_IDLE GPIO_PIN_SET #define fmc_af_gpio(port, pins) \ GPIO_InitStruct.Pin = pins; \ @@ -58,60 +52,22 @@ extern void fmc_init(void); -static inline HAL_StatusTypeDef _fmc_nwait_idle(void) -{ - int cnt; - - // poll NWAIT (number of iterations is limited) - for (cnt=0; cnt Date: Mon, 16 Jul 2018 15:00:16 -0400 Subject: Whack various bits of sw/stm32 test code to compile on fmc_clk branch. --- projects/board-test/fmc-perf.c | 20 ++++---------------- projects/board-test/fmc-probe.c | 5 +---- projects/board-test/fmc-test.c | 16 ++++++---------- projects/cli-test/test-fmc.c | 12 ++---------- 4 files changed, 13 insertions(+), 40 deletions(-) diff --git a/projects/board-test/fmc-perf.c b/projects/board-test/fmc-perf.c index 71d0149..5af0946 100644 --- a/projects/board-test/fmc-perf.c +++ b/projects/board-test/fmc-perf.c @@ -31,14 +31,8 @@ static void sanity(void) uint32_t rnd, data; rnd = random(); - if (fmc_write_32(0, rnd) != 0) { - uart_send_string("fmc_write_32 failed\r\n"); - Error_Handler(); - } - if (fmc_read_32(0, &data) != 0) { - uart_send_string("fmc_read_32 failed\r\n"); - Error_Handler(); - } + fmc_write_32(0, rnd); + fmc_read_32(0, &data); if (data != rnd) { uart_send_string("Data bus fail: expected "); uart_send_hex(rnd, 8); @@ -76,10 +70,7 @@ static void test_read(void) uint32_t i, data; for (i = 0; i < TEST_NUM_ROUNDS; ++i) { - if (fmc_read_32(0, &data) != 0) { - uart_send_string("fmc_read_32 failed\r\n"); - Error_Handler(); - } + fmc_read_32(0, &data); } } @@ -88,10 +79,7 @@ static void test_write(void) uint32_t i; for (i = 0; i < TEST_NUM_ROUNDS; ++i) { - if (fmc_write_32(0, i) != 0) { - uart_send_string("fmc_write_32 failed\r\n"); - Error_Handler(); - } + fmc_write_32(0, i); } } diff --git a/projects/board-test/fmc-probe.c b/projects/board-test/fmc-probe.c index 5f7fdb5..38897ab 100644 --- a/projects/board-test/fmc-probe.c +++ b/projects/board-test/fmc-probe.c @@ -21,10 +21,7 @@ static uint32_t read0(uint32_t addr) { uint32_t data; - if (fmc_read_32(addr, &data) != 0) { - uart_send_string("fmc_read_32 failed\r\n"); - Error_Handler(); - } + fmc_read_32(addr, &data); return data; } diff --git a/projects/board-test/fmc-test.c b/projects/board-test/fmc-test.c index 1421db0..bd30dd5 100644 --- a/projects/board-test/fmc-test.c +++ b/projects/board-test/fmc-test.c @@ -158,7 +158,7 @@ int main(void) int test_fpga_data_bus(void) //------------------------------------------------------------------------------ { - int c, ok; + int c; uint32_t rnd, buf; HAL_StatusTypeDef hal_result; @@ -171,12 +171,10 @@ int test_fpga_data_bus(void) if (hal_result != HAL_OK) break; // write value to fpga at address 0 - ok = fmc_write_32(0, rnd); - if (ok != 0) break; + fmc_write_32(0, rnd); // read value from fpga - ok = fmc_read_32(0, &buf); - if (ok != 0) break; + fmc_read_32(0, &buf); // compare (abort testing in case of error) if (buf != rnd) @@ -218,7 +216,7 @@ int test_fpga_data_bus(void) int test_fpga_address_bus(void) //------------------------------------------------------------------------------ { - int c, ok; + int c; uint32_t rnd, buf; HAL_StatusTypeDef hal_result; @@ -239,12 +237,10 @@ int test_fpga_address_bus(void) if (rnd == 0) continue; // write dummy value to fpga at some non-zero address - ok = fmc_write_32(rnd, buf); - if (ok != 0) break; + fmc_write_32(rnd, buf); // read value from fpga - ok = fmc_read_32(0, &buf); - if (ok != 0) break; + fmc_read_32(0, &buf); // fpga receives address of 32-bit word, while we need // byte address here to compare diff --git a/projects/cli-test/test-fmc.c b/projects/cli-test/test-fmc.c index 6773cfc..d9b0c9b 100644 --- a/projects/cli-test/test-fmc.c +++ b/projects/cli-test/test-fmc.c @@ -80,16 +80,8 @@ static int _write_then_read(struct cli_def *cli, uint32_t addr, uint32_t write_b { int ok; - ok = fmc_write_32(addr, write_buf); - if (ok != 0) { - cli_print(cli, "FMC write failed: 0x%x", ok); - return 0; - } - ok = fmc_read_32(0, read_buf); - if (ok != 0) { - cli_print(cli, "FMC read failed: 0x%x", ok); - return 0; - } + fmc_write_32(addr, write_buf); + fmc_read_32(0, read_buf); return 1; } -- cgit v1.2.3 From 322e272c90488adb65edc6b0bd1ebab442f435d5 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Mon, 27 Aug 2018 17:23:23 -0400 Subject: Add startup delay to let fmc bus settle --- projects/libhal-test/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/libhal-test/main.c b/projects/libhal-test/main.c index fff8c38..c0d9330 100644 --- a/projects/libhal-test/main.c +++ b/projects/libhal-test/main.c @@ -43,6 +43,7 @@ extern void __main(void); int main(void) { stm_init(); + HAL_Delay(500); led_on(LED_GREEN); __main(); -- cgit v1.2.3 From 01534a93197f3b1610865dd5601634549423f292 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Tue, 4 Sep 2018 23:46:09 -0400 Subject: Make parsing and display of masterkey values consistent with one another, so the user might be able to copy and paste a generated key into the CLI for recovery. Display had been a 32-byte hexdump. Parsing (manual re-entry of the KEK) had been 8 32-bit little-endian values. This has been a pain point for literally two years, albeit at a low enough pain level that I've managed to cringe and ignore it. --- libraries/libcli/Makefile | 3 +- projects/hsm/mgmt-masterkey.c | 90 ++++++++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 28 deletions(-) diff --git a/libraries/libcli/Makefile b/libraries/libcli/Makefile index cd1d3b6..6bc805a 100644 --- a/libraries/libcli/Makefile +++ b/libraries/libcli/Makefile @@ -11,7 +11,8 @@ CFLAGS += \ -DDO_REGULAR=0 \ -DDO_SOCKET=0 \ -DDO_TAB_COMPLETION=1 \ - -DDO_TELNET=0 + -DDO_TELNET=0 \ + -DCLI_MAX_LINE_WORDS=36 CFLAGS += -Wno-unused-parameter all: libcli.a diff --git a/projects/hsm/mgmt-masterkey.c b/projects/hsm/mgmt-masterkey.c index 765cb10..e63e0e0 100644 --- a/projects/hsm/mgmt-masterkey.c +++ b/projects/hsm/mgmt-masterkey.c @@ -60,24 +60,6 @@ static char * _status2str(const hal_error_t status) } } -static int _parse_hex_groups(uint8_t *buf, size_t len, char *argv[], int argc) -{ - int i; - uint32_t *dst = (uint32_t *) buf; - uint32_t *end = (uint32_t *) buf + len - 1; - char *err_ptr = NULL; - - if (! argc) return 0; - - for (i = 0; i < argc; i++) { - if (dst >= end) return -1; - *dst++ = strtoul(argv[i], &err_ptr, 16); - if (*err_ptr) return -2; - } - - return 1; -} - static int cmd_masterkey_status(struct cli_def *cli, const char *command, char *argv[], int argc) { hal_error_t status; @@ -97,12 +79,54 @@ static int cmd_masterkey_status(struct cli_def *cli, const char *command, char * return CLI_OK; } +static int str_to_hex_digit(char c) +{ + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'f') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else + return -1; + + return c; +} + +static inline char hex_to_str_digit(const uint8_t c) +{ + return (c < 10) ? ((char)c + '0') : ((char)c + 'A' - 10); +} + +static char *hexdump_kek(const uint8_t * const kek) +{ + /* This is only for dumping masterkey values, so has no length checks. + * Do not use it for anything else. + * + * For convenience of possibly hand-copying and hand-retyping, the key + * is divided into 8 4-byte (8-character) groups. + */ + + static char buf[2 * KEK_LENGTH + 8]; + char *dst = buf; + + for (size_t i = 0; i < KEK_LENGTH; ++i) { + uint8_t b = kek[i]; + *dst++ = hex_to_str_digit(b >> 4); + *dst++ = hex_to_str_digit(b & 0xf); + if ((i & 3) == 3) + *dst++ = ' '; + } + buf[sizeof(buf) - 1] = '\0'; + + return buf; +} + static int _masterkey_set(struct cli_def *cli, char *argv[], int argc, char *label, hal_error_t (*writer)(const uint8_t * const, const size_t)) { uint8_t buf[KEK_LENGTH] = {0}; hal_error_t err; - int i; if (argc == 0) { /* fill master key with yummy randomness */ @@ -110,20 +134,32 @@ static int _masterkey_set(struct cli_def *cli, char *argv[], int argc, cli_print(cli, "Error getting random key: %s", hal_error_string(err)); return CLI_ERROR; } - cli_print(cli, "Random key:\n"); - uart_send_hexdump(buf, 0, sizeof(buf) - 1); - cli_print(cli, "\n"); + cli_print(cli, "Random key:\n%s", hexdump_kek(buf)); } else { - if ((i = _parse_hex_groups(&buf[0], sizeof(buf), argv, argc)) != 1) { - cli_print(cli, "Failed parsing master key, expected up to 8 groups of 32-bit hex chars (%i)", i); + /* input is 32 hex bytes, arranged however the user wants */ + size_t len = 0; + for (int i = 0; i < argc; ++i) { + for (char *cp = argv[i]; *cp != '\0'; ) { + int c; + if ((c = str_to_hex_digit(*cp++)) < 0) + goto errout; + buf[len] = c << 4; + if ((c = str_to_hex_digit(*cp++)) < 0) + goto errout; + buf[len] |= c & 0xf; + if (++len > KEK_LENGTH) + goto errout; + } + } + if (len < KEK_LENGTH) { + errout: + cli_print(cli, "Failed parsing master key, expected exactly %d hex bytes", KEK_LENGTH); return CLI_ERROR; } - cli_print(cli, "Parsed key:\n"); - uart_send_hexdump(buf, 0, sizeof(buf) - 1); - cli_print(cli, "\n"); + cli_print(cli, "Parsed key:\n%s", hexdump_kek(buf)); } if ((err = writer(buf, sizeof(buf))) == LIBHAL_OK) { -- cgit v1.2.3 From 871014acb42815d4215cb23b8e8dd59ca0262b4b Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 5 Sep 2018 12:54:59 -0400 Subject: When erasing the whole keystore, optionally preserve the login PINs. --- projects/hsm/mgmt-keystore.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/projects/hsm/mgmt-keystore.c b/projects/hsm/mgmt-keystore.c index a7fdffe..9eb42da 100644 --- a/projects/hsm/mgmt-keystore.c +++ b/projects/hsm/mgmt-keystore.c @@ -50,6 +50,7 @@ #include #include +#include #include @@ -320,13 +321,34 @@ static int cmd_keystore_erase(struct cli_def *cli, const char *command, char *ar { hal_error_t err; HAL_StatusTypeDef status; + int preserve_PINs = 0; command = command; - if (argc != 1 || strcmp(argv[0], "YesIAmSure") != 0) { - cli_print(cli, "Syntax: keystore erase YesIAmSure"); + if (argc < 1 || argc > 2 || strcmp(argv[0], "YesIAmSure") != 0) { + usage: + cli_print(cli, "Syntax: keystore erase YesIAmSure [preservePINs]"); return CLI_ERROR; } + if (argc == 2) { + if (strcasecmp(argv[1], "preservePINs") != 0) + goto usage; + else + preserve_PINs = 1; + } + + hal_user_t users[3] = { HAL_USER_NORMAL, HAL_USER_SO, HAL_USER_WHEEL }; + hal_ks_pin_t pins[3]; + if (preserve_PINs) { + for (size_t i = 0; i < 3; ++i) { + const hal_ks_pin_t *pin; + if (hal_get_pin(users[i], &pin) != HAL_OK) { + cli_print(cli, "Failed to get the PINs"); + return CLI_ERROR; + } + memcpy(&pins[i], pin, sizeof(*pin)); + } + } cli_print(cli, "OK, erasing keystore, this will take about 45 seconds..."); if ((status = keystore_erase_bulk()) != CMSIS_HAL_OK) { @@ -344,6 +366,15 @@ static int cmd_keystore_erase(struct cli_def *cli, const char *command, char *ar return CLI_ERROR; } + if (preserve_PINs) { + for (size_t i = 0; i < 3; ++i) { + if (hal_set_pin(users[i], &pins[i]) != HAL_OK) { + cli_print(cli, "Failed to restore the PINs"); + return CLI_ERROR; + } + } + } + cli_print(cli, "Keystore erased"); return CLI_OK; } -- cgit v1.2.3 From 624527539a83dbfd0dc2f03fda1cff14a1669811 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Fri, 7 Sep 2018 12:13:37 -0400 Subject: Tweak for 60MHz FMC clock. --- stm-fmc.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/stm-fmc.c b/stm-fmc.c index 58df6fe..1019531 100644 --- a/stm-fmc.c +++ b/stm-fmc.c @@ -153,7 +153,18 @@ void fmc_init(void) fmc_timing.BusTurnAroundDuration = 0; // use smallest allowed divisor for best performance - fmc_timing.CLKDivision = 2; + // + // FMC_CLK = HCLK / CLKDivision, HCLK is 180 MHz + // + // Allowed values for CLKDivision are integers >= 2. + // + // Division == 2: FMC_CLK = 180 / 2 = 90 MHz (highest allowed frequency) + // Division == 3: FMC_CLK = 180 / 3 = 60 MHz (one step below) + // ... + // + +// fmc_timing.CLKDivision = 2; // 90 MHz + fmc_timing.CLKDivision = 3; // 60 MHz // use min suitable for fastest transfer fmc_timing.DataLatency = 4; @@ -164,22 +175,21 @@ void fmc_init(void) // initialize fmc HAL_SRAM_Init(&_fmc_fpga_inst, &fmc_timing, NULL); - // STM32 only enables FMC clock right before the very first read/write - // access. FPGA takes certain time (<= 100 us) to lock its PLL to this frequency, - // so a certain number of initial FMC transactions may be missed. One read transaction - // takes ~0.1 us (9 ticks @ 90 MHz), so doing 1000 dummy reads will make sure, that FPGA - // has already locked its PLL and is ready. Another way around is to repeatedly read - // some register that is guaranteed to have known value until reading starts returning - // correct data. + // STM32 only enables FMC clock right before the very first read/write + // access. FPGA takes certain time (<= 100 us) to lock its PLL to this frequency, + // so a certain number of initial FMC transactions may be missed. One read transaction + // takes ~0.1 us (9 ticks @ 90 MHz), so doing 1000 dummy reads will make sure, that FPGA + // has already locked its PLL and is ready. Another way around is to repeatedly read + // some register that is guaranteed to have known value until reading starts returning + // correct data. - // to prevent compiler from optimizing this away, we pretent we're calculating sum - int cyc; - uint32_t sum; - volatile uint32_t part; - - for (cyc=0; cyc<1000; cyc++) - { - part = *(__IO uint32_t *)FMC_FPGA_BASE_ADDR; - sum += part; - } + // to prevent compiler from optimizing this away, we pretent we're calculating sum + int cyc; + uint32_t sum; + volatile uint32_t part; + + for (cyc = 0; cyc < 1000; cyc++) { + part = *(__IO uint32_t *)FMC_FPGA_BASE_ADDR; + sum += part; + } } -- cgit v1.2.3 From 23850319664533c42c5c60d1e91277358d07dd63 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 14 Nov 2018 16:41:31 -0500 Subject: Increase cli task, and move it to SDRAM, because main RAM is getting full. --- projects/hsm/hsm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index 29509e8..a34b1f7 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -86,9 +86,8 @@ static uint8_t busy_stack[BUSY_STACK_SIZE]; * 4096-byte block of an FPGA or bootloader image upload. */ #ifndef CLI_STACK_SIZE -#define CLI_STACK_SIZE 8*1024 +#define CLI_STACK_SIZE 16*1024 #endif -static uint8_t cli_stack[CLI_STACK_SIZE]; /* RPC buffers. For each active request, there will be two - input and output. */ @@ -501,7 +500,8 @@ int main(void) */ /* Create the CLI task. */ - if (task_add("cli", (funcp_t)cli_main, NULL, cli_stack, sizeof(cli_stack)) == NULL) + void *cli_stack = (void *)sdram_malloc(CLI_STACK_SIZE); + if (task_add("cli", (funcp_t)cli_main, NULL, cli_stack, CLI_STACK_SIZE) == NULL) Error_Handler(); /* Start the tasker */ -- cgit v1.2.3 From 97034edb35e92361daaa24512989d00f6c3fd517 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Mon, 26 Nov 2018 17:26:55 -0500 Subject: Add loop unrolling to bring the profilable mem* functions closer to newlib, because memset is called a LOT in the course of RSA signing, and we need to understand how much time we're actually spending there. --- libraries/libprof/Makefile | 6 ++- libraries/libprof/memfunc.c | 127 ++++++++++++++++++++++++++++++++++++++++++++ memfunc.c | 101 ----------------------------------- projects/hsm/Makefile | 1 - 4 files changed, 132 insertions(+), 103 deletions(-) create mode 100644 libraries/libprof/memfunc.c delete mode 100644 memfunc.c diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile index 4fe5fb4..37b9a23 100644 --- a/libraries/libprof/Makefile +++ b/libraries/libprof/Makefile @@ -1,12 +1,16 @@ LIB = libprof.a -OBJS = gmon.o profil.o profiler.o +OBJS = gmon.o profil.o profiler.o memfunc.o # Don't profile the profiling code, because that way lies madness (and recursion). CFLAGS := $(subst -pg,,$(CFLAGS)) all: $(LIB) +# But do profile the mem functions +memfunc.o: memfunc.c + $(CC) $(CFLAGS) -pg -c -o $@ $< + %.o : %.c $(CC) $(CFLAGS) -c -o $@ $< diff --git a/libraries/libprof/memfunc.c b/libraries/libprof/memfunc.c new file mode 100644 index 0000000..fc908e1 --- /dev/null +++ b/libraries/libprof/memfunc.c @@ -0,0 +1,127 @@ +#include +#include + +/* + * Profilable substitutes for mem*(), lacking libc_p.a + * + * This code was written with reference to newlib, and was recently + * brought closer into line with newlib, to make profiling more accurate. + * + * Newlib is maintained by Cygwin, which is Red Hat. There is no copyright + * statement in the corresponding newlib source files, nor is there a + * COPYING file in newlib/libc/string or newlib/libc. Consider this file + * to be covered under one or more of the 50 copyright notices in + * newlib/COPYING, most of which are BSD. In any case, this file is only + * used for profiling, and is not used in production builds. + */ + +#define is_word_aligned(x) (((size_t)(x) & 3) == 0) + +void *memcpy(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= 4 * sizeof(uint32_t)) { + *d32++ = *s32++; + *d32++ = *s32++; + *d32++ = *s32++; + *d32++ = *s32++; + n -= 4 * sizeof(uint32_t); + } + while (n >= sizeof(uint32_t)) { + *d32++ = *s32++; + n -= sizeof(uint32_t); + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + *d8++ = *s8++; + } + + return dst; +} + +void *memset(void *dst, int c, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t c8 = (uint8_t)c; + + while (!is_word_aligned(d8)) { + if (n--) + *d8++ = c8; + else + return dst; + } + if (n >= sizeof(uint32_t)) { + uint32_t *d32 = (uint32_t *)d8; + uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8); + while (n >= 4 * sizeof(uint32_t)) { + *d32++ = c32; + *d32++ = c32; + *d32++ = c32; + *d32++ = c32; + n -= 4 * sizeof(uint32_t); + } + while (n >= sizeof(uint32_t)) { + *d32++ = c32; + n -= sizeof(uint32_t); + } + d8 = (uint8_t *)d32; + } + while (n-- > 0) { + *d8++ = c8; + } + + return dst; +} + +int memcmp(const void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) { + uint32_t *d32 = (uint32_t *)dst; + uint32_t *s32 = (uint32_t *)src; + while (n >= sizeof(uint32_t)) { + if (*d32 != *s32) + break; + d32++; + s32++; + n -= sizeof(uint32_t); + } + d8 = (uint8_t *)d32; + s8 = (uint8_t *)s32; + } + while (n-- > 0) { + if (*d8 != *s8) + return (*d8 - *s8); + d8++; + s8++; + } + + return 0; +} + +void *memmove(void *dst, const void *src, size_t n) +{ + uint8_t *d8 = (uint8_t *)dst; + uint8_t *s8 = (uint8_t *)src; + + if ((s8 < d8) && (d8 < s8 + n)) { + /* Destructive overlap...have to copy backwards */ + s8 += n; + d8 += n; + while (n-- > 0) { + *--d8 = *--s8; + } + return dst; + } + + return memcpy(dst, src, n); +} diff --git a/memfunc.c b/memfunc.c deleted file mode 100644 index fd94b28..0000000 --- a/memfunc.c +++ /dev/null @@ -1,101 +0,0 @@ -#include -#include - -/* - * Profilable substitutes for mem*(), lacking libc_p.a - * - * This code was written with reference to newlib, but does not copy every - * quirk and loop-unrolling optimization from newlib. Its only purpose is - * to let us figure out who is calling memcpy 2 million times. - */ - -#define is_word_aligned(x) (((size_t)(x) & 3) == 0) - -void *memcpy(void *dst, const void *src, size_t n) -{ - uint8_t *d8 = (uint8_t *)dst; - uint8_t *s8 = (uint8_t *)src; - - if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { - uint32_t *d32 = (uint32_t *)dst; - uint32_t *s32 = (uint32_t *)src; - while (n >= 4) { - *d32++ = *s32++; - n -= 4; - } - d8 = (uint8_t *)d32; - s8 = (uint8_t *)s32; - } - while (n-- > 0) { - *d8++ = *s8++; - } - - return dst; -} - -void *memset(void *dst, int c, size_t n) -{ - uint8_t *d8 = (uint8_t *)dst; - uint8_t c8 = (uint8_t)c; - - if (n >= 4 && is_word_aligned(dst)) { - uint32_t *d32 = (uint32_t *)dst; - uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8); - while (n >= 4) { - *d32++ = c32; - n -= 4; - } - d8 = (uint8_t *)d32; - } - while (n-- > 0) { - *d8++ = c8; - } - - return dst; -} - -int memcmp(const void *dst, const void *src, size_t n) -{ - uint8_t *d8 = (uint8_t *)dst; - uint8_t *s8 = (uint8_t *)src; - - if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) { - uint32_t *d32 = (uint32_t *)dst; - uint32_t *s32 = (uint32_t *)src; - while (n >= 4) { - if (*d32 != *s32) - break; - d32++; - s32++; - n -= 4; - } - d8 = (uint8_t *)d32; - s8 = (uint8_t *)s32; - } - while (n-- > 0) { - if (*d8 != *s8) - return (*d8 - *s8); - d8++; - s8++; - } - - return 0; -} - -void *memmove(void *dst, const void *src, size_t n) -{ - uint8_t *d8 = (uint8_t *)dst; - uint8_t *s8 = (uint8_t *)src; - - if ((s8 < d8) && (d8 < s8 + n)) { - /* Destructive overlap...have to copy backwards */ - s8 += n; - d8 += n; - while (n-- > 0) { - *--d8 = *--s8; - } - return dst; - } - - return memcpy(dst, src, n); -} diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile index 3430e14..37c552d 100644 --- a/projects/hsm/Makefile +++ b/projects/hsm/Makefile @@ -25,7 +25,6 @@ LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16 LDFLAGS += -Wl,--gc-sections ifdef DO_PROFILING -OBJS += $(TOPLEVEL)/memfunc.o LDFLAGS += --specs=rdimon.specs -lc -lrdimon endif -- cgit v1.2.3 From e99737c12cac1fcc8604ac89a14dac5b2606a42d Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Sun, 2 Dec 2018 17:19:44 -0500 Subject: Clean up the profiling code to the point where I stand a chance of understanding it 6 months from now. While I'm at it, try to make it a little more efficient (because 50-60% of time in a typical profiling run is spent in the function-entry counting), and collapse profil.c into gmon.c. --- libraries/libprof/Makefile | 2 +- libraries/libprof/gmon.c | 427 +++++++++++++++++++-------------------------- libraries/libprof/gmon.h | 57 ++---- libraries/libprof/profil.c | 96 ---------- libraries/libprof/profil.h | 60 ------- 5 files changed, 197 insertions(+), 445 deletions(-) delete mode 100644 libraries/libprof/profil.c delete mode 100644 libraries/libprof/profil.h diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile index 37b9a23..28bedea 100644 --- a/libraries/libprof/Makefile +++ b/libraries/libprof/Makefile @@ -1,6 +1,6 @@ LIB = libprof.a -OBJS = gmon.o profil.o profiler.o memfunc.o +OBJS = gmon.o profiler.o memfunc.o # Don't profile the profiling code, because that way lies madness (and recursion). CFLAGS := $(subst -pg,,$(CFLAGS)) diff --git a/libraries/libprof/gmon.c b/libraries/libprof/gmon.c index 317a173..a34f1a2 100644 --- a/libraries/libprof/gmon.c +++ b/libraries/libprof/gmon.c @@ -36,289 +36,226 @@ #include #include #include -#include -#include "gmon.h" -#include "profil.h" #include +#include "gmon.h" #define bzero(ptr,size) memset (ptr, 0, size); #define ERR(s) write(2, s, sizeof(s)) -struct gmonparam _gmonparam = { GMON_PROF_OFF, NULL, 0, NULL, 0, NULL, 0, 0L, 0, 0, 0}; -static char already_setup = 0; /* flag to indicate if we need to init */ -static int s_scale; -/* see profil(2) where this is described (incorrectly) */ -#define SCALE_1_TO_1 0x10000L +/* profiling frequency. (No larger than 1000) */ +/* Note this doesn't set the frequency, but merely describes it. */ +#define PROF_HZ 1000 -static void moncontrol(int mode); +struct gmonparam _gmonparam = { off, NULL, 0, NULL, 0, NULL, 0, 0, 0, 0, 0}; -void monstartup (size_t lowpc, size_t highpc) { - register size_t o; - char *cp; - struct gmonparam *p = &_gmonparam; +void monstartup(size_t lowpc, size_t highpc) +{ + static char already_setup = 0; + struct gmonparam *p = &_gmonparam; - if (already_setup) { - /* zero out cp as value will be added there */ - bzero(p->tos, p->kcountsize + p->fromssize + p->tossize); - moncontrol(1); /* start */ - return; - } - already_setup = 1; + if (already_setup) { + /* reinitialize counters and arcs */ + bzero(p->kcount, p->kcountsize); + bzero(p->froms, p->fromssize); + bzero(p->tos, p->tossize); + p->state = on; + return; + } + already_setup = 1; - /* enable semihosting, for eventual output */ - extern void initialise_monitor_handles(void); - initialise_monitor_handles(); + /* enable semihosting, for eventual output */ + extern void initialise_monitor_handles(void); + initialise_monitor_handles(); - /* - * round lowpc and highpc to multiples of the density we're using - * so the rest of the scaling (here and in gprof) stays in ints. - */ - p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); - p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); - p->textsize = p->highpc - p->lowpc + 0x20; - p->kcountsize = p->textsize / HISTFRACTION; - p->fromssize = p->textsize / HASHFRACTION; - p->tolimit = p->textsize * ARCDENSITY / 100; - if (p->tolimit < MINARCS) { - p->tolimit = MINARCS; - } else if (p->tolimit > MAXARCS) { - p->tolimit = MAXARCS; - } - p->tossize = p->tolimit * sizeof(struct tostruct); + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->textsize = p->highpc - p->lowpc; + p->kcountsize = p->textsize / HISTFRACTION; + p->fromssize = p->textsize / HASHFRACTION; + p->tolimit = p->textsize * ARCDENSITY / 100; + if (p->tolimit < MINARCS) { + p->tolimit = MINARCS; + } else if (p->tolimit > MAXARCS) { + p->tolimit = MAXARCS; + } + p->tossize = p->tolimit * sizeof(struct tostruct); - extern void *hal_allocate_static_memory(const size_t size); - cp = hal_allocate_static_memory(p->kcountsize + p->fromssize + p->tossize); - if (cp == NULL) { - ERR("monstartup: out of memory\n"); - return; - } + extern void *hal_allocate_static_memory(const size_t size); + void *cp = hal_allocate_static_memory(p->kcountsize + p->fromssize + p->tossize); + if (cp == NULL) { + ERR("monstartup: out of memory\n"); + return; + } - /* zero out cp as value will be added there */ - bzero(cp, p->kcountsize + p->fromssize + p->tossize); + bzero(cp, p->kcountsize + p->fromssize + p->tossize); + p->kcount = (unsigned short *)cp; cp += p->kcountsize; + p->froms = (unsigned short *)cp; cp += p->fromssize; + p->tos = (struct tostruct *)cp; - p->tos = (struct tostruct *)cp; - cp += p->tossize; - p->kcount = (unsigned short *)cp; - cp += p->kcountsize; - p->froms = (unsigned short *)cp; + p->state = on; +} - p->tos[0].link = 0; +void _mcleanup(void) +{ + static const char gmon_out[] = "gmon.out"; + int fd; + struct gmonparam *p = &_gmonparam; - o = p->highpc - p->lowpc; - if (p->kcountsize < o) { -#ifndef notdef - s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1; -#else /* avoid floating point */ - int quot = o / p->kcountsize; + if (p->state == err) { + ERR("_mcleanup: tos overflow\n"); + } - if (quot >= 0x10000) - s_scale = 1; - else if (quot >= 0x100) - s_scale = 0x10000 / quot; - else if (o >= 0x800000) - s_scale = 0x1000000 / (o / (p->kcountsize >> 8)); - else - s_scale = 0x1000000 / ((o << 8) / p->kcountsize); -#endif - } else { - s_scale = SCALE_1_TO_1; - } - moncontrol(1); /* start */ -} + fd = open(gmon_out , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); + if (fd < 0) { + perror( gmon_out ); + return; + } -void _mcleanup(void) { - static const char gmon_out[] = "gmon.out"; - int fd; - int fromindex; - int endfrom; - size_t frompc; - int toindex; - struct rawarc rawarc; - struct gmonparam *p = &_gmonparam; - struct gmonhdr gmonhdr = {0}, *hdr; - const char *proffile; -#ifdef DEBUG - int log, len; - char dbuf[200]; -#endif + struct gmonhdr hdr = { + .lpc = p->lowpc, + .hpc = p->highpc, + .ncnt = p->kcountsize + sizeof(struct gmonhdr), + .version = GMONVERSION, + .profrate = PROF_HZ + }; + write(fd, &hdr, sizeof(hdr)); - if (p->state == GMON_PROF_ERROR) { - ERR("_mcleanup: tos overflow\n"); - } - moncontrol(0); /* stop */ - proffile = gmon_out; - fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); - if (fd < 0) { - perror( proffile ); - return; - } -#ifdef DEBUG - log = open("gmon.log", O_CREAT|O_TRUNC|O_WRONLY, 0664); - if (log < 0) { - perror("mcount: gmon.log"); - return; - } - len = sprintf(dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n", - (unsigned int)p->kcount, p->kcountsize); - write(log, dbuf, len); -#endif - hdr = (struct gmonhdr *)&gmonhdr; - hdr->lpc = p->lowpc; - hdr->hpc = p->highpc; - hdr->ncnt = p->kcountsize + sizeof(gmonhdr); - hdr->version = GMONVERSION; - hdr->profrate = PROF_HZ; - hdr->spare[0] = hdr->spare[1] = hdr->spare[2] = 0; - write(fd, (char *)hdr, sizeof *hdr); - write(fd, p->kcount, p->kcountsize); - endfrom = p->fromssize / sizeof(*p->froms); - for (fromindex = 0; fromindex < endfrom; fromindex++) { - if (p->froms[fromindex] == 0) { - continue; - } - frompc = p->lowpc; - frompc += fromindex * HASHFRACTION * sizeof(*p->froms); - for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { -#ifdef DEBUG - len = sprintf(dbuf, - "[mcleanup2] frompc 0x%x selfpc 0x%x count %ld\n" , - frompc, p->tos[toindex].selfpc, - p->tos[toindex].count); - write(log, dbuf, len); -#endif - rawarc.raw_frompc = frompc; - rawarc.raw_selfpc = p->tos[toindex].selfpc; - rawarc.raw_count = p->tos[toindex].count; - write(fd, &rawarc, sizeof rawarc); - } - } - close(fd); -} + write(fd, p->kcount, p->kcountsize); -/* - * Control profiling - * profiling is what mcount checks to see if - * all the data structures are ready. - */ -static void moncontrol(int mode) { - struct gmonparam *p = &_gmonparam; + for (size_t fromindex = 0; fromindex < p->fromssize / sizeof(*p->froms); fromindex++) { + size_t frompc = p->lowpc + fromindex * HASHFRACTION * sizeof(*p->froms); + for (size_t toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].next) { + struct rawarc arc = { + .frompc = frompc, + .selfpc = p->tos[toindex].selfpc, + .count = p->tos[toindex].count + }; + write(fd, &arc, sizeof(arc)); + } + } - if (mode) { - /* start */ - profil((char *)p->kcount, p->kcountsize, p->lowpc, s_scale); - p->state = GMON_PROF_ON; - } else { - /* stop */ - profil((char *)0, 0, 0, 0); - p->state = GMON_PROF_OFF; - } + close(fd); } -void _mcount_internal(uint32_t *frompcindex, uint32_t *selfpc) { - register struct tostruct *top; - register struct tostruct *prevtop; - register long toindex; +void _mcount_internal(size_t frompc, size_t selfpc) +{ + register unsigned short *fromptr; + register struct tostruct *top; + register unsigned short toindex; struct gmonparam *p = &_gmonparam; /* - * check that we are profiling - * and that we aren't recursively invoked. - */ - if (p->state!=GMON_PROF_ON) { - goto out; - } - p->state++; - /* - * check that frompcindex is a reasonable pc value. - * for example: signal catchers get called from the stack, - * not from text space. too bad. + * check that we are profiling and that we aren't recursively invoked. + * check that frompc is a reasonable pc value. */ - frompcindex = (uint32_t*)((long)frompcindex - (long)p->lowpc); - if ((unsigned long)frompcindex > p->textsize) { - goto done; + if (p->state != on || (frompc -= p->lowpc) > p->textsize) { + return; } - frompcindex = (uint32_t*)&p->froms[((long)frompcindex) / (HASHFRACTION * sizeof(*p->froms))]; - toindex = *((unsigned short*)frompcindex); /* get froms[] value */ - if (toindex == 0) { - /* - * first time traversing this arc - */ - toindex = ++p->tos[0].link; /* the link of tos[0] points to the last used record in the array */ + + fromptr = &p->froms[frompc / (HASHFRACTION * sizeof(*p->froms))]; + toindex = *fromptr; /* get froms[] value */ + + if (toindex == 0) { /* we haven't seen this caller before */ + toindex = ++p->tos[0].next; /* index of the last used record in the array */ if (toindex >= p->tolimit) { /* more tos[] entries than we can handle! */ - goto overflow; - } - *((unsigned short*)frompcindex) = (unsigned short)toindex; /* store new 'to' value into froms[] */ + overflow: + p->state = err; /* halt further profiling */ +#define TOLIMIT "mcount: tos overflow\n" + write (2, TOLIMIT, sizeof(TOLIMIT)); + return; + } + *fromptr = toindex; /* store new 'to' value into froms[] */ top = &p->tos[toindex]; - top->selfpc = (size_t)selfpc; + top->selfpc = selfpc; top->count = 1; - top->link = 0; - goto done; - } - top = &p->tos[toindex]; - if (top->selfpc == (size_t)selfpc) { - /* - * arc at front of chain; usual case. - */ - top->count++; - goto done; + top->next = 0; } - /* - * have to go looking down chain for it. - * top points to what we are looking at, - * prevtop points to previous top. - * we know it is not at the head of the chain. - */ - for (; /* goto done */; ) { - if (top->link == 0) { + + else { /* we've seen this caller before */ + top = &p->tos[toindex]; + if (top->selfpc == selfpc) { /* - * top is end of the chain and none of the chain - * had top->selfpc == selfpc. - * so we allocate a new tostruct - * and link it to the head of the chain. + * arc at front of chain; usual case. */ - toindex = ++p->tos[0].link; - if (toindex >= p->tolimit) { - goto overflow; - } - top = &p->tos[toindex]; - top->selfpc = (size_t)selfpc; - top->count = 1; - top->link = *((unsigned short*)frompcindex); - *(unsigned short*)frompcindex = (unsigned short)toindex; - goto done; + top->count++; } - /* - * otherwise, check the next arc on the chain. - */ - prevtop = top; - top = &p->tos[top->link]; - if (top->selfpc == (size_t)selfpc) { + + else { /* - * there it is. - * increment its count - * move it to the head of the chain. + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. */ - top->count++; - toindex = prevtop->link; - prevtop->link = top->link; - top->link = *((unsigned short*)frompcindex); - *((unsigned short*)frompcindex) = (unsigned short)toindex; - goto done; + while (1) { + if (top->next == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and put it at the head of the chain. + */ + toindex = ++p->tos[0].next; + if (toindex >= p->tolimit) { + goto overflow; + } + top = &p->tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->next = *fromptr; + *fromptr = toindex; + break; + } + + else { + /* + * otherwise, check the next arc on the chain. + */ + register struct tostruct *prevtop = top; + top = &p->tos[top->next]; + if (top->selfpc == selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->next; + prevtop->next = top->next; + top->next = *fromptr; + *fromptr = toindex; + break; + } + } + } } } - done: - p->state--; - /* and fall through */ - out: - return; /* normal return restores saved registers */ - overflow: - p->state++; /* halt further profiling */ - #define TOLIMIT "mcount: tos overflow\n" - write (2, TOLIMIT, sizeof(TOLIMIT)); - goto out; + + return; /* normal return restores saved registers */ } -void _monInit(void) { - _gmonparam.state = GMON_PROF_OFF; - already_setup = 0; +#include +#include "stm32f4xx_hal.h" /* __get_MSP */ + +/* called from the SysTick handler */ +void profil_callback(void) +{ + struct gmonparam *p = &_gmonparam; + + if (p->state == on) { + /* The interrupt mechanism pushes xPSR, PC, LR, R12, and R3-R0 onto the + * stack, so PC is the 6th word from the top at that point. However, the + * normal function entry code pushes registers as well, so the stack + * offset right now depends on the call tree that got us here. + */ + size_t pc = (size_t)((uint32_t *)__get_MSP())[6 + 6]; + if ((pc -= p->lowpc) < p->textsize) { + size_t idx = pc / (HISTFRACTION * sizeof(*p->kcount)); + p->kcount[idx]++; + } + } } diff --git a/libraries/libprof/gmon.h b/libraries/libprof/gmon.h index 8b5ecf0..9016502 100644 --- a/libraries/libprof/gmon.h +++ b/libraries/libprof/gmon.h @@ -40,28 +40,6 @@ #ifndef _SYS_GMON_H_ #define _SYS_GMON_H_ -#ifndef __P -#define __P(x) x -#endif - -/* On POSIX systems, profile.h is a KRB5 header. To avoid collisions, just - pull in profile.h's content here. The profile.h header won't be provided - by Mingw-w64 anymore at one point. */ -#if 0 -#include -#else -#ifndef _WIN64 -#define _MCOUNT_CALL __attribute__ ((regparm (2))) -extern void _mcount(void); -#else -#define _MCOUNT_CALL -extern void mcount(void); -#endif -#define _MCOUNT_DECL __attribute__((gnu_inline)) __inline__ \ - void _MCOUNT_CALL _mcount_private -#define MCOUNT -#endif - /* * Structure prepended to gmon.out profiling data file. */ @@ -83,7 +61,8 @@ struct gmonhdr { /* * fraction of text space to allocate for histogram counters here, 1/2 */ -#define HISTFRACTION 2 +//#define HISTFRACTION 2 +#define HISTFRACTION 1 /* * Fraction of text space to allocate for from hash buckets. @@ -113,7 +92,8 @@ struct gmonhdr { * profiling data structures without (in practice) sacrificing * any granularity. */ -#define HASHFRACTION 2 +//#define HASHFRACTION 2 +#define HASHFRACTION 1 /* * percent of text space to allocate for tostructs with a minimum. @@ -123,10 +103,10 @@ struct gmonhdr { #define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER))) - 2) struct tostruct { - size_t selfpc; /* callee address/program counter. The caller address is in froms[] array which points to tos[] array */ - long count; /* how many times it has been called */ - unsigned short link; /* link to next entry in hash table. For tos[0] this points to the last used entry */ - unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */ + size_t selfpc; /* callee address. The caller address is in froms[] array which points to tos[] array */ + unsigned long count; /* how many times it has been called */ + unsigned short next; /* next entry in hash table. For tos[0] this is the index of the last used entry */ + unsigned short pad; /* additional padding bytes, to have entries 4byte aligned */ }; /* @@ -134,9 +114,9 @@ struct tostruct { * the called site and a count. */ struct rawarc { - size_t raw_frompc; - size_t raw_selfpc; - long raw_count; + size_t frompc; + size_t selfpc; + long count; }; /* @@ -149,29 +129,20 @@ struct rawarc { * The profiling data structures are housed in this structure. */ struct gmonparam { - int state; + enum { off, on, err } state; unsigned short *kcount; /* histogram PC sample array */ size_t kcountsize; /* size of kcount[] array in bytes */ unsigned short *froms; /* array of hashed 'from' addresses. The 16bit value is an index into the tos[] array */ size_t fromssize; /* size of froms[] array in bytes */ - struct tostruct *tos; /* to struct, contains histogram counter */ + struct tostruct *tos; /* to struct, contains arc counters */ size_t tossize; /* size of tos[] array in bytes */ - long tolimit; + size_t tolimit; size_t lowpc; /* low program counter of area */ size_t highpc; /* high program counter */ size_t textsize; /* code size */ }; extern struct gmonparam _gmonparam; -/* - * Possible states of profiling. - */ -#define GMON_PROF_ON 0 -#define GMON_PROF_BUSY 1 -#define GMON_PROF_ERROR 2 -#define GMON_PROF_OFF 3 - void _mcleanup(void); /* routine to be called to write gmon.out file */ -void _monInit(void); /* initialization routine */ #endif /* !_SYS_GMONH_ */ diff --git a/libraries/libprof/profil.c b/libraries/libprof/profil.c deleted file mode 100644 index b0d8d55..0000000 --- a/libraries/libprof/profil.c +++ /dev/null @@ -1,96 +0,0 @@ -/* profil.c -- win32 profil.c equivalent - - Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. - - This file is part of Cygwin. - - This software is a copyrighted work licensed under the terms of the - Cygwin license. Please consult the file "CYGWIN_LICENSE" for - details. */ - -/* - * This file is taken from Cygwin distribution, adapted to be used for bare embedded targets. - */ -#include -#include -#include -#include -#include "profil.h" -#include - -#include "stm32f4xx_hal.h" /* __get_MSP */ - -/* global profinfo for profil() call */ -static struct profinfo prof = { - PROFILE_NOT_INIT, 0, 0, 0, 0 -}; - -/* sample the current program counter */ -void profil_callback(void) { - if (prof.state == PROFILE_ON) { - /* The interrupt mechanism pushes xPSR, PC, LR, R12, and R3-R0 onto the - * stack, so PC is the 6th word from the top at that point. However, the - * normal function entry code pushes registers as well, so the stack - * offset right now depends on the call tree that got us here. - */ - size_t pc = (size_t)((uint32_t *)__get_MSP())[6 + 6]; - if (pc >= prof.lowpc && pc < prof.highpc) { - size_t idx = PROFIDX (pc, prof.lowpc, prof.scale); - prof.counter[idx]++; - } - } -} - -/* Stop profiling to the profiling buffer pointed to by p. */ -static int profile_off (struct profinfo *p) { - p->state = PROFILE_OFF; - return 0; -} - -/* Create a timer thread and pass it a pointer P to the profiling buffer. */ -static int profile_on (struct profinfo *p) { - p->state = PROFILE_ON; - return 0; /* ok */ -} - -/* - * start or stop profiling - * - * profiling goes into the SAMPLES buffer of size SIZE (which is treated - * as an array of unsigned shorts of size size/2) - * - * each bin represents a range of pc addresses from OFFSET. The number - * of pc addresses in a bin depends on SCALE. (A scale of 65536 maps - * each bin to two addresses, A scale of 32768 maps each bin to 4 addresses, - * a scale of 1 maps each bin to 128k address). Scale may be 1 - 65536, - * or zero to turn off profiling - */ -int profile_ctl (struct profinfo *p, char *samples, size_t size, size_t offset, unsigned int scale) { - size_t maxbin; - - if (scale > 65536) { - errno = EINVAL; - return -1; - } - profile_off(p); - if (scale) { - memset(samples, 0, size); - memset(p, 0, sizeof *p); - maxbin = size >> 1; - prof.counter = (unsigned short*)samples; - prof.lowpc = offset; - prof.highpc = PROFADDR(maxbin, offset, scale); - prof.scale = scale; - return profile_on(p); - } - return 0; -} - -/* Equivalent to unix profil() - Every SLEEPTIME interval, the user's program counter (PC) is examined: - offset is subtracted and the result is multiplied by scale. - The word pointed to by this address is incremented. */ -int profil (char *samples, size_t size, size_t offset, unsigned int scale) { - return profile_ctl (&prof, samples, size, offset, scale); -} - diff --git a/libraries/libprof/profil.h b/libraries/libprof/profil.h deleted file mode 100644 index c72dc00..0000000 --- a/libraries/libprof/profil.h +++ /dev/null @@ -1,60 +0,0 @@ -/* profil.h: gprof profiling header file - - Copyright 1998, 1999, 2000, 2001, 2002 Red Hat, Inc. - -This file is part of Cygwin. - -This software is a copyrighted work licensed under the terms of the -Cygwin license. Please consult the file "CYGWIN_LICENSE" for -details. */ - -/* - * This file is taken from Cygwin distribution. Please keep it in sync. - * The differences should be within __MINGW32__ guard. - */ - -#ifndef __PROFIL_H__ -#define __PROFIL_H__ - -/* profiling frequency. (No larger than 1000) */ -#define PROF_HZ 1000 - -/* convert an addr to an index */ -#define PROFIDX(pc, base, scale) \ - ({ \ - size_t i = (pc - base) / 2; \ - if (sizeof (unsigned long long int) > sizeof (size_t)) \ - i = (unsigned long long int) i * scale / 65536; \ - else \ - i = i / 65536 * scale + i % 65536 * scale / 65536; \ - i; \ - }) - -/* convert an index into an address */ -#define PROFADDR(idx, base, scale) \ - ((base) \ - + ((((unsigned long long)(idx) << 16) \ - / (unsigned long long)(scale)) << 1)) - -/* convert a bin size into a scale */ -#define PROFSCALE(range, bins) (((bins) << 16) / ((range) >> 1)) - -typedef void *_WINHANDLE; - -typedef enum { - PROFILE_NOT_INIT = 0, - PROFILE_ON, - PROFILE_OFF -} PROFILE_State; - -struct profinfo { - PROFILE_State state; /* profiling state */ - unsigned short *counter; /* profiling counters */ - size_t lowpc, highpc; /* range to be profiled */ - unsigned int scale; /* scale value of bins */ -}; - -int profile_ctl(struct profinfo *, char *, size_t, size_t, unsigned int); -int profil(char *, size_t, size_t, unsigned int); - -#endif /* __PROFIL_H__ */ -- cgit v1.2.3 From b333546b2fdb752388a1454969d23f6bf96c41bf Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Mon, 3 Dec 2018 09:55:53 -0500 Subject: Re-delete SysTick_hook (commit 9ffead1), which somehow crept back in. --- .../TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c index 81b27cb..d3bafb2 100644 --- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c +++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_it.c @@ -64,13 +64,6 @@ void HardFault_Handler(void) while (1) { ; } } -static void default_SysTick_hook(void) { }; -static void (*SysTick_hook)(void) = default_SysTick_hook; -void set_SysTick_hook(void (*hook)(void)) -{ - SysTick_hook = (hook == NULL) ? default_SysTick_hook : hook; -} - /** * @brief This function handles SysTick Handler. * @param None -- cgit v1.2.3 From a89dcb22ca549ae17742a8ee3c08f2d7fd606771 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Wed, 5 Dec 2018 14:40:16 -0500 Subject: Tell libtfm that the ARM is little-endian, so it can optimize fp_read_unsigned_bin(). This enables some overflow-detection code, which fixes CT-01-009. It also saves 484us/call, which adds up to 6.3ms/RSA signature. --- libraries/libtfm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/libtfm/Makefile b/libraries/libtfm/Makefile index b50421c..359729b 100644 --- a/libraries/libtfm/Makefile +++ b/libraries/libtfm/Makefile @@ -38,7 +38,7 @@ STM32_LIBTFM_CFLAGS_OPTIMIZATION := -O3 -funroll-loops -fomit-frame-pointer endif CFLAGS := $(subst ${STM32_CFLAGS_OPTIMIZATION},${STM32_LIBTFM_CFLAGS_OPTIMIZATION},${CFLAGS}) -CFLAGS += -DTFM_ARM -Dasm=__asm__ -Wa,-mimplicit-it=thumb +CFLAGS += -DTFM_ARM -DENDIAN_LITTLE -Dasm=__asm__ -Wa,-mimplicit-it=thumb CFLAGS += -I${LIBTFM_SRC}/tomsfastmath/src/headers CFLAGS += -DFP_MAX_SIZE="(${BITS}*2+(8*DIGIT_BIT))" CFLAGS += -Wall -W -Wshadow -Wno-uninitialized -- cgit v1.2.3 From 73c911dfe407027c1822ac4745405ff22d446c66 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Thu, 28 Feb 2019 15:01:12 -0500 Subject: Report SDRAM usage in `task show` --- projects/hsm/hsm.c | 11 +++++++++++ projects/hsm/mgmt-task.c | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index a34b1f7..c1ab691 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -395,6 +395,17 @@ static hal_error_t sdram_free(uint8_t *ptr) return HAL_ERROR_FORBIDDEN; } +hal_error_t sdram_stats(size_t *used, size_t *available) +{ + if (used == NULL || available == NULL) + return HAL_ERROR_BAD_ARGUMENTS; + + *used = sdram_heap - &_esdram1; + *available = &__end_sdram1 - sdram_heap; + + return LIBHAL_OK; +} + /* Implement static memory allocation for libhal over sdram_malloc(). */ void *hal_allocate_static_memory(const size_t size) diff --git a/projects/hsm/mgmt-task.c b/projects/hsm/mgmt-task.c index c2a3d3f..180c6d9 100644 --- a/projects/hsm/mgmt-task.c +++ b/projects/hsm/mgmt-task.c @@ -74,6 +74,12 @@ static int cmd_task_show(struct cli_def *cli, const char *command, char *argv[], cli_print(cli, " "); cli_print(cli, "UART receive queue maximum length: %u", uart_rx_max); + size_t used, available; + extern void sdram_stats(size_t *used, size_t *available); + sdram_stats(&used, &available); + cli_print(cli, " "); + cli_print(cli, "SDRAM used: %u, available: %u", used, available); + return CLI_OK; } -- cgit v1.2.3 From 9006c25bd73c00ff861cccbce4595e6c932f4ace Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Sun, 31 Mar 2019 17:21:23 -0400 Subject: Remove inclusion of now-deleted hashsig.h --- projects/hsm/hsm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index c1ab691..52157c9 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -341,7 +341,6 @@ static void busy_task(void) } #include "stm-fpgacfg.h" -#include "hashsig.h" static void hashsig_restart_task(void) { -- cgit v1.2.3 From e203f797dddfcd03419e7ac336a86a6186fce0c1 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Sat, 6 Apr 2019 17:54:06 -0400 Subject: Remove fmc_[read|write]_32, since we now memcpy in hal_io_[read|write]. --- stm-fmc.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/stm-fmc.h b/stm-fmc.h index 92b261b..c01d4ff 100644 --- a/stm-fmc.h +++ b/stm-fmc.h @@ -52,22 +52,9 @@ extern void fmc_init(void); -static inline void fmc_write_32(const uint32_t addr, const uint32_t data) +static inline void *fmc_fpga_addr(off_t addr) { - // calculate target fpga address - uint32_t *ptr = (uint32_t *) (FMC_FPGA_BASE_ADDR + (addr & FMC_FPGA_ADDR_MASK)); - - // write data to fpga - *ptr = data; -} - -static inline void fmc_read_32(const uint32_t addr, uint32_t * const data) -{ - // calculate target fpga address - uint32_t *ptr = (uint32_t *) (FMC_FPGA_BASE_ADDR + (addr & FMC_FPGA_ADDR_MASK)); - - // read data from fpga - *data = *ptr; + return (void *)(FMC_FPGA_BASE_ADDR + (addr & FMC_FPGA_ADDR_MASK)); } #endif /* __STM_FMC_H */ -- cgit v1.2.3