From 2a14d36ebd7bde9a3a6c98871050b14b54598389 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Thu, 9 Jun 2016 18:53:31 -0400 Subject: Put thread stack buffers in SDRAM, because pkey uses a lot of stack. Also rearchitect the way we handle RPC requests - have a bunch of waiting dispatch threads rather than continually creating and deleting threads. --- .../TOOLCHAIN_GCC_ARM/STM32F429BI.ld | 52 +++++++- .../TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c | 1 + projects/hsm/hsm.c | 138 ++++++++++++--------- 3 files changed, 125 insertions(+), 66 deletions(-) diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld index ad7ddaf..2f80bce 100644 --- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld +++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld @@ -10,9 +10,11 @@ MEMORY BOOTLOADER (rx) : ORIGIN = 0x08000000, LENGTH = 128K FIRMWARE (rx) : ORIGIN = 0x08000000 + 128K, LENGTH = 2048K - 128K FLASH (rx) : ORIGIN = 0x08000000 + 128K, LENGTH = 2048K - 128K - RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 192K - 4 + CCMRAM (rwx) : ORIGIN = 0x10000000, LENGTH = 64K + RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 192K - 4 + SDRAM1 (rwx) : ORIGIN = 0xC0000000, LENGTH = 64M + SDRAM2 (rwx) : ORIGIN = 0xD0000000, LENGTH = 64M } -/* original: FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 2048K */ /* Linker script to place sections and symbol values. Should be used together * with other linker script that defines memory regions FLASH and RAM. @@ -86,7 +88,7 @@ SECTIONS __etext = .; _sidata = .; - .data : AT (__etext) + .data : { __data_start__ = .; _sdata = .; @@ -120,7 +122,49 @@ SECTIONS __data_end__ = .; _edata = .; - } > RAM + } > RAM AT> FLASH + + /* If initialized variables are placed in this section, + * the startup code needs to be modified to copy the init-values. + */ + .ccmram : + { + . = ALIGN(4); + _sccmram = .; + *(.ccmram) + *(.ccmram*) + + . = ALIGN(4); + _eccmram = .; + } >CCMRAM AT> FLASH + + /* If initialized variables are placed in this section, + * the startup code needs to be modified to copy the init-values. + */ + .sdram1 : + { + . = ALIGN(4); + _ssdram1 = .; + *(.sdram1) + *(.sdram1*) + + . = ALIGN(4); + _esdram1 = .; + } >SDRAM1 AT> FLASH + + /* If initialized variables are placed in this section, + * the startup code needs to be modified to copy the init-values. + */ + .sdram2 : + { + . = ALIGN(4); + _ssdram2 = .; + *(.sdram2) + *(.sdram2*) + + . = ALIGN(4); + _esdram2 = .; + } >SDRAM2 AT> FLASH .bss : { diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c index fbd0adf..7eeb6df 100644 --- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c +++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c @@ -190,6 +190,7 @@ void HAL_UART_MspInit(UART_HandleTypeDef* huart) hdma->Init.PeriphBurst = DMA_PBURST_SINGLE; */ if (HAL_DMA_Init(hdma) != HAL_OK) { + extern void mbed_die(void); mbed_die(); } } diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index 79c567b..e3c1c36 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -53,6 +53,7 @@ #include "stm-led.h" #include "stm-fmc.h" #include "stm-uart.h" +#include "stm-sdram.h" /* stm32f4xx_hal_def.h and hal.h both define HAL_OK as an enum value */ #define HAL_OK HAL_OKAY @@ -65,13 +66,24 @@ /* RPC buffers. For each active RPC, there will be two - input and output. */ -#ifndef NUM_RPC_BUFFER +#ifndef NUM_RPC_TASK /* An arbitrary number, but we don't expect to have more than 8 concurrent * RPC requests. */ -#define NUM_RPC_BUFFER 16 +#define NUM_RPC_TASK 8 #endif +#ifndef TASK_STACK_SIZE +/* Define an absurdly large task stack, because some pkey operation use a + * lot of stack variables. + */ +#define TASK_STACK_SIZE 64*1024 +#endif + +/* Put the task stack buffers in SDRAM, because ARM RAM is too small. + */ +__attribute__((section(".sdram1"))) uint8_t stack[NUM_RPC_TASK][TASK_STACK_SIZE]; + #ifndef MAX_PKT_SIZE /* Another arbitrary number, more or less driven by the 4096-bit RSA * keygen test. @@ -87,65 +99,30 @@ typedef struct { uint8_t buf[MAX_PKT_SIZE]; } rpc_buffer_t; -osPoolDef(rpc_buffer_pool, NUM_RPC_BUFFER, rpc_buffer_t); -osPoolId rpc_buffer_pool; - -static rpc_buffer_t *rpc_buffer_alloc(void) -{ - return (rpc_buffer_t *)osPoolCAlloc(rpc_buffer_pool); -} - /* A mutex to arbitrate concurrent UART transmits, from RPC responses. */ osMutexId uart_mutex; osMutexDef(uart_mutex); -/* Thread entry point for the RPC request handler. +/* A mutex so only one dispatch thread can receive requests. */ -static void dispatch_thread(void const *args) -{ - rpc_buffer_t *ibuf = (rpc_buffer_t *)args; - rpc_buffer_t *obuf = rpc_buffer_alloc(); - if (obuf == NULL) { - uint8_t buf[8]; - uint8_t * bufptr = &buf[4]; - const uint8_t * const limit = buf + sizeof(buf); - memcpy(buf, ibuf->buf, 4); - hal_xdr_encode_int(&bufptr, limit, HAL_ERROR_ALLOCATION_FAILURE); - osMutexWait(uart_mutex, osWaitForever); - hal_rpc_sendto(ibuf->buf, sizeof(buf), NULL); - osMutexRelease(uart_mutex); - osPoolFree(rpc_buffer_pool, ibuf); - Error_Handler(); - } - /* copy client ID from request to response */ - memcpy(obuf->buf, ibuf->buf, 4); - obuf->len = sizeof(obuf->buf) - 4; - hal_rpc_server_dispatch(ibuf->buf + 4, ibuf->len - 4, obuf->buf + 4, &obuf->len); - osPoolFree(rpc_buffer_pool, ibuf); - osMutexWait(uart_mutex, osWaitForever); - hal_error_t ret = hal_rpc_sendto(obuf->buf, obuf->len + 4, NULL); - osMutexRelease(uart_mutex); - osPoolFree(rpc_buffer_pool, obuf); - if (ret != HAL_OK) - Error_Handler(); -} -osThreadDef(dispatch_thread, osPriorityNormal, DEFAULT_STACK_SIZE); +osMutexId dispatch_mutex; +osMutexDef(dispatch_mutex); -/* Semaphore to inform the main thread that there's a new RPC request. +/* Semaphore to inform the dispatch thread that there's a new RPC request. */ -osSemaphoreId rpc_sem; +osSemaphoreId rpc_sem; osSemaphoreDef(rpc_sem); -static uint8_t c; /* current character received from UART */ -static rpc_buffer_t *ibuf; /* current RPC input buffer */ +static uint8_t c; /* current character received from UART */ +static rpc_buffer_t * volatile rbuf; /* current RPC input buffer */ /* Callback for HAL_UART_Receive_IT(). */ void HAL_UART2_RxCpltCallback(UART_HandleTypeDef *huart) { int complete; - hal_slip_recv_char(ibuf->buf, &ibuf->len, sizeof(ibuf->buf), &complete); + hal_slip_recv_char(rbuf->buf, &rbuf->len, sizeof(rbuf->buf), &complete); if (complete) osSemaphoreRelease(rpc_sem); @@ -164,8 +141,47 @@ hal_error_t hal_serial_recv_char(uint8_t *cp) return HAL_OK; } -/* The main thread. After the system setup, it waits for the RPC-request - * semaphore from HAL_UART_RxCpltCallback, and spawns a dispatch thread. +/* Thread entry point for the RPC request handler. + */ +static void dispatch_thread(void const *args) +{ + rpc_buffer_t ibuf, obuf; + + while (1) { + memset(&ibuf, 0, sizeof(ibuf)); + memset(&obuf, 0, sizeof(obuf)); + + /* Wait for access to the uart */ + osMutexWait(dispatch_mutex, osWaitForever); + + /* Wait for the complete rpc request */ + rbuf = &ibuf; + osSemaphoreWait(rpc_sem, osWaitForever); + + /* Let the next thread handle the next request */ + osMutexRelease(dispatch_mutex); + /* Let the next thread take the mutex */ + osThreadYield(); + + /* Copy client ID from request to response */ + memcpy(obuf.buf, ibuf.buf, 4); + obuf.len = sizeof(obuf.buf) - 4; + + /* Process the request */ + hal_rpc_server_dispatch(ibuf.buf + 4, ibuf.len - 4, obuf.buf + 4, &obuf.len); + + /* Send the response */ + osMutexWait(uart_mutex, osWaitForever); + hal_error_t ret = hal_rpc_sendto(obuf.buf, obuf.len + 4, NULL); + osMutexRelease(uart_mutex); + if (ret != HAL_OK) + Error_Handler(); + } +} +osThreadDef_t thread_def[NUM_RPC_TASK]; + +/* The main thread. This does all the setup, and the worker threads handle + * the rest. */ int main() { @@ -183,6 +199,7 @@ int main() led_on(LED_GREEN); /* Prepare FMC interface. */ fmc_init(); + sdram_init(); /* Haaaack. probe_cores() calls malloc(), which works from the main * thread, but not from a spawned thread. It would be better to @@ -191,8 +208,8 @@ int main() */ hal_core_iterate(NULL); - rpc_buffer_pool = osPoolCreate(osPool(rpc_buffer_pool)); uart_mutex = osMutexCreate(osMutex(uart_mutex)); + dispatch_mutex = osMutexCreate(osMutex(dispatch_mutex)); rpc_sem = osSemaphoreCreate(osSemaphore(rpc_sem), 0); #ifdef TARGET_CRYPTECH_ALPHA @@ -205,22 +222,19 @@ int main() if (hal_rpc_server_init() != HAL_OK) Error_Handler(); - ibuf = rpc_buffer_alloc(); - if (ibuf == NULL) - /* Something is badly wrong. */ - Error_Handler(); + /* Create the rpc dispatch threads */ + for (int i = 0; i < NUM_RPC_TASK; ++i) { + osThreadDef_t *ot = &thread_def[i]; + ot->pthread = dispatch_thread; + ot->tpriority = osPriorityNormal; + ot->stacksize = TASK_STACK_SIZE; + ot->stack_pointer = (uint32_t *)stack[i]; + if (osThreadCreate(ot, (void *)i) == NULL) + Error_Handler(); + } /* Start the non-blocking receive */ HAL_UART_Receive_IT(&huart_user, &c, 1); - while (1) { - osSemaphoreWait(rpc_sem, osWaitForever); - if (osThreadCreate(osThread(dispatch_thread), (void *)ibuf) == NULL) - Error_Handler(); - while ((ibuf = rpc_buffer_alloc()) == NULL); - /* XXX There's a potential race condition, where another request - * could write into the old ibuf, or into the null pointer if - * we're out of ibufs. - */ - } + while (1) { ; } } -- cgit v1.2.3