From bf394f25dacac8e3e3add80ea326312cdd97ed00 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Thu, 27 Apr 2017 16:53:56 -0400 Subject: Replace the RTOS with a simple cooperative tasker. There are no priorities and no preemption, so tasks run in a round-robin fashion, and explicitly yield control. --- projects/hsm/hsm.c | 377 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 245 insertions(+), 132 deletions(-) (limited to 'projects/hsm/hsm.c') diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c index f71e2c2..a683b7f 100644 --- a/projects/hsm/hsm.c +++ b/projects/hsm/hsm.c @@ -44,13 +44,12 @@ /* Rename both CMSIS HAL_OK and libhal HAL_OK to disambiguate */ #define HAL_OK CMSIS_HAL_OK -#include "cmsis_os.h" - #include "stm-init.h" #include "stm-led.h" #include "stm-fmc.h" #include "stm-uart.h" #include "stm-sdram.h" +#include "task.h" #include "mgmt-cli.h" @@ -63,10 +62,9 @@ #undef HAL_OK #ifndef NUM_RPC_TASK -/* Just one RPC task for now. More will require active resource management - * of at least the FPGA cores. - */ #define NUM_RPC_TASK 1 +#elif NUM_RPC_TASK < 1 || NUM_RPC_TASK > 10 +#error invalid NUM_RPC_TASK #endif #ifndef TASK_STACK_SIZE @@ -77,6 +75,21 @@ #define TASK_STACK_SIZE 200*1024 #endif +/* Stack for the busy task. This doesn't need to be very big. + */ +#ifndef BUSY_STACK_SIZE +#define BUSY_STACK_SIZE 1*1024 +#endif +static uint8_t busy_stack[BUSY_STACK_SIZE]; + +/* Stack for the CLI task. This needs to be big enough to accept a + * 4096-byte block of an FPGA or bootloader image upload. + */ +#ifndef CLI_STACK_SIZE +#define CLI_STACK_SIZE 8*1024 +#endif +static uint8_t cli_stack[CLI_STACK_SIZE]; + #ifndef MAX_PKT_SIZE /* An arbitrary number, more or less driven by the 4096-bit RSA * keygen test. @@ -84,146 +97,238 @@ #define MAX_PKT_SIZE 4096 #endif -/* RPC buffers. For each active RPC, there will be two - input and output. +/* RPC buffers. For each active request, there will be two - input and output. */ -typedef struct { +typedef struct rpc_buffer_s { size_t len; uint8_t buf[MAX_PKT_SIZE]; + struct rpc_buffer_s *next; /* for ibuf queue linking */ } rpc_buffer_t; -/* A mail queue (memory pool + message queue) for RPC request messages. - */ -osMailQId ibuf_queue; -osMailQDef(ibuf_queue, NUM_RPC_TASK + 2, rpc_buffer_t); +/* RPC input (requst) buffers */ +static rpc_buffer_t ibufs[NUM_RPC_TASK]; -#if NUM_RPC_TASK > 1 -/* A mutex to arbitrate concurrent UART transmits, from RPC responses. - */ -osMutexId uart_mutex; -osMutexDef(uart_mutex); -static inline void uart_lock(void) { osMutexWait(uart_mutex, osWaitForever); } -static inline void uart_unlock(void) { osMutexRelease(uart_mutex); } -#else -static inline void uart_lock(void) { } -static inline void uart_unlock(void) { } -#endif +/* ibuf queue structure */ +typedef struct { + rpc_buffer_t *head, *tail; + size_t len, max; /* for reporting */ +} ibufq_t; -#if NUM_RPC_TASK > 1 -/* A mutex to arbitrate concurrent access to the keystore. +/* ibuf queues. These correspond roughly to task states - 'waiting' is for + * unallocated ibufs, while 'ready' is for requests that are ready to be + * processed. */ -osMutexId ks_mutex; -osMutexDef(ks_mutex); -void hal_ks_lock(void) { osMutexWait(ks_mutex, osWaitForever); } -void hal_ks_unlock(void) { osMutexRelease(ks_mutex); } -#endif +static ibufq_t ibuf_waiting, ibuf_ready; -/* A ring buffer for the UART DMA receiver. In theory, it should get at most - * 92 characters per 1ms tick, but we're going to up-size it for safety. - */ -#ifndef RPC_UART_RECVBUF_SIZE -#define RPC_UART_RECVBUF_SIZE 256 /* must be a power of 2 */ -#endif -#define RPC_UART_RECVBUF_MASK (RPC_UART_RECVBUF_SIZE - 1) +/* Get an ibuf from a queue. */ +static rpc_buffer_t *ibuf_get(ibufq_t *q) +{ + hal_critical_section_start(); + rpc_buffer_t *ibuf = q->head; + if (ibuf) { + q->head = ibuf->next; + if (q->head == NULL) + q->tail = NULL; + ibuf->next = NULL; + --q->len; + } + hal_critical_section_end(); + return ibuf; +} -typedef struct { - uint32_t ridx; - uint8_t buf[RPC_UART_RECVBUF_SIZE]; -} uart_ringbuf_t; +/* Put an ibuf on a queue. */ +static void ibuf_put(ibufq_t *q, rpc_buffer_t *ibuf) +{ + hal_critical_section_start(); + if (q->tail) + q->tail->next = ibuf; + else + q->head = ibuf; + q->tail = ibuf; + ibuf->next = NULL; + if (++q->len > q->max) + q->max = q->len; + hal_critical_section_end(); +} -volatile uart_ringbuf_t uart_ringbuf = {0, {0}}; +/* Get the current length of the 'ready' queue, for reporting in the CLI. */ +size_t request_queue_len(void) +{ + size_t n; + + hal_critical_section_start(); + n = ibuf_ready.len; + hal_critical_section_end(); + + return n; +} + +/* Get the maximum length of the 'ready' queue, for reporting in the CLI. */ +size_t request_queue_max(void) +{ + size_t n; -#define RINGBUF_RIDX(rb) (rb.ridx & RPC_UART_RECVBUF_MASK) -#define RINGBUF_WIDX(rb) (sizeof(rb.buf) - __HAL_DMA_GET_COUNTER(huart_user.hdmarx)) -#define RINGBUF_COUNT(rb) ((unsigned)(RINGBUF_WIDX(rb) - RINGBUF_RIDX(rb))) -#define RINGBUF_READ(rb, dst) {dst = rb.buf[RINGBUF_RIDX(rb)]; rb.ridx++;} + hal_critical_section_start(); + n = ibuf_ready.max; + hal_critical_section_end(); -/* Thread entry point for the UART DMA monitor. + return n; +} + +static void dispatch_task(void); +static void busy_task(void); +static tcb_t *busy_tcb; + +/* Select an available dispatch task. For simplicity, this doesn't try to + * allocate tasks in a round-robin fashion, so the lowest-numbered task + * will see the most action. OTOH, this lets us gauge the level of system + * activity in the CLI's 'task show' command. */ -void uart_rx_thread(void const *args) +static tcb_t *task_next_waiting(void) { - /* current RPC input buffer */ - rpc_buffer_t *ibuf = NULL; + for (tcb_t *t = task_iterate(NULL); t; t = task_iterate(t)) { + if (task_get_func(t) == dispatch_task && + task_get_state(t) == TASK_WAITING) + return t; + } + return NULL; +} - /* I wanted to call osThreadYield(), but the documentation is misleading, - * and it only yields to the next ready thread of the same priority, so - * this high-priority thread wouldn't let anything else run. osDelay(1) - * reschedules this thread for the next tick, which is what we want. +static uint8_t *sdram_malloc(size_t size); + +/* Callback for HAL_UART_Receive_DMA(). + */ +static void RxCallback(uint8_t c) +{ + int complete; + static rpc_buffer_t *ibuf = NULL; + + /* If we couldn't previously get an ibuf, a task may have freed one up + * in the meantime. Otherwise, allocate one from SDRAM. In normal + * operation, the number of ibufs will expand to the number of remote + * clients (which we don't know and can't predict). It would take an + * active attempt to DOS the system to exhaust SDRAM, and there are + * easier ways to attack the device (don't release hash or pkey handles). */ - for ( ; ; osDelay(1)) { + if (ibuf == NULL) { + ibuf = ibuf_get(&ibuf_waiting); if (ibuf == NULL) { - if ((ibuf = (rpc_buffer_t *)osMailAlloc(ibuf_queue, 1)) == NULL) - /* This could happen if all dispatch threads are busy, and - * there are NUM_RPC_TASK requests already queued. We could - * send a "server busy" error, or we could just try again on - * the next tick. - */ + ibuf = (rpc_buffer_t *)sdram_malloc(sizeof(rpc_buffer_t)); + if (ibuf == NULL) Error_Handler(); - ibuf->len = 0; } + ibuf->len = 0; + } - while (RINGBUF_COUNT(uart_ringbuf)) { - uint8_t c; - int complete; - - RINGBUF_READ(uart_ringbuf, c); - if (hal_slip_process_char(c, ibuf->buf, &ibuf->len, sizeof(ibuf->buf), &complete) != LIBHAL_OK) - Error_Handler(); + /* Process this character into the ibuf. */ + if (hal_slip_process_char(c, ibuf->buf, &ibuf->len, sizeof(ibuf->buf), &complete) != LIBHAL_OK) + Error_Handler(); - if (complete) { - if (osMailPut(ibuf_queue, (void *)ibuf) != osOK) - Error_Handler(); - ibuf = NULL; - /* Yield, to allow one of the dispatch threads to pick up this - * new request. - */ - break; - } - } + if (complete) { + /* Add the ibuf to the request queue, and try to get another ibuf. + */ + ibuf_put(&ibuf_ready, ibuf); + ibuf = ibuf_get(&ibuf_waiting); + if (ibuf != NULL) + ibuf->len = 0; + /* else all ibufs are busy, try again next time */ + + /* Wake a dispatch task to deal with this request, or wake the + * busy task to re-try scheduling a dispatch task. + */ + tcb_t *t = task_next_waiting(); + if (t) + task_wake(t); + else + task_wake(busy_tcb); } } -osThreadDef(uart_rx_thread, osPriorityHigh, DEFAULT_STACK_SIZE); +static uint8_t uart_rx[2]; /* current character received from UART */ +static uint32_t uart_rx_idx = 0; + +/* UART DMA half-complete and complete callbacks. With a 2-character DMA + * buffer, one or the other of these will fire on each incoming character. + * Under heavy load, these will sometimes fire in the wrong order, but the + * data are in the right order in the DMA buffer, so we have a flip-flop + * buffer index that doesn't depend on the order of the callbacks. + */ +void HAL_UART2_RxHalfCpltCallback(UART_HandleTypeDef *huart) +{ + RxCallback(uart_rx[uart_rx_idx]); + uart_rx_idx ^= 1; +} + +void HAL_UART2_RxCpltCallback(UART_HandleTypeDef *huart) +{ + RxCallback(uart_rx[uart_rx_idx]); + uart_rx_idx ^= 1; +} + +/* Send one character over the UART. This is called from + * hal_slip_send_char(). + */ hal_error_t hal_serial_send_char(uint8_t c) { return (uart_send_char2(STM_UART_USER, c) == 0) ? LIBHAL_OK : HAL_ERROR_RPC_TRANSPORT; } -/* Thread entry point for the RPC request handler. +/* Task entry point for the RPC request handler. */ -void dispatch_thread(void const *args) +static void dispatch_task(void) { - rpc_buffer_t obuf_s, *obuf = &obuf_s, *ibuf; + rpc_buffer_t obuf_s, *obuf = &obuf_s; while (1) { - memset(obuf, 0, sizeof(*obuf)); - obuf->len = sizeof(obuf->buf); - /* Wait for a complete RPC request */ - osEvent evt = osMailGet(ibuf_queue, osWaitForever); - if (evt.status != osEventMail) + task_sleep(); + + rpc_buffer_t *ibuf = ibuf_get(&ibuf_ready); + if (ibuf == NULL) + /* probably an error, but go back to sleep */ continue; - ibuf = (rpc_buffer_t *)evt.value.p; + + memset(obuf, 0, sizeof(*obuf)); + obuf->len = sizeof(obuf->buf); /* Process the request */ - hal_error_t ret = hal_rpc_server_dispatch(ibuf->buf, ibuf->len, obuf->buf, &obuf->len); - osMailFree(ibuf_queue, (void *)ibuf); - if (ret != LIBHAL_OK) { - /* If hal_rpc_server_dispatch failed with an XDR error, it - * probably means the request packet was garbage. In any case, we - * have nothing to transmit. - */ - continue; - } + hal_error_t ret = hal_rpc_server_dispatch(ibuf->buf, ibuf->len, obuf->buf, &obuf->len); + ibuf_put(&ibuf_waiting, ibuf); + if (ret == LIBHAL_OK) { + /* Send the response */ + if (hal_rpc_sendto(obuf->buf, obuf->len, NULL) != LIBHAL_OK) + Error_Handler(); + } + /* Else hal_rpc_server_dispatch failed with an XDR error, which + * probably means the request packet was garbage. In any case, we + * have nothing to transmit. + */ + } +} - /* Send the response */ - uart_lock(); - ret = hal_rpc_sendto(obuf->buf, obuf->len, NULL); - uart_unlock(); - if (ret != LIBHAL_OK) - Error_Handler(); +/* Task entry point for the task-rescheduling task. + */ +static void busy_task(void) +{ + while (1) { + /* Wake as many tasks as we have requests. + */ + size_t n; + for (n = request_queue_len(); n > 0; --n) { + tcb_t *t; + if ((t = task_next_waiting()) != NULL) + task_wake(t); + else + break; + } + if (n == 0) + /* flushed the queue, our work here is done */ + task_sleep(); + else + /* more work to do, try again after some tasks have run */ + task_yield(); } } -osThreadDef_t thread_def[NUM_RPC_TASK]; /* Allocate memory from SDRAM1. There is only malloc, no free, so we don't * worry about fragmentation. */ @@ -255,8 +360,7 @@ void *hal_allocate_static_memory(const size_t size) return sdram_malloc(size); } -#if NUM_RPC_TASK > 1 -/* Critical section start/end, currently used just for hal_core_alloc/_free. +/* Critical section start/end - temporarily disable interrupts. */ void hal_critical_section_start(void) { @@ -267,12 +371,19 @@ void hal_critical_section_end(void) { __enable_irq(); } -#endif -/* The main thread. This does all the setup, and the worker threads handle +/* A genericized public interface to task_yield(), for calling from + * libhal. + */ +void hal_task_yield(void) +{ + task_yield(); +} + +/* The main task. This does all the setup, and the worker tasks handle * the rest. */ -int main() +int main(void) { stm_init(); uart_set_default(STM_UART_MGMT); @@ -282,41 +393,43 @@ int main() fmc_init(); sdram_init(); - if ((ibuf_queue = osMailCreate(osMailQ(ibuf_queue), NULL)) == NULL) + if (hal_rpc_server_init() != LIBHAL_OK) Error_Handler(); -#if NUM_RPC_TASK > 1 - if ((uart_mutex = osMutexCreate(osMutex(uart_mutex))) == NULL) - Error_Handler(); - if ((ks_mutex = osMutexCreate(osMutex(ks_mutex))) == NULL) - Error_Handler(); -#endif - - if (hal_rpc_server_init() != LIBHAL_OK) - Error_Handler(); + /* Initialize the ibuf queues. */ + memset(&ibuf_waiting, 0, sizeof(ibuf_waiting)); + memset(&ibuf_ready, 0, sizeof(ibuf_ready)); + for (int i = 0; i < sizeof(ibufs)/sizeof(ibufs[0]); ++i) + ibuf_put(&ibuf_waiting, &ibufs[i]); - /* Create the rpc dispatch worker threads. */ + /* Create the rpc dispatch worker tasks. */ + static char label[NUM_RPC_TASK][sizeof("dispatch0")]; for (int i = 0; i < NUM_RPC_TASK; ++i) { - osThreadDef_t *ot = &thread_def[i]; - ot->pthread = dispatch_thread; - ot->tpriority = osPriorityNormal; - ot->stacksize = TASK_STACK_SIZE; - ot->stack_pointer = (uint32_t *)(sdram_malloc(TASK_STACK_SIZE)); - if (ot->stack_pointer == NULL) + sprintf(label[i], "dispatch%d", i); + void *stack = (void *)sdram_malloc(TASK_STACK_SIZE); + if (stack == NULL) Error_Handler(); - if (osThreadCreate(ot, (void *)i) == NULL) + if (task_add(label[i], dispatch_task, &ibufs[i], stack, TASK_STACK_SIZE) == NULL) Error_Handler(); } - /* Start the UART receiver. */ - if (HAL_UART_Receive_DMA(&huart_user, (uint8_t *) uart_ringbuf.buf, sizeof(uart_ringbuf.buf)) != CMSIS_HAL_OK) + /* Create the busy task. */ + busy_tcb = task_add("busy", busy_task, NULL, busy_stack, sizeof(busy_stack)); + if (busy_tcb == NULL) Error_Handler(); - if (osThreadCreate(osThread(uart_rx_thread), NULL) == NULL) + + /* Start the UART receiver. */ + if (HAL_UART_Receive_DMA(&huart_user, uart_rx, 2) != CMSIS_HAL_OK) Error_Handler(); - /* Launch other threads (csprng warm-up thread?) + /* Launch other tasks (csprng warm-up task?) * Wait for FPGA_DONE interrupt. */ - return cli_main(); + /* Create the CLI task. */ + if (task_add("cli", (funcp_t)cli_main, NULL, cli_stack, sizeof(cli_stack)) == NULL) + Error_Handler(); + + /* Start the tasker */ + task_yield(); } -- cgit v1.2.3