From e47310c3d5ef6fc10e60562d2d88da0927bc1fce Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Wed, 18 Dec 2019 23:36:25 +0100 Subject: WIP Revamp ChaCha20 seeding - chacha20_prng_block() uses counter in the state struct - chacha20_setup() replaces chacha20_prng_reseed() and fills the whole state struct, fixing a bug where only half of the key was being set; as a result of 'counter' being set, a state struct filled with entropy from the TRNG makes reseeding occur after a random number of rounds instead of after a fixed 2^32-1 rounds - decrementing of the block counter is done in chacha20_prng_block() - chacha output is copied to buf _after_ the interrupt driven transmission of buf to UART has finished, to stop the race between reading and refilling of buf --- src/cc20rng/cc20_prng.c | 124 +++++++++++++++++++++++---------------------- src/cc20rng/cc20_prng.h | 21 +++++--- src/cc20rng/main.c | 55 ++++++++++---------- src/cc20rng/main.h | 4 -- src/cc20rng/stm32f4xx_it.h | 2 - 5 files changed, 104 insertions(+), 102 deletions(-) delete mode 100644 src/cc20rng/main.h diff --git a/src/cc20rng/cc20_prng.c b/src/cc20rng/cc20_prng.c index 50739bd..36c06a4 100644 --- a/src/cc20rng/cc20_prng.c +++ b/src/cc20rng/cc20_prng.c @@ -41,16 +41,16 @@ #define CHACHA20_CONSTANT3 0x6b206574 #ifdef CHACHA20_PRNG_DEBUG -void _dump(struct cc20_state *cc, char *str); +static void _dump(struct cc20_state *cc, char *str); #endif -inline uint32_t rotl32(uint32_t x, uint32_t n) +static inline uint32_t rotl32(uint32_t x, uint32_t n) { return (x << n) | (x >> (32 - n)); } -inline void _qr(struct cc20_state *cc, uint32_t a, uint32_t b, uint32_t c, - uint32_t d) +static inline void _qr(struct cc20_state *cc, uint32_t a, uint32_t b, + uint32_t c, uint32_t d) { cc->i[a] += cc->i[b]; cc->i[d] ^= cc->i[a]; @@ -69,44 +69,40 @@ inline void _qr(struct cc20_state *cc, uint32_t a, uint32_t b, uint32_t c, cc->i[b] = rotl32(cc->i[b], 7); } -void chacha20_prng_reseed(struct cc20_state *cc, uint32_t *entropy) +static void chacha20_setup(struct cc20_state *state, + const struct cc20_state *src) { - uint32_t i = 256 / 8 / 4; - while (i--) { - cc->i[i] = entropy[i]; - } + state->s.constant[0] = CHACHA20_CONSTANT0; + state->s.constant[1] = CHACHA20_CONSTANT1; + state->s.constant[2] = CHACHA20_CONSTANT2; + state->s.constant[3] = CHACHA20_CONSTANT3; + + for (int i = 4; i < CHACHA20_BLOCK_SIZE_WORDS; i++) + state->i[i] = src->i[i]; } -void chacha20_prng_block(struct cc20_state *cc, uint32_t block_counter, - struct cc20_state *out) +void chacha20_prng_block(struct cc20_state *cc, uint8_t *out) { uint32_t i; + struct cc20_state *ws = (struct cc20_state *)out; - out->i[0] = CHACHA20_CONSTANT0; - out->i[1] = CHACHA20_CONSTANT1; - out->i[2] = CHACHA20_CONSTANT2; - out->i[3] = CHACHA20_CONSTANT3; - - cc->i[12] = block_counter; - - for (i = 4; i < CHACHA20_NUM_WORDS; i++) { - out->i[i] = cc->i[i]; - } + chacha20_setup(ws, cc); for (i = 10; i; i--) { - _qr(out, 0, 4, 8, 12); - _qr(out, 1, 5, 9, 13); - _qr(out, 2, 6, 10, 14); - _qr(out, 3, 7, 11, 15); - _qr(out, 0, 5, 10, 15); - _qr(out, 1, 6, 11, 12); - _qr(out, 2, 7, 8, 13); - _qr(out, 3, 4, 9, 14); + _qr(ws, 0, 4, 8, 12); + _qr(ws, 1, 5, 9, 13); + _qr(ws, 2, 6, 10, 14); + _qr(ws, 3, 7, 11, 15); + _qr(ws, 0, 5, 10, 15); + _qr(ws, 1, 6, 11, 12); + _qr(ws, 2, 7, 8, 13); + _qr(ws, 3, 4, 9, 14); } - for (i = 0; i < CHACHA20_NUM_WORDS; i++) { - out->i[i] += cc->i[i]; - } + for (i = 0; i < CHACHA20_BLOCK_SIZE_WORDS; i++) + ws->i[i] += cc->i[i]; + + cc->s.counter--; } int chacha20_prng_self_test1() @@ -115,16 +111,18 @@ int chacha20_prng_self_test1() * https://tools.ietf.org/html/rfc7539#section-2.3.2 */ struct cc20_state test = { - {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, - 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, - 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, - 0x00000001, 0x09000000, 0x4a000000, 0x00000000, + .i = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x09000000, 0x4a000000, 0x00000000, }}; struct cc20_state expected = { - {0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, - 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, - 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, - 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2, + .i = { + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, + 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, + 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, + 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2, }}; uint32_t i; struct cc20_state out; @@ -133,12 +131,13 @@ int chacha20_prng_self_test1() _dump(&test, "Test vector from RFC7539, section 2.3.2. Input:"); #endif - chacha20_prng_block(&test, 1, &out); + test.s.counter = 1; /* nop */ + chacha20_prng_block(&test, (uint8_t *)&out); #ifdef CHACHA20_PRNG_DEBUG _dump(&out, "Test vector from RFC7539, section 2.3.2. Output:"); #endif - for (i = 0; i < CHACHA20_NUM_WORDS; i++) { + for (i = 0; i < CHACHA20_BLOCK_SIZE_WORDS; i++) { if (out.i[i] != expected.i[i]) return 0; } @@ -152,22 +151,25 @@ int chacha20_prng_self_test2() * https://tools.ietf.org/html/rfc7539#section-2.4.2 */ struct cc20_state test = { - {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, - 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, - 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, - 0x00000001, 0x00000000, 0x4a000000, 0x00000000, + .i = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x00000000, 0x4a000000, 0x00000000, }}; struct cc20_state expected1 = { - {0xf3514f22, 0xe1d91b40, 0x6f27de2f, 0xed1d63b8, - 0x821f138c, 0xe2062c3d, 0xecca4f7e, 0x78cff39e, - 0xa30a3b8a, 0x920a6072, 0xcd7479b5, 0x34932bed, - 0x40ba4c79, 0xcd343ec6, 0x4c2c21ea, 0xb7417df0, + .i = { + 0xf3514f22, 0xe1d91b40, 0x6f27de2f, 0xed1d63b8, + 0x821f138c, 0xe2062c3d, 0xecca4f7e, 0x78cff39e, + 0xa30a3b8a, 0x920a6072, 0xcd7479b5, 0x34932bed, + 0x40ba4c79, 0xcd343ec6, 0x4c2c21ea, 0xb7417df0, }}; struct cc20_state expected2 = { - {0x9f74a669, 0x410f633f, 0x28feca22, 0x7ec44dec, - 0x6d34d426, 0x738cb970, 0x3ac5e9f3, 0x45590cc4, - 0xda6e8b39, 0x892c831a, 0xcdea67c1, 0x2b7e1d90, - 0x037463f3, 0xa11a2073, 0xe8bcfb88, 0xedc49139, + .i = { + 0x9f74a669, 0x410f633f, 0x28feca22, 0x7ec44dec, + 0x6d34d426, 0x738cb970, 0x3ac5e9f3, 0x45590cc4, + 0xda6e8b39, 0x892c831a, 0xcdea67c1, 0x2b7e1d90, + 0x037463f3, 0xa11a2073, 0xe8bcfb88, 0xedc49139, }}; struct cc20_state out; uint32_t i; @@ -176,20 +178,22 @@ int chacha20_prng_self_test2() _dump(&test, "Test vector from RFC7539, section 2.4.2. Input:"); #endif - chacha20_prng_block(&test, 1, &out); + test.s.counter = 1; /* nop */ + chacha20_prng_block(&test, (uint8_t *)&out); #ifdef CHACHA20_PRNG_DEBUG _dump(&out, "First block"); #endif - for (i = 0; i < CHACHA20_NUM_WORDS; i++) { + for (i = 0; i < CHACHA20_BLOCK_SIZE_WORDS; i++) { if (out.i[i] != expected1.i[i]) return 0; } - chacha20_prng_block(&test, 2, &out); + test.s.counter = 2; + chacha20_prng_block(&test, (uint8_t *)&out); #ifdef CHACHA20_PRNG_DEBUG _dump(&out, "Second block"); #endif - for (i = 0; i < CHACHA20_NUM_WORDS; i++) { + for (i = 0; i < CHACHA20_BLOCK_SIZE_WORDS; i++) { if (out.i[i] != expected2.i[i]) return 0; } @@ -213,8 +217,8 @@ void _dump(struct cc20_state *cc, char *str) } #endif -/* Test vector from RFC, used as simple power-on self-test of ability to compute - * a block correctly. +/* Test vector from RFC7539, used as simple power-on self-test of + * ability to compute a block correctly. */ int chacha20_prng_self_test() { diff --git a/src/cc20rng/cc20_prng.h b/src/cc20rng/cc20_prng.h index 7b597d0..08f78d7 100644 --- a/src/cc20rng/cc20_prng.h +++ b/src/cc20rng/cc20_prng.h @@ -3,17 +3,22 @@ #include -#define CHACHA20_MAX_BLOCK_COUNTER 0xffffffff -#define CHACHA20_NUM_WORDS 16 -#define CHACHA20_BLOCK_SIZE (CHACHA20_NUM_WORDS * 4) +#define CHACHA20_BLOCK_SIZE_WORDS 16 +#define CHACHA20_BLOCK_SIZE (CHACHA20_BLOCK_SIZE_WORDS * 4) struct cc20_state { - uint32_t i[CHACHA20_NUM_WORDS]; + union { + struct { + uint32_t constant[4]; + uint32_t key[8]; + uint32_t counter; + uint32_t nonce[3]; + } s; + uint32_t i[CHACHA20_BLOCK_SIZE_WORDS]; + }; }; -extern void chacha20_prng_reseed(struct cc20_state *cc, uint32_t *entropy); -extern void chacha20_prng_block(struct cc20_state *cc, uint32_t block_counter, - struct cc20_state *out); -extern int chacha20_prng_self_test(); +void chacha20_prng_block(struct cc20_state *cc, uint8_t *out); +int chacha20_prng_self_test(); #endif /* __STM32_CHACHA20_H */ diff --git a/src/cc20rng/main.c b/src/cc20rng/main.c index 88b5144..cc063f4 100644 --- a/src/cc20rng/main.c +++ b/src/cc20rng/main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2014, 2015, 2016 NORDUnet A/S + * Copyright (c) 2019 Sunet * All rights reserved. * * Redistribution and use in source and binary forms, with or @@ -32,7 +33,6 @@ #include #include "cc20_prng.h" -#include "main.h" #include "stm_init.h" #define UART_RANDOM_BYTES_PER_CHUNK 8 @@ -79,13 +79,14 @@ static inline volatile uint32_t *get_DMA_read_buf(void); static inline uint32_t safe_get_counter(volatile uint32_t *dmabuf, const uint32_t dmabuf_idx); static void check_uart_rx(UART_HandleTypeDef *this); -static void cc_reseed(struct cc20_state *cc); +static uint32_t cc_reseed(struct cc20_state *cc); void Error_Handler(void); int main() { - uint32_t i, timeout, block_counter = 0; - struct cc20_state cc, out; + uint32_t i, timeout, block_counter; + struct cc20_state cc_state = {0}; + uint8_t cc_result[CHACHA20_BLOCK_SIZE]; HAL_StatusTypeDef res; /* Initialize buffers */ @@ -113,40 +114,32 @@ int main() { HAL_Delay(125); } - /* Generate initial block of random data directly into buf */ - cc_reseed(&cc); - block_counter = RESEED_BLOCKS; - chacha20_prng_block(&cc, block_counter--, (struct cc20_state *)buf.rnd32); + /* Generate initial block of ChaCha20 output directly into buf. */ + block_counter = cc_reseed(&cc_state); + chacha20_prng_block(&cc_state, buf.rnd); + block_counter--; /* Main loop */ while (1) { - if (!(block_counter % 1000)) { + if (!(block_counter % 1000)) HAL_GPIO_TogglePin(LED_PORT, LED_YELLOW); - } - - if (!block_counter) { - cc_reseed(&cc); - block_counter = RESEED_BLOCKS; - } /* Send buf on UART (non blocking interrupt driven send). */ UartReady = RESET; - res = HAL_UART_Transmit_IT(huart, &buf.rnd[0], CHACHA20_BLOCK_SIZE); + res = HAL_UART_Transmit_IT(huart, buf.rnd, CHACHA20_BLOCK_SIZE); /* Generate next block while this block is being transmitted */ - chacha20_prng_block(&cc, block_counter--, &out); - /* Copying using a loop is faster than memcpy on STM32 */ - for (i = 0; i < CHACHA20_NUM_WORDS; i++) { - buf.rnd32[i] = out.i[i]; - } + if (!block_counter) + block_counter = cc_reseed(&cc_state); + chacha20_prng_block(&cc_state, cc_result); + block_counter--; + /* Wait for transfer to complete. */ if (res == HAL_OK) { timeout = 0xffff; - while (UartReady != SET && timeout) { + while (UartReady != SET && timeout) timeout--; - } } - if (UartReady != SET) { /* Failed to send, turn on RED LED for one second */ HAL_GPIO_WritePin(LED_PORT, LED_RED, GPIO_PIN_SET); @@ -154,6 +147,10 @@ int main() { HAL_GPIO_WritePin(LED_PORT, LED_RED, GPIO_PIN_RESET); } + /* Fill buffer with ChaCha20 output. */ + for (i = 0; i < CHACHA20_BLOCK_SIZE; i++) + buf.rnd[i] = cc_result[i]; + /* Check for UART change request */ check_uart_rx(&huart1); check_uart_rx(&huart2); @@ -163,16 +160,18 @@ int main() { /** * @brief Reseed chacha20 state with hardware generated entropy. * @param cc: ChaCha20 state - * @retval None + * @retval ChaCha20 block counter */ -static void cc_reseed(struct cc20_state *cc) { +static uint32_t cc_reseed(struct cc20_state *cc) { HAL_GPIO_WritePin(LED_PORT, LED_BLUE, GPIO_PIN_SET); - get_entropy32(CHACHA20_BLOCK_SIZE / 4, 0); + get_entropy32(CHACHA20_BLOCK_SIZE_WORDS, 0); restart_DMA(); - chacha20_prng_reseed(cc, (uint32_t *)&buf); + memcpy(cc, buf.rnd, CHACHA20_BLOCK_SIZE); HAL_GPIO_WritePin(LED_PORT, LED_BLUE, GPIO_PIN_RESET); + + return cc->s.counter; } /** diff --git a/src/cc20rng/main.h b/src/cc20rng/main.h deleted file mode 100644 index 902ecf4..0000000 --- a/src/cc20rng/main.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __MAIN_H -#define __MAIN_H - -#endif /* __MAIN_H */ diff --git a/src/cc20rng/stm32f4xx_it.h b/src/cc20rng/stm32f4xx_it.h index 04c5a36..6edba3c 100644 --- a/src/cc20rng/stm32f4xx_it.h +++ b/src/cc20rng/stm32f4xx_it.h @@ -44,8 +44,6 @@ #endif /* Includes ------------------------------------------------------------------*/ -#include "main.h" - /* Exported types ------------------------------------------------------------*/ /* Exported constants --------------------------------------------------------*/ /* Exported macro ------------------------------------------------------------*/ -- cgit v1.2.3