From f7c3a151c7b0a3f040ab9aa8d81c562684d3f2e8 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Tue, 1 Nov 2016 12:09:41 -0400 Subject: hal_ks_index_fsck() and a pile of debugging code. The debugging code was for tracking down what turned out to be a race condition in the Alpha's flash driver code (see sw/stm32); much of this was temporary, and will be removed in a (near) future commit, but some of the techniques were useful and belong in the repository in case we need to pull them back for something similar in the future. hal_ks_index_fsck() attempts to diagnose all the things I found wrong in the ks_flash index after one long series of errors. As presently written, it doesn't attempt to fix anything, just diagnose errors: the intent is that we can call this, before and after every modification if necessary, to poinpoint exactly which calls introduce errors. Once things stablize a bit, we may want to crank down the number of calls to this (it's a bit expensive, since it checks the entire index), and perhaps add the ability to clean up whatever errors it might find; the latter might be a good candidate for a CLI command. --- ks_flash.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 5 deletions(-) (limited to 'ks_flash.c') diff --git a/ks_flash.c b/ks_flash.c index 10df54b..c12aad4 100644 --- a/ks_flash.c +++ b/ks_flash.c @@ -46,6 +46,34 @@ #include "stm-keystore.h" #undef HAL_OK +#if 1 /* XXX Begin temporary debugging kludge */ +#warning Temporary debugging kludge, remove this + +/* + * Chasing what might be a race condition, except it's a bit too + * predictable. Debugger breakpoint or 0.1 second delay is enough to + * hide it, so need something simple. So try a simple ring buffer + * logging block numbers and actions. + */ + +static unsigned debug_ring_counter = 0; + +static struct { + char code; /* One letter code describing action */ + unsigned blockno; /* Block number */ + unsigned counter; /* value of debug_ring_counter when logged */ +} debug_ring_log_buffer[64]; + +static inline void debug_ring_log_event(const char code, const unsigned blockno) +{ + const unsigned i = debug_ring_counter % (sizeof(debug_ring_log_buffer)/sizeof(*debug_ring_log_buffer)); + debug_ring_log_buffer[i].code = code; + debug_ring_log_buffer[i].blockno = blockno; + debug_ring_log_buffer[i].counter = debug_ring_counter++; +} + +#endif /* XXX End temporary debugging kludge */ + /* * Known block states. * @@ -332,6 +360,8 @@ static hal_error_t block_read(const unsigned blockno, flash_block_t *block) if (block == NULL || blockno >= NUM_FLASH_BLOCKS || sizeof(*block) != KEYSTORE_SUBSECTOR_SIZE) return HAL_ERROR_IMPOSSIBLE; + debug_ring_log_event('r', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_read_data(block_offset(blockno), block->bytes, @@ -358,6 +388,8 @@ static hal_error_t block_read(const unsigned blockno, flash_block_t *block) return HAL_ERROR_KEYSTORE_BAD_BLOCK_TYPE; } + debug_ring_log_event('R', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_read_data(block_offset(blockno) + KEYSTORE_PAGE_SIZE, block->bytes + KEYSTORE_PAGE_SIZE, @@ -381,6 +413,8 @@ static hal_error_t block_read_cached(const unsigned blockno, flash_block_t **blo if (block == NULL) return HAL_ERROR_IMPOSSIBLE; + debug_ring_log_event('c', blockno); /* XXX */ + if ((*block = cache_find_block(blockno)) != NULL) return HAL_OK; @@ -405,12 +439,16 @@ static hal_error_t block_deprecate(const unsigned blockno) flash_block_header_t *header = (void *) page; uint32_t offset = block_offset(blockno); + debug_ring_log_event('d', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_read_data(offset, page, sizeof(page)) != 1) return HAL_ERROR_KEYSTORE_ACCESS; header->block_status = BLOCK_STATUS_TOMBSTONE; + debug_ring_log_event('D', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_write_data(offset, page, sizeof(page)) != 1) return HAL_ERROR_KEYSTORE_ACCESS; @@ -429,6 +467,8 @@ static hal_error_t block_zero(const unsigned blockno) uint8_t page[KEYSTORE_PAGE_SIZE] = {0}; + debug_ring_log_event('z', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_write_data(block_offset(blockno), page, sizeof(page)) != 1) return HAL_ERROR_KEYSTORE_ACCESS; @@ -445,6 +485,8 @@ static hal_error_t block_erase(const unsigned blockno) if (blockno >= NUM_FLASH_BLOCKS) return HAL_ERROR_IMPOSSIBLE; + debug_ring_log_event('e', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_erase_subsectors(blockno, blockno) != 1) return HAL_ERROR_KEYSTORE_ACCESS; @@ -467,6 +509,8 @@ static hal_error_t block_erase_maybe(const unsigned blockno) if (blockno >= NUM_FLASH_BLOCKS) return HAL_ERROR_IMPOSSIBLE; + debug_ring_log_event('m', blockno); /* XXX */ + uint8_t mask = 0xFF; for (uint32_t a = block_offset(blockno); a < block_offset(blockno + 1); a += KEYSTORE_PAGE_SIZE) { @@ -504,6 +548,8 @@ static hal_error_t block_write(const unsigned blockno, flash_block_t *block) break; } + debug_ring_log_event('w', blockno); /* XXX */ + /* Sigh, magic numeric return codes */ if (keystore_write_data(block_offset(blockno), block->bytes, sizeof(*block)) != 1) return HAL_ERROR_KEYSTORE_ACCESS; @@ -524,6 +570,8 @@ static hal_error_t block_update(const unsigned b1, flash_block_t *block, if (db.ksi.used == db.ksi.size) return HAL_ERROR_NO_KEY_INDEX_SLOTS; + debug_ring_log_event('u', b1); /* XXX */ + cache_release(block); hal_error_t err; @@ -710,6 +758,11 @@ static hal_error_t ks_init(const hal_ks_driver_t * const driver) if ((err = hal_ks_index_setup(&db.ksi)) != HAL_OK) return err; + /* + * We might want to call hal_ks_index_fsck() here, if we can figure + * out some safe set of recovery actions we can take. + */ + /* * Deal with tombstones. These are blocks left behind when * something bad (like a power failure) happened while we updating. @@ -1019,7 +1072,7 @@ static hal_error_t ks_fetch(hal_ks_t *ks, return err; if (block_get_type(block) != BLOCK_TYPE_KEY) - return HAL_ERROR_KEY_NOT_FOUND; + return HAL_ERROR_KEYSTORE_WRONG_BLOCK_TYPE; /* HAL_ERROR_KEY_NOT_FOUND */ cache_mark_used(block, b); @@ -1072,11 +1125,12 @@ static hal_error_t ks_delete(hal_ks_t *ks, if ((err = hal_ks_index_delete_range(&db.ksi, &slot->name, n, NULL, b, &slot->hint)) != HAL_OK) return err; - for (int i = 0; i < n; i++) { + for (int i = 0; i < n; i++) cache_release(cache_find_block(b[i])); + + for (int i = 0; i < n; i++) if ((err = block_zero(b[i])) != HAL_OK) return err; - } return block_erase_maybe(db.ksi.index[db.ksi.used]); } @@ -1128,7 +1182,7 @@ static inline hal_error_t locate_attributes(flash_block_t *block, const unsigned if (chunk == 0) { if (block_get_type(block) != BLOCK_TYPE_KEY) - return HAL_ERROR_KEY_NOT_FOUND; + return HAL_ERROR_KEYSTORE_WRONG_BLOCK_TYPE; /* HAL_ERROR_KEY_NOT_FOUND */ *attrs_len = &block->key.attributes_len; *bytes = block->key.der + block->key.der_len; *bytes_len = SIZEOF_FLASH_KEY_BLOCK_DER - block->key.der_len; @@ -1136,7 +1190,7 @@ static inline hal_error_t locate_attributes(flash_block_t *block, const unsigned else { if (block_get_type(block) != BLOCK_TYPE_ATTR) - return HAL_ERROR_KEY_NOT_FOUND; + return HAL_ERROR_KEYSTORE_WRONG_BLOCK_TYPE; /* HAL_ERROR_KEY_NOT_FOUND */ *attrs_len = &block->attr.attributes_len; *bytes = block->attr.attributes; *bytes_len = SIZEOF_FLASH_ATTRIBUTE_BLOCK_ATTRIBUTES; -- cgit v1.2.3