From 6da78b1b193caf2d82278c36ba66c1aa55b42a68 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Fri, 15 Jan 2021 19:29:14 -0800 Subject: [PATCH] Rewrote how injected faults are handled in SQLite to be more reliable and work with an upcoming write throttling feature in AsyncFileCached. --- fdbserver/CMakeLists.txt | 1 + fdbserver/KeyValueStoreSQLite.actor.cpp | 21 ++++++- fdbserver/VFSAsync.cpp | 81 ++++++++++++++----------- fdbserver/VFSAsync.h | 78 ++++++++++++++++++++++++ fdbserver/sqlite/sqlite3.amalgamation.c | 8 +++ fdbserver/sqlite/sqlite3.h | 3 + 6 files changed, 154 insertions(+), 38 deletions(-) create mode 100644 fdbserver/VFSAsync.h diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 89463f7a96..3da6dc552a 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -96,6 +96,7 @@ set(FDBSERVER_SRCS TLogInterface.h TLogServer.actor.cpp VersionedBTree.actor.cpp + VFSAsync.h VFSAsync.cpp WaitFailure.actor.cpp WaitFailure.h diff --git a/fdbserver/KeyValueStoreSQLite.actor.cpp b/fdbserver/KeyValueStoreSQLite.actor.cpp index e2e40f4658..d1c298778b 100644 --- a/fdbserver/KeyValueStoreSQLite.actor.cpp +++ b/fdbserver/KeyValueStoreSQLite.actor.cpp @@ -32,6 +32,7 @@ extern "C" { u32 sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*); } #include "flow/ThreadPrimitives.h" +#include "fdbserver/VFSAsync.h" #include "fdbserver/template_fdb.h" #include "fdbrpc/simulator.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -222,6 +223,7 @@ struct SQLiteDB : NonCopyable { bool page_checksums; bool fragment_values; PageChecksumCodec *pPagerCodec; // we do NOT own this pointer, db does. + VFSAsyncFile *vfsDB, *vfsWAL; void beginTransaction(bool write) { checkError("BtreeBeginTrans", sqlite3BtreeBeginTrans(btree, write)); @@ -236,7 +238,7 @@ struct SQLiteDB : NonCopyable { void open(bool writable); void createFromScratch(); - SQLiteDB( std::string filename, bool page_checksums, bool fragment_values): filename(filename), db(NULL), btree(NULL), table(-1), freetable(-1), haveMutex(false), page_checksums(page_checksums), fragment_values(fragment_values) {} + SQLiteDB( std::string filename, bool page_checksums, bool fragment_values): filename(filename), db(NULL), btree(NULL), table(-1), freetable(-1), haveMutex(false), page_checksums(page_checksums), fragment_values(fragment_values), vfsDB(nullptr), vfsWAL(nullptr) {} ~SQLiteDB() { if (db) { @@ -260,14 +262,24 @@ struct SQLiteDB : NonCopyable { } } + bool consumeInjectedErrors() { + // Both of these consumeInjectedError() calls must be made, If this was written as one expression + // then if the first one returned true the second call would be skipped. + bool dbErr = (vfsDB != nullptr && vfsDB->consumeInjectedError()); + bool walErr = (vfsWAL != nullptr && vfsWAL->consumeInjectedError()); + return dbErr || walErr; + } + void checkError( const char* context, int rc ) { //if (deterministicRandom()->random01() < .001) rc = SQLITE_INTERRUPT; if (rc) { // Our exceptions don't propagate through sqlite, so we don't know for sure if the error that caused this was // an injected fault. Assume that if fault injection is happening, this is an injected fault. Error err = io_error(); - if (g_network->isSimulated() && (g_simulator.getCurrentProcess()->fault_injection_p1 || g_simulator.getCurrentProcess()->machine->machineProcess->fault_injection_p1 || g_simulator.getCurrentProcess()->rebooting)) + + if (g_network->isSimulated() && (consumeInjectedErrors())) { err = err.asInjectedFault(); + } if (db) db->errCode = rc; @@ -1378,6 +1390,11 @@ void SQLiteDB::open(bool writable) { int result = sqlite3_open_v2(apath.c_str(), &db, (writable ? SQLITE_OPEN_READWRITE : SQLITE_OPEN_READONLY), NULL); checkError("open", result); + vfsDB = (VFSAsyncFile *)sqlite3_get_vfs_db(db); + ASSERT(vfsDB != nullptr); + vfsWAL = (VFSAsyncFile *)sqlite3_get_vfs_wal(db); + ASSERT(vfsWAL != nullptr); + int chunkSize; if( !g_network->isSimulated() ) { chunkSize = 4096 * SERVER_KNOBS->SQLITE_CHUNK_SIZE_PAGES; diff --git a/fdbserver/VFSAsync.cpp b/fdbserver/VFSAsync.cpp index 3d53aaccfb..b50ced0e97 100644 --- a/fdbserver/VFSAsync.cpp +++ b/fdbserver/VFSAsync.cpp @@ -46,6 +46,8 @@ #include #endif +#include "fdbserver/VFSAsync.h" + /* ** The maximum pathname length supported by this VFS. */ @@ -58,35 +60,11 @@ #define EXCLUSIVE_LOCK 4 const uint32_t RESERVED_COUNT = 1U<<29; -/* -** When using this VFS, the sqlite3_file* handles that SQLite uses are -** actually pointers to instances of type VFSAsyncFile. -*/ -typedef struct VFSAsyncFile VFSAsyncFile; -struct VFSAsyncFile { - sqlite3_file base; /* Base class. Must be first. */ - int flags; - std::string filename; - Reference file; +VFSAsyncFile::VFSAsyncFile(std::string const& filename, int flags) +: filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0), errorInjected(false) { + filename_lockCount_openCount[filename].second++; +} - uint32_t * const pLockCount; // +1 for each SHARED_LOCK, or 1+X_COUNT for lock level X - int lockLevel; // NO_LOCK, SHARED_LOCK, RESERVED_LOCK, PENDING_LOCK, or EXCLUSIVE_LOCK - - struct SharedMemoryInfo *sharedMemory; - int sharedMemorySharedLocks; - int sharedMemoryExclusiveLocks; - - int debug_zcrefs, debug_zcreads, debug_reads; - - int chunkSize; - - VFSAsyncFile(std::string const& filename, int flags) : filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0) { - filename_lockCount_openCount[filename].second++; - } - ~VFSAsyncFile(); - - static std::map> filename_lockCount_openCount; -}; std::map> VFSAsyncFile::filename_lockCount_openCount; static int asyncClose(sqlite3_file *pFile){ @@ -112,7 +90,10 @@ static int asyncRead(sqlite3_file *pFile, void *zBuf, int iAmt, sqlite_int64 iOf return SQLITE_IOERR_SHORT_READ; } return SQLITE_OK; - } catch (Error& ) { + } catch (Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR_READ; } } @@ -123,7 +104,10 @@ static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlit try{ --p->debug_zcrefs; p->file->releaseZeroCopy( data, iAmt, iOfst ); - } catch (Error& ) { + } catch (Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR; } return SQLITE_OK; @@ -145,7 +129,10 @@ static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_ } ++p->debug_zcreads; return SQLITE_OK; - } catch (Error& ) { + } catch (Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR_READ; } } @@ -162,7 +149,10 @@ static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_ return SQLITE_IOERR_SHORT_READ; } return SQLITE_OK; - } catch (Error& ) { + } catch (Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR_READ; } } @@ -178,7 +168,10 @@ static int asyncWrite(sqlite3_file *pFile, const void *zBuf, int iAmt, sqlite_in try { waitFor( p->file->write( zBuf, iAmt, iOfst ) ); return SQLITE_OK; - } catch(Error& ) { + } catch(Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR_WRITE; } } @@ -194,7 +187,10 @@ static int asyncTruncate(sqlite3_file *pFile, sqlite_int64 size){ try { waitFor( p->file->truncate( size ) ); return SQLITE_OK; - } catch(Error& ) { + } catch(Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR_TRUNCATE; } } @@ -204,7 +200,11 @@ static int asyncSync(sqlite3_file *pFile, int flags){ try { waitFor( p->file->sync() ); return SQLITE_OK; - } catch (Error& e) { + } catch (Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } + TraceEvent("VFSSyncError") .error(e) .detail("Filename", p->filename) @@ -223,7 +223,10 @@ static int VFSAsyncFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){ try { *pSize = waitForAndGet( p->file->size() ); return SQLITE_OK; - } catch (Error& ) { + } catch (Error &e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pFile)->errorInjected = true; + } return SQLITE_IOERR_FSTAT; } } @@ -648,6 +651,9 @@ static int asyncFullPathname( memcpy(zPathOut, s.c_str(), s.size()+1); return SQLITE_OK; } catch (Error& e) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pVfs)->errorInjected = true; + } TraceEvent(SevError,"VFSAsyncFullPathnameError").error(e).detail("PathIn", (std::string)zPath); return SQLITE_IOERR; } catch(...) { @@ -716,6 +722,9 @@ static int asyncSleep(sqlite3_vfs *pVfs, int microseconds){ waitFor( g_network->delay( microseconds*1e-6, TaskPriority::DefaultDelay ) || simCancel ); return microseconds; } catch( Error &e ) { + if(e.isInjectedFault()) { + ((VFSAsyncFile *)pVfs)->errorInjected = true; + } TraceEvent(SevError, "AsyncSleepError").error(e,true); return 0; } diff --git a/fdbserver/VFSAsync.h b/fdbserver/VFSAsync.h new file mode 100644 index 0000000000..debf317eef --- /dev/null +++ b/fdbserver/VFSAsync.h @@ -0,0 +1,78 @@ +/* + * VFSAsync.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sqlite/sqlite3.h" +#include +#include +#include "fdbrpc/IAsyncFile.h" +#include "fdbserver/CoroFlow.h" + +//#include +//#include + +//#ifdef WIN32 +//#include +//#endif + +#ifdef __unixish__ +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +#endif + +/* +** When using this VFS, the sqlite3_file* handles that SQLite uses are +** actually pointers to instances of type VFSAsyncFile. +*/ +typedef struct VFSAsyncFile VFSAsyncFile; +struct VFSAsyncFile { + sqlite3_file base; /* Base class. Must be first. */ + int flags; + std::string filename; + Reference file; + bool errorInjected; + + bool consumeInjectedError() { + bool e = errorInjected; + errorInjected = false; + return e; + } + + uint32_t * const pLockCount; // +1 for each SHARED_LOCK, or 1+X_COUNT for lock level X + int lockLevel; // NO_LOCK, SHARED_LOCK, RESERVED_LOCK, PENDING_LOCK, or EXCLUSIVE_LOCK + + struct SharedMemoryInfo *sharedMemory; + int sharedMemorySharedLocks; + int sharedMemoryExclusiveLocks; + + int debug_zcrefs, debug_zcreads, debug_reads; + + int chunkSize; + + VFSAsyncFile(std::string const& filename, int flags); + ~VFSAsyncFile(); + + static std::map> filename_lockCount_openCount; +}; diff --git a/fdbserver/sqlite/sqlite3.amalgamation.c b/fdbserver/sqlite/sqlite3.amalgamation.c index adbad64ea1..e2863ffae8 100644 --- a/fdbserver/sqlite/sqlite3.amalgamation.c +++ b/fdbserver/sqlite/sqlite3.amalgamation.c @@ -93791,6 +93791,14 @@ SQLITE_API int sqlite3_open_v2( return openDatabase(filename, ppDb, flags, zVfs); } +SQLITE_API void * sqlite3_get_vfs_db(sqlite3 *pDb) { + return pDb->aDb[0].pBt->pBt->pPager->pWal->pDbFd; +} + +SQLITE_API void * sqlite3_get_vfs_wal(sqlite3 *pDb) { + return pDb->aDb[0].pBt->pBt->pPager->pWal->pWalFd; +} + #ifndef SQLITE_OMIT_UTF16 /* ** Open a new database handle. diff --git a/fdbserver/sqlite/sqlite3.h b/fdbserver/sqlite/sqlite3.h index fc33edfd73..d3d62419f0 100755 --- a/fdbserver/sqlite/sqlite3.h +++ b/fdbserver/sqlite/sqlite3.h @@ -2411,6 +2411,9 @@ SQLITE_API int sqlite3_open_v2( const char *zVfs /* Name of VFS module to use */ ); +SQLITE_API void * sqlite3_get_vfs_db(sqlite3 *pDb); +SQLITE_API void * sqlite3_get_vfs_wal(sqlite3 *pDb); + /* ** CAPI3REF: Error Codes And Messages **