Merge branch 'release-6.2' of github.com:apple/foundationdb into features/add-jemalloc-boost

This commit is contained in:
Markus Pilman 2021-02-01 11:27:12 -07:00
commit 0805902770
18 changed files with 335 additions and 88 deletions

View File

@ -97,7 +97,7 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
RUN cd /opt/ && curl -L https://github.com/manticoresoftware/manticoresearch/raw/master/misc/junit/ctest2junit.xsl -o ctest2junit.xsl
# Setting this environment variable switches from OpenSSL to BoringSSL
#ENV OPENSSL_ROOT_DIR=/opt/boringssl
ENV OPENSSL_ROOT_DIR=/opt/boringssl
# install BoringSSL: TODO: They don't seem to have releases(?) I picked today's master SHA.
RUN cd /opt &&\

View File

@ -51,8 +51,8 @@ RUN cp -iv /usr/local/bin/clang++ /usr/local/bin/clang++.deref &&\
ldconfig &&\
rm -rf /mnt/artifacts
LABEL version=0.11.13
ENV DOCKER_IMAGEVER=0.11.13
LABEL version=0.11.14
ENV DOCKER_IMAGEVER=0.11.14
ENV CLANGCC=/usr/local/bin/clang.de8a65ef
ENV CLANGCXX=/usr/local/bin/clang++.de8a65ef

View File

@ -12,6 +12,8 @@ endif()
# SSL
################################################################################
include(CheckSymbolExists)
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support")
if(DISABLE_TLS)
set(WITH_TLS OFF)
@ -24,6 +26,14 @@ else()
if (LIBRESSL_FOUND)
add_library(OpenSSL::SSL ALIAS LibreSSL)
endif()
else()
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
check_symbol_exists(OPENSSL_INIT_NO_ATEXIT "openssl/crypto.h" OPENSSL_HAS_NO_ATEXIT)
if(OPENSSL_HAS_NO_ATEXIT)
add_compile_options(-DHAVE_OPENSSL_INIT_NO_ATEXIT)
else()
message(STATUS "Found OpenSSL without OPENSSL_INIT_NO_ATEXIT: assuming BoringSSL")
endif()
endif()
if(OPENSSL_FOUND OR LIBRESSL_FOUND)
set(WITH_TLS ON)

View File

@ -13,6 +13,7 @@ Release Notes
* Add documentation on read and write Path. `(PR #4099) <https://github.com/apple/foundationdb/pull/4099>`_
* Add a histogram to expose commit batching window on Proxies. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
* Fix double counting of range reads in TransactionMetrics. `(PR #4130) <https://github.com/apple/foundationdb/pull/4130>`_
* Add a trace event that can be used as an indicator of the load on the proxy. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
6.2.28
======

View File

@ -230,6 +230,12 @@ public:
return filename;
}
std::vector<AFCPage*> const& getFlushable() { return flushable; }
void setRateControl(Reference<IRateControl> const& rc) override { rateControl = rc; }
Reference<IRateControl> const& getRateControl() override { return rateControl; }
virtual void addref() {
ReferenceCounted<AsyncFileCached>::addref();
//TraceEvent("AsyncFileCachedAddRef").detail("Filename", filename).detail("Refcount", debugGetReferenceCount()).backtrace();
@ -239,6 +245,12 @@ public:
// If this is ever ThreadSafeReferenceCounted...
// setrefCountUnsafe(0);
if(rateControl) {
TraceEvent(SevDebug, "AsyncFileCachedKillWaiters")
.detail("Filename", filename);
rateControl->killWaiters(io_error());
}
auto f = quiesce();
//TraceEvent("AsyncFileCachedDel").detail("Filename", filename)
// .detail("Refcount", debugGetReferenceCount()).detail("CanDie", f.isReady()).backtrace();
@ -262,6 +274,7 @@ private:
Reference<EvictablePageCache> pageCache;
Future<Void> currentTruncate;
int64_t currentTruncateSize;
Reference<IRateControl> rateControl;
// Map of pointers which hold page buffers for pages which have been overwritten
// but at the time of write there were still readZeroCopy holders.
@ -287,8 +300,10 @@ private:
Int64MetricHandle countCachePageReadsMerged;
Int64MetricHandle countCacheReadBytes;
AsyncFileCached( Reference<IAsyncFile> uncached, const std::string& filename, int64_t length, Reference<EvictablePageCache> pageCache )
: uncached(uncached), filename(filename), length(length), prevLength(length), pageCache(pageCache), currentTruncate(Void()), currentTruncateSize(0) {
AsyncFileCached(Reference<IAsyncFile> uncached, const std::string& filename, int64_t length,
Reference<EvictablePageCache> pageCache)
: uncached(uncached), filename(filename), length(length), prevLength(length), pageCache(pageCache),
currentTruncate(Void()), currentTruncateSize(0), rateControl(nullptr) {
if( !g_network->isSimulated() ) {
countFileCacheWrites.init(LiteralStringRef("AsyncFile.CountFileCacheWrites"), filename);
countFileCacheReads.init(LiteralStringRef("AsyncFile.CountFileCacheReads"), filename);
@ -503,6 +518,10 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {
wait( self->notReading && self->notFlushing );
if (dirty) {
// Wait for rate control if it is set
if (self->owner->getRateControl())
wait(self->owner->getRateControl()->getAllowance(1));
if ( self->pageOffset + self->pageCache->pageSize > self->owner->length ) {
ASSERT(self->pageOffset < self->owner->length);
memset( static_cast<uint8_t *>(self->data) + self->owner->length - self->pageOffset, 0, self->pageCache->pageSize - (self->owner->length - self->pageOffset) );

View File

@ -24,6 +24,7 @@
#include <ctime>
#include "flow/flow.h"
#include "fdbrpc/IRateControl.h"
// All outstanding operations must be cancelled before the destructor of IAsyncFile is called.
// The desirability of the above semantic is disputed. Some classes (AsyncFileBlobStore,
@ -81,6 +82,10 @@ public:
virtual void releaseZeroCopy( void* data, int length, int64_t offset ) {}
virtual int64_t debugFD() = 0;
// Used for rate control, at present, only AsyncFileCached supports it
virtual Reference<IRateControl> const& getRateControl() { throw unsupported_operation(); }
virtual void setRateControl(Reference<IRateControl> const& rc) { throw unsupported_operation(); }
};
typedef void (*runCycleFuncPtr)();

View File

@ -30,6 +30,8 @@ public:
// If all of the allowance is not used the unused units can be given back.
// For convenience, n can safely be negative.
virtual void returnUnused(int n) = 0;
virtual void killWaiters(const Error &e) = 0;
virtual void wakeWaiters() = 0;
virtual void addref() = 0;
virtual void delref() = 0;
};
@ -37,18 +39,19 @@ public:
// An IRateControl implemenation that allows at most hands out at most windowLimit units of 'credit' in windowSeconds seconds
class SpeedLimit : public IRateControl, ReferenceCounted<SpeedLimit> {
public:
SpeedLimit(int windowLimit, int windowSeconds) : m_limit(windowLimit), m_seconds(windowSeconds), m_last_update(0), m_budget(0) {
m_budget_max = m_limit * m_seconds;
m_last_update = timer();
SpeedLimit(int windowLimit, double windowSeconds) : m_limit(windowLimit), m_seconds(windowSeconds), m_last_update(0), m_budget(0) {
m_last_update = now();
}
virtual ~SpeedLimit() {
m_stop.send(Never());
}
virtual ~SpeedLimit() {}
virtual void addref() { ReferenceCounted<SpeedLimit>::addref(); }
virtual void delref() { ReferenceCounted<SpeedLimit>::delref(); }
void addref() override { ReferenceCounted<SpeedLimit>::addref(); }
void delref() override { ReferenceCounted<SpeedLimit>::delref(); }
virtual Future<Void> getAllowance(unsigned int n) {
Future<Void> getAllowance(unsigned int n) override {
// Replenish budget based on time since last update
double ts = timer();
double ts = now();
// returnUnused happens to do exactly what we want here
returnUnused((ts - m_last_update) / m_seconds * m_limit);
m_last_update = ts;
@ -57,13 +60,25 @@ public:
if(m_budget >= 0)
return Void();
// Otherise return the amount of time it will take for the budget to rise to 0.
return delay(m_seconds * -m_budget / m_limit);
return m_stop.getFuture() || delay(m_seconds * -m_budget / m_limit);
}
virtual void returnUnused(int n) {
void returnUnused(int n) override {
if(n < 0)
return;
m_budget = std::min<int64_t>(m_budget + n, m_budget_max);
m_budget = std::min<int64_t>(m_budget + n, m_limit);
}
void wakeWaiters() override {
Promise<Void> p;
p.swap(m_stop);
p.send(Void());
}
void killWaiters(const Error &e) override {
Promise<Void> p;
p.swap(m_stop);
p.sendError(e);
}
private:
@ -71,7 +86,7 @@ private:
double m_seconds;
double m_last_update;
int64_t m_budget;
int64_t m_budget_max;
Promise<Void> m_stop;
};
// An IRateControl implemenation that enforces no limit
@ -79,9 +94,11 @@ class Unlimited : public IRateControl, ReferenceCounted<Unlimited> {
public:
Unlimited() {}
virtual ~Unlimited() {}
virtual void addref() { ReferenceCounted<Unlimited>::addref(); }
virtual void delref() { ReferenceCounted<Unlimited>::delref(); }
void addref() override { ReferenceCounted<Unlimited>::addref(); }
void delref() override { ReferenceCounted<Unlimited>::delref(); }
virtual Future<Void> getAllowance(unsigned int n) { return Void(); }
virtual void returnUnused(int n) {}
Future<Void> getAllowance(unsigned int n) override { return Void(); }
void returnUnused(int n) override {}
void wakeWaiters() override {}
void killWaiters(const Error &e) override {}
};

View File

@ -77,6 +77,7 @@ set(FDBSERVER_SRCS
TLogInterface.h
TLogServer.actor.cpp
VersionedBTree.actor.cpp
VFSAsync.h
VFSAsync.cpp
WaitFailure.actor.cpp
WaitFailure.h

View File

@ -31,6 +31,7 @@ extern "C" {
u32 sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*);
}
#include "flow/ThreadPrimitives.h"
#include "fdbserver/VFSAsync.h"
#include "fdbserver/template_fdb.h"
#include "fdbrpc/simulator.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -211,9 +212,19 @@ struct SQLiteDB : NonCopyable {
void open(bool writable);
void createFromScratch();
SQLiteDB( std::string filename, bool page_checksums, bool fragment_values): filename(filename), db(NULL), btree(NULL), table(-1), freetable(-1), haveMutex(false), page_checksums(page_checksums), fragment_values(fragment_values) {}
SQLiteDB( std::string filename, bool page_checksums, bool fragment_values): filename(filename), db(NULL), btree(NULL), table(-1), freetable(-1), haveMutex(false), page_checksums(page_checksums), fragment_values(fragment_values) {
TraceEvent(SevDebug, "SQLiteDBCreate")
.detail("This", (void*)this)
.detail("Filename", filename)
.backtrace();
}
~SQLiteDB() {
TraceEvent(SevDebug, "SQLiteDBDestroy")
.detail("This", (void*)this)
.detail("Filename", filename)
.backtrace();
if (db) {
if (haveMutex) {
sqlite3_mutex_leave(db->mutex);
@ -239,10 +250,11 @@ struct SQLiteDB : NonCopyable {
//if (deterministicRandom()->random01() < .001) rc = SQLITE_INTERRUPT;
if (rc) {
// Our exceptions don't propagate through sqlite, so we don't know for sure if the error that caused this was
// an injected fault. Assume that if fault injection is happening, this is an injected fault.
// an injected fault. Assume that if VFSAsyncFile caught an injected Error that it caused this error return code.
Error err = io_error();
if (g_network->isSimulated() && (g_simulator.getCurrentProcess()->fault_injection_p1 || g_simulator.getCurrentProcess()->machine->machineProcess->fault_injection_p1 || g_simulator.getCurrentProcess()->rebooting))
if (g_network->isSimulated() && VFSAsyncFile::checkInjectedError()) {
err = err.asInjectedFault();
}
if (db)
db->errCode = rc;
@ -252,6 +264,7 @@ struct SQLiteDB : NonCopyable {
throw err;
}
}
void checkpoint( bool restart ) {
int logSize=0, checkpointCount=0;
//double t = timer();
@ -282,7 +295,7 @@ struct SQLiteDB : NonCopyable {
int tables[] = {1, table, freetable};
TraceEvent("BTreeIntegrityCheckBegin").detail("Filename", filename);
char* e = sqlite3BtreeIntegrityCheck(btree, tables, 3, 1000, &errors, verbose);
if (!(g_network->isSimulated() && (g_simulator.getCurrentProcess()->fault_injection_p1 || g_simulator.getCurrentProcess()->rebooting))) {
if (!(g_network->isSimulated() && VFSAsyncFile::checkInjectedError())) {
TraceEvent((errors||e) ? SevError : SevInfo, "BTreeIntegrityCheckResults").detail("Filename", filename).detail("ErrorTotal", errors);
if(e != nullptr) {
// e is a string containing 1 or more lines. Create a separate trace event for each line.
@ -1296,6 +1309,7 @@ int SQLiteDB::checkAllPageChecksums() {
.detail("TotalErrors", totalErrors);
ASSERT(!vfsAsyncIsOpen(filename));
ASSERT(!vfsAsyncIsOpen(filename + "-wal"));
return totalErrors;
}
@ -1346,6 +1360,22 @@ void SQLiteDB::open(bool writable) {
if (dbFile.isError()) throw dbFile.getError(); // If we've failed to open the file, throw an exception
if (walFile.isError()) throw walFile.getError(); // If we've failed to open the file, throw an exception
// Set Rate control if FLOW_KNOBS are positive
if (FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT > 0 && FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS > 0) {
// The writer thread is created before the readers, so it should initialize the rate controls.
if(writable) {
// Create a new rate control and assign it to both files.
Reference<SpeedLimit> rc(new SpeedLimit(FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT,
FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS));
dbFile.get()->setRateControl(rc);
walFile.get()->setRateControl(rc);
} else {
// When a reader thread is opened, the rate controls should already be equal and not null
ASSERT(dbFile.get()->getRateControl() == walFile.get()->getRateControl());
ASSERT(dbFile.get()->getRateControl());
}
}
//TraceEvent("KVThreadInitStage").detail("Stage",2).detail("Filename", filename).detail("Writable", writable);
// Now that the file itself is open and locked, let sqlite open the database
@ -1493,6 +1523,7 @@ private:
volatile int64_t freeListPages;
vector< Reference<ReadCursor> > readCursors;
Reference<IAsyncFile> dbFile, walFile;
struct Reader : IThreadPoolReceiver {
SQLiteDB conn;
@ -1574,6 +1605,7 @@ private:
};
struct Writer : IThreadPoolReceiver {
KeyValueStoreSQLite *kvs;
SQLiteDB conn;
Cursor* cursor;
int commits;
@ -1588,8 +1620,9 @@ private:
bool checkAllChecksumsOnOpen;
bool checkIntegrityOnOpen;
explicit Writer( std::string const& filename, bool isBtreeV2, bool checkAllChecksumsOnOpen, bool checkIntegrityOnOpen, volatile int64_t& writesComplete, volatile SpringCleaningStats& springCleaningStats, volatile int64_t& diskBytesUsed, volatile int64_t& freeListPages, UID dbgid, vector<Reference<ReadCursor>>* pReadThreads )
: conn( filename, isBtreeV2, isBtreeV2 ),
explicit Writer( KeyValueStoreSQLite *kvs, bool isBtreeV2, bool checkAllChecksumsOnOpen, bool checkIntegrityOnOpen, volatile int64_t& writesComplete, volatile SpringCleaningStats& springCleaningStats, volatile int64_t& diskBytesUsed, volatile int64_t& freeListPages, UID dbgid, vector<Reference<ReadCursor>>* pReadThreads )
: kvs(kvs),
conn( kvs->filename, isBtreeV2, isBtreeV2 ),
commits(), setsThisCommit(),
freeTableEmpty(false),
writesComplete(writesComplete),
@ -1619,6 +1652,8 @@ private:
}
}
conn.open(true);
kvs->dbFile = conn.dbFile;
kvs->walFile = conn.walFile;
//If a wal file fails during the commit process before finishing a checkpoint, then it is possible that our wal file will be non-empty
//when we reload it. We execute a checkpoint here to remedy that situation. This call must come before before creating a cursor because
@ -1630,7 +1665,7 @@ private:
if (checkIntegrityOnOpen || EXPENSIVE_VALIDATION) {
if(conn.check(false) != 0) {
// A corrupt btree structure must not be used.
if (g_network->isSimulated() && (g_simulator.getCurrentProcess()->fault_injection_p1 || g_simulator.getCurrentProcess()->machine->machineProcess->fault_injection_p1 || g_simulator.getCurrentProcess()->rebooting)) {
if (g_network->isSimulated() && VFSAsyncFile::checkInjectedError()) {
throw file_corrupt().asInjectedFault();
} else {
throw file_corrupt();
@ -1855,12 +1890,30 @@ private:
}
}
void disableRateControl() {
if(dbFile && dbFile->getRateControl()) {
TraceEvent(SevDebug, "KeyValueStoreSQLiteShutdownRateControl").detail("Filename", dbFile->getFilename());
Reference<IRateControl> rc = dbFile->getRateControl();
dbFile->setRateControl({});
rc->wakeWaiters();
}
if(walFile && walFile->getRateControl()) {
TraceEvent(SevDebug, "KeyValueStoreSQLiteShutdownRateControl").detail("Filename", walFile->getFilename());
Reference<IRateControl> rc = walFile->getRateControl();
walFile->setRateControl({});
rc->wakeWaiters();
}
}
ACTOR static Future<Void> stopOnError( KeyValueStoreSQLite* self ) {
try {
wait( self->readThreads->getError() || self->writeThread->getError() );
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled)
throw;
self->disableRateControl();
}
self->readThreads->stop();
@ -1870,8 +1923,13 @@ private:
ACTOR static void doClose( KeyValueStoreSQLite* self, bool deleteOnClose ) {
state Error error = success();
self->disableRateControl();
try {
TraceEvent("KVClose", self->logID).detail("Del", deleteOnClose);
TraceEvent("KVClose", self->logID)
.detail("Filename", self->filename)
.detail("Del", deleteOnClose);
self->starting.cancel();
self->cleaning.cancel();
self->logging.cancel();
@ -1882,12 +1940,14 @@ private:
}
} catch (Error& e) {
TraceEvent(SevError, "KVDoCloseError", self->logID)
.error(e,true)
.detail("Filename", self->filename)
.error(e, true)
.detail("Reason", e.code() == error_code_platform_error ? "could not delete database" : "unknown");
error = e;
}
TraceEvent("KVClosed", self->logID);
TraceEvent("KVClosed", self->logID)
.detail("Filename", self->filename);
if( error.code() != error_code_actor_cancelled ) {
self->stopped.send(Void());
delete self;
@ -1935,6 +1995,9 @@ KeyValueStoreSQLite::KeyValueStoreSQLite(std::string const& filename, UID id, Ke
writeThread(CoroThreadPool::createThreadPool()),
readsRequested(0), writesRequested(0), writesComplete(0), diskBytesUsed(0), freeListPages(0)
{
TraceEvent(SevDebug, "KeyValueStoreSQLiteCreate")
.detail("Filename", filename);
stopOnErr = stopOnError(this);
#if SQLITE_THREADSAFE == 0
@ -1947,13 +2010,14 @@ KeyValueStoreSQLite::KeyValueStoreSQLite(std::string const& filename, UID id, Ke
//The DB file should not already be open
ASSERT(!vfsAsyncIsOpen(filename));
ASSERT(!vfsAsyncIsOpen(filename + "-wal"));
readCursors.resize(64); //< number of read threads
readCursors.resize(SERVER_KNOBS->SQLITE_READER_THREADS); //< number of read threads
sqlite3_soft_heap_limit64( SERVER_KNOBS->SOFT_HEAP_LIMIT ); // SOMEDAY: Is this a performance issue? Should we drop the cache sizes for individual threads?
TaskPriority taskId = g_network->getCurrentTask();
g_network->setCurrentTask(TaskPriority::DiskWrite);
writeThread->addThread( new Writer(filename, type==KeyValueStoreType::SSD_BTREE_V2, checkChecksums, checkIntegrity, writesComplete, springCleaningStats, diskBytesUsed, freeListPages, id, &readCursors) );
writeThread->addThread(new Writer(this, type==KeyValueStoreType::SSD_BTREE_V2, checkChecksums, checkIntegrity, writesComplete, springCleaningStats, diskBytesUsed, freeListPages, id, &readCursors));
g_network->setCurrentTask(taskId);
auto p = new Writer::InitAction();
auto f = p->result.getFuture();

View File

@ -250,6 +250,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( SQLITE_BTREE_PAGE_USABLE, 4096 - 8); // pageSize - reserveSize for page checksum
init( SQLITE_CHUNK_SIZE_PAGES, 25600 ); // 100MB
init( SQLITE_CHUNK_SIZE_PAGES_SIM, 1024 ); // 4MB
init( SQLITE_READER_THREADS, 64 ); // number of read threads
// Maximum and minimum cell payload bytes allowed on primary page as calculated in SQLite.
// These formulas are copied from SQLite, using its hardcoded constants, so if you are

View File

@ -222,6 +222,7 @@ public:
double SQLITE_FRAGMENT_MIN_SAVINGS;
int SQLITE_CHUNK_SIZE_PAGES;
int SQLITE_CHUNK_SIZE_PAGES_SIM;
int SQLITE_READER_THREADS;
// KeyValueStoreSqlite spring cleaning
double SPRING_CLEANING_NO_ACTION_INTERVAL;

View File

@ -46,6 +46,8 @@
#include <fcntl.h>
#endif
#include "fdbserver/VFSAsync.h"
/*
** The maximum pathname length supported by this VFS.
*/
@ -58,43 +60,26 @@
#define EXCLUSIVE_LOCK 4
const uint32_t RESERVED_COUNT = 1U<<29;
/*
** When using this VFS, the sqlite3_file* handles that SQLite uses are
** actually pointers to instances of type VFSAsyncFile.
*/
typedef struct VFSAsyncFile VFSAsyncFile;
struct VFSAsyncFile {
sqlite3_file base; /* Base class. Must be first. */
int flags;
std::string filename;
Reference<IAsyncFile> file;
VFSAsyncFile::VFSAsyncFile(std::string const& filename, int flags)
: filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0) {
filename_lockCount_openCount[filename].second++;
uint32_t * const pLockCount; // +1 for each SHARED_LOCK, or 1+X_COUNT for lock level X
int lockLevel; // NO_LOCK, SHARED_LOCK, RESERVED_LOCK, PENDING_LOCK, or EXCLUSIVE_LOCK
TraceEvent(SevDebug, "VFSAsyncFileConstruct")
.detail("Filename", filename)
.detail("OpenCount", filename_lockCount_openCount[filename].second)
.detail("LockCount", filename_lockCount_openCount[filename].first)
.backtrace();
}
struct SharedMemoryInfo *sharedMemory;
int sharedMemorySharedLocks;
int sharedMemoryExclusiveLocks;
int debug_zcrefs, debug_zcreads, debug_reads;
int chunkSize;
VFSAsyncFile(std::string const& filename, int flags) : filename(filename), flags(flags), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0) {
filename_lockCount_openCount[filename].second++;
}
~VFSAsyncFile();
static std::map<std::string, std::pair<uint32_t,int>> filename_lockCount_openCount;
};
std::map<std::string, std::pair<uint32_t,int>> VFSAsyncFile::filename_lockCount_openCount;
static int asyncClose(sqlite3_file *pFile){
VFSAsyncFile *p = (VFSAsyncFile*)pFile;
/*TraceEvent("VFSAsyncClose").detail("Fd", p->file->debugFD())
.detail("Filename", p->filename).detail("ZCRefs", p->debug_zcrefs)
.detail("ZCReads", p->debug_zcreads).detail("NormalReads", p->debug_reads).backtrace();*/
TraceEvent(SevDebug, "VFSAsyncFileDestroy")
.detail("Filename", p->filename)
.backtrace();
//printf("Closing %s: %d zcrefs, %d/%d reads zc\n", filename.c_str(), debug_zcrefs, debug_zcreads, debug_zcreads+debug_reads);
ASSERT( !p->debug_zcrefs );
@ -112,7 +97,10 @@ static int asyncRead(sqlite3_file *pFile, void *zBuf, int iAmt, sqlite_int64 iOf
return SQLITE_IOERR_SHORT_READ;
}
return SQLITE_OK;
} catch (Error& ) {
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ);
}
return SQLITE_IOERR_READ;
}
}
@ -123,7 +111,10 @@ static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlit
try{
--p->debug_zcrefs;
p->file->releaseZeroCopy( data, iAmt, iOfst );
} catch (Error& ) {
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR);
}
return SQLITE_IOERR;
}
return SQLITE_OK;
@ -145,7 +136,10 @@ static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_
}
++p->debug_zcreads;
return SQLITE_OK;
} catch (Error& ) {
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ);
}
return SQLITE_IOERR_READ;
}
}
@ -162,7 +156,10 @@ static int asyncReadZeroCopy(sqlite3_file *pFile, void **data, int iAmt, sqlite_
return SQLITE_IOERR_SHORT_READ;
}
return SQLITE_OK;
} catch (Error& ) {
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ);
}
return SQLITE_IOERR_READ;
}
}
@ -178,7 +175,10 @@ static int asyncWrite(sqlite3_file *pFile, const void *zBuf, int iAmt, sqlite_in
try {
waitFor( p->file->write( zBuf, iAmt, iOfst ) );
return SQLITE_OK;
} catch(Error& ) {
} catch(Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_WRITE);
}
return SQLITE_IOERR_WRITE;
}
}
@ -194,7 +194,10 @@ static int asyncTruncate(sqlite3_file *pFile, sqlite_int64 size){
try {
waitFor( p->file->truncate( size ) );
return SQLITE_OK;
} catch(Error& ) {
} catch(Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_TRUNCATE);
}
return SQLITE_IOERR_TRUNCATE;
}
}
@ -204,8 +207,12 @@ static int asyncSync(sqlite3_file *pFile, int flags){
try {
waitFor( p->file->sync() );
return SQLITE_OK;
} catch (Error& e) {
TraceEvent("VFSSyncError")
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_FSYNC);
}
TraceEvent("VFSAsyncFileSyncError")
.error(e)
.detail("Filename", p->filename)
.detail("Sqlite3File", (int64_t)pFile)
@ -223,7 +230,10 @@ static int VFSAsyncFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){
try {
*pSize = waitForAndGet( p->file->size() );
return SQLITE_OK;
} catch (Error& ) {
} catch (Error &e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR_FSTAT);
}
return SQLITE_IOERR_FSTAT;
}
}
@ -464,10 +474,20 @@ static int asyncDeviceCharacteristics(sqlite3_file *pFile){ return 0; }
}
VFSAsyncFile::~VFSAsyncFile() {
//TraceEvent("VFSAsyncFileDel").detail("Filename", filename);
TraceEvent(SevDebug, "VFSAsyncFileDestroyStart")
.detail("Filename", filename)
.detail("OpenCount", filename_lockCount_openCount[filename].second)
.detail("LockCount", filename_lockCount_openCount[filename].first)
.backtrace();
if (!--filename_lockCount_openCount[filename].second) {
filename_lockCount_openCount.erase(filename);
TraceEvent(SevDebug, "VFSAsyncFileDestroy")
.detail("Filename", filename)
.backtrace();
//Always delete the shared memory when the last copy of the file is deleted. In simulation, this is helpful because "killing" a file without properly closing
//it can result in a shared memory state that causes corruption when reopening the killed file. The only expected penalty from doing this
//is a potentially slower open operation on a database, but that should happen infrequently.
@ -537,14 +557,15 @@ static int asyncOpen(
// Note that SQLiteDB::open also opens the db file, so its flags and modes are important, too
p->file = waitForAndGet( IAsyncFileSystem::filesystem()->open( p->filename, oflags, 0600 ) );
/*TraceEvent("VFSOpened")
TraceEvent(SevDebug, "VFSAsyncFileOpened")
.detail("Filename", p->filename)
.detail("Fd", DEBUG_DETERMINISM ? 0 : p->file->debugFD())
.detail("Flags", flags)
.detail("Sqlite3File", DEBUG_DETERMINISM ? 0 : (int64_t)pFile)
.detail("IAsyncFile", DEBUG_DETERMINISM ? 0 : (int64_t)p->file.getPtr());*/
.backtrace();
} catch (Error& e) {
TraceEvent("SQLiteOpenFail").error(e).detail("Filename", p->filename);
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_CANTOPEN);
}
TraceEvent("VFSAsyncFileOpenError").error(e).detail("Filename", p->filename);
p->~VFSAsyncFile();
return SQLITE_CANTOPEN;
}
@ -648,6 +669,9 @@ static int asyncFullPathname(
memcpy(zPathOut, s.c_str(), s.size()+1);
return SQLITE_OK;
} catch (Error& e) {
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_IOERR);
}
TraceEvent(SevError,"VFSAsyncFullPathnameError").error(e).detail("PathIn", (std::string)zPath);
return SQLITE_IOERR;
} catch(...) {
@ -716,7 +740,10 @@ static int asyncSleep(sqlite3_vfs *pVfs, int microseconds){
waitFor( g_network->delay( microseconds*1e-6, TaskPriority::DefaultDelay ) || simCancel );
return microseconds;
} catch( Error &e ) {
TraceEvent(SevError, "AsyncSleepError").error(e,true);
if(e.isInjectedFault()) {
VFSAsyncFile::setInjectedError(SQLITE_ERROR);
}
TraceEvent(SevError, "VFSAsyncSleepError").error(e,true);
return 0;
}
}

92
fdbserver/VFSAsync.h Normal file
View File

@ -0,0 +1,92 @@
/*
* VFSAsync.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sqlite/sqlite3.h"
#include <string>
#include <map>
#include "fdbrpc/IAsyncFile.h"
/*
** When using this VFS, the sqlite3_file* handles that SQLite uses are
** actually pointers to instances of type VFSAsyncFile.
*/
typedef struct VFSAsyncFile VFSAsyncFile;
struct VFSAsyncFile {
sqlite3_file base; /* Base class. Must be first. */
int flags;
std::string filename;
Reference<IAsyncFile> file;
// The functions setInjectedError() and checkInjectedError() use an INetwork global to store the last
// return code from VFSAsyncFile method resulting from catching an injected Error exception. This allows
// callers of the SQLite API to determine if an error code returned appears to be due to an injected
// error in simulation.
//
// This scheme is not perfect, as it is possible for non-injected errors to occur after injected errors
// and be incorrectly recognized as injected. This problem already existed, however, as the previous scheme
// assumed that any SQLite error that occurred after any injected error in the simulated process was
// itself injected.
//
// Unfortunately, there is no easy or reliable way to plumb the injectedness of an error though the return
// path of VFSAsyncFile -> SQLite -> SQLite API calls made by KeyValueStoreSQLite.
//
// An attempt was made to store injected errors in VFSAsyncFile instances and expose a SQLiteDB's file
// instances via the SQLite API. This failed, however, because sometimes files are opened, encounter an
// error, and are closed within the lifetime of one SQLite API call so the caller never has an opportunity
// to access the VFSAsyncFile object or its injected error state.
//
// An attempt was also made to match SQLite API return codes to VFSAsyncFile injected error return codes on
// a 1:1 basis, only flagging a code as rejected if it matches the last injected error code and only once.
// This would have been more accurate (though coincidences could occur). This scheme also failed, however,
// because sometimes errors from SQLite APIs are temporarily ignored, relying on a subsequent API call to error,
// however this error could be different and would not have been produced directly by VFSAsyncFile.
//
static void setInjectedError(int64_t rc) {
g_network->setGlobal(INetwork::enSQLiteInjectedError, (flowGlobalType)rc);
TraceEvent("VFSSetInjectedError").detail("ErrorCode", rc).detail("NetworkPtr", (void *)g_network).backtrace();
}
static bool checkInjectedError() {
// Error code is only checked for non-zero because the SQLite API error code after an injected error
// may not match the error code returned by VFSAsyncFile when the inject error occurred.
bool e = g_network->global(INetwork::enSQLiteInjectedError) != (flowGlobalType)0;
TraceEvent("VFSCheckInjectedError")
.detail("Found", e)
.detail("ErrorCode", (int64_t)g_network->global(INetwork::enSQLiteInjectedError))
.backtrace();
return e;
}
uint32_t * const pLockCount; // +1 for each SHARED_LOCK, or 1+X_COUNT for lock level X
int lockLevel; // NO_LOCK, SHARED_LOCK, RESERVED_LOCK, PENDING_LOCK, or EXCLUSIVE_LOCK
struct SharedMemoryInfo *sharedMemory;
int sharedMemorySharedLocks;
int sharedMemoryExclusiveLocks;
int debug_zcrefs, debug_zcreads, debug_reads;
int chunkSize;
VFSAsyncFile(std::string const& filename, int flags);
~VFSAsyncFile();
static std::map<std::string, std::pair<uint32_t,int>> filename_lockCount_openCount;
};

View File

@ -339,7 +339,7 @@ public:
const double currentTime = now();
double longest = 0;
UID UIDofLongest;
for (const auto kv: startTimeMap) {
for (const auto& kv: startTimeMap) {
const double currentRunningTime = currentTime - kv.second;
if (longest < currentRunningTime) {
longest = currentRunningTime;

View File

@ -92,6 +92,16 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( MAX_EVICT_ATTEMPTS, 100 ); if( randomize && BUGGIFY ) MAX_EVICT_ATTEMPTS = 2;
init( CACHE_EVICTION_POLICY, "random" );
init( PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION, 0.1 ); if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 0.0; else if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 1.0;
init( FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS, -1 );
init( FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT, -1 );
if( randomize && BUGGIFY ) {
// Choose an window between .01 and 1.01 seconds.
FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS = 0.01 + deterministicRandom()->random01();
// Choose 10k to 50k operations per second
int opsPerSecond = deterministicRandom()->randomInt(1000, 5000);
// Set window limit to opsPerSecond scaled down to window size
FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT = opsPerSecond * FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS;
}
//AsyncFileEIO
init( EIO_MAX_PARALLELISM, 4 );

View File

@ -112,6 +112,8 @@ public:
double PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION;
double TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY;
int TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT;
int FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT;
double FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS;
//AsyncFileEIO
int EIO_MAX_PARALLELISM;

View File

@ -28,9 +28,7 @@ TLSPolicy::~TLSPolicy() {}
namespace TLS {
void DisableOpenSSLAtExitHandler() {
#ifdef TLS_DISABLED
return;
#else
#ifdef HAVE_OPENSSL_INIT_NO_ATEXIT
static bool once = false;
if (!once) {
once = true;
@ -43,9 +41,7 @@ void DisableOpenSSLAtExitHandler() {
}
void DestroyOpenSSLGlobalState() {
#ifdef TLS_DISABLED
return;
#else
#ifdef HAVE_OPENSSL_INIT_NO_ATEXIT
OPENSSL_cleanup();
#endif
}

View File

@ -436,7 +436,8 @@ public:
enASIOTimedOut = 9,
enBlobCredentialFiles = 10,
enNetworkAddressesFunc = 11,
enClientFailureMonitor = 12
enClientFailureMonitor = 12,
enSQLiteInjectedError = 13
};
virtual void longTaskCheck( const char* name ) {}