/* * VFSAsync.cpp * * This source file is part of the FoundationDB open source project * * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "sqlite/sqlite3.h" #include #include #include #include "fdbrpc/fdbrpc.h" #include "fdbrpc/IAsyncFile.h" #include "fdbserver/CoroFlow.h" #include "fdbrpc/simulator.h" #include "fdbrpc/AsyncFileReadAhead.actor.h" #include #include #ifdef WIN32 #include #endif #ifdef __unixish__ #include #include #include #include #include #include #include #include #endif #include "fdbserver/VFSAsync.h" /* ** The maximum pathname length supported by this VFS. */ #define MAXPATHNAME 512 #define NO_LOCK 0 #define SHARED_LOCK 1 #define RESERVED_LOCK 2 #define PENDING_LOCK 3 #define EXCLUSIVE_LOCK 4 const uint32_t RESERVED_COUNT = 1U << 29; VFSAsyncFile::VFSAsyncFile(std::string const& filename, int flags) : flags(flags), filename(filename), pLockCount(&filename_lockCount_openCount[filename].first), debug_zcrefs(0), debug_zcreads(0), debug_reads(0), chunkSize(0) { filename_lockCount_openCount[filename].second++; TraceEvent(SevDebug, "VFSAsyncFileConstruct") .detail("Filename", filename) .detail("OpenCount", filename_lockCount_openCount[filename].second) .detail("LockCount", filename_lockCount_openCount[filename].first) .backtrace(); } std::map> VFSAsyncFile::filename_lockCount_openCount; static int asyncClose(sqlite3_file* pFile) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; TraceEvent(SevDebug, "VFSAsyncFileDestroy").detail("Filename", p->filename).backtrace(); // printf("Closing %s: %d zcrefs, %d/%d reads zc\n", filename.c_str(), debug_zcrefs, debug_zcreads, // debug_zcreads+debug_reads); ASSERT(!p->debug_zcrefs); p->~VFSAsyncFile(); return SQLITE_OK; } static int asyncRead(sqlite3_file* pFile, void* zBuf, int iAmt, sqlite_int64 iOfst) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { ++p->debug_reads; int readBytes = waitForAndGet(p->file->read(zBuf, iAmt, iOfst)); if (readBytes < iAmt) { memset((uint8_t*)zBuf + readBytes, 0, iAmt - readBytes); // When reading past the EOF, sqlite expects the // extra portion of the buffer to be zeroed return SQLITE_IOERR_SHORT_READ; } return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ); } return SQLITE_IOERR_READ; } } #if 1 static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlite_int64 iOfst) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { --p->debug_zcrefs; p->file->releaseZeroCopy(data, iAmt, iOfst); } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR); } return SQLITE_IOERR; } return SQLITE_OK; } static int asyncReadZeroCopy(sqlite3_file* pFile, void** data, int iAmt, sqlite_int64 iOfst, int* pDataWasCached) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { int readBytes = iAmt; Future readFuture = p->file->readZeroCopy(data, &readBytes, iOfst); if (pDataWasCached) *pDataWasCached = readFuture.isReady() ? 1 : 0; waitFor(readFuture); ++p->debug_zcrefs; if (readBytes < iAmt) { // When reading past the EOF, sqlite expects the extra portion of the buffer to be zeroed. We can't do // that, so return and sqlite will use the slow path. asyncReleaseZeroCopy(pFile, *data, readBytes, iOfst); return SQLITE_IOERR_SHORT_READ; } ++p->debug_zcreads; return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ); } return SQLITE_IOERR_READ; } } #else static int asyncReadZeroCopy(sqlite3_file* pFile, void** data, int iAmt, sqlite_int64 iOfst) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { *data = new char[iAmt]; int readBytes = waitForAndGet(p->file->read(*data, iAmt, iOfst)); // printf("+asyncReadRef %p +%lld %d/%d = %p\n", pFile, iOfst, readBytes, iAmt, *data); if (readBytes < iAmt) { memset((uint8_t*)*data + readBytes, 0, iAmt - readBytes); // When reading past the EOF, sqlite expects the // extra portion of the buffer to be zeroed return SQLITE_IOERR_SHORT_READ; } return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_READ); } return SQLITE_IOERR_READ; } } static int asyncReleaseZeroCopy(sqlite3_file* pFile, void* data, int iAmt, sqlite_int64 iOfst) { // printf("-asyncReleaseRef %p +%lld %d <= %p\n", pFile, iOfst, iAmt, data); delete[](char*) data; return SQLITE_OK; } #endif static int asyncWrite(sqlite3_file* pFile, const void* zBuf, int iAmt, sqlite_int64 iOfst) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { waitFor(p->file->write(zBuf, iAmt, iOfst)); return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_WRITE); } return SQLITE_IOERR_WRITE; } } static int asyncTruncate(sqlite3_file* pFile, sqlite_int64 size) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; // Adjust size to a multiple of chunkSize if set if (p->chunkSize != 0) { size = ((size + p->chunkSize - 1) / p->chunkSize) * p->chunkSize; } try { waitFor(p->file->truncate(size)); return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_TRUNCATE); } return SQLITE_IOERR_TRUNCATE; } } static int asyncSync(sqlite3_file* pFile, int flags) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { waitFor(p->file->sync()); return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_FSYNC); } TraceEvent("VFSAsyncFileSyncError") .error(e) .detail("Filename", p->filename) .detail("Sqlite3File", (int64_t)pFile) .detail("IAsyncFile", (int64_t)p->file.getPtr()); return SQLITE_IOERR_FSYNC; } } /* ** Write the size of the file in bytes to *pSize. */ static int VFSAsyncFileSize(sqlite3_file* pFile, sqlite_int64* pSize) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; try { *pSize = waitForAndGet(p->file->size()); return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR_FSTAT); } return SQLITE_IOERR_FSTAT; } } static int asyncLock(sqlite3_file* pFile, int eLock) { // VFSAsyncFile *p = (VFSAsyncFile*)pFile; //TraceEvent("FileLock").detail("File", p->filename).detail("Fd", p->file->debugFD()).detail("PrevLockLevel", p->lockLevel).detail("Op", eLock).detail("LockCount", *p->pLockCount); return eLock == EXCLUSIVE_LOCK ? SQLITE_BUSY : SQLITE_OK; } static int asyncUnlock(sqlite3_file* pFile, int eLock) { assert(eLock <= SHARED_LOCK); return SQLITE_OK; } static int asyncCheckReservedLock(sqlite3_file* pFile, int* pResOut) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; *pResOut = *p->pLockCount >= RESERVED_COUNT; return SQLITE_OK; } /* ** No xFileControl() verbs are implemented by this VFS. */ static int VFSAsyncFileControl(sqlite3_file* pFile, int op, void* pArg) { VFSAsyncFile* p = (VFSAsyncFile*)pFile; switch (op) { case SQLITE_FCNTL_CHUNK_SIZE: p->chunkSize = *(int*)pArg; return SQLITE_OK; case SQLITE_FCNTL_SIZE_HINT: return asyncTruncate(pFile, *(int64_t*)pArg); default: return SQLITE_NOTFOUND; }; } static int asyncSectorSize(sqlite3_file* pFile) { return 512; } // SOMEDAY: Would 4K be better? static int asyncDeviceCharacteristics(sqlite3_file* pFile) { return 0; } #if 1 struct SharedMemoryInfo { // for a file std::string filename; std::vector regions; int regionSize; int refcount; // Number of connections with this open int sharedLocks[SQLITE_SHM_NLOCK]; int exclusiveLocks[SQLITE_SHM_NLOCK]; SharedMemoryInfo() : regionSize(0), refcount(0) { memset(sharedLocks, 0, sizeof(sharedLocks)); memset(exclusiveLocks, 0, sizeof(exclusiveLocks)); } void cleanup() { for (int i = 0; i < regions.size(); i++) delete[](uint8_t*) regions[i]; table.erase(filename); } static Mutex mutex; static std::map table; }; Mutex SharedMemoryInfo::mutex; std::map SharedMemoryInfo::table; /* ** This function is called to obtain a pointer to region iRegion of the ** shared-memory associated with the database file fd. Shared-memory regions ** are numbered starting from zero. Each shared-memory region is szRegion ** bytes in size. ** ** If an error occurs, an error code is returned and *pp is set to nullptr. ** ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory ** region has not been allocated (by any client, including one running in a ** separate process), then *pp is set to nullptr and SQLITE_OK returned. If ** bExtend is non-zero and the requested shared-memory region has not yet ** been allocated, it is allocated by this function. ** ** If the shared-memory region has already been allocated or is allocated by ** this call as described above, then it is mapped into this processes ** address space (if it is not already), *pp is set to point to the mapped ** memory and SQLITE_OK returned. */ static int asyncShmMap(sqlite3_file* fd, /* Handle open on database file */ int iRegion, /* Region to retrieve */ int szRegion, /* Size of regions */ int bExtend, /* True to extend file if necessary */ void volatile** pp /* OUT: Mapped memory */ ) { MutexHolder hold(SharedMemoryInfo::mutex); VFSAsyncFile* pDbFd = (VFSAsyncFile*)fd; SharedMemoryInfo* memInfo = pDbFd->sharedMemory; if (!memInfo) { std::string filename = pDbFd->filename; memInfo = pDbFd->sharedMemory = &SharedMemoryInfo::table[filename]; memInfo->filename = filename; memInfo->regionSize = szRegion; ++memInfo->refcount; // printf("Shared memory for: '%s' (%d refs)\n", filename.c_str(), memInfo->refcount); } else { assert(memInfo->regionSize == szRegion); } if (iRegion >= memInfo->regions.size()) { if (!bExtend) { *pp = nullptr; return SQLITE_OK; } while (memInfo->regions.size() <= iRegion) { void* mem = new uint8_t[szRegion]; memset(mem, 0, szRegion); memInfo->regions.push_back(mem); } } *pp = memInfo->regions[iRegion]; return SQLITE_OK; } /* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little ** different here than in posix. In xShmLock(), one can go from unlocked ** to shared and back or from unlocked to exclusive and back. But one may ** not go from shared to exclusive or from exclusive to shared. */ // sqlite doesn't seem to match these up correctly - it happily calls unlock on locks it doesn't hold. // So we have to keep track of which locks are held by a given sqlite3_file static int asyncShmLock(sqlite3_file* fd, /* Database file holding the shared memory */ int ofst, /* First lock to acquire or release */ int n, /* Number of locks to acquire or release */ int flags /* What to do with the lock */ ) { assert(ofst >= 0 && ofst + n <= SQLITE_SHM_NLOCK); assert(n >= 1); assert(flags == (SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags == (SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags == (SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags == (SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE)); assert(n == 1 || (flags & SQLITE_SHM_EXCLUSIVE) != 0); MutexHolder hold(SharedMemoryInfo::mutex); VFSAsyncFile* pDbFd = (VFSAsyncFile*)fd; SharedMemoryInfo* memInfo = pDbFd->sharedMemory; if (flags & SQLITE_SHM_UNLOCK) { for (int i = ofst; i < ofst + n; i++) { if (pDbFd->sharedMemorySharedLocks & (1 << i)) { pDbFd->sharedMemorySharedLocks &= ~(1 << i); --memInfo->sharedLocks[i]; } if (pDbFd->sharedMemoryExclusiveLocks & (1 << i)) { pDbFd->sharedMemoryExclusiveLocks &= ~(1 << i); --memInfo->exclusiveLocks[i]; } } } else if (flags & SQLITE_SHM_SHARED) { for (int i = ofst; i < ofst + n; i++) if (memInfo->exclusiveLocks[i] != ((pDbFd->sharedMemoryExclusiveLocks >> i) & 1)) { //TraceEvent("ShmLocked").detail("File", DEBUG_DETERMINISM ? 0 : (int64_t)pDbFd).detail("Acquiring", "Shared").detail("I", i).detail("Exclusive", memInfo->exclusiveLocks[i]).detail("MyExclusive", pDbFd->sharedMemoryExclusiveLocks); return SQLITE_BUSY; } for (int i = ofst; i < ofst + n; i++) if (!(pDbFd->sharedMemorySharedLocks & (1 << i))) { pDbFd->sharedMemorySharedLocks |= 1 << i; memInfo->sharedLocks[i]++; } } else { for (int i = ofst; i < ofst + n; i++) if (memInfo->exclusiveLocks[i] != ((pDbFd->sharedMemoryExclusiveLocks >> i) & 1) || memInfo->sharedLocks[i] != ((pDbFd->sharedMemorySharedLocks >> i) & 1)) { //TraceEvent("ShmLocked").detail("File", DEBUG_DETERMINISM ? 0 : (int64_t)pDbFd).detail("Acquiring", "Exclusive").detail("I", i).detail("Exclusive", memInfo->exclusiveLocks[i]).detail("MyExclusive", pDbFd->sharedMemoryExclusiveLocks).detail("Shared", memInfo->sharedLocks[i]).detail("MyShared", pDbFd->sharedMemorySharedLocks); return SQLITE_BUSY; } for (int i = ofst; i < ofst + n; i++) if (!(pDbFd->sharedMemoryExclusiveLocks & (1 << i))) { pDbFd->sharedMemoryExclusiveLocks |= 1 << i; memInfo->exclusiveLocks[i]++; } } return SQLITE_OK; } /* ** Implement a memory barrier or memory fence on shared memory. ** ** All loads and stores begun before the barrier must complete before ** any load or store begun after the barrier. */ static void asyncShmBarrier(sqlite3_file*) { #if WIN32 _ReadWriteBarrier(); #else __sync_synchronize(); #endif } /* ** Close a connection to shared-memory. Delete the underlying ** storage if deleteFlag is true. ** ** If there is no shared memory associated with the connection then this ** routine is a harmless no-op. */ static int asyncShmUnmap(sqlite3_file* fd, /* The underlying database file */ int deleteFlag /* Delete shared-memory if true */ ) { MutexHolder hold(SharedMemoryInfo::mutex); VFSAsyncFile* pDbFd = (VFSAsyncFile*)fd; SharedMemoryInfo* memInfo = pDbFd->sharedMemory; if (!memInfo) return SQLITE_OK; pDbFd->sharedMemory = 0; // printf("Connection %p closed shared memory\n", fd); if (!--memInfo->refcount) { // printf("Cleanup shared memory for: '%s' (%d refs; deleteFlag=%d)\n", memInfo->filename.c_str(), // memInfo->refcount, deleteFlag); printf(" Shared locks: "); for(int i=0; i<8; i++) printf("%d ", // memInfo->sharedLocks[i]); printf("\n"); printf(" Exclusive locks: "); for(int i=0; i<8; i++) printf("%d ", // memInfo->exclusiveLocks[i]); printf("\n"); //TraceEvent("CleanupSharedMemory").detail("Filename", memInfo->filename.c_str()).detail("RefCount", memInfo->refcount).detail("DeleteFlag", deleteFlag); // for(int i = 0; i < 8; i++) //TraceEvent("CleanupSharedMemory_Locks").detail("Filename", memInfo->filename.c_str()).detail("Num", i).detail("Shared", memInfo->sharedLocks[i]).detail("Exclusive", memInfo->exclusiveLocks[i]); // We don't think deleteFlag will ever be set ASSERT(!deleteFlag); } return SQLITE_OK; } VFSAsyncFile::~VFSAsyncFile() { TraceEvent(SevDebug, "VFSAsyncFileDestroyStart") .detail("Filename", filename) .detail("OpenCount", filename_lockCount_openCount[filename].second) .detail("LockCount", filename_lockCount_openCount[filename].first) .backtrace(); if (!--filename_lockCount_openCount[filename].second) { filename_lockCount_openCount.erase(filename); TraceEvent(SevDebug, "VFSAsyncFileDestroy").detail("Filename", filename).backtrace(); // Always delete the shared memory when the last copy of the file is deleted. In simulation, this is helpful // because "killing" a file without properly closing it can result in a shared memory state that causes // corruption when reopening the killed file. The only expected penalty from doing this is a potentially slower // open operation on a database, but that should happen infrequently. // // We can't do this in ShmUnmap when refcount is 0 because it seems that SQLite sometimes subsequently tries to // reopen the WAL from multiple locations simultaneously, resulting in a locking error auto itr = SharedMemoryInfo::table.find(filename); if (itr != SharedMemoryInfo::table.end()) { ASSERT_ABORT(itr->second.refcount == 0); itr->second.cleanup(); } } } #endif /* ** Open a file handle. */ static int asyncOpen(sqlite3_vfs* pVfs, /* VFS */ const char* zName, /* File to open, or 0 for a temp file */ sqlite3_file* pFile, /* Pointer to VFSAsyncFile struct to populate */ int flags, /* Input SQLITE_OPEN_XXX flags */ int* pOutFlags /* Output SQLITE_OPEN_XXX flags (or nullptr) */ ) { static const sqlite3_io_methods asyncio = { 3, /* iVersion */ asyncClose, /* xClose */ asyncRead, /* xRead */ asyncWrite, /* xWrite */ asyncTruncate, /* xTruncate */ asyncSync, /* xSync */ VFSAsyncFileSize, /* xFileSize */ asyncLock, /* xLock */ asyncUnlock, /* xUnlock */ asyncCheckReservedLock, /* xCheckReservedLock */ VFSAsyncFileControl, /* xFileControl */ asyncSectorSize, /* xSectorSize */ asyncDeviceCharacteristics, /* xDeviceCharacteristics */ asyncShmMap, asyncShmLock, asyncShmBarrier, asyncShmUnmap, asyncReadZeroCopy, asyncReleaseZeroCopy }; VFSAsyncFile* p = (VFSAsyncFile*)pFile; /* Populate this structure */ if (zName == 0) return SQLITE_IOERR; static_assert( SQLITE_OPEN_EXCLUSIVE == IAsyncFile::OPEN_EXCLUSIVE && SQLITE_OPEN_CREATE == IAsyncFile::OPEN_CREATE && SQLITE_OPEN_READONLY == IAsyncFile::OPEN_READONLY && SQLITE_OPEN_READWRITE == IAsyncFile::OPEN_READWRITE, "SQLite flag values don't match IAsyncFile flag values"); // File creation here is disabled because we always create the files first in KeyValueStoreSQLite, using atomic // creation int oflags = flags & (/*SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_CREATE |*/ SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE); if (flags & SQLITE_OPEN_WAL) oflags |= IAsyncFile::OPEN_LARGE_PAGES; oflags |= IAsyncFile::OPEN_LOCK; memset(static_cast(p), 0, sizeof(VFSAsyncFile)); new (p) VFSAsyncFile(zName, flags); try { // Note that SQLiteDB::open also opens the db file, so its flags and modes are important, too p->file = waitForAndGet(IAsyncFileSystem::filesystem()->open(p->filename, oflags, 0600)); TraceEvent(SevDebug, "VFSAsyncFileOpened").detail("Filename", p->filename).backtrace(); } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_CANTOPEN); } TraceEvent("VFSAsyncFileOpenError").error(e).detail("Filename", p->filename); p->~VFSAsyncFile(); return SQLITE_CANTOPEN; } if (pOutFlags) { *pOutFlags = flags; } p->base.pMethods = &asyncio; return SQLITE_OK; } // The next few functions, which perform filesystem operations by path rather than by file, have // OS-specific implementations. /* ** Delete the file identified by argument zPath. If the dirSync parameter ** is non-zero, then ensure the file-system modification to delete the ** file has been synced to disk before returning. */ static int asyncDelete(sqlite3_vfs* pVfs, const char* zPath, int dirSync) { ASSERT(false); // At the moment this isn't used; hence isn't under test. Could easily use // IAsyncFileSystem::filesystem()->deleteFile(). return SQLITE_IOERR_DELETE; } /* ** Query the file-system to see if the named file exists, is readable or ** is both readable and writable. For an exists query, treat a zero-length file ** as if it does not exist. */ static int asyncAccess(sqlite3_vfs* pVfs, const char* zPath, int flags, int* pResOut) { #ifdef __unixish__ #ifndef F_OK #define F_OK 0 #endif #ifndef R_OK #define R_OK 4 #endif #ifndef W_OK #define W_OK 2 #endif int rc; /* access() return code */ int eAccess = F_OK; /* Second argument to access() */ assert(flags == SQLITE_ACCESS_EXISTS /* access(zPath, F_OK) */ || flags == SQLITE_ACCESS_READ /* access(zPath, R_OK) */ || flags == SQLITE_ACCESS_READWRITE /* access(zPath, R_OK|W_OK) */ ); if (flags == SQLITE_ACCESS_READWRITE) eAccess = R_OK | W_OK; if (flags == SQLITE_ACCESS_READ) eAccess = R_OK; rc = access(zPath, eAccess); *pResOut = (rc == 0); if (flags == SQLITE_ACCESS_EXISTS && *pResOut) { struct stat buf; if (0 == stat(zPath, &buf) && buf.st_size == 0) { *pResOut = 0; } } return SQLITE_OK; #else WIN32_FILE_ATTRIBUTE_DATA data; DWORD attr = INVALID_FILE_ATTRIBUTES; memset(&data, 0, sizeof(data)); if (GetFileAttributesEx(zPath, GetFileExInfoStandard, &data)) { if (!(flags == SQLITE_ACCESS_EXISTS && data.nFileSizeHigh == 0 && data.nFileSizeLow == 0)) attr = data.dwFileAttributes; } else if (GetLastError() != ERROR_FILE_NOT_FOUND) return SQLITE_IOERR_ACCESS; if (flags == SQLITE_ACCESS_READWRITE) *pResOut = (attr & FILE_ATTRIBUTE_READONLY) == 0; else *pResOut = attr != INVALID_FILE_ATTRIBUTES; return SQLITE_OK; #endif } /* ** Argument zPath points to a nul-terminated string containing a file path. ** If zPath is an absolute path, then it is copied as is into the output ** buffer. Otherwise, if it is a relative path, then the equivalent full ** path is written to the output buffer. */ static int asyncFullPathname(sqlite3_vfs* pVfs, /* VFS */ const char* zPath, /* Input path (possibly a relative path) */ int nPathOut, /* Size of output buffer in bytes */ char* zPathOut /* Pointer to output buffer */ ) { try { auto s = abspath(zPath); if (s.size() >= nPathOut) return SQLITE_IOERR; memcpy(zPathOut, s.c_str(), s.size() + 1); return SQLITE_OK; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_IOERR); } TraceEvent(SevError, "VFSAsyncFullPathnameError").error(e).detail("PathIn", (std::string)zPath); return SQLITE_IOERR; } catch (...) { TraceEvent(SevError, "VFSAsyncFullPathnameError").error(unknown_error()).detail("PathIn", (std::string)zPath); return SQLITE_IOERR; } } /* ** Returns true if there is a shared memory entry for the specified filename, ** and false otherwise. */ bool vfsAsyncIsOpen(std::string filename) { return SharedMemoryInfo::table.count(abspath(filename)) > 0; } /* ** The following four VFS methods: ** ** xDlOpen ** xDlError ** xDlSym ** xDlClose ** ** are supposed to implement the functionality needed by SQLite to load ** extensions compiled as shared objects. This simple VFS does not support ** this functionality, so the following functions are no-ops. */ static void* asyncDlOpen(sqlite3_vfs* pVfs, const char* zPath) { return 0; } static void asyncDlError(sqlite3_vfs* pVfs, int nByte, char* zErrMsg) { sqlite3_snprintf(nByte, zErrMsg, "Loadable extensions are not supported"); zErrMsg[nByte - 1] = '\0'; } static void (*asyncDlSym(sqlite3_vfs* pVfs, void* pH, const char* z))(void) { return 0; } static void asyncDlClose(sqlite3_vfs* pVfs, void* pHandle) { return; } /* ** Parameter zByte points to a buffer nByte bytes in size. Populate this ** buffer with pseudo-random data. */ static int asyncRandomness(sqlite3_vfs* pVfs, int nByte, char* zByte) { for (int i = 0; i < nByte; i++) zByte[i] = deterministicRandom()->randomInt(0, 256); return SQLITE_OK; } /* ** Sleep for at least nMicro microseconds. Return the (approximate) number ** of microseconds slept for. */ static int asyncSleep(sqlite3_vfs* pVfs, int microseconds) { try { Future simCancel = Never(); if (g_network->isSimulated()) simCancel = success(g_simulator.getCurrentProcess()->shutdownSignal.getFuture()); if (simCancel.isReady()) { waitFor(delay(FLOW_KNOBS->MAX_BUGGIFIED_DELAY)); return 0; } waitFor(g_network->delay(microseconds * 1e-6, TaskPriority::DefaultDelay) || simCancel); return microseconds; } catch (Error& e) { if (e.isInjectedFault()) { VFSAsyncFile::setInjectedError(SQLITE_ERROR); } TraceEvent(SevError, "VFSAsyncSleepError").errorUnsuppressed(e); return 0; } } /* ** Find the current time (in Universal Coordinated Time). Write into *piNow ** the current time and date as a Julian Day number times 86_400_000. In ** other words, write into *piNow the number of milliseconds since the Julian ** epoch of noon in Greenwich on November 24, 4714 B.C according to the ** proleptic Gregorian calendar. ** ** On success, return 0. Return 1 if the time and date cannot be found. */ static int asyncCurrentTimeInt64(sqlite3_vfs* NotUsed, sqlite3_int64* piNow) { #if __unixish__ static const sqlite3_int64 unixEpoch = 24405875 * (sqlite3_int64)8640000; struct timeval sNow; gettimeofday(&sNow, nullptr); *piNow = unixEpoch + 1000 * (sqlite3_int64)sNow.tv_sec + sNow.tv_usec / 1000; #elif defined(_WIN32) static const sqlite3_int64 winFiletimeEpoch = 23058135 * (sqlite3_int64)8640000; int64_t ft = 0; GetSystemTimeAsFileTime((FILETIME*)&ft); *piNow = winFiletimeEpoch + ft / 10000; #else #error Port me! #endif return 0; } /* ** Set *pTime to the current UTC time expressed as a Julian day. Return ** SQLITE_OK if successful, or an error code otherwise. ** ** http://en.wikipedia.org/wiki/Julian_day */ static int asyncCurrentTime(sqlite3_vfs* pVfs, double* pTime) { sqlite3_int64 t = 0; int rc = asyncCurrentTimeInt64(pVfs, &t); if (rc) return rc; *pTime = t / 86400000.0; return SQLITE_OK; } static int asyncGetLastError(sqlite3_vfs* NotUsed, int NotUsed2, char* NotUsed3) { return 0; } /* ** This function returns a pointer to the VFS implemented in this file. ** To make the VFS available to SQLite: ** ** sqlite3_vfs_register(sqlite3_asyncvfs(), 0); */ sqlite3_vfs* vfsAsync() { static sqlite3_vfs asyncvfs = { 3, /* iVersion */ sizeof(VFSAsyncFile), /* szOsFile */ MAXPATHNAME, /* mxPathname */ 0, /* pNext */ "fdb_async", /* zName */ 0, /* pAppData */ asyncOpen, /* xOpen */ asyncDelete, /* xDelete */ asyncAccess, /* xAccess */ asyncFullPathname, /* xFullPathname */ asyncDlOpen, /* xDlOpen */ asyncDlError, /* xDlError */ asyncDlSym, /* xDlSym */ asyncDlClose, /* xDlClose */ asyncRandomness, /* xRandomness */ asyncSleep, /* xSleep */ asyncCurrentTime, /* xCurrentTime */ asyncGetLastError, /* xGetLastError */ asyncCurrentTimeInt64, /* xCurrentTimeInt64 */ 0, /* xSetSystemCall */ 0, /* xGetSystemCall */ 0, /* xNextSystemCall */ }; return &asyncvfs; }