Merge pull request #5198 from Daniel-B-Smith/rocksdb-simulation-roll-forward

Roll forward: RocksDB in Simulation
This commit is contained in:
Evan Tschannen 2021-07-20 09:33:11 -07:00 committed by GitHub
commit 5c9a38e526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 72 additions and 16 deletions

View File

@ -335,7 +335,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
// KeyValueStoreRocksDB
init( ROCKSDB_BACKGROUND_PARALLELISM, 0 );
init( ROCKSDB_READ_PARALLELISM, 4 );
init( ROCKSDB_MEMTABLE_BYTES, 512 * 1024 * 1024 );
// Use a smaller memtable in simulation to avoid OOMs.
int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024;
init( ROCKSDB_MEMTABLE_BYTES, memtableBytes );
init( ROCKSDB_UNSAFE_AUTO_FSYNC, false );
init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 );
init( ROCKSDB_PREFIX_LEN, 0 );

View File

@ -7,6 +7,7 @@
#include <rocksdb/slice_transform.h>
#include <rocksdb/table.h>
#include <rocksdb/utilities/table_properties_collectors.h>
#include "fdbserver/CoroFlow.h"
#include "flow/flow.h"
#include "flow/IThreadPool.h"
@ -283,7 +284,9 @@ struct RocksDBKeyValueStore : IKeyValueStore {
std::min(value.size(), size_t(a.maxLength)))));
} else {
if (!s.IsNotFound()) {
TraceEvent(SevError, "RocksDBError").detail("Error", s.ToString()).detail("Method", "ReadValuePrefix");
TraceEvent(SevError, "RocksDBError")
.detail("Error", s.ToString())
.detail("Method", "ReadValuePrefix");
}
a.result.send(Optional<Value>());
}
@ -367,8 +370,23 @@ struct RocksDBKeyValueStore : IKeyValueStore {
std::unique_ptr<rocksdb::WriteBatch> writeBatch;
explicit RocksDBKeyValueStore(const std::string& path, UID id) : path(path), id(id) {
writeThread = createGenericThreadPool();
readThreads = createGenericThreadPool();
// In simluation, run the reader/writer threads as Coro threads (i.e. in the network thread. The storage engine
// is still multi-threaded as background compaction threads are still present. Reads/writes to disk will also
// block the network thread in a way that would be unacceptable in production but is a necessary evil here. When
// performing the reads in background threads in simulation, the event loop thinks there is no work to do and
// advances time faster than 1 sec/sec. By the time the blocking read actually finishes, simulation has advanced
// time by more than 5 seconds, so every read fails with a transaction_too_old error. Doing blocking IO on the
// main thread solves this issue. There are almost certainly better fixes, but my goal was to get a less
// invasive change merged first and work on a more realistic version if/when we think that would provide
// substantially more confidence in the correctness.
// TODO: Adapt the simulation framework to not advance time quickly when background reads/writes are occurring.
if (g_network->isSimulated()) {
writeThread = CoroThreadPool::createThreadPool();
readThreads = CoroThreadPool::createThreadPool();
} else {
writeThread = createGenericThreadPool();
readThreads = createGenericThreadPool();
}
writeThread->addThread(new Writer(db, id), "fdb-rocksdb-wr");
for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) {
readThreads->addThread(new Reader(db), "fdb-rocksdb-re");

View File

@ -22,6 +22,7 @@
#include <fstream>
#include <ostream>
#include <sstream>
#include <string_view>
#include <toml.hpp>
#include "fdbrpc/Locality.h"
#include "fdbrpc/simulator.h"
@ -50,6 +51,19 @@ extern const char* getSourceVersion();
using namespace std::literals;
// TODO: Defining these here is just asking for ODR violations.
template <>
std::string describe(bool const& val) {
return val ? "true" : "false";
}
template <>
std::string describe(int const& val) {
return format("%d", val);
}
namespace {
const int MACHINE_REBOOT_TIME = 10;
bool destructed = false;
@ -232,6 +246,7 @@ public:
// 1 = "memory"
// 2 = "memory-radixtree-beta"
// 3 = "ssd-redwood-experimental"
// 4 = "ssd-rocksdb-experimental"
// Requires a comma-separated list of numbers WITHOUT whitespaces
std::vector<int> storageEngineExcludeTypes;
// Set the maximum TLog version that can be selected for a test
@ -629,16 +644,6 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
}
}
template <>
std::string describe(bool const& val) {
return val ? "true" : "false";
}
template <>
std::string describe(int const& val) {
return format("%d", val);
}
// Since a datacenter kill is considered to be the same as killing a machine, files cannot be swapped across datacenters
std::map<Optional<Standalone<StringRef>>, std::vector<std::vector<std::string>>> availableFolders;
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per
@ -1252,7 +1257,7 @@ void SimulationConfig::setDatacenters(const TestConfig& testConfig) {
// Sets storage engine based on testConfig details
void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
int storage_engine_type = deterministicRandom()->randomInt(0, 4);
int storage_engine_type = deterministicRandom()->randomInt(0, 5);
if (testConfig.storageEngineType.present()) {
storage_engine_type = testConfig.storageEngineType.get();
} else {
@ -1260,7 +1265,7 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
while (std::find(testConfig.storageEngineExcludeTypes.begin(),
testConfig.storageEngineExcludeTypes.end(),
storage_engine_type) != testConfig.storageEngineExcludeTypes.end()) {
storage_engine_type = deterministicRandom()->randomInt(0, 4);
storage_engine_type = deterministicRandom()->randomInt(0, 5);
}
}
@ -1285,6 +1290,16 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
set_config("ssd-redwood-experimental");
break;
}
case 4: {
TEST(true); // Simulated cluster using RocksDB storage engine
set_config("ssd-rocksdb-experimental");
// Tests using the RocksDB engine are necessarily non-deterministic because of RocksDB
// background threads.
TraceEvent(SevWarn, "RocksDBNonDeterminism")
.detail("Explanation", "The RocksDB storage engine is threaded and non-deterministic");
noUnseed = true;
break;
}
default:
ASSERT(false); // Programmer forgot to adjust cases.
}
@ -2081,9 +2096,17 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors,
using namespace std::literals;
#ifdef SSD_ROCKSDB_EXPERIMENTAL
bool rocksDBEnabled = true;
#else
bool rocksDBEnabled = false;
#endif
// Populates the TestConfig fields according to what is found in the test file.
void checkTestConf(const char* testFile, TestConfig* testConfig) {}
} // namespace
ACTOR void setupAndRun(std::string dataFolder,
const char* testFile,
bool rebooting,
@ -2098,6 +2121,19 @@ ACTOR void setupAndRun(std::string dataFolder,
g_simulator.hasDiffProtocolProcess = testConfig.startIncompatibleProcess;
g_simulator.setDiffProtocol = false;
// The RocksDB storage engine does not support the restarting tests because you cannot consistently get a clean
// snapshot of the storage engine without a snapshotting file system.
// https://github.com/apple/foundationdb/issues/5155
if (std::string_view(testFile).find("restarting") != std::string_view::npos) {
testConfig.storageEngineExcludeTypes.push_back(4);
}
// The RocksDB engine is not always built with the rest of fdbserver. Don't try to use it if it is not included
// in the build.
if (!rocksDBEnabled) {
testConfig.storageEngineExcludeTypes.push_back(4);
}
state ProtocolVersion protocolVersion = currentProtocolVersion;
if (testConfig.startIncompatibleProcess) {
// isolates right most 1 bit of compatibleProtocolVersionMask to make this protocolVersion incompatible