Add sharded rocksdb type (#6862)

* add-sharded-rocksdb-type

* address comments

Co-authored-by: Zhe Wang <zhewang@Zhes-MacBook-Pro.local>
This commit is contained in:
Zhe Wang 2022-04-21 22:53:14 -04:00 committed by GitHub
parent 04ecd8e08f
commit 6c9ff6ee5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 89 additions and 3 deletions

View File

@ -736,6 +736,7 @@
"ssd-2",
"ssd-redwood-1-experimental",
"ssd-rocksdb-v1",
"ssd-sharded-rocksdb",
"memory",
"memory-1",
"memory-2",
@ -749,6 +750,7 @@
"ssd-2",
"ssd-redwood-1-experimental",
"ssd-rocksdb-v1",
"ssd-sharded-rocksdb",
"memory",
"memory-1",
"memory-2",

View File

@ -195,6 +195,12 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
fprintf(stderr,
"WARN: RocksDB storage engine type is still in experimental stage, not yet production tested.\n");
break;
case ConfigurationResult::DATABASE_CREATED_WARN_SHARDED_ROCKSDB_EXPERIMENTAL:
printf("Database created\n");
fprintf(
stderr,
"WARN: Sharded RocksDB storage engine type is still in experimental stage, not yet production tested.\n");
break;
case ConfigurationResult::DATABASE_UNAVAILABLE:
fprintf(stderr, "ERROR: The database is unavailable\n");
fprintf(stderr, "Type `configure FORCE <TOKEN...>' to configure without this check\n");
@ -260,6 +266,12 @@ ACTOR Future<bool> configureCommandActor(Reference<IDatabase> db,
fprintf(stderr,
"WARN: RocksDB storage engine type is still in experimental stage, not yet production tested.\n");
break;
case ConfigurationResult::SUCCESS_WARN_SHARDED_ROCKSDB_EXPERIMENTAL:
printf("Configuration changed\n");
fprintf(
stderr,
"WARN: Sharded RocksDB storage engine type is still in experimental stage, not yet production tested.\n");
break;
default:
ASSERT(false);
ret = false;

View File

@ -303,6 +303,9 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
} else if (tLogDataStoreType == KeyValueStoreType::SSD_BTREE_V2 &&
storageServerStoreType == KeyValueStoreType::SSD_ROCKSDB_V1) {
result["storage_engine"] = "ssd-rocksdb-v1";
} else if (tLogDataStoreType == KeyValueStoreType::SSD_BTREE_V2 &&
storageServerStoreType == KeyValueStoreType::SSD_SHARDED_ROCKSDB) {
result["storage_engine"] = "ssd-sharded-rocksdb";
} else if (tLogDataStoreType == KeyValueStoreType::MEMORY && storageServerStoreType == KeyValueStoreType::MEMORY) {
result["storage_engine"] = "memory-1";
} else if (tLogDataStoreType == KeyValueStoreType::SSD_BTREE_V2 &&
@ -325,6 +328,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
result["tss_storage_engine"] = "ssd-redwood-1-experimental";
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_ROCKSDB_V1) {
result["tss_storage_engine"] = "ssd-rocksdb-v1";
} else if (testingStorageServerStoreType == KeyValueStoreType::SSD_SHARDED_ROCKSDB) {
result["tss_storage_engine"] = "ssd-sharded-rocksdb";
} else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY_RADIXTREE) {
result["tss_storage_engine"] = "memory-radixtree-beta";
} else if (testingStorageServerStoreType == KeyValueStoreType::MEMORY) {

View File

@ -825,7 +825,16 @@ struct KeyValueStoreType {
// These enumerated values are stored in the database configuration, so should NEVER be changed.
// Only add new ones just before END.
// SS storeType is END before the storageServerInterface is initialized.
enum StoreType { SSD_BTREE_V1, MEMORY, SSD_BTREE_V2, SSD_REDWOOD_V1, MEMORY_RADIXTREE, SSD_ROCKSDB_V1, END };
enum StoreType {
SSD_BTREE_V1,
MEMORY,
SSD_BTREE_V2,
SSD_REDWOOD_V1,
MEMORY_RADIXTREE,
SSD_ROCKSDB_V1,
SSD_SHARDED_ROCKSDB,
END
};
KeyValueStoreType() : type(END) {}
KeyValueStoreType(StoreType type) : type(type) {
@ -850,6 +859,8 @@ struct KeyValueStoreType {
return "ssd-redwood-1-experimental";
case SSD_ROCKSDB_V1:
return "ssd-rocksdb-v1";
case SSD_SHARDED_ROCKSDB:
return "ssd-sharded-rocksdb";
case MEMORY:
return "memory";
case MEMORY_RADIXTREE:

View File

@ -66,7 +66,9 @@ enum class ConfigurationResult {
SUCCESS_WARN_PPW_GRADUAL,
SUCCESS,
SUCCESS_WARN_ROCKSDB_EXPERIMENTAL,
SUCCESS_WARN_SHARDED_ROCKSDB_EXPERIMENTAL,
DATABASE_CREATED_WARN_ROCKSDB_EXPERIMENTAL,
DATABASE_CREATED_WARN_SHARDED_ROCKSDB_EXPERIMENTAL,
};
enum class CoordinatorsResult {
@ -293,6 +295,7 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
state bool warnPPWGradual = false;
state bool warnChangeStorageNoMigrate = false;
state bool warnRocksDBIsExperimental = false;
state bool warnShardedRocksDBIsExperimental = false;
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
@ -483,6 +486,9 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
} else if (newConfig.storageServerStoreType != oldConfig.storageServerStoreType &&
newConfig.storageServerStoreType == KeyValueStoreType::SSD_ROCKSDB_V1) {
warnRocksDBIsExperimental = true;
} else if (newConfig.storageServerStoreType != oldConfig.storageServerStoreType &&
newConfig.storageServerStoreType == KeyValueStoreType::SSD_SHARDED_ROCKSDB) {
warnShardedRocksDBIsExperimental = true;
}
}
}
@ -534,6 +540,9 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
else if (m[configKeysPrefix.toString() + "storage_engine"] ==
std::to_string(KeyValueStoreType::SSD_ROCKSDB_V1))
return ConfigurationResult::DATABASE_CREATED_WARN_ROCKSDB_EXPERIMENTAL;
else if (m[configKeysPrefix.toString() + "storage_engine"] ==
std::to_string(KeyValueStoreType::SSD_SHARDED_ROCKSDB))
return ConfigurationResult::DATABASE_CREATED_WARN_SHARDED_ROCKSDB_EXPERIMENTAL;
else
return ConfigurationResult::DATABASE_CREATED;
} catch (Error& e2) {
@ -549,6 +558,8 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
return ConfigurationResult::SUCCESS_WARN_PPW_GRADUAL;
} else if (warnRocksDBIsExperimental) {
return ConfigurationResult::SUCCESS_WARN_ROCKSDB_EXPERIMENTAL;
} else if (warnShardedRocksDBIsExperimental) {
return ConfigurationResult::SUCCESS_WARN_SHARDED_ROCKSDB_EXPERIMENTAL;
} else {
return ConfigurationResult::SUCCESS;
}

View File

@ -217,6 +217,9 @@ std::map<std::string, std::string> configForToken(std::string const& mode) {
} else if (mode == "ssd-rocksdb-v1") {
logType = KeyValueStoreType::SSD_BTREE_V2;
storeType = KeyValueStoreType::SSD_ROCKSDB_V1;
} else if (mode == "ssd-sharded-rocksdb") {
logType = KeyValueStoreType::SSD_BTREE_V2;
storeType = KeyValueStoreType::SSD_SHARDED_ROCKSDB;
} else if (mode == "memory" || mode == "memory-2") {
logType = KeyValueStoreType::SSD_BTREE_V2;
storeType = KeyValueStoreType::MEMORY;

View File

@ -769,6 +769,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"ssd-2",
"ssd-redwood-1-experimental",
"ssd-rocksdb-v1",
"ssd-sharded-rocksdb",
"memory",
"memory-1",
"memory-2",
@ -782,6 +783,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"ssd-2",
"ssd-redwood-1-experimental",
"ssd-rocksdb-v1",
"ssd-sharded-rocksdb",
"memory",
"memory-1",
"memory-2",

View File

@ -381,7 +381,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, "fdb");
init( ROCKSDB_PERFCONTEXT_ENABLE, false ); if( randomize && BUGGIFY ) ROCKSDB_PERFCONTEXT_ENABLE = deterministicRandom()->coinflip() ? false : true;
init( ROCKSDB_PERFCONTEXT_SAMPLE_RATE, 0.0001 );
init( ROCKSDB_PERFCONTEXT_SAMPLE_RATE, 0.0001 );
init( ROCKSDB_MAX_SUBCOMPACTIONS, 2 );
init( ROCKSDB_SOFT_PENDING_COMPACT_BYTES_LIMIT, 64000000000 ); // 64GB, Rocksdb option, Writes will slow down.
init( ROCKSDB_HARD_PENDING_COMPACT_BYTES_LIMIT, 100000000000 ); // 100GB, Rocksdb option, Writes will stall.
@ -393,6 +393,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD, 5 );
init( ROCKSDB_COMPACTION_READAHEAD_SIZE, 32768 ); // 32 KB, performs bigger reads when doing compaction.
init( ROCKSDB_BLOCK_SIZE, 32768 ); // 32 KB, size of the block in rocksdb cache.
init( ENABLE_SHARDED_ROCKSDB, false );
// Leader election
bool longLeaderElection = randomize && BUGGIFY;

View File

@ -322,6 +322,7 @@ public:
int ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD;
int64_t ROCKSDB_COMPACTION_READAHEAD_SIZE;
int64_t ROCKSDB_BLOCK_SIZE;
bool ENABLE_SHARDED_ROCKSDB;
// Leader election
int MAX_NOTIFICATIONS;

View File

@ -187,6 +187,8 @@ inline IKeyValueStore* openKVStore(KeyValueStoreType storeType,
return keyValueStoreRedwoodV1(filename, logID);
case KeyValueStoreType::SSD_ROCKSDB_V1:
return keyValueStoreRocksDB(filename, logID, storeType);
case KeyValueStoreType::SSD_SHARDED_ROCKSDB:
return keyValueStoreRocksDB(filename, logID, storeType); // TODO: to replace the KVS in the future
case KeyValueStoreType::MEMORY_RADIXTREE:
return keyValueStoreMemory(filename,
logID,

View File

@ -1809,7 +1809,14 @@ struct RocksDBKeyValueStore : IKeyValueStore {
void close() override { doClose(this, false); }
KeyValueStoreType getType() const override { return KeyValueStoreType(KeyValueStoreType::SSD_ROCKSDB_V1); }
KeyValueStoreType getType() const override {
if (SERVER_KNOBS->ENABLE_SHARDED_ROCKSDB)
// KVSRocks pretends as KVSShardedRocksDB
// TODO: to remove when the ShardedRocksDB KVS implementation is added in the future
return KeyValueStoreType(KeyValueStoreType::SSD_SHARDED_ROCKSDB);
else
return KeyValueStoreType(KeyValueStoreType::SSD_ROCKSDB_V1);
}
Future<Void> init() override {
if (openFuture.isValid()) {

View File

@ -310,6 +310,7 @@ public:
// 2 = "memory-radixtree-beta"
// 3 = "ssd-redwood-1-experimental"
// 4 = "ssd-rocksdb-v1"
// 5 = "ssd-sharded-rocksdb"
// Requires a comma-separated list of numbers WITHOUT whitespaces
std::vector<int> storageEngineExcludeTypes;
// Set the maximum TLog version that can be selected for a test
@ -1420,6 +1421,16 @@ void SimulationConfig::setStorageEngine(const TestConfig& testConfig) {
noUnseed = true;
break;
}
case 5: {
TEST(true); // Simulated cluster using Sharded RocksDB storage engine
set_config("ssd-sharded-rocksdb");
// Tests using the RocksDB engine are necessarily non-deterministic because of RocksDB
// background threads.
TraceEvent(SevWarnAlways, "RocksDBNonDeterminism")
.detail("Explanation", "The Sharded RocksDB storage engine is threaded and non-deterministic");
noUnseed = true;
break;
}
default:
ASSERT(false); // Programmer forgot to adjust cases.
}

View File

@ -323,6 +323,9 @@ KeyValueStoreSuffix redwoodSuffix = { KeyValueStoreType::SSD_REDWOOD_V1, ".redwo
KeyValueStoreSuffix rocksdbSuffix = { KeyValueStoreType::SSD_ROCKSDB_V1,
".rocksdb",
FilesystemCheck::DIRECTORIES_ONLY };
KeyValueStoreSuffix shardedRocksdbSuffix = { KeyValueStoreType::SSD_SHARDED_ROCKSDB,
".shardedrocksdb",
FilesystemCheck::DIRECTORIES_ONLY };
std::string validationFilename = "_validate";
@ -337,6 +340,8 @@ std::string filenameFromSample(KeyValueStoreType storeType, std::string folder,
return joinPath(folder, sample_filename);
else if (storeType == KeyValueStoreType::SSD_ROCKSDB_V1)
return joinPath(folder, sample_filename);
else if (storeType == KeyValueStoreType::SSD_SHARDED_ROCKSDB)
return joinPath(folder, sample_filename);
UNREACHABLE();
}
@ -352,6 +357,8 @@ std::string filenameFromId(KeyValueStoreType storeType, std::string folder, std:
return joinPath(folder, prefix + id.toString() + ".redwood-v1");
else if (storeType == KeyValueStoreType::SSD_ROCKSDB_V1)
return joinPath(folder, prefix + id.toString() + ".rocksdb");
else if (storeType == KeyValueStoreType::SSD_SHARDED_ROCKSDB)
return joinPath(folder, prefix + id.toString() + ".shardedrocksdb");
TraceEvent(SevError, "UnknownStoreType").detail("StoreType", storeType.toString());
UNREACHABLE();
@ -528,6 +535,9 @@ std::vector<DiskStore> getDiskStores(std::string folder) {
result.insert(result.end(), result4.begin(), result4.end());
auto result5 = getDiskStores(folder, rocksdbSuffix.suffix, rocksdbSuffix.type, rocksdbSuffix.check);
result.insert(result.end(), result5.begin(), result5.end());
auto result6 =
getDiskStores(folder, shardedRocksdbSuffix.suffix, shardedRocksdbSuffix.type, shardedRocksdbSuffix.check);
result.insert(result.end(), result6.begin(), result6.end());
return result;
}
@ -1657,6 +1667,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
SERVER_KNOBS->REMOTE_KV_STORE && /* testing mixed mode in simulation if remote kvs enabled */
(g_network->isSimulated()
? (/* Disable for RocksDB */ s.storeType != KeyValueStoreType::SSD_ROCKSDB_V1 &&
s.storeType != KeyValueStoreType::SSD_SHARDED_ROCKSDB &&
deterministicRandom()->coinflip())
: true));
Future<Void> kvClosed = kv->onClosed();
@ -2234,6 +2245,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
SERVER_KNOBS->REMOTE_KV_STORE && /* testing mixed mode in simulation if remote kvs enabled */
(g_network->isSimulated()
? (/* Disable for RocksDB */ req.storeType != KeyValueStoreType::SSD_ROCKSDB_V1 &&
req.storeType != KeyValueStoreType::SSD_SHARDED_ROCKSDB &&
deterministicRandom()->coinflip())
: true));
@ -2432,6 +2444,9 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
} else if (d.storeType == KeyValueStoreType::SSD_ROCKSDB_V1) {
included = fileExists(joinPath(d.filename, "CURRENT")) &&
fileExists(joinPath(d.filename, "IDENTITY"));
} else if (d.storeType == KeyValueStoreType::SSD_SHARDED_ROCKSDB) {
included = fileExists(joinPath(d.filename, "CURRENT")) &&
fileExists(joinPath(d.filename, "IDENTITY"));
} else if (d.storeType == KeyValueStoreType::MEMORY) {
included = fileExists(d.filename + "1.fdq");
} else {

View File

@ -388,6 +388,9 @@ ACTOR Future<Void> testKVStore(KVStoreTestWorkload* workload) {
test.store = keyValueStoreRedwoodV1(fn, id);
else if (workload->storeType == "ssd-rocksdb-v1")
test.store = keyValueStoreRocksDB(fn, id, KeyValueStoreType::SSD_ROCKSDB_V1);
else if (workload->storeType == "ssd-sharded-rocksdb")
test.store = keyValueStoreRocksDB(
fn, id, KeyValueStoreType::SSD_SHARDED_ROCKSDB); // TODO: to replace the KVS in the future
else if (workload->storeType == "memory")
test.store = keyValueStoreMemory(fn, id, 500e6);
else if (workload->storeType == "memory-radixtree-beta")