mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 09:58:50 +08:00
Fix read iterator pool race (#7455)
This commit is contained in:
parent
d9b0ff418f
commit
61d621a4c4
@ -100,6 +100,7 @@ std::string getErrorReason(BackgroundErrorReason reason) {
|
||||
return format("%d Unknown", reason);
|
||||
}
|
||||
}
|
||||
|
||||
// Background error handling is tested with Chaos test.
|
||||
// TODO: Test background error in simulation. RocksDB doesn't use flow IO in simulation, which limits our ability to
|
||||
// inject IO errors. We could implement rocksdb::FileSystem using flow IO to unblock simulation. Also, trace event is
|
||||
@ -144,6 +145,11 @@ private:
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
// Encapsulation of shared states.
|
||||
struct ShardedRocksDBState {
|
||||
bool closing = false;
|
||||
};
|
||||
|
||||
std::shared_ptr<rocksdb::Cache> rocksdb_block_cache = nullptr;
|
||||
|
||||
rocksdb::Slice toSlice(StringRef s) {
|
||||
@ -328,7 +334,7 @@ public:
|
||||
ASSERT(cf);
|
||||
readRangeOptions.background_purge_on_iterator_cleanup = true;
|
||||
readRangeOptions.auto_prefix_mode = (SERVER_KNOBS->ROCKSDB_PREFIX_LEN > 0);
|
||||
TraceEvent("ReadIteratorPool")
|
||||
TraceEvent(SevDebug, "ReadIteratorPool")
|
||||
.detail("Path", path)
|
||||
.detail("KnobRocksDBReadRangeReuseIterators", SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS)
|
||||
.detail("KnobRocksDBPrefixLen", SERVER_KNOBS->ROCKSDB_PREFIX_LEN);
|
||||
@ -407,25 +413,6 @@ private:
|
||||
uint64_t iteratorsReuseCount;
|
||||
};
|
||||
|
||||
ACTOR Future<Void> refreshReadIteratorPool(
|
||||
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>>* physicalShards) {
|
||||
state Reference<Histogram> histogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::microseconds);
|
||||
|
||||
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME));
|
||||
|
||||
double startTime = timer_monotonic();
|
||||
for (auto& [_, shard] : *physicalShards) {
|
||||
shard->readIterPool->refreshIterators();
|
||||
}
|
||||
histogram->sample(timer_monotonic() - startTime);
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> flowLockLogger(const FlowLock* readLock, const FlowLock* fetchLock) {
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->ROCKSDB_METRICS_DELAY));
|
||||
@ -450,7 +437,7 @@ struct DataShard {
|
||||
// PhysicalShard represent a collection of logical shards. A PhysicalShard could have one or more DataShards. A
|
||||
// PhysicalShard is stored as a column family in rocksdb. Each PhysicalShard has its own iterator pool.
|
||||
struct PhysicalShard {
|
||||
PhysicalShard(rocksdb::DB* db, std::string id) : db(db), id(id) {}
|
||||
PhysicalShard(rocksdb::DB* db, std::string id) : db(db), id(id), isInitialized(false) {}
|
||||
|
||||
rocksdb::Status init() {
|
||||
if (cf) {
|
||||
@ -462,10 +449,16 @@ struct PhysicalShard {
|
||||
return status;
|
||||
}
|
||||
readIterPool = std::make_shared<ReadIteratorPool>(db, cf, id);
|
||||
this->isInitialized.store(true);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool initialized() { return cf != nullptr; }
|
||||
bool initialized() { return this->isInitialized.load(); }
|
||||
|
||||
void refreshReadIteratorPool() {
|
||||
ASSERT(this->readIterPool != nullptr);
|
||||
this->readIterPool->refreshIterators();
|
||||
}
|
||||
|
||||
~PhysicalShard() {
|
||||
if (!deletePending)
|
||||
@ -487,6 +480,7 @@ struct PhysicalShard {
|
||||
std::unordered_map<std::string, std::unique_ptr<DataShard>> dataShards;
|
||||
std::shared_ptr<ReadIteratorPool> readIterPool;
|
||||
bool deletePending = false;
|
||||
std::atomic<bool> isInitialized;
|
||||
};
|
||||
|
||||
// Manages physical shards and maintains logical shard mapping.
|
||||
@ -1310,6 +1304,39 @@ ACTOR Future<Void> rocksDBAggregatedMetricsLogger(std::shared_ptr<RocksDBMetrics
|
||||
struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
using CF = rocksdb::ColumnFamilyHandle*;
|
||||
|
||||
ACTOR static Future<Void> refreshReadIteratorPools(
|
||||
std::shared_ptr<ShardedRocksDBState> rState,
|
||||
Future<Void> readyToStart,
|
||||
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>>* physicalShards) {
|
||||
state Reference<Histogram> histogram = Histogram::getHistogram(
|
||||
ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::microseconds);
|
||||
|
||||
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
|
||||
try {
|
||||
wait(readyToStart);
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME));
|
||||
if (rState->closing) {
|
||||
break;
|
||||
}
|
||||
double startTime = timer_monotonic();
|
||||
for (auto& [_, shard] : *physicalShards) {
|
||||
if (shard->initialized()) {
|
||||
shard->refreshReadIteratorPool();
|
||||
}
|
||||
}
|
||||
histogram->sample(timer_monotonic() - startTime);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
TraceEvent(SevError, "RefreshReadIteratorPoolError").errorUnsuppressed(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
struct Writer : IThreadPoolReceiver {
|
||||
int threadIndex;
|
||||
std::unordered_map<uint32_t, rocksdb::ColumnFamilyHandle*>* columnFamilyMap;
|
||||
@ -1360,15 +1387,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
return;
|
||||
}
|
||||
|
||||
if (g_network->isSimulated()) {
|
||||
a.metrics = refreshReadIteratorPool(a.shardManager->getAllShards());
|
||||
} else {
|
||||
onMainThread([&] {
|
||||
a.metrics = refreshReadIteratorPool(a.shardManager->getAllShards());
|
||||
return Future<bool>(true);
|
||||
}).blockUntilReady();
|
||||
}
|
||||
|
||||
TraceEvent(SevInfo, "RocksDB").detail("Method", "Open");
|
||||
a.done.send(Void());
|
||||
}
|
||||
@ -1865,7 +1883,8 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
|
||||
// Persist shard mappinng key range should not be in shardMap.
|
||||
explicit ShardedRocksDBKeyValueStore(const std::string& path, UID id)
|
||||
: path(path), id(id), readSemaphore(SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
: rState(std::make_shared<ShardedRocksDBState>()), path(path), id(id),
|
||||
readSemaphore(SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
fetchSemaphore(SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_SOFT_MAX),
|
||||
numReadWaiters(SERVER_KNOBS->ROCKSDB_READ_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_READ_QUEUE_SOFT_MAX),
|
||||
numFetchWaiters(SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_HARD_MAX - SERVER_KNOBS->ROCKSDB_FETCH_QUEUE_SOFT_MAX),
|
||||
@ -1898,8 +1917,10 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
Future<Void> getError() const override { return errorFuture; }
|
||||
|
||||
ACTOR static void doClose(ShardedRocksDBKeyValueStore* self, bool deleteOnClose) {
|
||||
self->rState->closing = true;
|
||||
// The metrics future retains a reference to the DB, so stop it before we delete it.
|
||||
self->metrics.reset();
|
||||
self->refreshHolder.cancel();
|
||||
|
||||
wait(self->readThreads->stop());
|
||||
auto a = new Writer::CloseAction(&self->shardManager, deleteOnClose);
|
||||
@ -1930,6 +1951,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
auto a = std::make_unique<Writer::OpenAction>(
|
||||
&shardManager, metrics, &readSemaphore, &fetchSemaphore, errorListener);
|
||||
openFuture = a->done.getFuture();
|
||||
this->refreshHolder = refreshReadIteratorPools(this->rState, openFuture, shardManager.getAllShards());
|
||||
writeThread->post(a.release());
|
||||
return openFuture;
|
||||
}
|
||||
@ -2092,6 +2114,8 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
// Used for debugging shard mapping issue.
|
||||
std::vector<std::pair<KeyRange, std::string>> getDataMapping() { return shardManager.getDataMapping(); }
|
||||
|
||||
std::shared_ptr<ShardedRocksDBState> rState;
|
||||
ShardManager shardManager;
|
||||
std::shared_ptr<RocksDBMetrics> rocksDBMetrics;
|
||||
std::string path;
|
||||
const std::string dataPath;
|
||||
@ -2101,7 +2125,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
std::shared_ptr<RocksDBErrorListener> errorListener;
|
||||
Future<Void> errorFuture;
|
||||
Promise<Void> closePromise;
|
||||
ShardManager shardManager;
|
||||
Future<Void> openFuture;
|
||||
Optional<Future<Void>> metrics;
|
||||
FlowLock readSemaphore;
|
||||
@ -2109,6 +2132,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
|
||||
FlowLock fetchSemaphore;
|
||||
int numFetchWaiters;
|
||||
Counters counters;
|
||||
Future<Void> refreshHolder;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -190,7 +190,7 @@ if(WITH_PYTHON)
|
||||
add_fdb_test(TEST_FILES noSim/RandomUnitTests.toml UNIT)
|
||||
if (WITH_ROCKSDB_EXPERIMENTAL)
|
||||
add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml)
|
||||
add_fdb_test(TEST_FILES noSim/ShardedRocksDBTest.toml IGNORE) # TODO: re-enable once storage engine is stable.
|
||||
add_fdb_test(TEST_FILES noSim/ShardedRocksDBTest.toml UNIT)
|
||||
add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml)
|
||||
else()
|
||||
add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml IGNORE)
|
||||
|
Loading…
x
Reference in New Issue
Block a user