mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 02:18:39 +08:00
more pruning bug fixes
This commit is contained in:
parent
d46e551f11
commit
2c88a189a9
@ -2092,7 +2092,7 @@ ACTOR Future<GranuleFiles> loadHistoryFiles(Reference<BlobManagerData> bmData, U
|
||||
* also removes the history entry for this granule from the system keyspace
|
||||
* TODO: ensure cannot fully delete granule that is still splitting!
|
||||
*/
|
||||
ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self, UID granuleId, KeyRef historyKey) {
|
||||
ACTOR Future<Void> fullyDeleteGranule(Reference<BlobManagerData> self, UID granuleId, Key historyKey) {
|
||||
if (BM_DEBUG) {
|
||||
fmt::print("Fully deleting granule {0}: init\n", granuleId.toString());
|
||||
}
|
||||
@ -2194,8 +2194,9 @@ ACTOR Future<Void> partiallyDeleteGranule(Reference<BlobManagerData> self, UID g
|
||||
}
|
||||
}
|
||||
|
||||
// we would have only partially deleted the granule if such a snapshot existed
|
||||
ASSERT(latestSnapshotVersion != invalidVersion);
|
||||
if (latestSnapshotVersion == invalidVersion) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
// delete all delta files older than latestSnapshotVersion
|
||||
for (auto deltaFile : files.deltaFiles) {
|
||||
@ -2277,7 +2278,7 @@ ACTOR Future<Void> pruneRange(Reference<BlobManagerData> self, KeyRangeRef range
|
||||
state std::queue<std::tuple<KeyRange, Version, Version>> historyEntryQueue;
|
||||
|
||||
// stacks of <granuleId, historyKey> and <granuleId> to track which granules to delete
|
||||
state std::vector<std::tuple<UID, KeyRef>> toFullyDelete;
|
||||
state std::vector<std::tuple<UID, Key>> toFullyDelete;
|
||||
state std::vector<UID> toPartiallyDelete;
|
||||
|
||||
// track which granules we have already added to traversal
|
||||
@ -2443,7 +2444,7 @@ ACTOR Future<Void> pruneRange(Reference<BlobManagerData> self, KeyRangeRef range
|
||||
}
|
||||
for (i = toFullyDelete.size() - 1; i >= 0; --i) {
|
||||
UID granuleId;
|
||||
KeyRef historyKey;
|
||||
Key historyKey;
|
||||
std::tie(granuleId, historyKey) = toFullyDelete[i];
|
||||
// FIXME: consider batching into a single txn (need to take care of txn size limit)
|
||||
if (BM_DEBUG) {
|
||||
|
@ -1889,31 +1889,34 @@ ACTOR Future<Void> blobGranuleLoadHistory(Reference<BlobWorkerData> bwData,
|
||||
stopVersion = prev.value().isValid() ? prev.value()->startVersion : invalidVersion;
|
||||
|
||||
state std::vector<Reference<GranuleHistoryEntry>> historyEntryStack;
|
||||
state bool foundHistory = true;
|
||||
|
||||
// while the start version of the current granule's parent not past the last known start version,
|
||||
// walk backwards
|
||||
while (curHistory.value.parentGranules.size() > 0 &&
|
||||
curHistory.value.parentGranules[0].second >= stopVersion) {
|
||||
state GranuleHistory next;
|
||||
|
||||
loop {
|
||||
try {
|
||||
Optional<Value> v = wait(tr.get(blobGranuleHistoryKeyFor(
|
||||
curHistory.value.parentGranules[0].first, curHistory.value.parentGranules[0].second)));
|
||||
if (!v.present()) {
|
||||
printf("No granule history present for [%s - %s) @ %lld!!\n",
|
||||
curHistory.value.parentGranules[0].first.begin.printable().c_str(),
|
||||
curHistory.value.parentGranules[0].first.end.printable().c_str(),
|
||||
curHistory.value.parentGranules[0].first);
|
||||
}
|
||||
ASSERT(v.present());
|
||||
foundHistory = false;
|
||||
} else {
|
||||
next = GranuleHistory(curHistory.value.parentGranules[0].first,
|
||||
curHistory.value.parentGranules[0].second,
|
||||
decodeBlobGranuleHistoryValue(v.get()));
|
||||
}
|
||||
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
if (!foundHistory) {
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(next.version != invalidVersion);
|
||||
// granule next.granuleID goes from the version range [next.version, curHistory.version]
|
||||
@ -1924,8 +1927,14 @@ ACTOR Future<Void> blobGranuleLoadHistory(Reference<BlobWorkerData> bwData,
|
||||
|
||||
if (!historyEntryStack.empty()) {
|
||||
Version oldestStartVersion = historyEntryStack.back()->startVersion;
|
||||
if (!foundHistory && stopVersion != invalidVersion) {
|
||||
stopVersion = oldestStartVersion;
|
||||
}
|
||||
ASSERT(stopVersion == oldestStartVersion || stopVersion == invalidVersion);
|
||||
} else {
|
||||
if (!foundHistory && stopVersion != invalidVersion) {
|
||||
stopVersion = invalidVersion;
|
||||
}
|
||||
ASSERT(stopVersion == invalidVersion);
|
||||
}
|
||||
|
||||
@ -1947,11 +1956,13 @@ ACTOR Future<Void> blobGranuleLoadHistory(Reference<BlobWorkerData> bwData,
|
||||
while (i >= 0) {
|
||||
auto prevRanges = bwData->granuleHistory.rangeContaining(historyEntryStack[i]->range.begin);
|
||||
|
||||
// sanity check
|
||||
ASSERT(!prevRanges.value().isValid() ||
|
||||
prevRanges.value()->endVersion == historyEntryStack[i]->startVersion);
|
||||
|
||||
if (prevRanges.value().isValid() &&
|
||||
prevRanges.value()->endVersion != historyEntryStack[i]->startVersion) {
|
||||
historyEntryStack[i]->parentGranule = Reference<GranuleHistoryEntry>();
|
||||
} else {
|
||||
historyEntryStack[i]->parentGranule = prevRanges.value();
|
||||
}
|
||||
|
||||
bwData->granuleHistory.insert(historyEntryStack[i]->range, historyEntryStack[i]);
|
||||
i--;
|
||||
}
|
||||
@ -2199,7 +2210,11 @@ ACTOR Future<Void> doBlobGranuleFileRequest(Reference<BlobWorkerData> bwData, Bl
|
||||
when(wait(metadata->cancelled.getFuture())) { throw wrong_shard_server(); }
|
||||
}
|
||||
|
||||
ASSERT(!chunkFiles.snapshotFiles.empty());
|
||||
if (chunkFiles.snapshotFiles.empty()) {
|
||||
// a snapshot file must have been pruned
|
||||
throw blob_granule_transaction_too_old();
|
||||
}
|
||||
|
||||
ASSERT(!chunkFiles.deltaFiles.empty());
|
||||
ASSERT(chunkFiles.deltaFiles.back().version > req.readVersion);
|
||||
if (chunkFiles.snapshotFiles.front().version > req.readVersion) {
|
||||
|
@ -39,8 +39,6 @@
|
||||
|
||||
#define BGV_DEBUG true
|
||||
|
||||
Version dbgPruneVersion = 0;
|
||||
|
||||
/*
|
||||
* This workload is designed to verify the correctness of the blob data produced by the blob workers.
|
||||
* As a read-only validation workload, it can piggyback off of other write or read/write workloads.
|
||||
@ -64,6 +62,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
||||
int64_t rowsRead = 0;
|
||||
int64_t bytesRead = 0;
|
||||
std::vector<Future<Void>> clients;
|
||||
bool enablePruning;
|
||||
|
||||
DatabaseConfiguration config;
|
||||
|
||||
@ -79,6 +78,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
||||
timeTravelLimit = getOption(options, LiteralStringRef("timeTravelLimit"), testDuration);
|
||||
timeTravelBufferSize = getOption(options, LiteralStringRef("timeTravelBufferSize"), 100000000);
|
||||
threads = getOption(options, LiteralStringRef("threads"), 1);
|
||||
enablePruning = getOption(options, LiteralStringRef("enablePruning"), false /*sharedRandomNumber % 2 == 0*/);
|
||||
ASSERT(threads >= 1);
|
||||
|
||||
if (BGV_DEBUG) {
|
||||
@ -454,13 +454,19 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
||||
|
||||
try {
|
||||
state Version newPruneVersion = 0;
|
||||
state bool doPruning =
|
||||
allowPruning && prevPruneVersion < oldRead.v && deterministicRandom()->random01() < 0.5;
|
||||
state bool doPruning = allowPruning && deterministicRandom()->random01() < 0.5;
|
||||
if (doPruning) {
|
||||
newPruneVersion = deterministicRandom()->randomInt64(prevPruneVersion, oldRead.v);
|
||||
Version maxPruneVersion = oldRead.v;
|
||||
for (auto& it : timeTravelChecks) {
|
||||
maxPruneVersion = std::min(it.second.v, maxPruneVersion);
|
||||
}
|
||||
if (prevPruneVersion < maxPruneVersion) {
|
||||
newPruneVersion = deterministicRandom()->randomInt64(prevPruneVersion, maxPruneVersion);
|
||||
prevPruneVersion = std::max(prevPruneVersion, newPruneVersion);
|
||||
dbgPruneVersion = prevPruneVersion;
|
||||
wait(self->pruneAtVersion(cx, normalKeys, newPruneVersion, false));
|
||||
} else {
|
||||
doPruning = false;
|
||||
}
|
||||
}
|
||||
std::pair<RangeResult, Standalone<VectorRef<BlobGranuleChunkRef>>> reReadResult =
|
||||
wait(self->readFromBlob(cx, self, oldRead.range, oldRead.v));
|
||||
@ -487,7 +493,7 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_blob_granule_transaction_too_old && oldRead.v >= dbgPruneVersion) {
|
||||
if (e.code() == error_code_blob_granule_transaction_too_old) {
|
||||
self->timeTravelTooOld++;
|
||||
// TODO: add debugging info for when this is a failure
|
||||
}
|
||||
@ -532,15 +538,14 @@ struct BlobGranuleVerifierWorkload : TestWorkload {
|
||||
Future<Void> start(Database const& cx) override {
|
||||
clients.reserve(threads + 1);
|
||||
clients.push_back(timeout(findGranules(cx, this), testDuration, Void()));
|
||||
for (int i = 0; i < threads; i++) {
|
||||
if (enablePruning && clientId == 0) {
|
||||
clients.push_back(
|
||||
timeout(reportErrors(
|
||||
// TODO change back
|
||||
verifyGranules(
|
||||
cx, this, false /*clientId == 0 && i == 0 && deterministicRandom()->random01() < 0.5*/),
|
||||
"BlobGranuleVerifier"),
|
||||
testDuration,
|
||||
Void()));
|
||||
timeout(reportErrors(verifyGranules(cx, this, true), "BlobGranuleVerifier"), testDuration, Void()));
|
||||
} else if (!enablePruning) {
|
||||
for (int i = 0; i < threads; i++) {
|
||||
clients.push_back(timeout(
|
||||
reportErrors(verifyGranules(cx, this, false), "BlobGranuleVerifier"), testDuration, Void()));
|
||||
}
|
||||
}
|
||||
return delay(testDuration);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user