/* * BlobGranuleValidation.actor.cpp * * This source file is part of the FoundationDB open source project * * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fdbserver/BlobGranuleValidation.actor.h" #include "flow/actorcompiler.h" // has to be last include ACTOR Future> readFromFDB(Database cx, KeyRange range) { state bool first = true; state Version v; state RangeResult out; state Transaction tr(cx); state KeyRange currentRange = range; loop { tr.setOption(FDBTransactionOptions::RAW_ACCESS); try { state RangeResult r = wait(tr.getRange(currentRange, CLIENT_KNOBS->TOO_MANY)); Version grv = wait(tr.getReadVersion()); // need consistent version snapshot of range if (first) { v = grv; first = false; } else if (v != grv) { // reset the range and restart the read at a higher version first = true; out = RangeResult(); currentRange = range; tr.reset(); continue; } out.arena().dependsOn(r.arena()); out.append(out.arena(), r.begin(), r.size()); if (r.more) { currentRange = KeyRangeRef(keyAfter(r.back().key), currentRange.end); } else { break; } } catch (Error& e) { wait(tr.onError(e)); } } return std::pair(out, v); } // FIXME: typedef this pair type and/or chunk list ACTOR Future>>> readFromBlob( Database cx, Reference bstore, KeyRange range, Version beginVersion, Version readVersion, Optional tenantName) { state RangeResult out; state Standalone> chunks; state Transaction tr(cx, tenantName); loop { try { Standalone> chunks_ = wait(tr.readBlobGranules(range, beginVersion, readVersion)); chunks = chunks_; break; } catch (Error& e) { wait(tr.onError(e)); } } for (const BlobGranuleChunkRef& chunk : chunks) { ASSERT(chunk.tenantPrefix.present() == tenantName.present()); RangeResult chunkRows = wait(readBlobGranule(chunk, range, beginVersion, readVersion, bstore)); out.arena().dependsOn(chunkRows.arena()); out.append(out.arena(), chunkRows.begin(), chunkRows.size()); } return std::pair(out, chunks); } bool compareFDBAndBlob(RangeResult fdb, std::pair>> blob, KeyRange range, Version v, bool debug) { bool correct = fdb == blob.first; if (!correct) { TraceEvent ev(SevError, "GranuleMismatch"); ev.detail("RangeStart", range.begin) .detail("RangeEnd", range.end) .detail("Version", v) .detail("FDBSize", fdb.size()) .detail("BlobSize", blob.first.size()); if (debug) { fmt::print("\nMismatch for [{0} - {1}) @ {2}. F({3}) B({4}):\n", range.begin.printable(), range.end.printable(), v, fdb.size(), blob.first.size()); Optional lastCorrect; for (int i = 0; i < std::max(fdb.size(), blob.first.size()); i++) { if (i >= fdb.size() || i >= blob.first.size() || fdb[i] != blob.first[i]) { printf(" Found mismatch at %d.\n", i); if (lastCorrect.present()) { printf(" last correct: %s=%s\n", lastCorrect.get().key.printable().c_str(), lastCorrect.get().value.printable().c_str()); } if (i < fdb.size()) { printf(" FDB: %s=%s\n", fdb[i].key.printable().c_str(), fdb[i].value.printable().c_str()); } else { printf(" FDB: \n"); } if (i < blob.first.size()) { printf(" BLB: %s=%s\n", blob.first[i].key.printable().c_str(), blob.first[i].value.printable().c_str()); } else { printf(" BLB: \n"); } printf("\n"); break; } if (i < fdb.size()) { lastCorrect = fdb[i]; } else { lastCorrect = blob.first[i]; } } printGranuleChunks(blob.second); } } return correct; } void printGranuleChunks(const Standalone>& chunks) { printf("Chunks:\n"); for (auto& chunk : chunks) { printf("[%s - %s)\n", chunk.keyRange.begin.printable().c_str(), chunk.keyRange.end.printable().c_str()); printf(" SnapshotFile:\n %s\n", chunk.snapshotFile.present() ? chunk.snapshotFile.get().toString().c_str() : ""); printf(" DeltaFiles:\n"); for (auto& df : chunk.deltaFiles) { printf(" %s\n", df.toString().c_str()); } printf(" Deltas: (%d)", chunk.newDeltas.size()); if (chunk.newDeltas.size() > 0) { fmt::print(" with version [{0} - {1}]", chunk.newDeltas[0].version, chunk.newDeltas[chunk.newDeltas.size() - 1].version); } fmt::print(" IncludedVersion: {}\n", chunk.includedVersion); } printf("\n"); } ACTOR Future clearAndAwaitMerge(Database cx, KeyRange range) { // clear key range and check whether it is merged or not, repeatedly state Transaction tr(cx); state int reClearCount = 1; state int reClearInterval = 1; // do quadratic backoff on clear rate, b/c large keys can keep it not write-cold loop { try { Standalone> ranges = wait(tr.getBlobGranuleRanges(range)); if (ranges.size() == 1) { return Void(); } CODE_PROBE(true, "ClearAndAwaitMerge doing clear"); reClearCount--; if (reClearCount <= 0) { tr.clear(range); wait(tr.commit()); fmt::print("ClearAndAwaitMerge cleared [{0} - {1}) @ {2}\n", range.begin.printable(), range.end.printable(), tr.getCommittedVersion()); reClearCount = reClearInterval; reClearInterval++; } wait(delay(30.0)); // sleep a bit before checking on merge again tr.reset(); } catch (Error& e) { wait(tr.onError(e)); } } }