mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
FastRestore:Debug trace for seg fault
This commit is contained in:
parent
01255b7ead
commit
7b7490efe7
@ -558,8 +558,8 @@ struct RestoreSendVersionedMutationsRequest : TimedRequest {
|
||||
|
||||
std::string toString() {
|
||||
std::stringstream ss;
|
||||
ss << "VersionBatchIndex:" << batchIndex << "RestoreAsset:" << asset.toString() << " msgIndex:" << msgIndex
|
||||
<< " isRangeFile:" << isRangeFile << " versionedMutations.size:" << versionedMutations.size();
|
||||
ss << "VersionBatchIndex:" << batchIndex << " msgIndex:" << msgIndex << " isRangeFile:" << isRangeFile
|
||||
<< " versionedMutations.size:" << versionedMutations.size() << " RestoreAsset:" << asset.toString();
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
@ -651,6 +651,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||
init( FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 20 + 1;}
|
||||
init( FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;}
|
||||
init( FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH, 2 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;}
|
||||
init( FASTRESTORE_NUM_TRACE_EVENTS, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_TRACE_EVENTS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 500 + 1;}
|
||||
|
||||
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
|
||||
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );
|
||||
|
@ -583,6 +583,7 @@ public:
|
||||
int FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE; // number of load request to release at once
|
||||
int FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD; // we can send future VB requests if it is less than this knob
|
||||
int FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH; // number of future VB sendLoadingParam requests to process at once
|
||||
int FASTRESTORE_NUM_TRACE_EVENTS;
|
||||
|
||||
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
|
||||
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.
|
||||
|
@ -88,6 +88,7 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
|
||||
break;
|
||||
}
|
||||
}
|
||||
TraceEvent("RestoreApplierCore", self->id()).detail("Request", requestTypeStr); // For debug only
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevWarn, "FastRestoreApplierError", self->id())
|
||||
.detail("RequestType", requestTypeStr)
|
||||
@ -121,7 +122,7 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
|
||||
// Trace when the receive phase starts at a VB and when it finishes.
|
||||
// This can help check if receiveMutations block applyMutation phase.
|
||||
// If so, we need more sophisticated scheduler to ensure priority execution
|
||||
printTrace = (batchData->receiveMutationReqs % 100 == 1);
|
||||
printTrace = (batchData->receiveMutationReqs % SERVER_KNOBS->FASTRESTORE_NUM_TRACE_EVENTS == 0);
|
||||
TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreApplierPhaseReceiveMutations", self->id())
|
||||
.detail("BatchIndex", req.batchIndex)
|
||||
.detail("RestoreAsset", req.asset.toString())
|
||||
@ -398,6 +399,7 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
|
||||
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys;
|
||||
std::map<Key, StagingKey>::iterator stagingKeyIter = batchData->stagingKeys.begin();
|
||||
int numKeysInBatch = 0;
|
||||
int numGetTxns = 0;
|
||||
double delayTime = 0; // Start transactions at different time to avoid overwhelming FDB.
|
||||
for (; stagingKeyIter != batchData->stagingKeys.end(); stagingKeyIter++) {
|
||||
if (!stagingKeyIter->second.hasBaseValue()) {
|
||||
@ -407,12 +409,14 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
|
||||
if (numKeysInBatch == SERVER_KNOBS->FASTRESTORE_APPLIER_FETCH_KEYS_SIZE) {
|
||||
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID,
|
||||
batchIndex, &batchData->counters));
|
||||
numGetTxns++;
|
||||
delayTime += SERVER_KNOBS->FASTRESTORE_TXN_EXTRA_DELAY;
|
||||
numKeysInBatch = 0;
|
||||
incompleteStagingKeys.clear();
|
||||
}
|
||||
}
|
||||
if (numKeysInBatch > 0) {
|
||||
numGetTxns++;
|
||||
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID,
|
||||
batchIndex, &batchData->counters));
|
||||
}
|
||||
@ -420,7 +424,8 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
|
||||
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
|
||||
.detail("BatchIndex", batchIndex)
|
||||
.detail("Step", "Compute the other staging keys")
|
||||
.detail("StagingKeys", batchData->stagingKeys.size());
|
||||
.detail("StagingKeys", batchData->stagingKeys.size())
|
||||
.detail("GetStagingKeyBatchTxns", numGetTxns);
|
||||
// Pre-compute pendingMutations to other keys in stagingKeys that has base value
|
||||
for (stagingKeyIter = batchData->stagingKeys.begin(); stagingKeyIter != batchData->stagingKeys.end();
|
||||
stagingKeyIter++) {
|
||||
|
@ -164,7 +164,10 @@ ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> controllerWo
|
||||
break;
|
||||
}
|
||||
|
||||
TraceEvent("FastRestoreController", controllerData->id()).detail("WorkerNode", workerInterf.first);
|
||||
TraceEvent("FastRestoreController", controllerData->id())
|
||||
.detail("WorkerNode", workerInterf.first)
|
||||
.detail("NodeRole", role)
|
||||
.detail("NodeIndex", nodeIndex);
|
||||
requests.emplace_back(workerInterf.first,
|
||||
RestoreRecruitRoleRequest(controllerWorker->controllerInterf.get(), role, nodeIndex));
|
||||
nodeIndex++;
|
||||
|
@ -776,7 +776,7 @@ ACTOR Future<Void> sendMutationsToApplier(
|
||||
applierID, RestoreSendVersionedMutationsRequest(batchIndex, asset, msgIndex, isRangeFile,
|
||||
applierVersionedMutationsBuffer[applierID]));
|
||||
}
|
||||
TraceEvent(SevDebug, "FastRestoreLoaderSendMutationToApplier")
|
||||
TraceEvent(SevInfo, "FastRestoreLoaderSendMutationToApplier")
|
||||
.detail("MessageIndex", msgIndex)
|
||||
.detail("RestoreAsset", asset.toString())
|
||||
.detail("Requests", requests.size());
|
||||
@ -800,7 +800,7 @@ ACTOR Future<Void> sendMutationsToApplier(
|
||||
RestoreSendVersionedMutationsRequest(batchIndex, asset, msgIndex, isRangeFile,
|
||||
applierVersionedMutationsBuffer[applierID]));
|
||||
}
|
||||
TraceEvent(SevDebug, "FastRestoreLoaderSendMutationToApplier")
|
||||
TraceEvent(SevInfo, "FastRestoreLoaderSendMutationToApplier")
|
||||
.detail("MessageIndex", msgIndex)
|
||||
.detail("RestoreAsset", asset.toString())
|
||||
.detail("Requests", requests.size());
|
||||
|
@ -75,8 +75,9 @@ ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Refer
|
||||
// Future: Multiple roles in a restore worker
|
||||
void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self,
|
||||
ActorCollection* actors, Database cx) {
|
||||
// Already recruited a role
|
||||
// Future: Allow multiple restore roles on a restore worker. The design should easily allow this.
|
||||
ASSERT(!self->loaderInterf.present() || !self->applierInterf.present()); // Only one role per worker for now
|
||||
// Already recruited a role
|
||||
if (self->loaderInterf.present()) {
|
||||
ASSERT(req.role == RestoreRole::Loader);
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
|
||||
|
Loading…
x
Reference in New Issue
Block a user