mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 18:32:18 +08:00
FastRestore:Debug trace for seg fault
This commit is contained in:
parent
01255b7ead
commit
7b7490efe7
@ -558,8 +558,8 @@ struct RestoreSendVersionedMutationsRequest : TimedRequest {
|
|||||||
|
|
||||||
std::string toString() {
|
std::string toString() {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "VersionBatchIndex:" << batchIndex << "RestoreAsset:" << asset.toString() << " msgIndex:" << msgIndex
|
ss << "VersionBatchIndex:" << batchIndex << " msgIndex:" << msgIndex << " isRangeFile:" << isRangeFile
|
||||||
<< " isRangeFile:" << isRangeFile << " versionedMutations.size:" << versionedMutations.size();
|
<< " versionedMutations.size:" << versionedMutations.size() << " RestoreAsset:" << asset.toString();
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -651,6 +651,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||||||
init( FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 20 + 1;}
|
init( FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 20 + 1;}
|
||||||
init( FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;}
|
init( FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD, 10 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;}
|
||||||
init( FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH, 2 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;}
|
init( FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH, 2 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 30 + 1;}
|
||||||
|
init( FASTRESTORE_NUM_TRACE_EVENTS, 100 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_TRACE_EVENTS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 500 + 1;}
|
||||||
|
|
||||||
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
|
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
|
||||||
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );
|
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );
|
||||||
|
@ -583,6 +583,7 @@ public:
|
|||||||
int FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE; // number of load request to release at once
|
int FASTRESTORE_SCHED_LOAD_REQ_BATCHSIZE; // number of load request to release at once
|
||||||
int FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD; // we can send future VB requests if it is less than this knob
|
int FASTRESTORE_SCHED_INFLIGHT_SENDPARAM_THRESHOLD; // we can send future VB requests if it is less than this knob
|
||||||
int FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH; // number of future VB sendLoadingParam requests to process at once
|
int FASTRESTORE_SCHED_SEND_FUTURE_VB_REQS_BATCH; // number of future VB sendLoadingParam requests to process at once
|
||||||
|
int FASTRESTORE_NUM_TRACE_EVENTS;
|
||||||
|
|
||||||
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
|
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
|
||||||
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.
|
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.
|
||||||
|
@ -88,6 +88,7 @@ ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TraceEvent("RestoreApplierCore", self->id()).detail("Request", requestTypeStr); // For debug only
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent(SevWarn, "FastRestoreApplierError", self->id())
|
TraceEvent(SevWarn, "FastRestoreApplierError", self->id())
|
||||||
.detail("RequestType", requestTypeStr)
|
.detail("RequestType", requestTypeStr)
|
||||||
@ -121,7 +122,7 @@ ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMu
|
|||||||
// Trace when the receive phase starts at a VB and when it finishes.
|
// Trace when the receive phase starts at a VB and when it finishes.
|
||||||
// This can help check if receiveMutations block applyMutation phase.
|
// This can help check if receiveMutations block applyMutation phase.
|
||||||
// If so, we need more sophisticated scheduler to ensure priority execution
|
// If so, we need more sophisticated scheduler to ensure priority execution
|
||||||
printTrace = (batchData->receiveMutationReqs % 100 == 1);
|
printTrace = (batchData->receiveMutationReqs % SERVER_KNOBS->FASTRESTORE_NUM_TRACE_EVENTS == 0);
|
||||||
TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreApplierPhaseReceiveMutations", self->id())
|
TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreApplierPhaseReceiveMutations", self->id())
|
||||||
.detail("BatchIndex", req.batchIndex)
|
.detail("BatchIndex", req.batchIndex)
|
||||||
.detail("RestoreAsset", req.asset.toString())
|
.detail("RestoreAsset", req.asset.toString())
|
||||||
@ -398,6 +399,7 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
|
|||||||
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys;
|
std::map<Key, std::map<Key, StagingKey>::iterator> incompleteStagingKeys;
|
||||||
std::map<Key, StagingKey>::iterator stagingKeyIter = batchData->stagingKeys.begin();
|
std::map<Key, StagingKey>::iterator stagingKeyIter = batchData->stagingKeys.begin();
|
||||||
int numKeysInBatch = 0;
|
int numKeysInBatch = 0;
|
||||||
|
int numGetTxns = 0;
|
||||||
double delayTime = 0; // Start transactions at different time to avoid overwhelming FDB.
|
double delayTime = 0; // Start transactions at different time to avoid overwhelming FDB.
|
||||||
for (; stagingKeyIter != batchData->stagingKeys.end(); stagingKeyIter++) {
|
for (; stagingKeyIter != batchData->stagingKeys.end(); stagingKeyIter++) {
|
||||||
if (!stagingKeyIter->second.hasBaseValue()) {
|
if (!stagingKeyIter->second.hasBaseValue()) {
|
||||||
@ -407,12 +409,14 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
|
|||||||
if (numKeysInBatch == SERVER_KNOBS->FASTRESTORE_APPLIER_FETCH_KEYS_SIZE) {
|
if (numKeysInBatch == SERVER_KNOBS->FASTRESTORE_APPLIER_FETCH_KEYS_SIZE) {
|
||||||
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID,
|
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID,
|
||||||
batchIndex, &batchData->counters));
|
batchIndex, &batchData->counters));
|
||||||
|
numGetTxns++;
|
||||||
delayTime += SERVER_KNOBS->FASTRESTORE_TXN_EXTRA_DELAY;
|
delayTime += SERVER_KNOBS->FASTRESTORE_TXN_EXTRA_DELAY;
|
||||||
numKeysInBatch = 0;
|
numKeysInBatch = 0;
|
||||||
incompleteStagingKeys.clear();
|
incompleteStagingKeys.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (numKeysInBatch > 0) {
|
if (numKeysInBatch > 0) {
|
||||||
|
numGetTxns++;
|
||||||
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID,
|
fGetAndComputeKeys.push_back(getAndComputeStagingKeys(incompleteStagingKeys, delayTime, cx, applierID,
|
||||||
batchIndex, &batchData->counters));
|
batchIndex, &batchData->counters));
|
||||||
}
|
}
|
||||||
@ -420,7 +424,8 @@ ACTOR static Future<Void> precomputeMutationsResult(Reference<ApplierBatchData>
|
|||||||
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
|
TraceEvent("FastRestoreApplerPhasePrecomputeMutationsResult", applierID)
|
||||||
.detail("BatchIndex", batchIndex)
|
.detail("BatchIndex", batchIndex)
|
||||||
.detail("Step", "Compute the other staging keys")
|
.detail("Step", "Compute the other staging keys")
|
||||||
.detail("StagingKeys", batchData->stagingKeys.size());
|
.detail("StagingKeys", batchData->stagingKeys.size())
|
||||||
|
.detail("GetStagingKeyBatchTxns", numGetTxns);
|
||||||
// Pre-compute pendingMutations to other keys in stagingKeys that has base value
|
// Pre-compute pendingMutations to other keys in stagingKeys that has base value
|
||||||
for (stagingKeyIter = batchData->stagingKeys.begin(); stagingKeyIter != batchData->stagingKeys.end();
|
for (stagingKeyIter = batchData->stagingKeys.begin(); stagingKeyIter != batchData->stagingKeys.end();
|
||||||
stagingKeyIter++) {
|
stagingKeyIter++) {
|
||||||
|
@ -164,7 +164,10 @@ ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> controllerWo
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
TraceEvent("FastRestoreController", controllerData->id()).detail("WorkerNode", workerInterf.first);
|
TraceEvent("FastRestoreController", controllerData->id())
|
||||||
|
.detail("WorkerNode", workerInterf.first)
|
||||||
|
.detail("NodeRole", role)
|
||||||
|
.detail("NodeIndex", nodeIndex);
|
||||||
requests.emplace_back(workerInterf.first,
|
requests.emplace_back(workerInterf.first,
|
||||||
RestoreRecruitRoleRequest(controllerWorker->controllerInterf.get(), role, nodeIndex));
|
RestoreRecruitRoleRequest(controllerWorker->controllerInterf.get(), role, nodeIndex));
|
||||||
nodeIndex++;
|
nodeIndex++;
|
||||||
|
@ -776,7 +776,7 @@ ACTOR Future<Void> sendMutationsToApplier(
|
|||||||
applierID, RestoreSendVersionedMutationsRequest(batchIndex, asset, msgIndex, isRangeFile,
|
applierID, RestoreSendVersionedMutationsRequest(batchIndex, asset, msgIndex, isRangeFile,
|
||||||
applierVersionedMutationsBuffer[applierID]));
|
applierVersionedMutationsBuffer[applierID]));
|
||||||
}
|
}
|
||||||
TraceEvent(SevDebug, "FastRestoreLoaderSendMutationToApplier")
|
TraceEvent(SevInfo, "FastRestoreLoaderSendMutationToApplier")
|
||||||
.detail("MessageIndex", msgIndex)
|
.detail("MessageIndex", msgIndex)
|
||||||
.detail("RestoreAsset", asset.toString())
|
.detail("RestoreAsset", asset.toString())
|
||||||
.detail("Requests", requests.size());
|
.detail("Requests", requests.size());
|
||||||
@ -800,7 +800,7 @@ ACTOR Future<Void> sendMutationsToApplier(
|
|||||||
RestoreSendVersionedMutationsRequest(batchIndex, asset, msgIndex, isRangeFile,
|
RestoreSendVersionedMutationsRequest(batchIndex, asset, msgIndex, isRangeFile,
|
||||||
applierVersionedMutationsBuffer[applierID]));
|
applierVersionedMutationsBuffer[applierID]));
|
||||||
}
|
}
|
||||||
TraceEvent(SevDebug, "FastRestoreLoaderSendMutationToApplier")
|
TraceEvent(SevInfo, "FastRestoreLoaderSendMutationToApplier")
|
||||||
.detail("MessageIndex", msgIndex)
|
.detail("MessageIndex", msgIndex)
|
||||||
.detail("RestoreAsset", asset.toString())
|
.detail("RestoreAsset", asset.toString())
|
||||||
.detail("Requests", requests.size());
|
.detail("Requests", requests.size());
|
||||||
|
@ -75,8 +75,9 @@ ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Refer
|
|||||||
// Future: Multiple roles in a restore worker
|
// Future: Multiple roles in a restore worker
|
||||||
void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self,
|
void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self,
|
||||||
ActorCollection* actors, Database cx) {
|
ActorCollection* actors, Database cx) {
|
||||||
// Already recruited a role
|
|
||||||
// Future: Allow multiple restore roles on a restore worker. The design should easily allow this.
|
// Future: Allow multiple restore roles on a restore worker. The design should easily allow this.
|
||||||
|
ASSERT(!self->loaderInterf.present() || !self->applierInterf.present()); // Only one role per worker for now
|
||||||
|
// Already recruited a role
|
||||||
if (self->loaderInterf.present()) {
|
if (self->loaderInterf.present()) {
|
||||||
ASSERT(req.role == RestoreRole::Loader);
|
ASSERT(req.role == RestoreRole::Loader);
|
||||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
|
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user