mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
FastRestore:LoaderScheduler:Add knobs
This commit is contained in:
parent
7e302b5910
commit
a035e7a872
@ -643,6 +643,11 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||
init( FASTRESTORE_USE_RANGE_FILE, true ); // Perf test only: set it to false will cause simulation failure
|
||||
init( FASTRESTORE_USE_LOG_FILE, true ); // Perf test only: set it to false will cause simulation failure
|
||||
init( FASTRESTORE_SAMPLE_MSG_BYTES, 1048576 ); if( randomize && BUGGIFY ) { FASTRESTORE_SAMPLE_MSG_BYTES = deterministicRandom()->random01() * 2048;}
|
||||
init( FASTRESTORE_SCHED_UPDATE_DELAY, 0.5 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_UPDATE_DELAY = deterministicRandom()->random01() * 2;}
|
||||
init( FASTRESTORE_SCHED_TARGET_CPU_PERCENT, 70 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_TARGET_CPU_PERCENT = deterministicRandom()->random01() * 100;}
|
||||
init( FASTRESTORE_SCHED_MAX_CPU_PERCENT, 90 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_MAX_CPU_PERCENT = FASTRESTORE_SCHED_TARGET_CPU_PERCENT + deterministicRandom()->random01() * 100;}
|
||||
init( FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS, 20 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS = deterministicRandom()->random01() * 30;}
|
||||
init( FASTRESTORE_SCHED_INFLIGHT_SEND_REQS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_SCHED_INFLIGHT_SEND_REQS = deterministicRandom()->random01() < 0.2 ? 1 : deterministicRandom()->random01() * 5 + 1;}
|
||||
|
||||
init( REDWOOD_DEFAULT_PAGE_SIZE, 4096 );
|
||||
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );
|
||||
|
@ -575,6 +575,11 @@ public:
|
||||
bool FASTRESTORE_USE_RANGE_FILE; // use range file in backup
|
||||
bool FASTRESTORE_USE_LOG_FILE; // use log file in backup
|
||||
int64_t FASTRESTORE_SAMPLE_MSG_BYTES; // sample message desired size
|
||||
double FASTRESTORE_SCHED_UPDATE_DELAY; // delay in seconds in updating process metrics
|
||||
int FASTRESTORE_SCHED_TARGET_CPU_PERCENT; // release as many requests as possible when cpu usage is below the knob
|
||||
int FASTRESTORE_SCHED_MAX_CPU_PERCENT; // max cpu percent when scheduler shall not release non-urgent requests
|
||||
int FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS; // number of inflight requests to load backup files
|
||||
int FASTRESTORE_SCHED_INFLIGHT_SEND_REQS; // number of infligth requests for loaders to send mutations to appliers
|
||||
|
||||
int REDWOOD_DEFAULT_PAGE_SIZE; // Page size for new Redwood files
|
||||
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.
|
||||
|
@ -70,7 +70,8 @@ ACTOR Future<Void> dispatchRequests(Reference<RestoreLoaderData> self) {
|
||||
while (!self->sendingQueue.empty()) {
|
||||
const RestoreSendMutationsToAppliersRequest& req = self->sendingQueue.top();
|
||||
// Dispatch the request if it is the next version batch to process or if cpu usage is low
|
||||
if (req.batchIndex - 1 == self->finishedSendingVB || self->cpuUsage < 70) {
|
||||
if (req.batchIndex - 1 == self->finishedSendingVB ||
|
||||
self->cpuUsage < SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) {
|
||||
self->addActor.send(handleSendMutationsRequest(req, self));
|
||||
self->sendingQueue.pop();
|
||||
}
|
||||
@ -78,29 +79,30 @@ ACTOR Future<Void> dispatchRequests(Reference<RestoreLoaderData> self) {
|
||||
// and it takes large amount of resource
|
||||
}
|
||||
// When shall the node pause the process of more loading file requests
|
||||
if (self->inflightSendingReqs >= 3 || (self->inflightSendingReqs >= 1 && self->cpuUsage >= 70) ||
|
||||
self->cpuUsage >= 90) {
|
||||
if (self->inflightSendingReqs >= 3) {
|
||||
if ((self->inflightSendingReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SEND_REQS ||
|
||||
(self->inflightSendingReqs >= 1 &&
|
||||
self->cpuUsage >= SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) ||
|
||||
self->cpuUsage >= SERVER_KNOBS->FASTRESTORE_SCHED_MAX_CPU_PERCENT) &&
|
||||
(self->inflightSendingReqs > 0 && self->inflightLoadingReqs > 0)) {
|
||||
if (self->inflightSendingReqs >= SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_SEND_REQS) {
|
||||
TraceEvent(SevWarn, "FastRestoreLoaderTooManyInflightSendingMutationRequests")
|
||||
.detail("VersionBatchesBlockedAtSendingMutationsToAppliers", self->inflightSendingReqs)
|
||||
.detail("Reason", "Sending mutations is too slow");
|
||||
}
|
||||
wait(delay(0.5)); // TODO: Knob
|
||||
wait(delay(SERVER_KNOBS->FASTRESTORE_SCHED_UPDATE_DELAY));
|
||||
updateProcessStats(self);
|
||||
continue;
|
||||
}
|
||||
// Dispatch loading backup file requests
|
||||
int releasedReq = 0;
|
||||
while (!self->loadingQueue.empty()) {
|
||||
const RestoreLoadFileRequest& req = self->loadingQueue.top();
|
||||
self->addActor.send(handleLoadFileRequest(req, self));
|
||||
++releasedReq;
|
||||
self->loadingQueue.pop();
|
||||
if (releasedReq > 10) { // TODO: Knob
|
||||
if (self->inflightLoadingReqs > SERVER_KNOBS->FASTRESTORE_SCHED_INFLIGHT_LOAD_REQS) {
|
||||
break;
|
||||
}
|
||||
const RestoreLoadFileRequest& req = self->loadingQueue.top();
|
||||
self->addActor.send(handleLoadFileRequest(req, self));
|
||||
self->loadingQueue.pop();
|
||||
}
|
||||
if (self->cpuUsage >= 70) {
|
||||
if (self->cpuUsage >= SERVER_KNOBS->FASTRESTORE_SCHED_TARGET_CPU_PERCENT) {
|
||||
wait(delay(0.1));
|
||||
updateProcessStats(self);
|
||||
}
|
||||
@ -439,6 +441,7 @@ ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<R
|
||||
ASSERT(batchData->sampleMutations.find(req.param) == batchData->sampleMutations.end());
|
||||
batchData->processedFileParams[req.param] =
|
||||
_processLoadingParam(&self->rangeVersions, req.param, batchData, self->id(), self->bc);
|
||||
self->inflightLoadingReqs++;
|
||||
isDuplicated = false;
|
||||
} else {
|
||||
TraceEvent(SevFRDebugInfo, "FastRestoreLoadFile", self->id())
|
||||
@ -483,6 +486,7 @@ ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<R
|
||||
}
|
||||
|
||||
// Ack restore controller the param is processed
|
||||
self->inflightLoadingReqs--;
|
||||
req.reply.send(RestoreLoadFileReply(req.param, isDuplicated));
|
||||
TraceEvent(printTrace ? SevInfo : SevFRDebugInfo, "FastRestoreLoaderPhaseLoadFileDone", self->id())
|
||||
.detail("BatchIndex", req.batchIndex)
|
||||
|
@ -146,6 +146,7 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
|
||||
int finishedLoadingVB; // the max version batch index that finished loading file phase
|
||||
int finishedSendingVB; // the max version batch index that finished sending mutations phase
|
||||
int inflightSendingReqs; // number of sendingMutations requests released
|
||||
int inflightLoadingReqs; // number of load backup file requests release
|
||||
|
||||
// addActor: add to actorCollection so that when an actor has error, the ActorCollection can catch the error.
|
||||
// addActor is used to create the actorCollection when the RestoreController is created
|
||||
@ -155,7 +156,7 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
|
||||
void delref() { return ReferenceCounted<RestoreLoaderData>::delref(); }
|
||||
|
||||
explicit RestoreLoaderData(UID loaderInterfID, int assignedIndex, RestoreControllerInterface ci)
|
||||
: ci(ci), finishedLoadingVB(0), finishedSendingVB(0) {
|
||||
: ci(ci), finishedLoadingVB(0), finishedSendingVB(0), inflightSendingReqs(0), inflightLoadingReqs(0) {
|
||||
nodeID = loaderInterfID;
|
||||
nodeIndex = assignedIndex;
|
||||
role = RestoreRole::Loader;
|
||||
|
Loading…
x
Reference in New Issue
Block a user