From 647138145d65ff7aba341205577c5fcdf3d05673 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 17 Jun 2021 20:59:47 +0000 Subject: [PATCH 1/5] adjust default value of stopWiggleSignal; better trace logic --- fdbserver/DataDistribution.actor.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 121409337f..fc2c437ac4 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -4093,7 +4093,7 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, ACTOR Future monitorPerpetualStorageWiggle(DDTeamCollection* teamCollection, const DDEnabledState* ddEnabledState) { state int speed = 0; - state AsyncVar stopWiggleSignal(false); + state AsyncVar stopWiggleSignal(true); state PromiseStream finishStorageWiggleSignal; state SignalableActorCollection collection; teamCollection->pauseWiggle = makeReference>(true); @@ -4119,11 +4119,13 @@ ACTOR Future monitorPerpetualStorageWiggle(DDTeamCollection* teamCollectio collection.add(perpetualStorageWiggler( &stopWiggleSignal, finishStorageWiggleSignal, teamCollection, ddEnabledState)); TraceEvent("PerpetualStorageWiggleOpen", teamCollection->distributorId); - } else if (speed == 0 && !stopWiggleSignal.get()) { - stopWiggleSignal.set(true); - wait(collection.signalAndReset()); + } else if (speed == 0) { + if (!stopWiggleSignal.get()) { + stopWiggleSignal.set(true); + wait(collection.signalAndReset()); + teamCollection->pauseWiggle->set(true); + } TraceEvent("PerpetualStorageWiggleClose", teamCollection->distributorId); - teamCollection->pauseWiggle->set(true); } wait(watchFuture); break; From 783520ce851e0e5906e3fb4282c9a3ed1dbb1660 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Sat, 19 Jun 2021 16:57:04 +0000 Subject: [PATCH 2/5] add and remove some healthy check to solve cluster status oscillation when #ss is little; simplify some code --- fdbserver/DataDistribution.actor.cpp | 59 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index fc2c437ac4..5b1f4247bb 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3898,29 +3898,27 @@ ACTOR Future>> getServerL // to a sorted PID set maintained by the data distributor. If now no storage server exists, the new Process ID is 0. ACTOR Future updateNextWigglingStoragePID(DDTeamCollection* teamCollection) { state ReadYourWritesTransaction tr(teamCollection->cx); - state Value writeValue = LiteralStringRef("0"); + state Value writeValue; loop { try { tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); Optional value = wait(tr.get(wigglingStorageServerKey)); if (teamCollection->pid2server_info.empty()) { - tr.set(wigglingStorageServerKey, LiteralStringRef("0")); + writeValue = LiteralStringRef(""); } else { Value pid = teamCollection->pid2server_info.begin()->first; if (value.present()) { auto nextIt = teamCollection->pid2server_info.upper_bound(value.get()); if (nextIt == teamCollection->pid2server_info.end()) { - tr.set(wigglingStorageServerKey, pid); writeValue = pid; } else { - tr.set(wigglingStorageServerKey, nextIt->first); writeValue = nextIt->first; } } else { - tr.set(wigglingStorageServerKey, pid); writeValue = pid; } } + tr.set(wigglingStorageServerKey, writeValue); wait(tr.commit()); break; } catch (Error& e) { @@ -3942,7 +3940,14 @@ ACTOR Future perpetualStorageWiggleIterator(AsyncVar* stopSignal, loop { choose { when(wait(stopSignal->onChange())) {} - when(waitNext(finishStorageWiggleSignal)) { wait(updateNextWigglingStoragePID(teamCollection)); } + when(waitNext(finishStorageWiggleSignal)) { + // there must not have other teams to place wiggled data + while (teamCollection->server_info.size() <= teamCollection->configuration.storageTeamSize || + teamCollection->machine_info.size() < teamCollection->configuration.storageTeamSize) { + wait(delayJittered(SERVER_KNOBS->CHECK_TEAM_DELAY)); + } + wait(updateNextWigglingStoragePID(teamCollection)); + } } if (stopSignal->get()) { break; @@ -3976,17 +3981,23 @@ ACTOR Future, Value>> watchPerpetualStoragePIDChange(DDTe } // periodically check whether the cluster is healthy if we continue perpetual wiggle -ACTOR Future clusterHealthCheckForPerpetualWiggle(DDTeamCollection* self) { +ACTOR Future clusterHealthCheckForPerpetualWiggle(DDTeamCollection* self, int* extraTeamCount) { loop { Promise countp; self->getUnhealthyRelocationCount.send(countp); int count = wait(countp.getFuture()); // pause wiggle when // a. DDQueue is busy with unhealthy relocation request - // b. no healthy team + // b. healthy teams are not enough // c. the overall disk space is not enough - if (count >= SERVER_KNOBS->DD_STORAGE_WIGGLE_PAUSE_THRESHOLD || self->healthyTeamCount == 0 || + if (count >= SERVER_KNOBS->DD_STORAGE_WIGGLE_PAUSE_THRESHOLD || self->healthyTeamCount <= *extraTeamCount || self->bestTeamStuck) { + // if we pause wiggle not because the reason a, increase extraTeamCount. This helps avoid oscillation + // between pause and non-pause status. + if (count < SERVER_KNOBS->DD_STORAGE_WIGGLE_PAUSE_THRESHOLD && !self->pauseWiggle->get()) { + *extraTeamCount = std::min(*extraTeamCount + 1, + SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (int)self->server_info.size()); + } self->pauseWiggle->set(true); } else { self->pauseWiggle->set(false); @@ -4004,9 +4015,9 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, const DDEnabledState* ddEnabledState) { state Future watchFuture = Never(); state Future moveFinishFuture = Never(); - state Future ddQueueCheck = clusterHealthCheckForPerpetualWiggle(self); + state int extraTeamCount = 0; + state Future ddQueueCheck = clusterHealthCheckForPerpetualWiggle(self, &extraTeamCount); state int movingCount = 0; - state vector excludedServerIds; state std::pair, Value> res = wait(watchPerpetualStoragePIDChange(self)); ASSERT(!self->wigglingPid.present()); // only single process wiggle is allowed self->wigglingPid = Optional(res.second); @@ -4022,24 +4033,13 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, .detail("ProcessId", pid) .detail("StorageCount", movingCount); } else { - // pre-check whether wiggling chosen servers still satisfy replica requirement - excludedServerIds.clear(); - for (const auto& info : self->pid2server_info[self->wigglingPid.get()]) { - excludedServerIds.push_back(info->id); - } - if (_exclusionSafetyCheck(excludedServerIds, self)) { - TEST(true); // start wiggling - auto fv = self->excludeStorageServersForWiggle(pid); - movingCount = fv.size(); - moveFinishFuture = waitForAll(fv); - TraceEvent("PerpetualStorageWiggleStart", self->distributorId) - .detail("ProcessId", pid) - .detail("StorageCount", movingCount); - } else { - TEST(true); // skip wiggling current process - TraceEvent("PerpetualStorageWiggleSkip", self->distributorId).detail("ProcessId", pid.toString()); - moveFinishFuture = Void(); - } + TEST(true); // start wiggling + auto fv = self->excludeStorageServersForWiggle(pid); + movingCount = fv.size(); + moveFinishFuture = waitForAll(fv); + TraceEvent("PerpetualStorageWiggleStart", self->distributorId) + .detail("ProcessId", pid) + .detail("StorageCount", movingCount); } } @@ -4068,6 +4068,7 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, self->wigglingPid.reset(); watchFuture = res.first; finishStorageWiggleSignal.send(Void()); + extraTeamCount = std::max(0, extraTeamCount - 1); } when(wait(ddQueueCheck || self->pauseWiggle->onChange() || stopSignal->onChange())) {} } From 0493d149e6f6a03e058edec5decb2d661110bf36 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 21 Jun 2021 05:18:19 +0000 Subject: [PATCH 3/5] wait remove --- fdbserver/DataDistribution.actor.cpp | 14 +++++++++----- fdbserver/DataDistributionQueue.actor.cpp | 2 ++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 5b1f4247bb..814d65c215 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2840,8 +2840,7 @@ struct DDTeamCollection : ReferenceCounted { } this->wiggle_addresses.push_back(addr); this->excludedServers.set(addr, DDTeamCollection::Status::WIGGLING); - moveFutures.push_back( - waitForAllDataRemoved(this->cx, info->lastKnownInterface.id(), info->addedVersion, this)); + moveFutures.push_back(info->onRemoved); } if (!moveFutures.empty()) { this->restartRecruiting.trigger(); @@ -3982,6 +3981,7 @@ ACTOR Future, Value>> watchPerpetualStoragePIDChange(DDTe // periodically check whether the cluster is healthy if we continue perpetual wiggle ACTOR Future clusterHealthCheckForPerpetualWiggle(DDTeamCollection* self, int* extraTeamCount) { + state int pausePenalty = 1; loop { Promise countp; self->getUnhealthyRelocationCount.send(countp); @@ -3994,9 +3994,9 @@ ACTOR Future clusterHealthCheckForPerpetualWiggle(DDTeamCollection* self, self->bestTeamStuck) { // if we pause wiggle not because the reason a, increase extraTeamCount. This helps avoid oscillation // between pause and non-pause status. - if (count < SERVER_KNOBS->DD_STORAGE_WIGGLE_PAUSE_THRESHOLD && !self->pauseWiggle->get()) { - *extraTeamCount = std::min(*extraTeamCount + 1, - SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (int)self->server_info.size()); + if ((self->healthyTeamCount <= *extraTeamCount || self->bestTeamStuck) && !self->pauseWiggle->get()) { + *extraTeamCount = std::min(*extraTeamCount + pausePenalty, (int)self->teams.size()); + pausePenalty = std::min(pausePenalty * 2, (int)self->teams.size()); } self->pauseWiggle->set(true); } else { @@ -4031,6 +4031,8 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, self->includeStorageServersForWiggle(); TraceEvent("PerpetualStorageWigglePause", self->distributorId) .detail("ProcessId", pid) + .detail("ExtraHealthyTeamCount", extraTeamCount) + .detail("HealthyTeamCount", self->healthyTeamCount) .detail("StorageCount", movingCount); } else { TEST(true); // start wiggling @@ -4039,6 +4041,8 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, moveFinishFuture = waitForAll(fv); TraceEvent("PerpetualStorageWiggleStart", self->distributorId) .detail("ProcessId", pid) + .detail("ExtraHealthyTeamCount", extraTeamCount) + .detail("HealthyTeamCount", self->healthyTeamCount) .detail("StorageCount", movingCount); } } diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index bb87bf4e33..ba8e0f416a 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -1662,6 +1662,8 @@ ACTOR Future dataDistributionQueue(Database cx, self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM]) .detail("PriorityRebalanceOverutilizedTeam", self.priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM]) + .detail("PriorityStorageWiggle", + self.priority_relocations[SERVER_KNOBS->PRIORITY_PERPETUAL_STORAGE_WIGGLE]) .detail("PriorityTeamHealthy", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_HEALTHY]) .detail("PriorityTeamContainsUndesiredServer", self.priority_relocations[SERVER_KNOBS->PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER]) From f2daf20927260e1e0077bf4d40bca8e93ed045a4 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 21 Jun 2021 06:55:34 +0000 Subject: [PATCH 4/5] TEST condition --- fdbserver/DataDistribution.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 814d65c215..08a9143167 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -4059,9 +4059,9 @@ ACTOR Future perpetualStorageWiggler(AsyncVar* stopSignal, wait(delayJittered(5.0, TaskPriority::DataDistributionLow)); } when(wait(moveFinishFuture)) { - TEST(true); // finish wiggling this process ASSERT(self->wigglingPid.present()); StringRef pid = self->wigglingPid.get(); + TEST(pid != LiteralStringRef("")); // finish wiggling this process moveFinishFuture = Never(); self->includeStorageServersForWiggle(); From 7b713f7fd2ea471926bc8c4e9ff48030aefcefce Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 23 Jun 2021 05:49:55 +0000 Subject: [PATCH 5/5] add knob --- fdbclient/ServerKnobs.cpp | 1 + fdbclient/ServerKnobs.h | 1 + fdbserver/DataDistribution.actor.cpp | 11 +++++++---- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 38e7beb341..8cbb77defb 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -210,6 +210,7 @@ void ServerKnobs::initialize(Randomize _randomize, ClientKnobs* clientKnobs, IsS init( ALL_DATA_REMOVED_DELAY, 1.0 ); init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0; init( CHECK_TEAM_DELAY, 30.0 ); + init( PERPETUAL_WIGGLE_DELAY, 50.0 ); init( LOG_ON_COMPLETION_DELAY, DD_QUEUE_LOGGING_INTERVAL ); init( BEST_TEAM_MAX_TEAM_TRIES, 10 ); init( BEST_TEAM_OPTION_COUNT, 4 ); diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index c704541a2e..30468c7e84 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -160,6 +160,7 @@ public: double ALL_DATA_REMOVED_DELAY; double INITIAL_FAILURE_REACTION_DELAY; double CHECK_TEAM_DELAY; + double PERPETUAL_WIGGLE_DELAY; double LOG_ON_COMPLETION_DELAY; int BEST_TEAM_MAX_TEAM_TRIES; int BEST_TEAM_OPTION_COUNT; diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 08a9143167..3dd6aceeec 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -3936,14 +3936,17 @@ ACTOR Future updateNextWigglingStoragePID(DDTeamCollection* teamCollection ACTOR Future perpetualStorageWiggleIterator(AsyncVar* stopSignal, FutureStream finishStorageWiggleSignal, DDTeamCollection* teamCollection) { + state int lastFinishTime = now(); loop { choose { when(wait(stopSignal->onChange())) {} when(waitNext(finishStorageWiggleSignal)) { - // there must not have other teams to place wiggled data - while (teamCollection->server_info.size() <= teamCollection->configuration.storageTeamSize || - teamCollection->machine_info.size() < teamCollection->configuration.storageTeamSize) { - wait(delayJittered(SERVER_KNOBS->CHECK_TEAM_DELAY)); + state bool takeRest = true; // delay to avoid delete and update ServerList too frequently + while (takeRest) { + wait(delayJittered(SERVER_KNOBS->PERPETUAL_WIGGLE_DELAY)); + // there must not have other teams to place wiggled data + takeRest = teamCollection->server_info.size() <= teamCollection->configuration.storageTeamSize || + teamCollection->machine_info.size() < teamCollection->configuration.storageTeamSize; } wait(updateNextWigglingStoragePID(teamCollection)); }