1
0
mirror of https://github.com/apple/foundationdb.git synced 2025-05-31 18:19:35 +08:00

add hasWigglePausedServer method; add new sort criteria

This commit is contained in:
Xiaoxi Wang 2022-03-14 16:02:42 -07:00
parent 8be519a5d8
commit 87640673f7
4 changed files with 41 additions and 2 deletions

@ -200,8 +200,9 @@ public:
}
int64_t bestLoadBytes = 0;
bool wigglingBestOption = false; // best option contains server in paused wiggle state
Optional<Reference<IDataDistributionTeam>> bestOption;
std::vector<Reference<IDataDistributionTeam>> randomTeams;
std::vector<Reference<TCTeamInfo>> randomTeams;
const std::set<UID> completeSources(req.completeSources.begin(), req.completeSources.end());
// Note: this block does not apply any filters from the request
@ -249,9 +250,18 @@ public:
(!req.teamMustHaveShards ||
self->shardsAffectedByTeamFailure->hasShards(ShardsAffectedByTeamFailure::Team(
self->teams[currentIndex]->getServerIDs(), self->primary)))) {
// bestOption doesn't contain wiggling SS while current team does. Don't replace bestOption
// in this case
if (bestOption.present() && !wigglingBestOption &&
self->teams[currentIndex]->hasWigglePausedServer()) {
continue;
}
bestLoadBytes = loadBytes;
bestOption = self->teams[currentIndex];
bestIndex = currentIndex;
wigglingBestOption = self->teams[bestIndex]->hasWigglePausedServer();
}
}
}
@ -262,7 +272,7 @@ public:
while (randomTeams.size() < SERVER_KNOBS->BEST_TEAM_OPTION_COUNT &&
nTries < SERVER_KNOBS->BEST_TEAM_MAX_TEAM_TRIES) {
// If unhealthy team is majority, we may not find an ok dest in this while loop
Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams);
Reference<TCTeamInfo> dest = deterministicRandom()->randomChoice(self->teams);
bool ok = dest->isHealthy() && (!req.preferLowerUtilization ||
dest->hasHealthyAvailableSpace(self->medianAvailableSpace));
@ -298,8 +308,16 @@ public:
int64_t loadBytes = randomTeams[i]->getLoadBytes(true, req.inflightPenalty);
if (!bestOption.present() || (req.preferLowerUtilization && loadBytes < bestLoadBytes) ||
(!req.preferLowerUtilization && loadBytes > bestLoadBytes)) {
// bestOption doesn't contain wiggling SS while current team does. Don't replace bestOption
// in this case
if (bestOption.present() && !wigglingBestOption && randomTeams[i]->hasWigglePausedServer()) {
continue;
}
bestLoadBytes = loadBytes;
bestOption = randomTeams[i];
wigglingBestOption = randomTeams[i]->hasWigglePausedServer();
}
}
}
@ -3611,6 +3629,10 @@ void DDTeamCollection::removeLaggingStorageServer(Key zoneId) {
disableFailingLaggingServers.set(false);
}
bool DDTeamCollection::isWigglePausedServer(const UID& server) const {
return pauseWiggle && pauseWiggle->get() && wigglingId == server;
}
std::vector<UID> DDTeamCollection::getRandomHealthyTeam(const UID& excludeServer) {
std::vector<int> candidates, backup;
for (int i = 0; i < teams.size(); ++i) {

@ -594,6 +594,9 @@ public:
void removeLaggingStorageServer(Key zoneId);
// whether server is under wiggling proces, but wiggle is paused for some healthy compliance.
bool isWigglePausedServer(const UID& server) const;
// Returns a random healthy team, which does not contain excludeServer.
std::vector<UID> getRandomHealthyTeam(const UID& excludeServer);

@ -154,6 +154,10 @@ bool TCServerInfo::hasHealthyAvailableSpace(double minAvailableSpaceRatio) const
return availableSpaceRatio >= minAvailableSpaceRatio;
}
bool TCServerInfo::isWigglePausedServer() const {
return collection && collection->isWigglePausedServer(id);
}
Future<Void> TCServerInfo::updateServerMetrics() {
return TCServerInfoImpl::updateServerMetrics(this);
}
@ -431,6 +435,14 @@ bool TCTeamInfo::hasServer(const UID& server) const {
return std::find(serverIDs.begin(), serverIDs.end(), server) != serverIDs.end();
}
bool TCTeamInfo::hasWigglePausedServer() const {
for (const auto& server : servers) {
if (server->isWigglePausedServer())
return true;
}
return false;
}
void TCTeamInfo::addServers(const std::vector<UID>& servers) {
serverIDs.reserve(servers.size());
for (int i = 0; i < servers.size(); i++) {

@ -97,6 +97,7 @@ public:
// If a storage server does not reply its storeType, it will be tracked by failure monitor and removed.
return (storeType == configStoreType || storeType == KeyValueStoreType::END);
}
bool isWigglePausedServer() const;
std::pair<int64_t, int64_t> spaceBytes(bool includeInFlight = true) const;
int64_t loadBytes() const;
@ -214,6 +215,7 @@ public:
void delref() override { ReferenceCounted<TCTeamInfo>::delref(); }
bool hasServer(const UID& server) const;
bool hasWigglePausedServer() const;
void addServers(const std::vector<UID>& servers) override;