Merge branch 'main' into feature-metacluster

This commit is contained in:
A.J. Beamon 2022-07-30 18:33:19 -07:00
commit ad52c575a8
6 changed files with 308 additions and 257 deletions

View File

@ -265,7 +265,7 @@ TEST_CASE("/fdbrpc/authz/TokenCache/BadTokens") {
},
{
[](Arena&, IRandom& rng, authz::jwt::TokenRef& token) {
token.expiresAtUnixTime = uint64_t(g_network->timer() - 10 - rng.random01() * 50);
token.expiresAtUnixTime = uint64_t(std::max<double>(g_network->timer() - 10 - rng.random01() * 50, 0));
},
"ExpiredToken",
},

View File

@ -1055,179 +1055,3 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
throw e;
}
}
std::vector<KeyRange> ShardsAffectedByTeamFailure::getShardsFor(Team team) const {
std::vector<KeyRange> r;
for (auto it = team_shards.lower_bound(std::pair<Team, KeyRange>(team, KeyRangeRef()));
it != team_shards.end() && it->first == team;
++it)
r.push_back(it->second);
return r;
}
bool ShardsAffectedByTeamFailure::hasShards(Team team) const {
auto it = team_shards.lower_bound(std::pair<Team, KeyRange>(team, KeyRangeRef()));
return it != team_shards.end() && it->first == team;
}
int ShardsAffectedByTeamFailure::getNumberOfShards(UID ssID) const {
auto it = storageServerShards.find(ssID);
return it == storageServerShards.end() ? 0 : it->second;
}
std::pair<std::vector<ShardsAffectedByTeamFailure::Team>, std::vector<ShardsAffectedByTeamFailure::Team>>
ShardsAffectedByTeamFailure::getTeamsFor(KeyRangeRef keys) {
return shard_teams[keys.begin];
}
void ShardsAffectedByTeamFailure::erase(Team team, KeyRange const& range) {
DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureErase")
.detail("Range", range)
.detail("Team", team.toString());
if (team_shards.erase(std::pair<Team, KeyRange>(team, range)) > 0) {
for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid) {
// Safeguard against going negative after eraseServer() sets value to 0
if (storageServerShards[*uid] > 0) {
storageServerShards[*uid]--;
}
}
}
}
void ShardsAffectedByTeamFailure::insert(Team team, KeyRange const& range) {
DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureInsert")
.detail("Range", range)
.detail("Team", team.toString());
if (team_shards.insert(std::pair<Team, KeyRange>(team, range)).second) {
for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid)
storageServerShards[*uid]++;
}
}
void ShardsAffectedByTeamFailure::defineShard(KeyRangeRef keys) {
std::vector<Team> teams;
std::vector<Team> prevTeams;
auto rs = shard_teams.intersectingRanges(keys);
for (auto it = rs.begin(); it != rs.end(); ++it) {
for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) {
teams.push_back(*t);
erase(*t, it->range());
}
for (auto t = it->value().second.begin(); t != it->value().second.end(); ++t) {
prevTeams.push_back(*t);
}
}
uniquify(teams);
uniquify(prevTeams);
/*TraceEvent("ShardsAffectedByTeamFailureDefine")
.detail("KeyBegin", keys.begin)
.detail("KeyEnd", keys.end)
.detail("TeamCount", teams.size());*/
auto affectedRanges = shard_teams.getAffectedRangesAfterInsertion(keys);
shard_teams.insert(keys, std::make_pair(teams, prevTeams));
for (auto r = affectedRanges.begin(); r != affectedRanges.end(); ++r) {
auto& t = shard_teams[r->begin];
for (auto it = t.first.begin(); it != t.first.end(); ++it) {
insert(*it, *r);
}
}
check();
}
// Move keys to destinationTeams by updating shard_teams
void ShardsAffectedByTeamFailure::moveShard(KeyRangeRef keys, std::vector<Team> destinationTeams) {
/*TraceEvent("ShardsAffectedByTeamFailureMove")
.detail("KeyBegin", keys.begin)
.detail("KeyEnd", keys.end)
.detail("NewTeamSize", destinationTeam.size())
.detail("NewTeam", describe(destinationTeam));*/
auto ranges = shard_teams.intersectingRanges(keys);
std::vector<std::pair<std::pair<std::vector<Team>, std::vector<Team>>, KeyRange>> modifiedShards;
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
if (keys.contains(it->range())) {
// erase the many teams that were associated with this one shard
for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) {
erase(*t, it->range());
}
// save this modification for later insertion
std::vector<Team> prevTeams = it->value().second;
prevTeams.insert(prevTeams.end(), it->value().first.begin(), it->value().first.end());
uniquify(prevTeams);
modifiedShards.push_back(std::pair<std::pair<std::vector<Team>, std::vector<Team>>, KeyRange>(
std::make_pair(destinationTeams, prevTeams), it->range()));
} else {
// for each range that touches this move, add our team as affecting this range
for (auto& team : destinationTeams) {
insert(team, it->range());
}
// if we are not in the list of teams associated with this shard, add us in
auto& teams = it->value();
teams.second.insert(teams.second.end(), teams.first.begin(), teams.first.end());
uniquify(teams.second);
teams.first.insert(teams.first.end(), destinationTeams.begin(), destinationTeams.end());
uniquify(teams.first);
}
}
// we cannot modify the KeyRangeMap while iterating through it, so add saved modifications now
for (int i = 0; i < modifiedShards.size(); i++) {
for (auto& t : modifiedShards[i].first.first) {
insert(t, modifiedShards[i].second);
}
shard_teams.insert(modifiedShards[i].second, modifiedShards[i].first);
}
check();
}
void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) {
auto ranges = shard_teams.containedRanges(keys);
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
it.value().second.clear();
}
}
void ShardsAffectedByTeamFailure::setCheckMode(CheckMode mode) {
checkMode = mode;
}
void ShardsAffectedByTeamFailure::check() const {
if (checkMode == CheckMode::ForceNoCheck)
return;
if (EXPENSIVE_VALIDATION || checkMode == CheckMode::ForceCheck) {
for (auto t = team_shards.begin(); t != team_shards.end(); ++t) {
auto i = shard_teams.rangeContaining(t->second.begin);
if (i->range() != t->second || !std::count(i->value().first.begin(), i->value().first.end(), t->first)) {
ASSERT(false);
}
}
auto rs = shard_teams.ranges();
for (auto i = rs.begin(); i != rs.end(); ++i) {
for (auto t = i->value().first.begin(); t != i->value().first.end(); ++t) {
if (!team_shards.count(std::make_pair(*t, i->range()))) {
std::string teamDesc, shards;
for (int k = 0; k < t->servers.size(); k++)
teamDesc += format("%llx ", t->servers[k].first());
for (auto x = team_shards.lower_bound(std::make_pair(*t, KeyRangeRef()));
x != team_shards.end() && x->first == *t;
++x)
shards += printable(x->second.begin) + "-" + printable(x->second.end) + ",";
TraceEvent(SevError, "SATFInvariantError2")
.detail("KB", i->begin())
.detail("KE", i->end())
.detail("Team", teamDesc)
.detail("Shards", shards);
ASSERT(false);
}
}
}
}
}

View File

@ -0,0 +1,197 @@
/*
* ShardsAffectedByTeamFailure.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbserver/ShardsAffectedByTeamFailure.h"
std::vector<KeyRange> ShardsAffectedByTeamFailure::getShardsFor(Team team) const {
std::vector<KeyRange> r;
for (auto it = team_shards.lower_bound(std::pair<Team, KeyRange>(team, KeyRangeRef()));
it != team_shards.end() && it->first == team;
++it)
r.push_back(it->second);
return r;
}
bool ShardsAffectedByTeamFailure::hasShards(Team team) const {
auto it = team_shards.lower_bound(std::pair<Team, KeyRange>(team, KeyRangeRef()));
return it != team_shards.end() && it->first == team;
}
int ShardsAffectedByTeamFailure::getNumberOfShards(UID ssID) const {
auto it = storageServerShards.find(ssID);
return it == storageServerShards.end() ? 0 : it->second;
}
std::pair<std::vector<ShardsAffectedByTeamFailure::Team>, std::vector<ShardsAffectedByTeamFailure::Team>>
ShardsAffectedByTeamFailure::getTeamsFor(KeyRangeRef keys) {
return shard_teams[keys.begin];
}
void ShardsAffectedByTeamFailure::erase(Team team, KeyRange const& range) {
DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureErase")
.detail("Range", range)
.detail("Team", team.toString());
if (team_shards.erase(std::pair<Team, KeyRange>(team, range)) > 0) {
for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid) {
// Safeguard against going negative after eraseServer() sets value to 0
if (storageServerShards[*uid] > 0) {
storageServerShards[*uid]--;
}
}
}
}
void ShardsAffectedByTeamFailure::insert(Team team, KeyRange const& range) {
DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureInsert")
.detail("Range", range)
.detail("Team", team.toString());
if (team_shards.insert(std::pair<Team, KeyRange>(team, range)).second) {
for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid)
storageServerShards[*uid]++;
}
}
void ShardsAffectedByTeamFailure::defineShard(KeyRangeRef keys) {
std::vector<Team> teams;
std::vector<Team> prevTeams;
auto rs = shard_teams.intersectingRanges(keys);
for (auto it = rs.begin(); it != rs.end(); ++it) {
for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) {
teams.push_back(*t);
erase(*t, it->range());
}
for (auto t = it->value().second.begin(); t != it->value().second.end(); ++t) {
prevTeams.push_back(*t);
}
}
uniquify(teams);
uniquify(prevTeams);
/*TraceEvent("ShardsAffectedByTeamFailureDefine")
.detail("KeyBegin", keys.begin)
.detail("KeyEnd", keys.end)
.detail("TeamCount", teams.size());*/
auto affectedRanges = shard_teams.getAffectedRangesAfterInsertion(keys);
shard_teams.insert(keys, std::make_pair(teams, prevTeams));
for (auto r = affectedRanges.begin(); r != affectedRanges.end(); ++r) {
auto& t = shard_teams[r->begin];
for (auto it = t.first.begin(); it != t.first.end(); ++it) {
insert(*it, *r);
}
}
check();
}
// Move keys to destinationTeams by updating shard_teams
void ShardsAffectedByTeamFailure::moveShard(KeyRangeRef keys, std::vector<Team> destinationTeams) {
/*TraceEvent("ShardsAffectedByTeamFailureMove")
.detail("KeyBegin", keys.begin)
.detail("KeyEnd", keys.end)
.detail("NewTeamSize", destinationTeam.size())
.detail("NewTeam", describe(destinationTeam));*/
auto ranges = shard_teams.intersectingRanges(keys);
std::vector<std::pair<std::pair<std::vector<Team>, std::vector<Team>>, KeyRange>> modifiedShards;
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
if (keys.contains(it->range())) {
// erase the many teams that were associated with this one shard
for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) {
erase(*t, it->range());
}
// save this modification for later insertion
std::vector<Team> prevTeams = it->value().second;
prevTeams.insert(prevTeams.end(), it->value().first.begin(), it->value().first.end());
uniquify(prevTeams);
modifiedShards.push_back(std::pair<std::pair<std::vector<Team>, std::vector<Team>>, KeyRange>(
std::make_pair(destinationTeams, prevTeams), it->range()));
} else {
// for each range that touches this move, add our team as affecting this range
for (auto& team : destinationTeams) {
insert(team, it->range());
}
// if we are not in the list of teams associated with this shard, add us in
auto& teams = it->value();
teams.second.insert(teams.second.end(), teams.first.begin(), teams.first.end());
uniquify(teams.second);
teams.first.insert(teams.first.end(), destinationTeams.begin(), destinationTeams.end());
uniquify(teams.first);
}
}
// we cannot modify the KeyRangeMap while iterating through it, so add saved modifications now
for (int i = 0; i < modifiedShards.size(); i++) {
for (auto& t : modifiedShards[i].first.first) {
insert(t, modifiedShards[i].second);
}
shard_teams.insert(modifiedShards[i].second, modifiedShards[i].first);
}
check();
}
void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) {
auto ranges = shard_teams.containedRanges(keys);
for (auto it = ranges.begin(); it != ranges.end(); ++it) {
it.value().second.clear();
}
}
void ShardsAffectedByTeamFailure::setCheckMode(CheckMode mode) {
checkMode = mode;
}
void ShardsAffectedByTeamFailure::check() const {
if (checkMode == CheckMode::ForceNoCheck)
return;
if (EXPENSIVE_VALIDATION || checkMode == CheckMode::ForceCheck) {
for (auto t = team_shards.begin(); t != team_shards.end(); ++t) {
auto i = shard_teams.rangeContaining(t->second.begin);
if (i->range() != t->second || !std::count(i->value().first.begin(), i->value().first.end(), t->first)) {
ASSERT(false);
}
}
auto rs = shard_teams.ranges();
for (auto i = rs.begin(); i != rs.end(); ++i) {
for (auto t = i->value().first.begin(); t != i->value().first.end(); ++t) {
if (!team_shards.count(std::make_pair(*t, i->range()))) {
std::string teamDesc, shards;
for (int k = 0; k < t->servers.size(); k++)
teamDesc += format("%llx ", t->servers[k].first());
for (auto x = team_shards.lower_bound(std::make_pair(*t, KeyRangeRef()));
x != team_shards.end() && x->first == *t;
++x)
shards += printable(x->second.begin) + "-" + printable(x->second.end) + ",";
TraceEvent(SevError, "SATFInvariantError2")
.detail("KB", i->begin())
.detail("KE", i->end())
.detail("Team", teamDesc)
.detail("Shards", shards);
ASSERT(false);
}
}
}
}
}

View File

@ -30,6 +30,7 @@
#include "fdbserver/Knobs.h"
#include "fdbserver/LogSystem.h"
#include "fdbserver/MoveKeys.actor.h"
#include "fdbserver/ShardsAffectedByTeamFailure.h"
#include <boost/heap/policies.hpp>
#include <boost/heap/skew_heap.hpp>
@ -290,86 +291,6 @@ struct TeamCollectionInterface {
PromiseStream<GetTeamRequest> getTeam;
};
class ShardsAffectedByTeamFailure : public ReferenceCounted<ShardsAffectedByTeamFailure> {
public:
ShardsAffectedByTeamFailure() {}
enum class CheckMode { Normal = 0, ForceCheck, ForceNoCheck };
struct Team {
std::vector<UID> servers; // sorted
bool primary;
Team() : primary(true) {}
Team(std::vector<UID> const& servers, bool primary) : servers(servers), primary(primary) {}
bool operator<(const Team& r) const {
if (servers == r.servers)
return primary < r.primary;
return servers < r.servers;
}
bool operator>(const Team& r) const { return r < *this; }
bool operator<=(const Team& r) const { return !(*this > r); }
bool operator>=(const Team& r) const { return !(*this < r); }
bool operator==(const Team& r) const { return servers == r.servers && primary == r.primary; }
bool operator!=(const Team& r) const { return !(*this == r); }
std::string toString() const { return describe(servers); };
};
// This tracks the data distribution on the data distribution server so that teamTrackers can
// relocate the right shards when a team is degraded.
// The following are important to make sure that failure responses don't revert splits or merges:
// - The shards boundaries in the two data structures reflect "queued" RelocateShard requests
// (i.e. reflects the desired set of shards being tracked by dataDistributionTracker,
// rather than the status quo). These boundaries are modified in defineShard and the content
// of what servers correspond to each shard is a copy or union of the shards already there
// - The teams associated with each shard reflect either the sources for non-moving shards
// or the destination team for in-flight shards (the change is atomic with respect to team selection).
// moveShard() changes the servers associated with a shard and will never adjust the shard
// boundaries. If a move is received for a shard that has been redefined (the exact shard is
// no longer in the map), the servers will be set for all contained shards and added to all
// intersecting shards.
int getNumberOfShards(UID ssID) const;
std::vector<KeyRange> getShardsFor(Team team) const;
bool hasShards(Team team) const;
// The first element of the pair is either the source for non-moving shards or the destination team for in-flight
// shards The second element of the pair is all previous sources for in-flight shards
std::pair<std::vector<Team>, std::vector<Team>> getTeamsFor(KeyRangeRef keys);
void defineShard(KeyRangeRef keys);
void moveShard(KeyRangeRef keys, std::vector<Team> destinationTeam);
void finishMove(KeyRangeRef keys);
void check() const;
void setCheckMode(CheckMode);
PromiseStream<KeyRange> restartShardTracker;
private:
struct OrderByTeamKey {
bool operator()(const std::pair<Team, KeyRange>& lhs, const std::pair<Team, KeyRange>& rhs) const {
if (lhs.first < rhs.first)
return true;
if (lhs.first > rhs.first)
return false;
return lhs.second.begin < rhs.second.begin;
}
};
CheckMode checkMode = CheckMode::Normal;
KeyRangeMap<std::pair<std::vector<Team>, std::vector<Team>>>
shard_teams; // A shard can be affected by the failure of multiple teams if it is a queued merge, or when
// usable_regions > 1
std::set<std::pair<Team, KeyRange>, OrderByTeamKey> team_shards;
std::map<UID, int> storageServerShards;
void erase(Team team, KeyRange const& range);
void insert(Team team, KeyRange const& range);
};
// DDShardInfo is so named to avoid link-time name collision with ShardInfo within the StorageServer
struct DDShardInfo {
Key key;

View File

@ -0,0 +1,108 @@
/*
* ShardsAffectedByTeamFailure.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOUNDATIONDB_SHARDSAFFECTEDBYTEAMFAILURE_H
#define FOUNDATIONDB_SHARDSAFFECTEDBYTEAMFAILURE_H
#include "flow/FastRef.h"
#include "flow/IRandom.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/KeyRangeMap.h"
class ShardsAffectedByTeamFailure : public ReferenceCounted<ShardsAffectedByTeamFailure> {
public:
ShardsAffectedByTeamFailure() {}
enum class CheckMode { Normal = 0, ForceCheck, ForceNoCheck };
struct Team {
std::vector<UID> servers; // sorted
bool primary;
Team() : primary(true) {}
Team(std::vector<UID> const& servers, bool primary) : servers(servers), primary(primary) {}
bool operator<(const Team& r) const {
if (servers == r.servers)
return primary < r.primary;
return servers < r.servers;
}
bool operator>(const Team& r) const { return r < *this; }
bool operator<=(const Team& r) const { return !(*this > r); }
bool operator>=(const Team& r) const { return !(*this < r); }
bool operator==(const Team& r) const { return servers == r.servers && primary == r.primary; }
bool operator!=(const Team& r) const { return !(*this == r); }
std::string toString() const { return describe(servers); };
};
// This tracks the data distribution on the data distribution server so that teamTrackers can
// relocate the right shards when a team is degraded.
// The following are important to make sure that failure responses don't revert splits or merges:
// - The shards boundaries in the two data structures reflect "queued" RelocateShard requests
// (i.e. reflects the desired set of shards being tracked by dataDistributionTracker,
// rather than the status quo). These boundaries are modified in defineShard and the content
// of what servers correspond to each shard is a copy or union of the shards already there
// - The teams associated with each shard reflect either the sources for non-moving shards
// or the destination team for in-flight shards (the change is atomic with respect to team selection).
// moveShard() changes the servers associated with a shard and will never adjust the shard
// boundaries. If a move is received for a shard that has been redefined (the exact shard is
// no longer in the map), the servers will be set for all contained shards and added to all
// intersecting shards.
int getNumberOfShards(UID ssID) const;
std::vector<KeyRange> getShardsFor(Team team) const;
bool hasShards(Team team) const;
// The first element of the pair is either the source for non-moving shards or the destination team for in-flight
// shards The second element of the pair is all previous sources for in-flight shards
std::pair<std::vector<Team>, std::vector<Team>> getTeamsFor(KeyRangeRef keys);
void defineShard(KeyRangeRef keys);
void moveShard(KeyRangeRef keys, std::vector<Team> destinationTeam);
void finishMove(KeyRangeRef keys);
void check() const;
void setCheckMode(CheckMode);
PromiseStream<KeyRange> restartShardTracker;
private:
struct OrderByTeamKey {
bool operator()(const std::pair<Team, KeyRange>& lhs, const std::pair<Team, KeyRange>& rhs) const {
if (lhs.first < rhs.first)
return true;
if (lhs.first > rhs.first)
return false;
return lhs.second.begin < rhs.second.begin;
}
};
CheckMode checkMode = CheckMode::Normal;
KeyRangeMap<std::pair<std::vector<Team>, std::vector<Team>>>
shard_teams; // A shard can be affected by the failure of multiple teams if it is a queued merge, or when
// usable_regions > 1
std::set<std::pair<Team, KeyRange>, OrderByTeamKey> team_shards;
std::map<UID, int> storageServerShards;
void erase(Team team, KeyRange const& range);
void insert(Team team, KeyRange const& range);
};
#endif // FOUNDATIONDB_SHARDSAFFECTEDBYTEAMFAILURE_H

View File

@ -24,6 +24,7 @@
#include <atomic>
#include <cstdint>
#include <utility>
// The thread safety this class provides is that it's safe to call addref and
// delref on the same object concurrently in different threads. Subclass does