diff --git a/fdbrpc/TokenCache.actor.cpp b/fdbrpc/TokenCache.actor.cpp index 574a110d53..7c6d84a8d6 100644 --- a/fdbrpc/TokenCache.actor.cpp +++ b/fdbrpc/TokenCache.actor.cpp @@ -265,7 +265,7 @@ TEST_CASE("/fdbrpc/authz/TokenCache/BadTokens") { }, { [](Arena&, IRandom& rng, authz::jwt::TokenRef& token) { - token.expiresAtUnixTime = uint64_t(g_network->timer() - 10 - rng.random01() * 50); + token.expiresAtUnixTime = uint64_t(std::max(g_network->timer() - 10 - rng.random01() * 50, 0)); }, "ExpiredToken", }, diff --git a/fdbserver/DataDistributionTracker.actor.cpp b/fdbserver/DataDistributionTracker.actor.cpp index c1ce3023ff..7f85a63927 100644 --- a/fdbserver/DataDistributionTracker.actor.cpp +++ b/fdbserver/DataDistributionTracker.actor.cpp @@ -1055,179 +1055,3 @@ ACTOR Future dataDistributionTracker(Reference in throw e; } } - -std::vector ShardsAffectedByTeamFailure::getShardsFor(Team team) const { - std::vector r; - for (auto it = team_shards.lower_bound(std::pair(team, KeyRangeRef())); - it != team_shards.end() && it->first == team; - ++it) - r.push_back(it->second); - return r; -} - -bool ShardsAffectedByTeamFailure::hasShards(Team team) const { - auto it = team_shards.lower_bound(std::pair(team, KeyRangeRef())); - return it != team_shards.end() && it->first == team; -} - -int ShardsAffectedByTeamFailure::getNumberOfShards(UID ssID) const { - auto it = storageServerShards.find(ssID); - return it == storageServerShards.end() ? 0 : it->second; -} - -std::pair, std::vector> -ShardsAffectedByTeamFailure::getTeamsFor(KeyRangeRef keys) { - return shard_teams[keys.begin]; -} - -void ShardsAffectedByTeamFailure::erase(Team team, KeyRange const& range) { - DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureErase") - .detail("Range", range) - .detail("Team", team.toString()); - if (team_shards.erase(std::pair(team, range)) > 0) { - for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid) { - // Safeguard against going negative after eraseServer() sets value to 0 - if (storageServerShards[*uid] > 0) { - storageServerShards[*uid]--; - } - } - } -} - -void ShardsAffectedByTeamFailure::insert(Team team, KeyRange const& range) { - DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureInsert") - .detail("Range", range) - .detail("Team", team.toString()); - if (team_shards.insert(std::pair(team, range)).second) { - for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid) - storageServerShards[*uid]++; - } -} - -void ShardsAffectedByTeamFailure::defineShard(KeyRangeRef keys) { - std::vector teams; - std::vector prevTeams; - auto rs = shard_teams.intersectingRanges(keys); - for (auto it = rs.begin(); it != rs.end(); ++it) { - for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) { - teams.push_back(*t); - erase(*t, it->range()); - } - for (auto t = it->value().second.begin(); t != it->value().second.end(); ++t) { - prevTeams.push_back(*t); - } - } - uniquify(teams); - uniquify(prevTeams); - - /*TraceEvent("ShardsAffectedByTeamFailureDefine") - .detail("KeyBegin", keys.begin) - .detail("KeyEnd", keys.end) - .detail("TeamCount", teams.size());*/ - - auto affectedRanges = shard_teams.getAffectedRangesAfterInsertion(keys); - shard_teams.insert(keys, std::make_pair(teams, prevTeams)); - - for (auto r = affectedRanges.begin(); r != affectedRanges.end(); ++r) { - auto& t = shard_teams[r->begin]; - for (auto it = t.first.begin(); it != t.first.end(); ++it) { - insert(*it, *r); - } - } - check(); -} - -// Move keys to destinationTeams by updating shard_teams -void ShardsAffectedByTeamFailure::moveShard(KeyRangeRef keys, std::vector destinationTeams) { - /*TraceEvent("ShardsAffectedByTeamFailureMove") - .detail("KeyBegin", keys.begin) - .detail("KeyEnd", keys.end) - .detail("NewTeamSize", destinationTeam.size()) - .detail("NewTeam", describe(destinationTeam));*/ - - auto ranges = shard_teams.intersectingRanges(keys); - std::vector, std::vector>, KeyRange>> modifiedShards; - for (auto it = ranges.begin(); it != ranges.end(); ++it) { - if (keys.contains(it->range())) { - // erase the many teams that were associated with this one shard - for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) { - erase(*t, it->range()); - } - - // save this modification for later insertion - std::vector prevTeams = it->value().second; - prevTeams.insert(prevTeams.end(), it->value().first.begin(), it->value().first.end()); - uniquify(prevTeams); - - modifiedShards.push_back(std::pair, std::vector>, KeyRange>( - std::make_pair(destinationTeams, prevTeams), it->range())); - } else { - // for each range that touches this move, add our team as affecting this range - for (auto& team : destinationTeams) { - insert(team, it->range()); - } - - // if we are not in the list of teams associated with this shard, add us in - auto& teams = it->value(); - teams.second.insert(teams.second.end(), teams.first.begin(), teams.first.end()); - uniquify(teams.second); - - teams.first.insert(teams.first.end(), destinationTeams.begin(), destinationTeams.end()); - uniquify(teams.first); - } - } - - // we cannot modify the KeyRangeMap while iterating through it, so add saved modifications now - for (int i = 0; i < modifiedShards.size(); i++) { - for (auto& t : modifiedShards[i].first.first) { - insert(t, modifiedShards[i].second); - } - shard_teams.insert(modifiedShards[i].second, modifiedShards[i].first); - } - - check(); -} - -void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) { - auto ranges = shard_teams.containedRanges(keys); - for (auto it = ranges.begin(); it != ranges.end(); ++it) { - it.value().second.clear(); - } -} - -void ShardsAffectedByTeamFailure::setCheckMode(CheckMode mode) { - checkMode = mode; -} - -void ShardsAffectedByTeamFailure::check() const { - if (checkMode == CheckMode::ForceNoCheck) - return; - if (EXPENSIVE_VALIDATION || checkMode == CheckMode::ForceCheck) { - for (auto t = team_shards.begin(); t != team_shards.end(); ++t) { - auto i = shard_teams.rangeContaining(t->second.begin); - if (i->range() != t->second || !std::count(i->value().first.begin(), i->value().first.end(), t->first)) { - ASSERT(false); - } - } - auto rs = shard_teams.ranges(); - for (auto i = rs.begin(); i != rs.end(); ++i) { - for (auto t = i->value().first.begin(); t != i->value().first.end(); ++t) { - if (!team_shards.count(std::make_pair(*t, i->range()))) { - std::string teamDesc, shards; - for (int k = 0; k < t->servers.size(); k++) - teamDesc += format("%llx ", t->servers[k].first()); - for (auto x = team_shards.lower_bound(std::make_pair(*t, KeyRangeRef())); - x != team_shards.end() && x->first == *t; - ++x) - shards += printable(x->second.begin) + "-" + printable(x->second.end) + ","; - TraceEvent(SevError, "SATFInvariantError2") - .detail("KB", i->begin()) - .detail("KE", i->end()) - .detail("Team", teamDesc) - .detail("Shards", shards); - ASSERT(false); - } - } - } - } -} diff --git a/fdbserver/ShardsAffectedByTeamFailure.cpp b/fdbserver/ShardsAffectedByTeamFailure.cpp new file mode 100644 index 0000000000..a121f16a0f --- /dev/null +++ b/fdbserver/ShardsAffectedByTeamFailure.cpp @@ -0,0 +1,197 @@ +/* + * ShardsAffectedByTeamFailure.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbserver/ShardsAffectedByTeamFailure.h" + +std::vector ShardsAffectedByTeamFailure::getShardsFor(Team team) const { + std::vector r; + for (auto it = team_shards.lower_bound(std::pair(team, KeyRangeRef())); + it != team_shards.end() && it->first == team; + ++it) + r.push_back(it->second); + return r; +} + +bool ShardsAffectedByTeamFailure::hasShards(Team team) const { + auto it = team_shards.lower_bound(std::pair(team, KeyRangeRef())); + return it != team_shards.end() && it->first == team; +} + +int ShardsAffectedByTeamFailure::getNumberOfShards(UID ssID) const { + auto it = storageServerShards.find(ssID); + return it == storageServerShards.end() ? 0 : it->second; +} + +std::pair, std::vector> +ShardsAffectedByTeamFailure::getTeamsFor(KeyRangeRef keys) { + return shard_teams[keys.begin]; +} + +void ShardsAffectedByTeamFailure::erase(Team team, KeyRange const& range) { + DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureErase") + .detail("Range", range) + .detail("Team", team.toString()); + if (team_shards.erase(std::pair(team, range)) > 0) { + for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid) { + // Safeguard against going negative after eraseServer() sets value to 0 + if (storageServerShards[*uid] > 0) { + storageServerShards[*uid]--; + } + } + } +} + +void ShardsAffectedByTeamFailure::insert(Team team, KeyRange const& range) { + DisabledTraceEvent(SevDebug, "ShardsAffectedByTeamFailureInsert") + .detail("Range", range) + .detail("Team", team.toString()); + if (team_shards.insert(std::pair(team, range)).second) { + for (auto uid = team.servers.begin(); uid != team.servers.end(); ++uid) + storageServerShards[*uid]++; + } +} + +void ShardsAffectedByTeamFailure::defineShard(KeyRangeRef keys) { + std::vector teams; + std::vector prevTeams; + auto rs = shard_teams.intersectingRanges(keys); + for (auto it = rs.begin(); it != rs.end(); ++it) { + for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) { + teams.push_back(*t); + erase(*t, it->range()); + } + for (auto t = it->value().second.begin(); t != it->value().second.end(); ++t) { + prevTeams.push_back(*t); + } + } + uniquify(teams); + uniquify(prevTeams); + + /*TraceEvent("ShardsAffectedByTeamFailureDefine") + .detail("KeyBegin", keys.begin) + .detail("KeyEnd", keys.end) + .detail("TeamCount", teams.size());*/ + + auto affectedRanges = shard_teams.getAffectedRangesAfterInsertion(keys); + shard_teams.insert(keys, std::make_pair(teams, prevTeams)); + + for (auto r = affectedRanges.begin(); r != affectedRanges.end(); ++r) { + auto& t = shard_teams[r->begin]; + for (auto it = t.first.begin(); it != t.first.end(); ++it) { + insert(*it, *r); + } + } + check(); +} + +// Move keys to destinationTeams by updating shard_teams +void ShardsAffectedByTeamFailure::moveShard(KeyRangeRef keys, std::vector destinationTeams) { + /*TraceEvent("ShardsAffectedByTeamFailureMove") + .detail("KeyBegin", keys.begin) + .detail("KeyEnd", keys.end) + .detail("NewTeamSize", destinationTeam.size()) + .detail("NewTeam", describe(destinationTeam));*/ + + auto ranges = shard_teams.intersectingRanges(keys); + std::vector, std::vector>, KeyRange>> modifiedShards; + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + if (keys.contains(it->range())) { + // erase the many teams that were associated with this one shard + for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) { + erase(*t, it->range()); + } + + // save this modification for later insertion + std::vector prevTeams = it->value().second; + prevTeams.insert(prevTeams.end(), it->value().first.begin(), it->value().first.end()); + uniquify(prevTeams); + + modifiedShards.push_back(std::pair, std::vector>, KeyRange>( + std::make_pair(destinationTeams, prevTeams), it->range())); + } else { + // for each range that touches this move, add our team as affecting this range + for (auto& team : destinationTeams) { + insert(team, it->range()); + } + + // if we are not in the list of teams associated with this shard, add us in + auto& teams = it->value(); + teams.second.insert(teams.second.end(), teams.first.begin(), teams.first.end()); + uniquify(teams.second); + + teams.first.insert(teams.first.end(), destinationTeams.begin(), destinationTeams.end()); + uniquify(teams.first); + } + } + + // we cannot modify the KeyRangeMap while iterating through it, so add saved modifications now + for (int i = 0; i < modifiedShards.size(); i++) { + for (auto& t : modifiedShards[i].first.first) { + insert(t, modifiedShards[i].second); + } + shard_teams.insert(modifiedShards[i].second, modifiedShards[i].first); + } + + check(); +} + +void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) { + auto ranges = shard_teams.containedRanges(keys); + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + it.value().second.clear(); + } +} + +void ShardsAffectedByTeamFailure::setCheckMode(CheckMode mode) { + checkMode = mode; +} + +void ShardsAffectedByTeamFailure::check() const { + if (checkMode == CheckMode::ForceNoCheck) + return; + if (EXPENSIVE_VALIDATION || checkMode == CheckMode::ForceCheck) { + for (auto t = team_shards.begin(); t != team_shards.end(); ++t) { + auto i = shard_teams.rangeContaining(t->second.begin); + if (i->range() != t->second || !std::count(i->value().first.begin(), i->value().first.end(), t->first)) { + ASSERT(false); + } + } + auto rs = shard_teams.ranges(); + for (auto i = rs.begin(); i != rs.end(); ++i) { + for (auto t = i->value().first.begin(); t != i->value().first.end(); ++t) { + if (!team_shards.count(std::make_pair(*t, i->range()))) { + std::string teamDesc, shards; + for (int k = 0; k < t->servers.size(); k++) + teamDesc += format("%llx ", t->servers[k].first()); + for (auto x = team_shards.lower_bound(std::make_pair(*t, KeyRangeRef())); + x != team_shards.end() && x->first == *t; + ++x) + shards += printable(x->second.begin) + "-" + printable(x->second.end) + ","; + TraceEvent(SevError, "SATFInvariantError2") + .detail("KB", i->begin()) + .detail("KE", i->end()) + .detail("Team", teamDesc) + .detail("Shards", shards); + ASSERT(false); + } + } + } + } +} diff --git a/fdbserver/include/fdbserver/DataDistribution.actor.h b/fdbserver/include/fdbserver/DataDistribution.actor.h index 67a80252cf..3e9b03e02c 100644 --- a/fdbserver/include/fdbserver/DataDistribution.actor.h +++ b/fdbserver/include/fdbserver/DataDistribution.actor.h @@ -30,6 +30,7 @@ #include "fdbserver/Knobs.h" #include "fdbserver/LogSystem.h" #include "fdbserver/MoveKeys.actor.h" +#include "fdbserver/ShardsAffectedByTeamFailure.h" #include #include @@ -290,86 +291,6 @@ struct TeamCollectionInterface { PromiseStream getTeam; }; -class ShardsAffectedByTeamFailure : public ReferenceCounted { -public: - ShardsAffectedByTeamFailure() {} - - enum class CheckMode { Normal = 0, ForceCheck, ForceNoCheck }; - struct Team { - std::vector servers; // sorted - bool primary; - - Team() : primary(true) {} - Team(std::vector const& servers, bool primary) : servers(servers), primary(primary) {} - - bool operator<(const Team& r) const { - if (servers == r.servers) - return primary < r.primary; - return servers < r.servers; - } - bool operator>(const Team& r) const { return r < *this; } - bool operator<=(const Team& r) const { return !(*this > r); } - bool operator>=(const Team& r) const { return !(*this < r); } - bool operator==(const Team& r) const { return servers == r.servers && primary == r.primary; } - bool operator!=(const Team& r) const { return !(*this == r); } - - std::string toString() const { return describe(servers); }; - }; - - // This tracks the data distribution on the data distribution server so that teamTrackers can - // relocate the right shards when a team is degraded. - - // The following are important to make sure that failure responses don't revert splits or merges: - // - The shards boundaries in the two data structures reflect "queued" RelocateShard requests - // (i.e. reflects the desired set of shards being tracked by dataDistributionTracker, - // rather than the status quo). These boundaries are modified in defineShard and the content - // of what servers correspond to each shard is a copy or union of the shards already there - // - The teams associated with each shard reflect either the sources for non-moving shards - // or the destination team for in-flight shards (the change is atomic with respect to team selection). - // moveShard() changes the servers associated with a shard and will never adjust the shard - // boundaries. If a move is received for a shard that has been redefined (the exact shard is - // no longer in the map), the servers will be set for all contained shards and added to all - // intersecting shards. - - int getNumberOfShards(UID ssID) const; - std::vector getShardsFor(Team team) const; - bool hasShards(Team team) const; - - // The first element of the pair is either the source for non-moving shards or the destination team for in-flight - // shards The second element of the pair is all previous sources for in-flight shards - std::pair, std::vector> getTeamsFor(KeyRangeRef keys); - - void defineShard(KeyRangeRef keys); - void moveShard(KeyRangeRef keys, std::vector destinationTeam); - void finishMove(KeyRangeRef keys); - void check() const; - - void setCheckMode(CheckMode); - - PromiseStream restartShardTracker; - -private: - struct OrderByTeamKey { - bool operator()(const std::pair& lhs, const std::pair& rhs) const { - if (lhs.first < rhs.first) - return true; - if (lhs.first > rhs.first) - return false; - return lhs.second.begin < rhs.second.begin; - } - }; - - CheckMode checkMode = CheckMode::Normal; - KeyRangeMap, std::vector>> - shard_teams; // A shard can be affected by the failure of multiple teams if it is a queued merge, or when - // usable_regions > 1 - std::set, OrderByTeamKey> team_shards; - std::map storageServerShards; - - void erase(Team team, KeyRange const& range); - void insert(Team team, KeyRange const& range); -}; - // DDShardInfo is so named to avoid link-time name collision with ShardInfo within the StorageServer struct DDShardInfo { Key key; diff --git a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h new file mode 100644 index 0000000000..cb6386d5e5 --- /dev/null +++ b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h @@ -0,0 +1,108 @@ +/* + * ShardsAffectedByTeamFailure.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef FOUNDATIONDB_SHARDSAFFECTEDBYTEAMFAILURE_H +#define FOUNDATIONDB_SHARDSAFFECTEDBYTEAMFAILURE_H + +#include "flow/FastRef.h" +#include "flow/IRandom.h" +#include "fdbclient/FDBTypes.h" +#include "fdbclient/KeyRangeMap.h" + +class ShardsAffectedByTeamFailure : public ReferenceCounted { +public: + ShardsAffectedByTeamFailure() {} + + enum class CheckMode { Normal = 0, ForceCheck, ForceNoCheck }; + struct Team { + std::vector servers; // sorted + bool primary; + + Team() : primary(true) {} + Team(std::vector const& servers, bool primary) : servers(servers), primary(primary) {} + + bool operator<(const Team& r) const { + if (servers == r.servers) + return primary < r.primary; + return servers < r.servers; + } + bool operator>(const Team& r) const { return r < *this; } + bool operator<=(const Team& r) const { return !(*this > r); } + bool operator>=(const Team& r) const { return !(*this < r); } + bool operator==(const Team& r) const { return servers == r.servers && primary == r.primary; } + bool operator!=(const Team& r) const { return !(*this == r); } + + std::string toString() const { return describe(servers); }; + }; + + // This tracks the data distribution on the data distribution server so that teamTrackers can + // relocate the right shards when a team is degraded. + + // The following are important to make sure that failure responses don't revert splits or merges: + // - The shards boundaries in the two data structures reflect "queued" RelocateShard requests + // (i.e. reflects the desired set of shards being tracked by dataDistributionTracker, + // rather than the status quo). These boundaries are modified in defineShard and the content + // of what servers correspond to each shard is a copy or union of the shards already there + // - The teams associated with each shard reflect either the sources for non-moving shards + // or the destination team for in-flight shards (the change is atomic with respect to team selection). + // moveShard() changes the servers associated with a shard and will never adjust the shard + // boundaries. If a move is received for a shard that has been redefined (the exact shard is + // no longer in the map), the servers will be set for all contained shards and added to all + // intersecting shards. + + int getNumberOfShards(UID ssID) const; + std::vector getShardsFor(Team team) const; + bool hasShards(Team team) const; + + // The first element of the pair is either the source for non-moving shards or the destination team for in-flight + // shards The second element of the pair is all previous sources for in-flight shards + std::pair, std::vector> getTeamsFor(KeyRangeRef keys); + + void defineShard(KeyRangeRef keys); + void moveShard(KeyRangeRef keys, std::vector destinationTeam); + void finishMove(KeyRangeRef keys); + void check() const; + + void setCheckMode(CheckMode); + + PromiseStream restartShardTracker; + +private: + struct OrderByTeamKey { + bool operator()(const std::pair& lhs, const std::pair& rhs) const { + if (lhs.first < rhs.first) + return true; + if (lhs.first > rhs.first) + return false; + return lhs.second.begin < rhs.second.begin; + } + }; + + CheckMode checkMode = CheckMode::Normal; + KeyRangeMap, std::vector>> + shard_teams; // A shard can be affected by the failure of multiple teams if it is a queued merge, or when + // usable_regions > 1 + std::set, OrderByTeamKey> team_shards; + std::map storageServerShards; + + void erase(Team team, KeyRange const& range); + void insert(Team team, KeyRange const& range); +}; + +#endif // FOUNDATIONDB_SHARDSAFFECTEDBYTEAMFAILURE_H diff --git a/flow/include/flow/FastRef.h b/flow/include/flow/FastRef.h index 3c65cc6002..2e49b34de2 100644 --- a/flow/include/flow/FastRef.h +++ b/flow/include/flow/FastRef.h @@ -24,6 +24,7 @@ #include #include +#include // The thread safety this class provides is that it's safe to call addref and // delref on the same object concurrently in different threads. Subclass does