mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
Snake naming of keys and added comments to all new functions.
This commit is contained in:
parent
73752f441b
commit
4ad5926a25
@ -958,10 +958,10 @@ that process, and wait for necessary data to be moved away.
|
||||
#. ``\xff\xff/management/consistency_check_suspended`` Read/write. Set or read this key will set or read the underlying system key ``\xff\x02/ConsistencyCheck/Suspend``. The value of this special key is unused thus if present, will be empty. In particular, if the key exists, then consistency is suspended. For more details, see help text of ``fdbcli`` command ``consistencycheck``.
|
||||
#. ``\xff\xff/management/db_locked`` Read/write. A single key that can be read and modified. Set the key will lock the database and clear the key will unlock. If the database is already locked, then the commit will fail with the ``special_keys_api_failure`` error. For more details, see help text of ``fdbcli`` command ``lock`` and ``unlock``.
|
||||
#. ``\xff\xff/management/auto_coordinators`` Read-only. A single key, if read, will return a set of processes which is able to satisfy the current redundency level and serve as new coordinators. The return value is formatted as a comma delimited string of network addresses of coordinators, i.e. ``<ip:port>,<ip:port>,...,<ip:port>``.
|
||||
#. ``\xff\xff/management/excludedlocality/<locality>`` Read/write. Indicates that the cluster should move data away from processes matching ``<locality>``, so that they can be safely removed. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
|
||||
#. ``\xff\xff/management/failedlocality/<locality>`` Read/write. Indicates that the cluster should consider matching processes as permanently failed. This allows the cluster to avoid maintaining extra state and doing extra work in the hope that these processes come back. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
|
||||
#. ``\xff\xff/management/options/excludedlocality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/excludedlocality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
|
||||
#. ``\xff\xff/management/options/failedlocality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/failedlocality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
|
||||
#. ``\xff\xff/management/excluded_locality/<locality>`` Read/write. Indicates that the cluster should move data away from processes matching ``<locality>``, so that they can be safely removed. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
|
||||
#. ``\xff\xff/management/failed_locality/<locality>`` Read/write. Indicates that the cluster should consider matching processes as permanently failed. This allows the cluster to avoid maintaining extra state and doing extra work in the hope that these processes come back. See :ref:`removing machines from a cluster <removing-machines-from-a-cluster>` for documentation for the corresponding fdbcli command.
|
||||
#. ``\xff\xff/management/options/excluded_locality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/excluded_locality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
|
||||
#. ``\xff\xff/management/options/failed_locality/force`` Read/write. Setting this key disables safety checks for writes to ``\xff\xff/management/failed_locality/<locality>``. Setting this key only has an effect in the current transaction and is not persisted on commit.
|
||||
|
||||
An exclusion is syntactically either an ip address (e.g. ``127.0.0.1``), or
|
||||
an ip address and port (e.g. ``127.0.0.1:4500``) or locality (e.g ``dcid:primary-satellite`` or
|
||||
|
@ -38,7 +38,7 @@ $ sudo service foundationdb stop
|
||||
|
||||
7. Exclude the original machines from the cluster using ``exclude`` in ``fdbcli``. This command will not return until all database state has been moved off of the original machines and fully replicated to the new machines. For example::
|
||||
|
||||
fdb> exclude 192.168.1.1:4500 192.168.1.2:4500 192.168.1.3:4500
|
||||
fdb> exclude 192.168.1.1:4500 192.168.1.2:4500 192.168.1.3:4500 dcid:primary-satellite zoneid:primary-satellite-log-2 machineid:primary-stateless-1 processid:223be2da244ca0182375364e4d122c30
|
||||
|
||||
8. Run ``coordinators auto`` in ``fdbcli`` to move coordination state to the new machines. Please note that this will cause the fdb.cluster file to be updated with the addresses of the new machines. Any currently connected clients will be notified and (assuming they have appropriate file system :ref:`permissions <cluster_file_permissions>`) will update their own copy of the cluster file. As long as the original machines are still running, any clients that connect to them will be automatically forwarded to the new cluster coordinators. However, if you have a client that has not yet connected or only connects intermittently, you will need to copy the new cluster file from one of the new machines to the client machine.
|
||||
|
||||
|
@ -2391,6 +2391,7 @@ ACTOR Future<bool> coordinators(Database db, std::vector<StringRef> tokens, bool
|
||||
return err;
|
||||
}
|
||||
|
||||
// Includes the servers that could be ipaddresses or localities back to the cluster.
|
||||
ACTOR Future<bool> include(Database db, std::vector<StringRef> tokens) {
|
||||
std::vector<AddressExclusion> addresses;
|
||||
state std::vector<std::string> localities;
|
||||
@ -2403,6 +2404,7 @@ ACTOR Future<bool> include(Database db, std::vector<StringRef> tokens) {
|
||||
failed = true;
|
||||
} else if (t->startsWith(ExcludeLocalityKeyDcIdPrefix) || t->startsWith(ExcludeLocalityKeyMachineIdPrefix) ||
|
||||
t->startsWith(ExcludeLocalityKeyProcessIdPrefix) || t->startsWith(ExcludeLocalityKeyZoneIdPrefix)) {
|
||||
// if the token starts with any locality prefix.
|
||||
localities.push_back(t->toString());
|
||||
} else {
|
||||
auto a = AddressExclusion::parse(*t);
|
||||
@ -2427,6 +2429,7 @@ ACTOR Future<bool> include(Database db, std::vector<StringRef> tokens) {
|
||||
wait(makeInterruptable(includeServers(db, addresses, failed)));
|
||||
}
|
||||
if (!localities.empty()) {
|
||||
// includes the servers that belong to given localities.
|
||||
wait(makeInterruptable(includeLocalities(db, &localities, failed, all)));
|
||||
}
|
||||
}
|
||||
@ -2488,6 +2491,7 @@ ACTOR Future<bool> exclude(Database db,
|
||||
if (localityAddresses.empty()) {
|
||||
noMatchLocalities.push_back(t->toString());
|
||||
} else {
|
||||
// add all the server ipaddresses that belong to the given localities to the exclusionSet.
|
||||
std::copy(localityAddresses.begin(), localityAddresses.end(), back_inserter(exclusionVector));
|
||||
exclusionSet.insert(localityAddresses.begin(), localityAddresses.end());
|
||||
}
|
||||
@ -2509,7 +2513,7 @@ ACTOR Future<bool> exclude(Database db,
|
||||
}
|
||||
|
||||
if (exclusionAddresses.empty() && exclusionLocalities.empty()) {
|
||||
fprintf(stderr, "ERROR: Atleast one valid network endpoint address or a locality is not provided\n");
|
||||
fprintf(stderr, "ERROR: At least one valid network endpoint address or a locality is not provided\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -632,6 +632,7 @@ std::set<AddressExclusion> DatabaseConfiguration::getExcludedServers() const {
|
||||
return addrs;
|
||||
}
|
||||
|
||||
// checks if the locality is excluded or not by checking if the key is present.
|
||||
bool DatabaseConfiguration::isExcludedLocality(const LocalityData& locality) const {
|
||||
return (locality.dcId().present() ? get(encodeExcludedLocalityKey(ExcludeLocalityKeyDcIdPrefix.toString() +
|
||||
locality.dcId().get().toString()))
|
||||
@ -669,6 +670,9 @@ bool DatabaseConfiguration::isExcludedLocality(const LocalityData& locality) con
|
||||
: false);
|
||||
}
|
||||
|
||||
// checks if this machineid of given locality is excluded.
|
||||
// A machine can be excluded either as part of dcid exclustion or zoneid exclusion
|
||||
// or this explicit machineid exclusion.
|
||||
bool DatabaseConfiguration::isMachineExcluded(const LocalityData& locality) const {
|
||||
return (locality.dcId().present() ? get(encodeExcludedLocalityKey(ExcludeLocalityKeyDcIdPrefix.toString() +
|
||||
locality.dcId().get().toString()))
|
||||
@ -697,6 +701,7 @@ bool DatabaseConfiguration::isMachineExcluded(const LocalityData& locality) cons
|
||||
: false);
|
||||
}
|
||||
|
||||
// Gets the list of already excluded localities (with failed option)
|
||||
std::set<std::string> DatabaseConfiguration::getExcludedLocalities() const {
|
||||
// TODO: revisit all const_cast usages
|
||||
const_cast<DatabaseConfiguration*>(this)->makeConfigurationImmutable();
|
||||
|
@ -1596,6 +1596,7 @@ ACTOR Future<Void> excludeServers(Database cx, vector<AddressExclusion> servers,
|
||||
}
|
||||
}
|
||||
|
||||
// excludes localities by setting the keys in api version below 7.0
|
||||
void excludeLocalities(Transaction& tr, std::unordered_set<std::string>* localities, bool failed) {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
@ -1615,6 +1616,8 @@ void excludeLocalities(Transaction& tr, std::unordered_set<std::string>* localit
|
||||
TraceEvent("ExcludeLocalitiesCommit").detail("Localities", describe(*localities)).detail("ExcludeFailed", failed);
|
||||
}
|
||||
|
||||
// Exclude the servers matching the given set of localities from use as state servers.
|
||||
// excludes localities by setting the keys.
|
||||
ACTOR Future<Void> excludeLocalities(Database cx, std::unordered_set<std::string>* localities, bool failed) {
|
||||
if (cx->apiVersionAtLeast(700)) {
|
||||
state ReadYourWritesTransaction ryw(cx);
|
||||
@ -1757,6 +1760,8 @@ ACTOR Future<Void> includeServers(Database cx, vector<AddressExclusion> servers,
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the given localities from the exclusion list.
|
||||
// include localities by clearing the keys.
|
||||
ACTOR Future<Void> includeLocalities(Database cx, vector<std::string>* localities, bool failed, bool includeAll) {
|
||||
state std::string versionKey = deterministicRandom()->randomUniqueID().toString();
|
||||
if (cx->apiVersionAtLeast(700)) {
|
||||
@ -1912,6 +1917,7 @@ ACTOR Future<vector<AddressExclusion>> getExcludedServers(Database cx) {
|
||||
}
|
||||
}
|
||||
|
||||
// Get the current list of excluded localities by reading the keys.
|
||||
ACTOR Future<vector<std::string>> getExcludedLocalities(Transaction* tr) {
|
||||
state RangeResult r = wait(tr->getRange(excludedLocalityKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!r.more && r.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
@ -1931,12 +1937,13 @@ ACTOR Future<vector<std::string>> getExcludedLocalities(Transaction* tr) {
|
||||
return excludedLocalities;
|
||||
}
|
||||
|
||||
// Get the list of excluded localities by reading the keys.
|
||||
ACTOR Future<vector<std::string>> getExcludedLocalities(Database cx) {
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); // necessary?
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
vector<std::string> exclusions = wait(getExcludedLocalities(&tr));
|
||||
return exclusions;
|
||||
@ -1946,6 +1953,8 @@ ACTOR Future<vector<std::string>> getExcludedLocalities(Database cx) {
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes the locality string to a pair of locality prefix and its value.
|
||||
// The prefix could be dcid, processid, machineid, processid.
|
||||
std::pair<std::string, std::string> decodeLocality(std::string& locality) {
|
||||
StringRef localityRef(locality.c_str());
|
||||
if (localityRef.startsWith(ExcludeLocalityKeyDcIdPrefix)) {
|
||||
@ -1965,6 +1974,8 @@ std::pair<std::string, std::string> decodeLocality(std::string& locality) {
|
||||
return std::make_pair("", "");
|
||||
}
|
||||
|
||||
// Returns the list of IPAddresses of the workers that match the given locality.
|
||||
// Example: locality="dcid:primary" returns all the ip addresses of the workers in the primary dc.
|
||||
std::set<AddressExclusion> getAddressesByLocality(std::vector<ProcessData>& workers, std::string locality) {
|
||||
std::pair<std::string, std::string> localityKeyValue = decodeLocality(locality);
|
||||
|
||||
|
@ -78,10 +78,10 @@ std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandT
|
||||
KeyRangeRef(LiteralStringRef("failed/"), LiteralStringRef("failed0"))
|
||||
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
|
||||
{ "excludedlocality",
|
||||
KeyRangeRef(LiteralStringRef("excludedlocality/"), LiteralStringRef("excludedlocality0"))
|
||||
KeyRangeRef(LiteralStringRef("excluded_locality/"), LiteralStringRef("excluded_locality0"))
|
||||
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
|
||||
{ "failedlocality",
|
||||
KeyRangeRef(LiteralStringRef("failedlocality/"), LiteralStringRef("failedlocality0"))
|
||||
KeyRangeRef(LiteralStringRef("failed_locality/"), LiteralStringRef("failed_locality0"))
|
||||
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
|
||||
{ "lock", singleKeyRange(LiteralStringRef("db_locked")).withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
|
||||
{ "consistencycheck",
|
||||
@ -2120,6 +2120,7 @@ Future<Optional<std::string>> DataDistributionImpl::commit(ReadYourWritesTransac
|
||||
return msg;
|
||||
}
|
||||
|
||||
// Clears the special management api keys excludeLocality and failedLocality.
|
||||
void includeLocalities(ReadYourWritesTransaction* ryw) {
|
||||
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
ryw->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
@ -2153,6 +2154,8 @@ void includeLocalities(ReadYourWritesTransaction* ryw) {
|
||||
}
|
||||
}
|
||||
|
||||
// Reads the excludedlocality and failed locality keys using managment api,
|
||||
// parses them and returns the list.
|
||||
bool parseLocalitiesFromKeys(ReadYourWritesTransaction* ryw,
|
||||
bool failed,
|
||||
std::unordered_set<std::string>& localities,
|
||||
@ -2196,6 +2199,8 @@ bool parseLocalitiesFromKeys(ReadYourWritesTransaction* ryw,
|
||||
return true;
|
||||
}
|
||||
|
||||
// On commit, parses the special exclusion keys and get the localities to be excluded, check for exclusions
|
||||
// and add them to the exclusion list. Also, clears the special management api keys with includeLocalities.
|
||||
ACTOR Future<Optional<std::string>> excludeLocalityCommitActor(ReadYourWritesTransaction* ryw, bool failed) {
|
||||
state Optional<std::string> result;
|
||||
state std::unordered_set<std::string> localities;
|
||||
@ -2242,6 +2247,7 @@ Key ExcludedLocalitiesRangeImpl::encode(const KeyRef& key) const {
|
||||
}
|
||||
|
||||
Future<Optional<std::string>> ExcludedLocalitiesRangeImpl::commit(ReadYourWritesTransaction* ryw) {
|
||||
// exclude locality with failed option as false.
|
||||
return excludeLocalityCommitActor(ryw, false);
|
||||
}
|
||||
|
||||
@ -2267,5 +2273,6 @@ Key FailedLocalitiesRangeImpl::encode(const KeyRef& key) const {
|
||||
}
|
||||
|
||||
Future<Optional<std::string>> FailedLocalitiesRangeImpl::commit(ReadYourWritesTransaction* ryw) {
|
||||
// exclude locality with failed option as true.
|
||||
return excludeLocalityCommitActor(ryw, true);
|
||||
}
|
||||
|
@ -280,6 +280,7 @@ public:
|
||||
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
|
||||
};
|
||||
|
||||
// Special key management api for excluding localities (exclude_locality)
|
||||
class ExcludedLocalitiesRangeImpl : public SpecialKeyRangeRWImpl {
|
||||
public:
|
||||
explicit ExcludedLocalitiesRangeImpl(KeyRangeRef kr);
|
||||
@ -290,6 +291,7 @@ public:
|
||||
Future<Optional<std::string>> commit(ReadYourWritesTransaction* ryw) override;
|
||||
};
|
||||
|
||||
// Special key management api for excluding localities with failed option (failed_locality)
|
||||
class FailedLocalitiesRangeImpl : public SpecialKeyRangeRWImpl {
|
||||
public:
|
||||
explicit FailedLocalitiesRangeImpl(KeyRangeRef kr);
|
||||
|
@ -648,16 +648,15 @@ std::string encodeExcludedServersKey(AddressExclusion const& addr) {
|
||||
return excludedServersPrefix.toString() + addr.toString();
|
||||
}
|
||||
|
||||
const KeyRangeRef excludedLocalityKeys(LiteralStringRef("\xff/conf/excludedlocality/"),
|
||||
LiteralStringRef("\xff/conf/excludedlocality0"));
|
||||
const KeyRangeRef excludedLocalityKeys(LiteralStringRef("\xff/conf/excluded_locality/"),
|
||||
LiteralStringRef("\xff/conf/excluded_locality0"));
|
||||
const KeyRef excludedLocalityPrefix = excludedLocalityKeys.begin;
|
||||
const KeyRef excludedLocalityVersionKey = LiteralStringRef("\xff/conf/excludedlocality");
|
||||
const KeyRef excludedLocalityVersionKey = LiteralStringRef("\xff/conf/excluded_locality");
|
||||
std::string decodeExcludedLocalityKey(KeyRef const& key) {
|
||||
ASSERT(key.startsWith(excludedLocalityPrefix));
|
||||
return key.removePrefix(excludedLocalityPrefix).toString();
|
||||
}
|
||||
std::string encodeExcludedLocalityKey(std::string const& locality) {
|
||||
// FIXME: make sure what's persisted here is not affected by innocent changes elsewhere
|
||||
return excludedLocalityPrefix.toString() + locality;
|
||||
}
|
||||
|
||||
@ -678,16 +677,15 @@ std::string encodeFailedServersKey(AddressExclusion const& addr) {
|
||||
return failedServersPrefix.toString() + addr.toString();
|
||||
}
|
||||
|
||||
const KeyRangeRef failedLocalityKeys(LiteralStringRef("\xff/conf/failedlocality/"),
|
||||
LiteralStringRef("\xff/conf/failedlocality0"));
|
||||
const KeyRangeRef failedLocalityKeys(LiteralStringRef("\xff/conf/failed_locality/"),
|
||||
LiteralStringRef("\xff/conf/failed_locality0"));
|
||||
const KeyRef failedLocalityPrefix = failedLocalityKeys.begin;
|
||||
const KeyRef failedLocalityVersionKey = LiteralStringRef("\xff/conf/failedlocality");
|
||||
const KeyRef failedLocalityVersionKey = LiteralStringRef("\xff/conf/failed_locality");
|
||||
std::string decodeFailedLocalityKey(KeyRef const& key) {
|
||||
ASSERT(key.startsWith(failedLocalityPrefix));
|
||||
return key.removePrefix(failedLocalityPrefix).toString();
|
||||
}
|
||||
std::string encodeFailedLocalityKey(std::string const& locality) {
|
||||
// FIXME: make sure what's persisted here is not affected by innocent changes elsewhere
|
||||
return failedLocalityPrefix.toString() + locality;
|
||||
}
|
||||
|
||||
|
@ -1083,7 +1083,7 @@ ACTOR Future<std::pair<Version, Tag>> addStorageServer(Database cx, StorageServe
|
||||
success(fFailedLocZoneID) && success(fExclLocMachineID) && success(fFailedLocMachineID) &&
|
||||
success(fExclLocProcessID) && success(fFailedLocProcessID));
|
||||
|
||||
// If we have been added to the excluded/failed state servers list, we have to fail
|
||||
// If we have been added to the excluded/failed state servers or localities list, we have to fail
|
||||
if (fExclProc.get().present() || fExclIP.get().present() || fFailProc.get().present() ||
|
||||
fFailIP.get().present() || fExclProc2.get().present() || fExclIP2.get().present() ||
|
||||
fFailProc2.get().present() || fFailIP2.get().present() || fExclLocDCID.get().present() ||
|
||||
|
@ -455,6 +455,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
int localitiesCount = localities.size();
|
||||
int nToKill3 = deterministicRandom()->randomInt(std::min(localitiesCount, minMachinesToKill),
|
||||
std::min(localitiesCount, maxMachinesToKill) + 1);
|
||||
// get random subset of localities.
|
||||
state std::set<std::string> toKill3 = random_subset(localities, nToKill3);
|
||||
|
||||
TraceEvent("RemoveAndKillLocalities")
|
||||
@ -479,6 +480,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
.detail("Kill3Size", toKill3.size())
|
||||
.detail("ToKill3", describe(toKill3))
|
||||
.detail("ClusterAvailable", g_simulator.isAvailable());
|
||||
// exclude localities
|
||||
wait(reportErrors(
|
||||
timeoutError(removeAndKillLocalities(self, cx, toKill3, bClearedFirst ? &toKill2 : nullptr, failed),
|
||||
self->kill3Timeout),
|
||||
@ -766,11 +768,13 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
return subset;
|
||||
}
|
||||
|
||||
// creates a random set of size n from given unordered_set.
|
||||
template <class T>
|
||||
static std::set<T> random_subset(std::unordered_set<T>& s, int n) {
|
||||
return random_subset(std::vector<T>(s.begin(), s.end()), n);
|
||||
}
|
||||
|
||||
// creates a random set of size n from given set.
|
||||
template <class T>
|
||||
static std::set<T> random_subset(std::set<T>& s, int n) {
|
||||
return random_subset(std::vector<T>(s.begin(), s.end()), n);
|
||||
@ -781,6 +785,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
machineProcesses[kill].count(AddressExclusion(process.ip, process.port)) > 0);
|
||||
}
|
||||
|
||||
// Returns the list of IPAddresses of the workers that match the given list of localities.
|
||||
ACTOR Future<std::set<AddressExclusion>> getLocalitiesAddresses(Database cx, std::set<std::string> localities) {
|
||||
state Transaction tr(cx);
|
||||
state std::vector<ProcessData> workers = wait(getWorkers(&tr));
|
||||
@ -793,6 +798,9 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
return addressesSet;
|
||||
}
|
||||
|
||||
// Finds the safe localities list that can be excluded from the killable safeProcesses list.
|
||||
// If excluding based on a particular locality of the safe process, kills any other process, that
|
||||
// particular locality is not included in the killable safeLocalities list.
|
||||
std::set<std::string> getSafeLocalitiesToKill(std::vector<ISimulator::ProcessInfo*> const& safeProcesses) {
|
||||
std::unordered_map<std::string, int> safeLocalitiesCount;
|
||||
for (const auto& processInfo : safeProcesses) {
|
||||
@ -837,7 +845,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
return safeLocalities;
|
||||
}
|
||||
|
||||
// Attempts to exclude a set of processes, and once the exclusion is successful it kills them.
|
||||
// Attempts to exclude a set of localities, and once the exclusion is successful it kills them.
|
||||
// If markExcludeAsFailed is true, then it is an error if we cannot complete the exclusion.
|
||||
ACTOR static Future<Void> removeAndKillLocalities(RemoveServersSafelyWorkload* self,
|
||||
Database cx,
|
||||
@ -862,6 +870,8 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
}
|
||||
|
||||
state std::set<std::string> toKillMarkFailedSet;
|
||||
// if markExcludedasFailed is true, get random subset of tokill, check for
|
||||
// safe exclusions and exclude them with failed option.
|
||||
if (markExcludeAsFailed) {
|
||||
state int retries = 0;
|
||||
loop {
|
||||
@ -914,9 +924,11 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
.detail("FailedLocalities", describe(toKillMarkFailedUnorderedSet))
|
||||
.detail("ClusterAvailable", g_simulator.isAvailable())
|
||||
.detail("MarkExcludeAsFailed", markExcludeAsFailed);
|
||||
// exclude localities with failed option as true
|
||||
if (markExcludeAsFailed) {
|
||||
wait(excludeLocalities(cx, &toKillMarkFailedUnorderedSet, true));
|
||||
}
|
||||
// exclude localities with failed option as false.
|
||||
wait(excludeLocalities(cx, &toKillUnorderedSet));
|
||||
|
||||
TraceEvent("RemoveAndKillLocalities", functionId)
|
||||
@ -926,6 +938,8 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Update the g_simulator processes list with the process ids
|
||||
// of the workers, that are generated as part of worker creation.
|
||||
ACTOR static Future<Void> updateProcessIds(Database cx) {
|
||||
Transaction tr(cx);
|
||||
std::vector<ProcessData> workers = wait(getWorkers(&tr));
|
||||
|
Loading…
x
Reference in New Issue
Block a user