mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 02:18:39 +08:00
fix killMachine - make sure we have at least 1 blob worker in a dc
This commit is contained in:
parent
03f1d13be3
commit
9db48eb10c
@ -482,6 +482,7 @@ public:
|
||||
TSSMode tssMode;
|
||||
std::map<NetworkAddress, bool> corruptWorkerMap;
|
||||
ConfigDBType configDBType;
|
||||
bool blobGranulesEnabled;
|
||||
|
||||
// Used by workloads that perform reconfigurations
|
||||
int testerCount;
|
||||
|
@ -1367,6 +1367,45 @@ public:
|
||||
return primaryTLogsDead || primaryProcessesDead.validate(storagePolicy);
|
||||
}
|
||||
|
||||
// The following function will determine if a machine can be remove in case when it has a blob worker
|
||||
bool canKillMachineWithBlobWorkers(Optional<Standalone<StringRef>> machineId, KillType kt, KillType* ktFinal) {
|
||||
// Allow if no blob workers, or it's a reboot(without removing the machine)
|
||||
if (!blobGranulesEnabled && kt >= RebootAndDelete) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allow if the machine doesn't support blob worker
|
||||
MachineInfo& currentMachine = machines[machineId];
|
||||
bool hasBlobWorker = false;
|
||||
for (auto processInfo : currentMachine.processes) {
|
||||
if (processInfo->startingClass == ProcessClass::BlobWorkerClass) {
|
||||
hasBlobWorker = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasBlobWorker)
|
||||
return true;
|
||||
|
||||
// Count # remaining support blob workers in current dc
|
||||
auto currentDcId = currentMachine.machineProcess->locality.dcId();
|
||||
int nLeft = 0;
|
||||
for (auto processInfo : getAllProcesses()) {
|
||||
if (currentDcId != processInfo->locality.dcId() || // skip other dc
|
||||
processInfo->startingClass != ProcessClass::BlobWorkerClass || // skip non blob workers
|
||||
processInfo->locality.machineId() == machineId) { // skip current machine
|
||||
continue;
|
||||
}
|
||||
nLeft++; // alive blob workers after killing machineId
|
||||
}
|
||||
|
||||
// Ensure there is at least 1 remaining blob workers after removing current machine
|
||||
if (nLeft <= 1) {
|
||||
*ktFinal = RebootAndDelete; // reboot and delete data, but keep this machine
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// The following function will determine if the specified configuration of available and dead processes can allow
|
||||
// the cluster to survive
|
||||
bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses,
|
||||
@ -1787,6 +1826,14 @@ public:
|
||||
// Check if machine can be removed, if requested
|
||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) ||
|
||||
(kt == RebootAndDelete) || (kt == RebootProcessAndDelete))) {
|
||||
|
||||
if (!canKillMachineWithBlobWorkers(machineId, kt, &kt)) {
|
||||
TraceEvent("canKillMachineWithBlobWorkers")
|
||||
.detail("MachineId", machineId)
|
||||
.detail("KillType", kt)
|
||||
.detail("OrigKillType", ktOrig);
|
||||
}
|
||||
|
||||
std::vector<ProcessInfo*> processesLeft, processesDead;
|
||||
int protectedWorker = 0, unavailable = 0, excluded = 0, cleared = 0;
|
||||
|
||||
|
@ -1421,6 +1421,7 @@ void SimulationConfig::setSpecificConfig(const TestConfig& testConfig) {
|
||||
if (testConfig.resolverCount.present()) {
|
||||
db.resolverCount = testConfig.resolverCount.get();
|
||||
}
|
||||
db.blobGranulesEnabled = testConfig.blobGranulesEnabled;
|
||||
}
|
||||
|
||||
// Sets generateFearless and number of dataCenters based on testConfig details
|
||||
@ -1939,6 +1940,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
||||
simconfig.db.tenantMode = tenantMode;
|
||||
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
|
||||
|
||||
g_simulator->blobGranulesEnabled = simconfig.db.blobGranulesEnabled;
|
||||
|
||||
StatusObject startingConfigJSON = simconfig.db.toJSON(true);
|
||||
std::string startingConfigString = "new";
|
||||
if (testConfig.configureLocked) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user