mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-21 14:02:59 +08:00
fix killMachine - make sure we have at least 1 blob worker in a dc
This commit is contained in:
parent
03f1d13be3
commit
9db48eb10c
@ -482,6 +482,7 @@ public:
|
|||||||
TSSMode tssMode;
|
TSSMode tssMode;
|
||||||
std::map<NetworkAddress, bool> corruptWorkerMap;
|
std::map<NetworkAddress, bool> corruptWorkerMap;
|
||||||
ConfigDBType configDBType;
|
ConfigDBType configDBType;
|
||||||
|
bool blobGranulesEnabled;
|
||||||
|
|
||||||
// Used by workloads that perform reconfigurations
|
// Used by workloads that perform reconfigurations
|
||||||
int testerCount;
|
int testerCount;
|
||||||
|
@ -1367,6 +1367,45 @@ public:
|
|||||||
return primaryTLogsDead || primaryProcessesDead.validate(storagePolicy);
|
return primaryTLogsDead || primaryProcessesDead.validate(storagePolicy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The following function will determine if a machine can be remove in case when it has a blob worker
|
||||||
|
bool canKillMachineWithBlobWorkers(Optional<Standalone<StringRef>> machineId, KillType kt, KillType* ktFinal) {
|
||||||
|
// Allow if no blob workers, or it's a reboot(without removing the machine)
|
||||||
|
if (!blobGranulesEnabled && kt >= RebootAndDelete) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow if the machine doesn't support blob worker
|
||||||
|
MachineInfo& currentMachine = machines[machineId];
|
||||||
|
bool hasBlobWorker = false;
|
||||||
|
for (auto processInfo : currentMachine.processes) {
|
||||||
|
if (processInfo->startingClass == ProcessClass::BlobWorkerClass) {
|
||||||
|
hasBlobWorker = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!hasBlobWorker)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Count # remaining support blob workers in current dc
|
||||||
|
auto currentDcId = currentMachine.machineProcess->locality.dcId();
|
||||||
|
int nLeft = 0;
|
||||||
|
for (auto processInfo : getAllProcesses()) {
|
||||||
|
if (currentDcId != processInfo->locality.dcId() || // skip other dc
|
||||||
|
processInfo->startingClass != ProcessClass::BlobWorkerClass || // skip non blob workers
|
||||||
|
processInfo->locality.machineId() == machineId) { // skip current machine
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
nLeft++; // alive blob workers after killing machineId
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure there is at least 1 remaining blob workers after removing current machine
|
||||||
|
if (nLeft <= 1) {
|
||||||
|
*ktFinal = RebootAndDelete; // reboot and delete data, but keep this machine
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// The following function will determine if the specified configuration of available and dead processes can allow
|
// The following function will determine if the specified configuration of available and dead processes can allow
|
||||||
// the cluster to survive
|
// the cluster to survive
|
||||||
bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses,
|
bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses,
|
||||||
@ -1787,6 +1826,14 @@ public:
|
|||||||
// Check if machine can be removed, if requested
|
// Check if machine can be removed, if requested
|
||||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) ||
|
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) ||
|
||||||
(kt == RebootAndDelete) || (kt == RebootProcessAndDelete))) {
|
(kt == RebootAndDelete) || (kt == RebootProcessAndDelete))) {
|
||||||
|
|
||||||
|
if (!canKillMachineWithBlobWorkers(machineId, kt, &kt)) {
|
||||||
|
TraceEvent("canKillMachineWithBlobWorkers")
|
||||||
|
.detail("MachineId", machineId)
|
||||||
|
.detail("KillType", kt)
|
||||||
|
.detail("OrigKillType", ktOrig);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<ProcessInfo*> processesLeft, processesDead;
|
std::vector<ProcessInfo*> processesLeft, processesDead;
|
||||||
int protectedWorker = 0, unavailable = 0, excluded = 0, cleared = 0;
|
int protectedWorker = 0, unavailable = 0, excluded = 0, cleared = 0;
|
||||||
|
|
||||||
|
@ -1421,6 +1421,7 @@ void SimulationConfig::setSpecificConfig(const TestConfig& testConfig) {
|
|||||||
if (testConfig.resolverCount.present()) {
|
if (testConfig.resolverCount.present()) {
|
||||||
db.resolverCount = testConfig.resolverCount.get();
|
db.resolverCount = testConfig.resolverCount.get();
|
||||||
}
|
}
|
||||||
|
db.blobGranulesEnabled = testConfig.blobGranulesEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sets generateFearless and number of dataCenters based on testConfig details
|
// Sets generateFearless and number of dataCenters based on testConfig details
|
||||||
@ -1939,6 +1940,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
|||||||
simconfig.db.tenantMode = tenantMode;
|
simconfig.db.tenantMode = tenantMode;
|
||||||
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
|
simconfig.db.encryptionAtRestMode = EncryptionAtRestMode::DISABLED;
|
||||||
|
|
||||||
|
g_simulator->blobGranulesEnabled = simconfig.db.blobGranulesEnabled;
|
||||||
|
|
||||||
StatusObject startingConfigJSON = simconfig.db.toJSON(true);
|
StatusObject startingConfigJSON = simconfig.db.toJSON(true);
|
||||||
std::string startingConfigString = "new";
|
std::string startingConfigString = "new";
|
||||||
if (testConfig.configureLocked) {
|
if (testConfig.configureLocked) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user