diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 12289ba5d1..1f928f2da6 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -737,10 +737,6 @@ ACTOR Future changeQuorum( Database cx, ReferenceisSimulated()) - g_simulator.maxCoordinatorsInDatacenter = g_simulator.killableMachines + 1; - loop { try { tr.setOption( FDBTransactionOptions::LOCK_AWARE ); diff --git a/fdbrpc/sim2.actor.cpp b/fdbrpc/sim2.actor.cpp index b80358e656..171a14f410 100644 --- a/fdbrpc/sim2.actor.cpp +++ b/fdbrpc/sim2.actor.cpp @@ -1194,10 +1194,9 @@ public: } } virtual void killProcess( ProcessInfo* machine, KillType kt ) { - TraceEvent("attemptingKillProcess").detail("killedMachines", killedMachines).detail("killableMachines", killableMachines); + TraceEvent("attemptingKillProcess"); if (kt < RebootAndDelete ) { killProcess_internal( machine, kt ); - killedMachines++; } } virtual void killInterface( NetworkAddress address, KillType kt ) { @@ -1205,7 +1204,6 @@ public: std::vector& processes = machines[ addressMap[address]->locality.zoneId() ].processes; for( int i = 0; i < processes.size(); i++ ) killProcess_internal( processes[i], kt ); - killedMachines++; } } virtual bool killMachine(Optional> zoneId, KillType kt, bool killIsSafe, bool forceKill, KillType* ktFinal) { @@ -1319,7 +1317,7 @@ public: return false; } - TraceEvent("KillMachine", zoneId).detailext("ZoneId", zoneId).detail("Kt", kt).detail("KtOrig", ktOrig).detail("KilledMachines", killedMachines).detail("KillableMachines", processesOnMachine).detail("ProcessPerMachine", processesPerMachine).detail("KillChanged", kt!=ktOrig).detail("killIsSafe", killIsSafe); + TraceEvent("KillMachine", zoneId).detailext("ZoneId", zoneId).detail("Kt", kt).detail("KtOrig", ktOrig).detail("KillableMachines", processesOnMachine).detail("ProcessPerMachine", processesPerMachine).detail("KillChanged", kt!=ktOrig).detail("killIsSafe", killIsSafe); if (kt < RebootAndDelete ) { if(kt == InjectFaults && machines[zoneId].machineProcess != nullptr) killProcess_internal( machines[zoneId].machineProcess, kt ); @@ -1425,10 +1423,6 @@ public: } TraceEvent("killDataCenter") - .detail("killedMachines", killedMachines) - .detail("killableMachines", killableMachines) - .detail("killableDatacenters", killableDatacenters) - .detail("maxCoordinatorsInDatacenter", maxCoordinatorsInDatacenter) .detail("DcZones", datacenterZones.size()) .detail("DcProcesses", dcProcesses) .detailext("DCID", dcId) diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h index 597406848c..ec3d59001d 100644 --- a/fdbrpc/simulator.h +++ b/fdbrpc/simulator.h @@ -34,7 +34,7 @@ enum ClogMode { ClogDefault, ClogAll, ClogSend, ClogReceive }; class ISimulator : public INetwork { public: - ISimulator() : killedMachines(0), killableMachines(0), machinesNeededForProgress(3), neededDatacenters(1), killableDatacenters(0), killedDatacenters(0), maxCoordinatorsInDatacenter(0), desiredCoordinators(1), processesPerMachine(0), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), extraDB(NULL) {} + ISimulator() : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), extraDB(NULL) {} // Order matters! enum KillType { None, KillInstantly, InjectFaults, RebootAndDelete, Reboot, RebootProcessAndDelete, RebootProcess }; @@ -270,17 +270,8 @@ public: virtual void destroyProcess( ProcessInfo *p ) = 0; virtual void destroyMachine(Optional> const& zoneId ) = 0; - // These are here for reasoning about whether it is possible to kill machines (or delete their data) - // and maintain the durability of the database. - int killedMachines; - int killableMachines; - int machinesNeededForProgress; int desiredCoordinators; - int neededDatacenters; - int killedDatacenters; - int killableDatacenters; int physicalDatacenters; - int maxCoordinatorsInDatacenter; int processesPerMachine; std::set protectedAddresses; std::map currentlyRebootingProcesses; diff --git a/fdbserver/DatabaseConfiguration.cpp b/fdbserver/DatabaseConfiguration.cpp index 496db73503..da5901e704 100644 --- a/fdbserver/DatabaseConfiguration.cpp +++ b/fdbserver/DatabaseConfiguration.cpp @@ -65,10 +65,18 @@ void parseReplicationPolicy(IRepPolicyRef* policy, ValueRef const& v) { } void DatabaseConfiguration::setDefaultReplicationPolicy() { - storagePolicy = IRepPolicyRef(new PolicyAcross(storageTeamSize, "zoneid", IRepPolicyRef(new PolicyOne()))); - tLogPolicy = IRepPolicyRef(new PolicyAcross(tLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); - remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(remoteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); - satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(satelliteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + if(!storagePolicy) { + storagePolicy = IRepPolicyRef(new PolicyAcross(storageTeamSize, "zoneid", IRepPolicyRef(new PolicyOne()))); + } + if(!tLogPolicy) { + tLogPolicy = IRepPolicyRef(new PolicyAcross(tLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + } + if(remoteTLogReplicationFactor > 0 && !remoteTLogPolicy) { + remoteTLogPolicy = IRepPolicyRef(new PolicyAcross(remoteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + } + if(satelliteTLogReplicationFactor > 0 && !satelliteTLogPolicy) { + satelliteTLogPolicy = IRepPolicyRef(new PolicyAcross(satelliteTLogReplicationFactor, "zoneid", IRepPolicyRef(new PolicyOne()))); + } } bool DatabaseConfiguration::isValid() const { @@ -170,18 +178,33 @@ std::map DatabaseConfiguration::toMap() const { result["remote_satellite_dcs"] = remoteDcStr; } - if(satelliteTLogReplicationFactor > 0) { - result["satellite_replication"] = format("%d", satelliteTLogReplicationFactor); + if(satelliteTLogReplicationFactor == 1 && satelliteTLogUsableDcs == 1 && satelliteTLogWriteAntiQuorum == 0) { + result["satellite_redundancy_mode"] = "one_satellite_single"; + } else if(satelliteTLogReplicationFactor == 2 && satelliteTLogUsableDcs == 1 && satelliteTLogWriteAntiQuorum == 0) { + result["satellite_redundancy_mode"] = "one_satellite_double"; + } else if(satelliteTLogReplicationFactor == 3 && satelliteTLogUsableDcs == 1 && satelliteTLogWriteAntiQuorum == 0) { + result["satellite_redundancy_mode"] = "one_satellite_triple"; + } else if(satelliteTLogReplicationFactor == 4 && satelliteTLogUsableDcs == 2 && satelliteTLogWriteAntiQuorum == 0) { + result["satellite_redundancy_mode"] = "two_satellite_safe"; + } else if(satelliteTLogReplicationFactor == 4 && satelliteTLogUsableDcs == 2 && satelliteTLogWriteAntiQuorum == 2) { + result["satellite_redundancy_mode"] = "two_satellite_fast"; + } else if(satelliteTLogReplicationFactor == 0) { + result["satellite_redundancy_mode"] = "none"; + } else { + result["satellite_redundancy_mode"] = "custom"; } - if( remoteTLogReplicationFactor == 1 ) + if( remoteTLogReplicationFactor == 1 ) { result["remote_redundancy_mode"] = "remote_single"; - else if( remoteTLogReplicationFactor == 2 ) + } else if( remoteTLogReplicationFactor == 2 ) { result["remote_redundancy_mode"] = "remote_double"; - else if( remoteTLogReplicationFactor == 3 ) + } else if( remoteTLogReplicationFactor == 3 ) { result["remote_redundancy_mode"] = "remote_triple"; - else if(remoteTLogReplicationFactor > 0) + } else if(remoteTLogReplicationFactor == 0) { + result["remote_redundancy_mode"] = "none"; + } else { result["remote_redundancy_mode"] = "custom"; + } if( desiredTLogCount != -1 ) result["logs"] = format("%d", desiredTLogCount); diff --git a/fdbserver/DatabaseConfiguration.h b/fdbserver/DatabaseConfiguration.h index 8fbabe1bb6..04013305c0 100644 --- a/fdbserver/DatabaseConfiguration.h +++ b/fdbserver/DatabaseConfiguration.h @@ -54,8 +54,21 @@ struct DatabaseConfiguration { } // SOMEDAY: think about changing storageTeamSize to durableStorageQuorum - int32_t minMachinesRequired() const { return std::max(tLogReplicationFactor, storageTeamSize); } - int32_t maxMachineFailuresTolerated() const { return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, durableStorageQuorum - 1); } + int32_t minDatacentersRequired() const { + if(!primaryDcId.present()) return 1; + return 2 + primarySatelliteDcIds.size() + remoteSatelliteDcIds.size(); + } + int32_t minMachinesRequiredPerDatacenter() const { return std::max( satelliteTLogReplicationFactor/std::max(1,satelliteTLogUsableDcs), std::max( remoteTLogReplicationFactor, std::max(tLogReplicationFactor, storageTeamSize) ) ); } + + //Killing an entire datacenter counts as killing one machine in modes that support it + int32_t maxMachineFailuresTolerated() const { + if(remoteTLogReplicationFactor > 0 && satelliteTLogReplicationFactor > 0) { + return 1 + std::min(std::max(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, satelliteTLogReplicationFactor - 1 - satelliteTLogWriteAntiQuorum), durableStorageQuorum - 1); + } else if(satelliteTLogReplicationFactor > 0) { + return std::min(tLogReplicationFactor + satelliteTLogReplicationFactor - 2 - tLogWriteAntiQuorum - satelliteTLogWriteAntiQuorum, durableStorageQuorum - 1); + } + return std::min(tLogReplicationFactor - 1 - tLogWriteAntiQuorum, durableStorageQuorum - 1); + } // MasterProxy Servers int32_t masterProxyCount; @@ -120,9 +133,7 @@ struct DatabaseConfiguration { if (ar.isDeserializing) { for(auto c=rawConfiguration.begin(); c!=rawConfiguration.end(); ++c) setInternal(c->key, c->value); - if(!storagePolicy || !tLogPolicy) { - setDefaultReplicationPolicy(); - } + setDefaultReplicationPolicy(); } } @@ -131,9 +142,7 @@ struct DatabaseConfiguration { this->rawConfiguration = rawConfig; for(auto c=rawConfiguration.begin(); c!=rawConfiguration.end(); ++c) setInternal(c->key, c->value); - if(!storagePolicy || !tLogPolicy) { - setDefaultReplicationPolicy(); - } + setDefaultReplicationPolicy(); } private: diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 2dc854665e..4cf39511d9 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -588,10 +588,7 @@ ACTOR Future restartSimulatedSystem(vector> *systemActors, st ini.SetMultiKey(); try { - int dataCenters = atoi(ini.GetValue("META", "dataCenters")); - int killableMachines = atoi(ini.GetValue("META", "killableMachines")); int machineCount = atoi(ini.GetValue("META", "machineCount")); - int machinesNeededForProgress = atoi(ini.GetValue("META", "machinesNeededForProgress")); int processesPerMachine = atoi(ini.GetValue("META", "processesPerMachine")); int desiredCoordinators = atoi(ini.GetValue("META", "desiredCoordinators")); int testerCount = atoi(ini.GetValue("META", "testerCount")); @@ -634,11 +631,6 @@ ACTOR Future restartSimulatedSystem(vector> *systemActors, st processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine") ); } - g_simulator.killableMachines = killableMachines; - g_simulator.neededDatacenters = dataCenters; - g_simulator.maxCoordinatorsInDatacenter = ((desiredCoordinators-1)/dataCenters) + 1; - g_simulator.killableDatacenters = 0; - g_simulator.machinesNeededForProgress = machinesNeededForProgress; g_simulator.desiredCoordinators = desiredCoordinators; g_simulator.processesPerMachine = processesPerMachine; } @@ -647,11 +639,6 @@ ACTOR Future restartSimulatedSystem(vector> *systemActors, st } TraceEvent("RestartSimulatorSettings") - .detail("killableMachines", g_simulator.killableMachines) - .detail("neededDatacenters", g_simulator.neededDatacenters) - .detail("killableDatacenters", g_simulator.killableDatacenters) - .detail("machinesNeededForProgress", g_simulator.machinesNeededForProgress) - .detail("maxCoordinatorsInDatacenter", g_simulator.maxCoordinatorsInDatacenter) .detail("desiredCoordinators", g_simulator.desiredCoordinators) .detail("processesPerMachine", g_simulator.processesPerMachine); @@ -757,18 +744,26 @@ void SimulationConfig::generateNormalConfig(int minimumReplication) { if(datacenters == 2 && g_random->random01() < 0.5) { db.primaryDcId = LiteralStringRef("0"); - db.remoteDcId = LiteralStringRef("1"); - machine_count = g_random->randomInt( std::max( 6, datacenters*db.minMachinesRequired() ), std::max(extraDB ? 7 : 10, datacenters*db.minMachinesRequired() + 1) ); - } else { - machine_count = g_random->randomInt( std::max( 2+datacenters, db.minMachinesRequired() ), extraDB ? 6 : 10 ); + db.remoteDcId = LiteralStringRef("1"); } + + if(db.tLogPolicy && db.tLogPolicy->info() == "data_hall^2 x zoneid^2 x 1") { + machine_count = 9; + } else { + //datacenters+2 so that the configure database workload can configure into three_data_hall + machine_count = std::max(datacenters+2, ((db.minDatacentersRequired() > 1) ? datacenters : 1) * std::max(3, db.minMachinesRequiredPerDatacenter())); + machine_count = g_random->randomInt( machine_count, std::max(machine_count+1, extraDB ? 6 : 10) ); + } + + //because we protect a majority of coordinators from being killed, it is better to run with low numbers of coordinators to prevent too many processes from being protected + coordinators = BUGGIFY ? g_random->randomInt(1, machine_count+1) : 1; if(minimumReplication > 1 && datacenters == 3) { //low latency tests in 3 data hall mode need 2 other data centers with 2 machines each to avoid waiting for logs to recover. machine_count = std::max( machine_count, 6); + coordinators = 3; } processes_per_machine = g_random->randomInt(1, (extraDB ? 14 : 28)/machine_count + 2 ); - coordinators = BUGGIFY ? g_random->randomInt(1, machine_count+1) : std::min( machine_count, db.maxMachineFailuresTolerated()*2 + 1 ); } std::string SimulationConfig::toString() { @@ -785,20 +780,53 @@ std::string SimulationConfig::toString() { config << " " << "storage_quorum:=" << db.durableStorageQuorum; } + if(dbconfig["remote_redundancy_mode"] != "none") { + if (dbconfig["remote_redundancy_mode"] != "custom") { + config << " " << dbconfig["remote_redundancy_mode"]; + } else { + config << " " << "remote_log_replicas:=" << db.remoteTLogReplicationFactor; + } + } + + if(dbconfig["satellite_redundancy_mode"] != "none") { + if (dbconfig["satellite_redundancy_mode"] != "custom") { + config << " " << dbconfig["satellite_redundancy_mode"]; + } else { + config << " " << "satellite_log_replicas:=" << db.satelliteTLogReplicationFactor; + config << " " << "satellite_anti_quorum:=" << db.satelliteTLogWriteAntiQuorum; + config << " " << "satellite_usable_dcs:=" << db.satelliteTLogUsableDcs; + } + } + config << " logs=" << db.getDesiredLogs(); config << " proxies=" << db.getDesiredProxies(); config << " resolvers=" << db.getDesiredResolvers(); - if(db.remoteDesiredTLogCount > 0) { - config << " remote_logs=" << db.remoteDesiredTLogCount; + + if(db.remoteTLogReplicationFactor > 0) { + config << " remote_logs=" << db.getDesiredRemoteLogs(); + config << " log_routers=" << db.getDesiredLogRouters(); } - if(db.satelliteDesiredTLogCount > 0) { - config << " satellite_logs=" << db.satelliteDesiredTLogCount; + + if(db.satelliteTLogReplicationFactor > 0) { + config << " satellite_logs=" << db.getDesiredSatelliteLogs(); } + if(db.primaryDcId.present()) { config << " primary_dc=" << db.primaryDcId.get().printable(); config << " remote_dc=" << db.remoteDcId.get().printable(); } + if(db.primarySatelliteDcIds.size()) { + config << " primary_satellite_dcs=" << db.primarySatelliteDcIds[0].get().printable(); + for(int i = 1; i < db.primarySatelliteDcIds.size(); i++) { + config << "," << db.primarySatelliteDcIds[i].get().printable(); + } + config << " remote_satellite_dcs=" << db.remoteSatelliteDcIds[0].get().printable(); + for(int i = 1; i < db.remoteSatelliteDcIds.size(); i++) { + config << "," << db.remoteSatelliteDcIds[i].get().printable(); + } + } + config << " " << dbconfig["storage_engine"]; return config.str(); } @@ -925,21 +953,12 @@ void setupSimulatedSystem( vector> *systemActors, std::string baseF } g_simulator.desiredCoordinators = coordinatorCount; - g_simulator.killableMachines = simconfig.db.maxMachineFailuresTolerated(); - g_simulator.neededDatacenters = 1; - g_simulator.killableDatacenters = 0; g_simulator.physicalDatacenters = dataCenters; - g_simulator.maxCoordinatorsInDatacenter = ((coordinatorCount-1)/dataCenters) + 1; - g_simulator.machinesNeededForProgress = simconfig.db.minMachinesRequired() + nonVersatileMachines; g_simulator.processesPerMachine = processesPerMachine; TraceEvent("SetupSimulatorSettings") - .detail("killableMachines", g_simulator.killableMachines) - .detail("neededDatacenters", g_simulator.neededDatacenters) - .detail("killableDatacenters", g_simulator.killableDatacenters) - .detail("machinesNeededForProgress", g_simulator.machinesNeededForProgress) - .detail("maxCoordinatorsInDatacenter", g_simulator.maxCoordinatorsInDatacenter) .detail("desiredCoordinators", g_simulator.desiredCoordinators) + .detail("physicalDatacenters", g_simulator.physicalDatacenters) .detail("processesPerMachine", g_simulator.processesPerMachine); // SOMEDAY: add locality for testers to simulate network topology @@ -980,11 +999,8 @@ void setupSimulatedSystem( vector> *systemActors, std::string baseF g_simulator.testerCount = testerCount; TraceEvent("SimulatedClusterStarted") - .detail("KillableMachines", g_simulator.killableMachines) .detail("DataCenters", dataCenters) - .detail("NeededDataCenters", g_simulator.neededDatacenters) .detail("ServerMachineCount", machineCount) - .detail("ServersNeededForProgress", g_simulator.machinesNeededForProgress) .detail("ProcessesPerServer", processesPerMachine) .detail("SSLEnabled", sslEnabled) .detail("ClassesAssigned", assignClasses) diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index e06b2dd492..8e88db6a13 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1535,7 +1535,7 @@ static StatusObject faultToleranceStatusFetcher(DatabaseConfiguration configurat statusObj["max_machine_failures_without_losing_data"] = std::max(machineFailuresWithoutLosingData, 0); // without losing availablity - statusObj["max_machine_failures_without_losing_availability"] = std::max(std::min(numTLogEligibleMachines - configuration.minMachinesRequired(), machineFailuresWithoutLosingData), 0); + statusObj["max_machine_failures_without_losing_availability"] = std::max(std::min(numTLogEligibleMachines - configuration.minMachinesRequiredPerDatacenter(), machineFailuresWithoutLosingData), 0); return statusObj; } diff --git a/fdbserver/fdbserver.vcxproj b/fdbserver/fdbserver.vcxproj index 14a6832dbf..7533a5f9a1 100644 --- a/fdbserver/fdbserver.vcxproj +++ b/fdbserver/fdbserver.vcxproj @@ -131,7 +131,6 @@ - diff --git a/fdbserver/fdbserver.vcxproj.filters b/fdbserver/fdbserver.vcxproj.filters index cc2679d2cc..723d85e38a 100644 --- a/fdbserver/fdbserver.vcxproj.filters +++ b/fdbserver/fdbserver.vcxproj.filters @@ -195,9 +195,6 @@ - - workloads - workloads diff --git a/fdbserver/workloads/MachineAttrition.actor.cpp b/fdbserver/workloads/MachineAttrition.actor.cpp index c2112be62c..a13bb03a64 100644 --- a/fdbserver/workloads/MachineAttrition.actor.cpp +++ b/fdbserver/workloads/MachineAttrition.actor.cpp @@ -122,9 +122,6 @@ struct MachineAttritionWorkload : TestWorkload { ASSERT( g_network->isSimulated() ); - TEST(g_simulator.killableMachines > 0); // Some machines can be killed - TEST(g_simulator.killableDatacenters > 0); // Some processes can be killed - if( self->killDc ) { Void _ = wait( delay( delayBeforeKill ) ); diff --git a/fdbserver/workloads/RestartRecovery.actor.cpp b/fdbserver/workloads/RestartRecovery.actor.cpp deleted file mode 100644 index 538de0b56d..0000000000 --- a/fdbserver/workloads/RestartRecovery.actor.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * RestartRecovery.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "flow/actorcompiler.h" -#include "fdbclient/NativeAPI.h" -#include "fdbserver/TesterInterface.h" -#include "workloads.h" -#include "fdbrpc/simulator.h" -#include "fdbserver/MasterInterface.h" -#include "fdbclient/SystemData.h" -#include "fdbserver/WorkerInterface.h" -#include "fdbserver/ServerDBInfo.h" -#include "fdbserver/QuietDatabase.h" - -struct RestartRecoveryWorkload : TestWorkload { - std::string machineToKill; - bool enabled; - double killAt; - - RestartRecoveryWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx) - { - enabled = !clientId; // only do this on the "first" client - killAt = getOption( options, LiteralStringRef("killAt"), 10.0 ); - } - - virtual std::string description() { return "RestartRecoveryWorkload"; } - virtual Future setup( Database const& cx ) { return Void(); } - virtual Future start( Database const& cx ) { - if (enabled) - return assassin( cx, this ); - return Void(); - } - virtual Future check( Database const& cx ) { return true; } - virtual void getMetrics( vector& m ) { - } - - ACTOR Future assassin( Database cx, RestartRecoveryWorkload* self ) { - Void _ = wait( delay( self->killAt ) ); - state std::vector logs = self->dbInfo->get().logSystemConfig.allPresentLogs(); - if(logs.size() > 2 && g_simulator.killableMachines > 0) { - TraceEvent("RestartRecoveryReboot").detail("addr", logs[2].address()); - g_simulator.rebootProcess( g_simulator.getProcessByAddress(NetworkAddress(logs[2].address().ip, logs[2].address().port, true, false)), ISimulator::RebootProcess ); - Void _ = wait( delay(8.0) ); - TraceEvent("RestartRecoveryKill"); - g_simulator.rebootProcess( g_simulator.getProcessByAddress(NetworkAddress(logs[0].address().ip, logs[0].address().port, true, false)), ISimulator::RebootProcessAndDelete ); - } - return Void(); - } -}; - -WorkloadFactory RestartRecoveryWorkloadFactory("RestartRecovery"); diff --git a/fdbserver/workloads/SaveAndKill.actor.cpp b/fdbserver/workloads/SaveAndKill.actor.cpp index 3cdf5338db..0c0d9a3880 100644 --- a/fdbserver/workloads/SaveAndKill.actor.cpp +++ b/fdbserver/workloads/SaveAndKill.actor.cpp @@ -59,9 +59,6 @@ struct SaveAndKillWorkload : TestWorkload { ini.SetUnicode(); ini.LoadFile(self->restartInfo.c_str()); - ini.SetValue("META", "killableMachines", format("%d", g_simulator.killableMachines).c_str()); - ini.SetValue("META", "dataCenters", format("%d", g_simulator.neededDatacenters).c_str()); - ini.SetValue("META", "machinesNeededForProgress", format("%d", g_simulator.machinesNeededForProgress).c_str()); ini.SetValue("META", "processesPerMachine", format("%d", g_simulator.processesPerMachine).c_str()); ini.SetValue("META", "desiredCoordinators", format("%d", g_simulator.desiredCoordinators).c_str()); ini.SetValue("META", "connectionString", g_simulator.connectionString.c_str()); diff --git a/tests/rare/RestartRecovery.txt b/tests/rare/RestartRecovery.txt deleted file mode 100644 index 3553ddf46c..0000000000 --- a/tests/rare/RestartRecovery.txt +++ /dev/null @@ -1,14 +0,0 @@ -testTitle=DDBalance_test - testName=DDBalance - testDuration=60.0 - transactionsPerSecond=250.0 - binCount=1000 - writesPerTransaction=5 - keySpaceDriftFactor=10 - moversPerClient=10 - actorsPerClient=100 - nodes=100000 - connectionFailuresDisableDuration=100000 - - testName=RestartRecovery - killAt=30.0