mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-21 22:33:17 +08:00
Extract ProcessInfo, MachineInfo, KillType out from ISimulator
This commit is contained in:
parent
50de69c897
commit
0a60142160
fdbclient
fdbrpc
AsyncFileNonDurable.actor.cpp
include/fdbrpc
AsyncFileNonDurable.actor.hSimulatorKillType.hSimulatorMachineInfo.hSimulatorProcessInfo.hsimulator.h
sim2.actor.cppfdbserver
FDBExecHelper.actor.cppKeyValueStoreSQLite.actor.cppQuietDatabase.actor.cppRestoreWorker.actor.cppSimulatedCluster.actor.cppVFSAsync.cpp
coroimpl
fdbserver.actor.cppinclude/fdbserver
tester.actor.cppworker.actor.cppworkloads
ClientWorkload.actor.cppClogSingleConnection.actor.cppConsistencyCheck.actor.cppDataLossRecovery.actor.cppKillRegion.actor.cppMachineAttrition.actor.cppProtocolVersion.actor.cppRandomClogging.actor.cppRandomMoveKeys.actor.cppRemoveServersSafely.actor.cppRollback.actor.cppSaveAndKill.actor.cppTargetedKill.actor.cpp
@ -24,6 +24,7 @@
|
|||||||
#include "flow/Platform.actor.h"
|
#include "flow/Platform.actor.h"
|
||||||
#include "flow/Platform.h"
|
#include "flow/Platform.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -43,6 +43,7 @@
|
|||||||
#include "fdbrpc/ReplicationPolicy.h"
|
#include "fdbrpc/ReplicationPolicy.h"
|
||||||
#include "fdbrpc/Replication.h"
|
#include "fdbrpc/Replication.h"
|
||||||
#include "fdbclient/Schemas.h"
|
#include "fdbclient/Schemas.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
|
@ -19,10 +19,19 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbrpc/AsyncFileNonDurable.actor.h"
|
#include "fdbrpc/AsyncFileNonDurable.actor.h"
|
||||||
|
|
||||||
|
#include "fdbrpc/SimulatorMachineInfo.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
std::map<std::string, Future<Void>> AsyncFileNonDurable::filesBeingDeleted;
|
std::map<std::string, Future<Void>> AsyncFileNonDurable::filesBeingDeleted;
|
||||||
|
|
||||||
|
Future<Void> waitShutdownSignal() {
|
||||||
|
return success(g_simulator->getCurrentProcess()->shutdownSignal.getFuture());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ACTOR Future<Void> sendOnProcess(ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID) {
|
ACTOR Future<Void> sendOnProcess(ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID) {
|
||||||
wait(g_simulator->onProcess(process, taskID));
|
wait(g_simulator->onProcess(process, taskID));
|
||||||
promise.send(Void());
|
promise.send(Void());
|
||||||
@ -37,3 +46,180 @@ ACTOR Future<Void> sendErrorOnProcess(ISimulator::ProcessInfo* process,
|
|||||||
promise.sendError(e);
|
promise.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> AsyncFileDetachable::doShutdown(AsyncFileDetachable* self) {
|
||||||
|
wait(success(g_simulator->getCurrentProcess()->shutdownSignal.getFuture()));
|
||||||
|
self->file = Reference<IAsyncFile>();
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Reference<IAsyncFile>> AsyncFileDetachable::open(Future<Reference<IAsyncFile>> wrappedFile) {
|
||||||
|
choose {
|
||||||
|
when(wait(success(g_simulator->getCurrentProcess()->shutdownSignal.getFuture()))) {
|
||||||
|
throw io_error().asInjectedFault();
|
||||||
|
}
|
||||||
|
when(Reference<IAsyncFile> f = wait(wrappedFile)) { return makeReference<AsyncFileDetachable>(f); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<int> AsyncFileDetachable::read(void* data, int length, int64_t offset) {
|
||||||
|
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
||||||
|
return io_error().asInjectedFault();
|
||||||
|
return sendErrorOnShutdown(file->read(data, length, offset), assertOnReadWriteCancel);
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Void> AsyncFileDetachable::write(void const* data, int length, int64_t offset) {
|
||||||
|
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
||||||
|
return io_error().asInjectedFault();
|
||||||
|
return sendErrorOnShutdown(file->write(data, length, offset), assertOnReadWriteCancel);
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Void> AsyncFileDetachable::truncate(int64_t size) {
|
||||||
|
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
||||||
|
return io_error().asInjectedFault();
|
||||||
|
return sendErrorOnShutdown(file->truncate(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Void> AsyncFileDetachable::sync() {
|
||||||
|
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
||||||
|
return io_error().asInjectedFault();
|
||||||
|
return sendErrorOnShutdown(file->sync());
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<int64_t> AsyncFileDetachable::size() const {
|
||||||
|
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
||||||
|
return io_error().asInjectedFault();
|
||||||
|
return sendErrorOnShutdown(file->size());
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Reference<IAsyncFile>> AsyncFileNonDurable::open(std::string filename,
|
||||||
|
std::string actualFilename,
|
||||||
|
Future<Reference<IAsyncFile>> wrappedFile,
|
||||||
|
Reference<DiskParameters> diskParameters,
|
||||||
|
bool aio) {
|
||||||
|
state ISimulator::ProcessInfo* currentProcess = g_simulator->getCurrentProcess();
|
||||||
|
state TaskPriority currentTaskID = g_network->getCurrentTask();
|
||||||
|
state Future<Void> shutdown = success(currentProcess->shutdownSignal.getFuture());
|
||||||
|
|
||||||
|
//TraceEvent("AsyncFileNonDurableOpenBegin").detail("Filename", filename).detail("Addr", g_simulator->getCurrentProcess()->address);
|
||||||
|
wait(g_simulator->onMachine(currentProcess));
|
||||||
|
try {
|
||||||
|
wait(success(wrappedFile) || shutdown);
|
||||||
|
|
||||||
|
if (shutdown.isReady())
|
||||||
|
throw io_error().asInjectedFault();
|
||||||
|
|
||||||
|
state Reference<IAsyncFile> file = wrappedFile.get();
|
||||||
|
|
||||||
|
// If we are in the process of deleting a file, we can't let someone else modify it at the same time. We
|
||||||
|
// therefore block the creation of new files until deletion is complete
|
||||||
|
state std::map<std::string, Future<Void>>::iterator deletedFile = AsyncFileNonDurable::filesBeingDeleted.find(filename);
|
||||||
|
if (deletedFile != AsyncFileNonDurable::filesBeingDeleted.end()) {
|
||||||
|
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete1").detail("Filename", filename);
|
||||||
|
wait(deletedFile->second || shutdown);
|
||||||
|
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete2").detail("Filename", filename);
|
||||||
|
if (shutdown.isReady())
|
||||||
|
throw io_error().asInjectedFault();
|
||||||
|
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
||||||
|
}
|
||||||
|
|
||||||
|
state Reference<AsyncFileNonDurable> nonDurableFile(
|
||||||
|
new AsyncFileNonDurable(filename, actualFilename, file, diskParameters, currentProcess->address, aio));
|
||||||
|
|
||||||
|
// Causes the approximateSize member to be set
|
||||||
|
state Future<int64_t> sizeFuture = nonDurableFile->size();
|
||||||
|
wait(success(sizeFuture) || shutdown);
|
||||||
|
|
||||||
|
if (shutdown.isReady())
|
||||||
|
throw io_error().asInjectedFault();
|
||||||
|
|
||||||
|
//TraceEvent("AsyncFileNonDurableOpenComplete").detail("Filename", filename);
|
||||||
|
|
||||||
|
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
||||||
|
|
||||||
|
return nonDurableFile;
|
||||||
|
} catch (Error& e) {
|
||||||
|
state Error err = e;
|
||||||
|
std::string currentFilename =
|
||||||
|
(wrappedFile.isReady() && !wrappedFile.isError()) ? wrappedFile.get()->getFilename() : actualFilename;
|
||||||
|
currentProcess->machine->openFiles.erase(currentFilename);
|
||||||
|
//TraceEvent("AsyncFileNonDurableOpenError").errorUnsuppressed(e).detail("Filename", filename).detail("Address", currentProcess->address).detail("Addr", g_simulator->getCurrentProcess()->address);
|
||||||
|
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<int> AsyncFileNonDurable::read(AsyncFileNonDurable* self, void* data, int length, int64_t offset) {
|
||||||
|
state ISimulator::ProcessInfo* currentProcess = g_simulator->getCurrentProcess();
|
||||||
|
state TaskPriority currentTaskID = g_network->getCurrentTask();
|
||||||
|
wait(g_simulator->onMachine(currentProcess));
|
||||||
|
|
||||||
|
try {
|
||||||
|
state int rep = wait(self->onRead(self, data, length, offset));
|
||||||
|
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
||||||
|
return rep;
|
||||||
|
} catch (Error& e) {
|
||||||
|
state Error err = e;
|
||||||
|
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> AsyncFileNonDurable::closeFile(AsyncFileNonDurable* self) {
|
||||||
|
state ISimulator::ProcessInfo* currentProcess = g_simulator->getCurrentProcess();
|
||||||
|
state TaskPriority currentTaskID = g_network->getCurrentTask();
|
||||||
|
state std::string filename = self->filename;
|
||||||
|
|
||||||
|
g_simulator->getMachineByNetworkAddress(self->openedAddress)->deletingOrClosingFiles.insert(self->getFilename());
|
||||||
|
|
||||||
|
wait(g_simulator->onMachine(currentProcess));
|
||||||
|
try {
|
||||||
|
// Make sure all writes have gone through.
|
||||||
|
Promise<bool> startSyncPromise = self->startSyncPromise;
|
||||||
|
self->startSyncPromise = Promise<bool>();
|
||||||
|
startSyncPromise.send(true);
|
||||||
|
|
||||||
|
std::vector<Future<Void>> outstandingModifications;
|
||||||
|
|
||||||
|
for (auto itr = self->pendingModifications.ranges().begin(); itr != self->pendingModifications.ranges().end();
|
||||||
|
++itr)
|
||||||
|
if (itr->value().isValid() && !itr->value().isReady())
|
||||||
|
outstandingModifications.push_back(itr->value());
|
||||||
|
|
||||||
|
// Ignore errors here so that all modifications can finish
|
||||||
|
wait(waitForAllReady(outstandingModifications));
|
||||||
|
|
||||||
|
// Make sure we aren't in the process of killing the file
|
||||||
|
if (self->killed.isSet())
|
||||||
|
wait(self->killComplete.getFuture());
|
||||||
|
|
||||||
|
// Remove this file from the filesBeingDeleted map so that new files can be created with this filename
|
||||||
|
g_simulator->getMachineByNetworkAddress(self->openedAddress)->closingFiles.erase(self->getFilename());
|
||||||
|
g_simulator->getMachineByNetworkAddress(self->openedAddress)->deletingOrClosingFiles.erase(self->getFilename());
|
||||||
|
AsyncFileNonDurable::filesBeingDeleted.erase(self->filename);
|
||||||
|
//TraceEvent("AsyncFileNonDurable_FinishDelete", self->id).detail("Filename", self->filename);
|
||||||
|
|
||||||
|
delete self;
|
||||||
|
return Void();
|
||||||
|
} catch (Error& e) {
|
||||||
|
state Error err = e;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AsyncFileNonDurable::removeOpenFile(std::string filename, AsyncFileNonDurable* file) {
|
||||||
|
auto& openFiles = g_simulator->getCurrentProcess()->machine->openFiles;
|
||||||
|
|
||||||
|
auto iter = openFiles.find(filename);
|
||||||
|
|
||||||
|
// Various actions (e.g. simulated delete) can remove a file from openFiles prematurely, so it may already
|
||||||
|
// be gone. Renamed files (from atomic write and create) will also be present under only one of the two
|
||||||
|
// names.
|
||||||
|
if (iter != openFiles.end()) {
|
||||||
|
// even if the filename exists, it doesn't mean that it references the same file. It could be that the
|
||||||
|
// file was renamed and later a file with the same name was opened.
|
||||||
|
if (iter->second.getPtrIfReady().orDefault(nullptr) == file) {
|
||||||
|
openFiles.erase(iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -45,16 +45,14 @@ ACTOR Future<Void> sendErrorOnProcess(ISimulator::ProcessInfo* process,
|
|||||||
Error e,
|
Error e,
|
||||||
TaskPriority taskID);
|
TaskPriority taskID);
|
||||||
|
|
||||||
|
extern Future<Void> waitShutdownSignal();
|
||||||
|
|
||||||
ACTOR template <class T>
|
ACTOR template <class T>
|
||||||
Future<T> sendErrorOnShutdown(Future<T> in, bool assertOnCancel = false) {
|
Future<T> sendErrorOnShutdown(Future<T> in, bool assertOnCancel = false) {
|
||||||
try {
|
try {
|
||||||
choose {
|
choose {
|
||||||
when(wait(success(g_simulator->getCurrentProcess()->shutdownSignal.getFuture()))) {
|
when(wait(waitShutdownSignal())) { throw io_error().asInjectedFault(); }
|
||||||
throw io_error().asInjectedFault();
|
when(T rep = wait(in)) { return rep; }
|
||||||
}
|
|
||||||
when(T rep = wait(in)) {
|
|
||||||
return rep;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
ASSERT(e.code() != error_code_actor_cancelled || !assertOnCancel);
|
ASSERT(e.code() != error_code_actor_cancelled || !assertOnCancel);
|
||||||
@ -73,55 +71,17 @@ public:
|
|||||||
shutdown = doShutdown(this);
|
shutdown = doShutdown(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> doShutdown(AsyncFileDetachable* self) {
|
ACTOR Future<Void> doShutdown(AsyncFileDetachable* self);
|
||||||
wait(success(g_simulator->getCurrentProcess()->shutdownSignal.getFuture()));
|
ACTOR static Future<Reference<IAsyncFile>> open(Future<Reference<IAsyncFile>> wrappedFile);
|
||||||
self->file = Reference<IAsyncFile>();
|
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR static Future<Reference<IAsyncFile>> open(Future<Reference<IAsyncFile>> wrappedFile) {
|
|
||||||
choose {
|
|
||||||
when(wait(success(g_simulator->getCurrentProcess()->shutdownSignal.getFuture()))) {
|
|
||||||
throw io_error().asInjectedFault();
|
|
||||||
}
|
|
||||||
when(Reference<IAsyncFile> f = wait(wrappedFile)) {
|
|
||||||
return makeReference<AsyncFileDetachable>(f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void addref() override { ReferenceCounted<AsyncFileDetachable>::addref(); }
|
void addref() override { ReferenceCounted<AsyncFileDetachable>::addref(); }
|
||||||
void delref() override { ReferenceCounted<AsyncFileDetachable>::delref(); }
|
void delref() override { ReferenceCounted<AsyncFileDetachable>::delref(); }
|
||||||
|
|
||||||
Future<int> read(void* data, int length, int64_t offset) override {
|
Future<int> read(void* data, int length, int64_t offset) override;
|
||||||
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
Future<Void> write(void const* data, int length, int64_t offset) override;
|
||||||
return io_error().asInjectedFault();
|
Future<Void> truncate(int64_t size) override;
|
||||||
return sendErrorOnShutdown(file->read(data, length, offset), assertOnReadWriteCancel);
|
Future<Void> sync() override;
|
||||||
}
|
Future<int64_t> size() const override;
|
||||||
|
|
||||||
Future<Void> write(void const* data, int length, int64_t offset) override {
|
|
||||||
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
|
||||||
return io_error().asInjectedFault();
|
|
||||||
return sendErrorOnShutdown(file->write(data, length, offset), assertOnReadWriteCancel);
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<Void> truncate(int64_t size) override {
|
|
||||||
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
|
||||||
return io_error().asInjectedFault();
|
|
||||||
return sendErrorOnShutdown(file->truncate(size));
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<Void> sync() override {
|
|
||||||
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
|
||||||
return io_error().asInjectedFault();
|
|
||||||
return sendErrorOnShutdown(file->sync());
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<int64_t> size() const override {
|
|
||||||
if (!file.getPtr() || g_simulator->getCurrentProcess()->shutdownSignal.getFuture().isReady())
|
|
||||||
return io_error().asInjectedFault();
|
|
||||||
return sendErrorOnShutdown(file->size());
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t debugFD() const override {
|
int64_t debugFD() const override {
|
||||||
if (!file.getPtr())
|
if (!file.getPtr())
|
||||||
@ -225,58 +185,7 @@ public:
|
|||||||
std::string actualFilename,
|
std::string actualFilename,
|
||||||
Future<Reference<IAsyncFile>> wrappedFile,
|
Future<Reference<IAsyncFile>> wrappedFile,
|
||||||
Reference<DiskParameters> diskParameters,
|
Reference<DiskParameters> diskParameters,
|
||||||
bool aio) {
|
bool aio);
|
||||||
state ISimulator::ProcessInfo* currentProcess = g_simulator->getCurrentProcess();
|
|
||||||
state TaskPriority currentTaskID = g_network->getCurrentTask();
|
|
||||||
state Future<Void> shutdown = success(currentProcess->shutdownSignal.getFuture());
|
|
||||||
|
|
||||||
//TraceEvent("AsyncFileNonDurableOpenBegin").detail("Filename", filename).detail("Addr", g_simulator->getCurrentProcess()->address);
|
|
||||||
wait(g_simulator->onMachine(currentProcess));
|
|
||||||
try {
|
|
||||||
wait(success(wrappedFile) || shutdown);
|
|
||||||
|
|
||||||
if (shutdown.isReady())
|
|
||||||
throw io_error().asInjectedFault();
|
|
||||||
|
|
||||||
state Reference<IAsyncFile> file = wrappedFile.get();
|
|
||||||
|
|
||||||
// If we are in the process of deleting a file, we can't let someone else modify it at the same time. We
|
|
||||||
// therefore block the creation of new files until deletion is complete
|
|
||||||
state std::map<std::string, Future<Void>>::iterator deletedFile = filesBeingDeleted.find(filename);
|
|
||||||
if (deletedFile != filesBeingDeleted.end()) {
|
|
||||||
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete1").detail("Filename", filename);
|
|
||||||
wait(deletedFile->second || shutdown);
|
|
||||||
//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete2").detail("Filename", filename);
|
|
||||||
if (shutdown.isReady())
|
|
||||||
throw io_error().asInjectedFault();
|
|
||||||
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
|
||||||
}
|
|
||||||
|
|
||||||
state Reference<AsyncFileNonDurable> nonDurableFile(
|
|
||||||
new AsyncFileNonDurable(filename, actualFilename, file, diskParameters, currentProcess->address, aio));
|
|
||||||
|
|
||||||
// Causes the approximateSize member to be set
|
|
||||||
state Future<int64_t> sizeFuture = nonDurableFile->size();
|
|
||||||
wait(success(sizeFuture) || shutdown);
|
|
||||||
|
|
||||||
if (shutdown.isReady())
|
|
||||||
throw io_error().asInjectedFault();
|
|
||||||
|
|
||||||
//TraceEvent("AsyncFileNonDurableOpenComplete").detail("Filename", filename);
|
|
||||||
|
|
||||||
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
|
||||||
|
|
||||||
return nonDurableFile;
|
|
||||||
} catch (Error& e) {
|
|
||||||
state Error err = e;
|
|
||||||
std::string currentFilename =
|
|
||||||
(wrappedFile.isReady() && !wrappedFile.isError()) ? wrappedFile.get()->getFilename() : actualFilename;
|
|
||||||
currentProcess->machine->openFiles.erase(currentFilename);
|
|
||||||
//TraceEvent("AsyncFileNonDurableOpenError").errorUnsuppressed(e).detail("Filename", filename).detail("Address", currentProcess->address).detail("Addr", g_simulator->getCurrentProcess()->address);
|
|
||||||
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
~AsyncFileNonDurable() override {
|
~AsyncFileNonDurable() override {
|
||||||
//TraceEvent("AsyncFileNonDurable_Destroy", id).detail("Filename", filename);
|
//TraceEvent("AsyncFileNonDurable_Destroy", id).detail("Filename", filename);
|
||||||
@ -301,22 +210,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Removes a file from the openFiles map
|
// Removes a file from the openFiles map
|
||||||
static void removeOpenFile(std::string filename, AsyncFileNonDurable* file) {
|
static void removeOpenFile(std::string filename, AsyncFileNonDurable* file);
|
||||||
auto& openFiles = g_simulator->getCurrentProcess()->machine->openFiles;
|
|
||||||
|
|
||||||
auto iter = openFiles.find(filename);
|
|
||||||
|
|
||||||
// Various actions (e.g. simulated delete) can remove a file from openFiles prematurely, so it may already
|
|
||||||
// be gone. Renamed files (from atomic write and create) will also be present under only one of the two
|
|
||||||
// names.
|
|
||||||
if (iter != openFiles.end()) {
|
|
||||||
// even if the filename exists, it doesn't mean that it references the same file. It could be that the
|
|
||||||
// file was renamed and later a file with the same name was opened.
|
|
||||||
if (iter->second.getPtrIfReady().orDefault(nullptr) == file) {
|
|
||||||
openFiles.erase(iter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Passes along reads straight to the underlying file, waiting for any outstanding changes that could affect the
|
// Passes along reads straight to the underlying file, waiting for any outstanding changes that could affect the
|
||||||
// results
|
// results
|
||||||
@ -443,21 +337,7 @@ private:
|
|||||||
return readFuture.get();
|
return readFuture.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<int> read(AsyncFileNonDurable* self, void* data, int length, int64_t offset) {
|
ACTOR Future<int> read(AsyncFileNonDurable* self, void* data, int length, int64_t offset);
|
||||||
state ISimulator::ProcessInfo* currentProcess = g_simulator->getCurrentProcess();
|
|
||||||
state TaskPriority currentTaskID = g_network->getCurrentTask();
|
|
||||||
wait(g_simulator->onMachine(currentProcess));
|
|
||||||
|
|
||||||
try {
|
|
||||||
state int rep = wait(self->onRead(self, data, length, offset));
|
|
||||||
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
|
||||||
return rep;
|
|
||||||
} catch (Error& e) {
|
|
||||||
state Error err = e;
|
|
||||||
wait(g_simulator->onProcess(currentProcess, currentTaskID));
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delays writes a random amount of time before passing them through to the underlying file.
|
// Delays writes a random amount of time before passing them through to the underlying file.
|
||||||
// If a kill interrupts the delay, then the output could be the correct write, part of the write,
|
// If a kill interrupts the delay, then the output could be the correct write, part of the write,
|
||||||
@ -511,9 +391,7 @@ private:
|
|||||||
state bool saveDurable = true;
|
state bool saveDurable = true;
|
||||||
choose {
|
choose {
|
||||||
when(wait(delay(delayDuration))) {}
|
when(wait(delay(delayDuration))) {}
|
||||||
when(bool durable = wait(startSyncFuture)) {
|
when(bool durable = wait(startSyncFuture)) { saveDurable = durable; }
|
||||||
saveDurable = durable;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
debugFileCheck("AsyncFileNonDurableWriteAfterWait", self->filename, dataCopy.begin(), offset, length);
|
debugFileCheck("AsyncFileNonDurableWriteAfterWait", self->filename, dataCopy.begin(), offset, length);
|
||||||
@ -690,9 +568,7 @@ private:
|
|||||||
state bool saveDurable = true;
|
state bool saveDurable = true;
|
||||||
choose {
|
choose {
|
||||||
when(wait(delay(delayDuration))) {}
|
when(wait(delay(delayDuration))) {}
|
||||||
when(bool durable = wait(startSyncFuture)) {
|
when(bool durable = wait(startSyncFuture)) { saveDurable = durable; }
|
||||||
saveDurable = durable;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_network->check_yield(TaskPriority::DefaultYield)) {
|
if (g_network->check_yield(TaskPriority::DefaultYield)) {
|
||||||
@ -840,50 +716,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Finishes all outstanding actors on an AsyncFileNonDurable and then deletes it
|
// Finishes all outstanding actors on an AsyncFileNonDurable and then deletes it
|
||||||
ACTOR Future<Void> closeFile(AsyncFileNonDurable* self) {
|
ACTOR Future<Void> closeFile(AsyncFileNonDurable* self);
|
||||||
state ISimulator::ProcessInfo* currentProcess = g_simulator->getCurrentProcess();
|
|
||||||
state TaskPriority currentTaskID = g_network->getCurrentTask();
|
|
||||||
state std::string filename = self->filename;
|
|
||||||
|
|
||||||
g_simulator->getMachineByNetworkAddress(self->openedAddress)
|
|
||||||
->deletingOrClosingFiles.insert(self->getFilename());
|
|
||||||
|
|
||||||
wait(g_simulator->onMachine(currentProcess));
|
|
||||||
try {
|
|
||||||
// Make sure all writes have gone through.
|
|
||||||
Promise<bool> startSyncPromise = self->startSyncPromise;
|
|
||||||
self->startSyncPromise = Promise<bool>();
|
|
||||||
startSyncPromise.send(true);
|
|
||||||
|
|
||||||
std::vector<Future<Void>> outstandingModifications;
|
|
||||||
|
|
||||||
for (auto itr = self->pendingModifications.ranges().begin();
|
|
||||||
itr != self->pendingModifications.ranges().end();
|
|
||||||
++itr)
|
|
||||||
if (itr->value().isValid() && !itr->value().isReady())
|
|
||||||
outstandingModifications.push_back(itr->value());
|
|
||||||
|
|
||||||
// Ignore errors here so that all modifications can finish
|
|
||||||
wait(waitForAllReady(outstandingModifications));
|
|
||||||
|
|
||||||
// Make sure we aren't in the process of killing the file
|
|
||||||
if (self->killed.isSet())
|
|
||||||
wait(self->killComplete.getFuture());
|
|
||||||
|
|
||||||
// Remove this file from the filesBeingDeleted map so that new files can be created with this filename
|
|
||||||
g_simulator->getMachineByNetworkAddress(self->openedAddress)->closingFiles.erase(self->getFilename());
|
|
||||||
g_simulator->getMachineByNetworkAddress(self->openedAddress)
|
|
||||||
->deletingOrClosingFiles.erase(self->getFilename());
|
|
||||||
AsyncFileNonDurable::filesBeingDeleted.erase(self->filename);
|
|
||||||
//TraceEvent("AsyncFileNonDurable_FinishDelete", self->id).detail("Filename", self->filename);
|
|
||||||
|
|
||||||
delete self;
|
|
||||||
return Void();
|
|
||||||
} catch (Error& e) {
|
|
||||||
state Error err = e;
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
|
21
fdbrpc/include/fdbrpc/SimulatorKillType.h
Normal file
21
fdbrpc/include/fdbrpc/SimulatorKillType.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#ifndef FDBRPC_SIMULATOR_KILLTYPE_H
|
||||||
|
#define FDBRPC_SIMULATOR_KILLTYPE_H
|
||||||
|
|
||||||
|
namespace simulator {
|
||||||
|
|
||||||
|
// Order matters!
|
||||||
|
enum KillType {
|
||||||
|
KillInstantly,
|
||||||
|
InjectFaults,
|
||||||
|
FailDisk,
|
||||||
|
RebootAndDelete,
|
||||||
|
RebootProcessAndDelete,
|
||||||
|
RebootProcessAndSwitch, // Reboot and switch cluster file
|
||||||
|
Reboot,
|
||||||
|
RebootProcess,
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace simulator
|
||||||
|
|
||||||
|
#endif // FDBRPC_SIMULATOR_KILLTYPE_H
|
55
fdbrpc/include/fdbrpc/SimulatorMachineInfo.h
Normal file
55
fdbrpc/include/fdbrpc/SimulatorMachineInfo.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#ifndef FDBRPC_SIMULATORMACHINEINFO_H
|
||||||
|
#define FDBRPC_SIMULATORMACHINEINFO_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "flow/Optional.h"
|
||||||
|
|
||||||
|
namespace simulator {
|
||||||
|
|
||||||
|
struct ProcessInfo;
|
||||||
|
|
||||||
|
// A set of data associated with a simulated machine
|
||||||
|
struct MachineInfo {
|
||||||
|
ProcessInfo* machineProcess;
|
||||||
|
std::vector<ProcessInfo*> processes;
|
||||||
|
|
||||||
|
// A map from filename to file handle for all open files on a machine
|
||||||
|
std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> openFiles;
|
||||||
|
|
||||||
|
std::set<std::string> deletingOrClosingFiles;
|
||||||
|
std::set<std::string> closingFiles;
|
||||||
|
Optional<Standalone<StringRef>> machineId;
|
||||||
|
|
||||||
|
const uint16_t remotePortStart;
|
||||||
|
std::vector<uint16_t> usedRemotePorts;
|
||||||
|
|
||||||
|
MachineInfo() : machineProcess(nullptr), remotePortStart(1000) {}
|
||||||
|
|
||||||
|
short getRandomPort() {
|
||||||
|
for (uint16_t i = remotePortStart; i < 60000; i++) {
|
||||||
|
if (std::find(usedRemotePorts.begin(), usedRemotePorts.end(), i) == usedRemotePorts.end()) {
|
||||||
|
TraceEvent(SevDebug, "RandomPortOpened").detail("PortNum", i);
|
||||||
|
usedRemotePorts.push_back(i);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
void removeRemotePort(uint16_t port) {
|
||||||
|
if (port < remotePortStart)
|
||||||
|
return;
|
||||||
|
auto pos = std::find(usedRemotePorts.begin(), usedRemotePorts.end(), port);
|
||||||
|
if (pos != usedRemotePorts.end()) {
|
||||||
|
usedRemotePorts.erase(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace simulator
|
||||||
|
|
||||||
|
#endif // FDBRPC_SIMULATORMACHINEINFO_H
|
164
fdbrpc/include/fdbrpc/SimulatorProcessInfo.h
Normal file
164
fdbrpc/include/fdbrpc/SimulatorProcessInfo.h
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
#ifndef FDBRPC_SIMULATOR_PROCESSINFO_H
|
||||||
|
#define FDBRPC_SIMULATOR_PROCESSINFO_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "flow/NetworkAddress.h"
|
||||||
|
#include "flow/IConnection.h"
|
||||||
|
#include "flow/IUDPSocket.h"
|
||||||
|
|
||||||
|
#include "fdbrpc/SimulatorMachineInfo.h"
|
||||||
|
#include "fdbrpc/SimulatorKillType.h"
|
||||||
|
|
||||||
|
struct MachineInfo;
|
||||||
|
|
||||||
|
namespace simulator {
|
||||||
|
|
||||||
|
struct ProcessInfo : NonCopyable {
|
||||||
|
std::string name;
|
||||||
|
std::string coordinationFolder;
|
||||||
|
std::string dataFolder;
|
||||||
|
MachineInfo* machine;
|
||||||
|
NetworkAddressList addresses;
|
||||||
|
NetworkAddress address;
|
||||||
|
LocalityData locality;
|
||||||
|
ProcessClass startingClass;
|
||||||
|
TDMetricCollection tdmetrics;
|
||||||
|
MetricCollection metrics;
|
||||||
|
ChaosMetrics chaosMetrics;
|
||||||
|
HistogramRegistry histograms;
|
||||||
|
std::map<NetworkAddress, Reference<IListener>> listenerMap;
|
||||||
|
std::map<NetworkAddress, Reference<IUDPSocket>> boundUDPSockets;
|
||||||
|
bool failed;
|
||||||
|
bool excluded;
|
||||||
|
bool cleared;
|
||||||
|
bool rebooting;
|
||||||
|
bool drProcess;
|
||||||
|
std::vector<flowGlobalType> globals;
|
||||||
|
|
||||||
|
INetworkConnections* network;
|
||||||
|
|
||||||
|
uint64_t fault_injection_r;
|
||||||
|
double fault_injection_p1, fault_injection_p2;
|
||||||
|
bool failedDisk;
|
||||||
|
|
||||||
|
UID uid;
|
||||||
|
|
||||||
|
ProtocolVersion protocolVersion;
|
||||||
|
bool excludeFromRestarts = false;
|
||||||
|
|
||||||
|
std::vector<ProcessInfo*> childs;
|
||||||
|
|
||||||
|
ProcessInfo(const char* name,
|
||||||
|
LocalityData locality,
|
||||||
|
ProcessClass startingClass,
|
||||||
|
NetworkAddressList addresses,
|
||||||
|
INetworkConnections* net,
|
||||||
|
const char* dataFolder,
|
||||||
|
const char* coordinationFolder)
|
||||||
|
: name(name), coordinationFolder(coordinationFolder), dataFolder(dataFolder), machine(nullptr),
|
||||||
|
addresses(addresses), address(addresses.address), locality(locality), startingClass(startingClass),
|
||||||
|
failed(false), excluded(false), cleared(false), rebooting(false), drProcess(false), network(net),
|
||||||
|
fault_injection_r(0), fault_injection_p1(0), fault_injection_p2(0), failedDisk(false) {
|
||||||
|
uid = deterministicRandom()->randomUniqueID();
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<KillType> onShutdown() { return shutdownSignal.getFuture(); }
|
||||||
|
|
||||||
|
bool isSpawnedKVProcess() const {
|
||||||
|
// SOMEDAY: use a separate bool may be better?
|
||||||
|
return name == "remote flow process";
|
||||||
|
}
|
||||||
|
bool isReliable() const {
|
||||||
|
return !failed && fault_injection_p1 == 0 && fault_injection_p2 == 0 && !failedDisk &&
|
||||||
|
(!machine ||
|
||||||
|
(machine->machineProcess->fault_injection_p1 == 0 && machine->machineProcess->fault_injection_p2 == 0));
|
||||||
|
}
|
||||||
|
bool isAvailable() const { return !isExcluded() && isReliable(); }
|
||||||
|
bool isExcluded() const { return excluded; }
|
||||||
|
bool isCleared() const { return cleared; }
|
||||||
|
std::string getReliableInfo() const {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "failed:" << failed << " fault_injection_p1:" << fault_injection_p1
|
||||||
|
<< " fault_injection_p2:" << fault_injection_p2;
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
std::vector<ProcessInfo*> const& getChilds() const { return childs; }
|
||||||
|
|
||||||
|
// Return true if the class type is suitable for stateful roles, such as tLog and StorageServer.
|
||||||
|
bool isAvailableClass() const {
|
||||||
|
switch (startingClass._class) {
|
||||||
|
case ProcessClass::UnsetClass:
|
||||||
|
return true;
|
||||||
|
case ProcessClass::StorageClass:
|
||||||
|
return true;
|
||||||
|
case ProcessClass::TransactionClass:
|
||||||
|
return true;
|
||||||
|
case ProcessClass::ResolutionClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::CommitProxyClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::GrvProxyClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::MasterClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::TesterClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::StatelessClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::LogClass:
|
||||||
|
return true;
|
||||||
|
case ProcessClass::LogRouterClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::ClusterControllerClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::DataDistributorClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::RatekeeperClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::ConsistencyScanClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::BlobManagerClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::StorageCacheClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::BackupClass:
|
||||||
|
return false;
|
||||||
|
case ProcessClass::EncryptKeyProxyClass:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Reference<IListener> getListener(const NetworkAddress& addr) const {
|
||||||
|
auto listener = listenerMap.find(addr);
|
||||||
|
ASSERT(listener != listenerMap.end());
|
||||||
|
return listener->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline flowGlobalType global(int id) const { return (globals.size() > id) ? globals[id] : nullptr; };
|
||||||
|
inline void setGlobal(size_t id, flowGlobalType v) {
|
||||||
|
globals.resize(std::max(globals.size(), id + 1));
|
||||||
|
globals[id] = v;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string toString() const {
|
||||||
|
return format("name: %s address: %s zone: %s datahall: %s class: %s excluded: %d cleared: %d",
|
||||||
|
name.c_str(),
|
||||||
|
formatIpPort(addresses.address.ip, addresses.address.port).c_str(),
|
||||||
|
(locality.zoneId().present() ? locality.zoneId().get().printable().c_str() : "[unset]"),
|
||||||
|
(locality.dataHallId().present() ? locality.dataHallId().get().printable().c_str() : "[unset]"),
|
||||||
|
startingClass.toString().c_str(),
|
||||||
|
excluded,
|
||||||
|
cleared);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Members not for external use
|
||||||
|
Promise<KillType> shutdownSignal;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace simulator
|
||||||
|
|
||||||
|
#endif // FDBRPC_SIMULATOR_PROCESSINFO_H
|
@ -42,29 +42,26 @@
|
|||||||
#include "fdbrpc/TokenSign.h"
|
#include "fdbrpc/TokenSign.h"
|
||||||
#include "flow/IUDPSocket.h"
|
#include "flow/IUDPSocket.h"
|
||||||
#include "flow/IConnection.h"
|
#include "flow/IConnection.h"
|
||||||
|
#include "fdbrpc/SimulatorKillType.h"
|
||||||
|
|
||||||
enum ClogMode { ClogDefault, ClogAll, ClogSend, ClogReceive };
|
enum ClogMode { ClogDefault, ClogAll, ClogSend, ClogReceive };
|
||||||
|
|
||||||
|
|
||||||
struct ValidationData {
|
struct ValidationData {
|
||||||
// global validation that missing refreshed feeds were previously destroyed
|
// global validation that missing refreshed feeds were previously destroyed
|
||||||
std::unordered_set<std::string> allDestroyedChangeFeedIDs;
|
std::unordered_set<std::string> allDestroyedChangeFeedIDs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace simulator {
|
||||||
|
struct ProcessInfo;
|
||||||
|
struct MachineInfo;
|
||||||
|
} // namespace simulator
|
||||||
|
|
||||||
class ISimulator : public INetwork {
|
class ISimulator : public INetwork {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Order matters!
|
using KillType = simulator::KillType;
|
||||||
enum KillType {
|
using ProcessInfo = simulator::ProcessInfo;
|
||||||
KillInstantly,
|
using MachineInfo = simulator::MachineInfo;
|
||||||
InjectFaults,
|
|
||||||
FailDisk,
|
|
||||||
RebootAndDelete,
|
|
||||||
RebootProcessAndDelete,
|
|
||||||
RebootProcessAndSwitch, // Reboot and switch cluster file
|
|
||||||
Reboot,
|
|
||||||
RebootProcess,
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// Order matters! all modes >= 2 are fault injection modes
|
// Order matters! all modes >= 2 are fault injection modes
|
||||||
enum TSSMode { Disabled, EnabledNormal, EnabledAddDelay, EnabledDropMutations };
|
enum TSSMode { Disabled, EnabledNormal, EnabledAddDelay, EnabledDropMutations };
|
||||||
@ -90,192 +87,6 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Subclasses may subclass ProcessInfo as well
|
|
||||||
struct MachineInfo;
|
|
||||||
|
|
||||||
struct ProcessInfo : NonCopyable {
|
|
||||||
std::string name;
|
|
||||||
std::string coordinationFolder;
|
|
||||||
std::string dataFolder;
|
|
||||||
MachineInfo* machine;
|
|
||||||
NetworkAddressList addresses;
|
|
||||||
NetworkAddress address;
|
|
||||||
LocalityData locality;
|
|
||||||
ProcessClass startingClass;
|
|
||||||
TDMetricCollection tdmetrics;
|
|
||||||
MetricCollection metrics;
|
|
||||||
ChaosMetrics chaosMetrics;
|
|
||||||
HistogramRegistry histograms;
|
|
||||||
std::map<NetworkAddress, Reference<IListener>> listenerMap;
|
|
||||||
std::map<NetworkAddress, Reference<IUDPSocket>> boundUDPSockets;
|
|
||||||
bool failed;
|
|
||||||
bool excluded;
|
|
||||||
bool cleared;
|
|
||||||
bool rebooting;
|
|
||||||
bool drProcess;
|
|
||||||
std::vector<flowGlobalType> globals;
|
|
||||||
|
|
||||||
INetworkConnections* network;
|
|
||||||
|
|
||||||
uint64_t fault_injection_r;
|
|
||||||
double fault_injection_p1, fault_injection_p2;
|
|
||||||
bool failedDisk;
|
|
||||||
|
|
||||||
UID uid;
|
|
||||||
|
|
||||||
ProtocolVersion protocolVersion;
|
|
||||||
bool excludeFromRestarts = false;
|
|
||||||
|
|
||||||
std::vector<ProcessInfo*> childs;
|
|
||||||
|
|
||||||
ProcessInfo(const char* name,
|
|
||||||
LocalityData locality,
|
|
||||||
ProcessClass startingClass,
|
|
||||||
NetworkAddressList addresses,
|
|
||||||
INetworkConnections* net,
|
|
||||||
const char* dataFolder,
|
|
||||||
const char* coordinationFolder)
|
|
||||||
: name(name), coordinationFolder(coordinationFolder), dataFolder(dataFolder), machine(nullptr),
|
|
||||||
addresses(addresses), address(addresses.address), locality(locality), startingClass(startingClass),
|
|
||||||
failed(false), excluded(false), cleared(false), rebooting(false), drProcess(false), network(net),
|
|
||||||
fault_injection_r(0), fault_injection_p1(0), fault_injection_p2(0), failedDisk(false) {
|
|
||||||
uid = deterministicRandom()->randomUniqueID();
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<KillType> onShutdown() { return shutdownSignal.getFuture(); }
|
|
||||||
|
|
||||||
bool isSpawnedKVProcess() const {
|
|
||||||
// SOMEDAY: use a separate bool may be better?
|
|
||||||
return name == "remote flow process";
|
|
||||||
}
|
|
||||||
bool isReliable() const {
|
|
||||||
return !failed && fault_injection_p1 == 0 && fault_injection_p2 == 0 && !failedDisk &&
|
|
||||||
(!machine || (machine->machineProcess->fault_injection_p1 == 0 &&
|
|
||||||
machine->machineProcess->fault_injection_p2 == 0));
|
|
||||||
}
|
|
||||||
bool isAvailable() const { return !isExcluded() && isReliable(); }
|
|
||||||
bool isExcluded() const { return excluded; }
|
|
||||||
bool isCleared() const { return cleared; }
|
|
||||||
std::string getReliableInfo() const {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "failed:" << failed << " fault_injection_p1:" << fault_injection_p1
|
|
||||||
<< " fault_injection_p2:" << fault_injection_p2;
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
std::vector<ProcessInfo*> const& getChilds() const { return childs; }
|
|
||||||
|
|
||||||
// Return true if the class type is suitable for stateful roles, such as tLog and StorageServer.
|
|
||||||
bool isAvailableClass() const {
|
|
||||||
switch (startingClass._class) {
|
|
||||||
case ProcessClass::UnsetClass:
|
|
||||||
return true;
|
|
||||||
case ProcessClass::StorageClass:
|
|
||||||
return true;
|
|
||||||
case ProcessClass::TransactionClass:
|
|
||||||
return true;
|
|
||||||
case ProcessClass::ResolutionClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::CommitProxyClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::GrvProxyClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::MasterClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::TesterClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::StatelessClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::LogClass:
|
|
||||||
return true;
|
|
||||||
case ProcessClass::LogRouterClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::ClusterControllerClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::DataDistributorClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::RatekeeperClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::ConsistencyScanClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::BlobManagerClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::StorageCacheClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::BackupClass:
|
|
||||||
return false;
|
|
||||||
case ProcessClass::EncryptKeyProxyClass:
|
|
||||||
return false;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Reference<IListener> getListener(const NetworkAddress& addr) const {
|
|
||||||
auto listener = listenerMap.find(addr);
|
|
||||||
ASSERT(listener != listenerMap.end());
|
|
||||||
return listener->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline flowGlobalType global(int id) const { return (globals.size() > id) ? globals[id] : nullptr; };
|
|
||||||
inline void setGlobal(size_t id, flowGlobalType v) {
|
|
||||||
globals.resize(std::max(globals.size(), id + 1));
|
|
||||||
globals[id] = v;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string toString() const {
|
|
||||||
return format(
|
|
||||||
"name: %s address: %s zone: %s datahall: %s class: %s excluded: %d cleared: %d",
|
|
||||||
name.c_str(),
|
|
||||||
formatIpPort(addresses.address.ip, addresses.address.port).c_str(),
|
|
||||||
(locality.zoneId().present() ? locality.zoneId().get().printable().c_str() : "[unset]"),
|
|
||||||
(locality.dataHallId().present() ? locality.dataHallId().get().printable().c_str() : "[unset]"),
|
|
||||||
startingClass.toString().c_str(),
|
|
||||||
excluded,
|
|
||||||
cleared);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Members not for external use
|
|
||||||
Promise<KillType> shutdownSignal;
|
|
||||||
};
|
|
||||||
|
|
||||||
// A set of data associated with a simulated machine
|
|
||||||
struct MachineInfo {
|
|
||||||
ProcessInfo* machineProcess;
|
|
||||||
std::vector<ProcessInfo*> processes;
|
|
||||||
|
|
||||||
// A map from filename to file handle for all open files on a machine
|
|
||||||
std::map<std::string, UnsafeWeakFutureReference<IAsyncFile>> openFiles;
|
|
||||||
|
|
||||||
std::set<std::string> deletingOrClosingFiles;
|
|
||||||
std::set<std::string> closingFiles;
|
|
||||||
Optional<Standalone<StringRef>> machineId;
|
|
||||||
|
|
||||||
const uint16_t remotePortStart;
|
|
||||||
std::vector<uint16_t> usedRemotePorts;
|
|
||||||
|
|
||||||
MachineInfo() : machineProcess(nullptr), remotePortStart(1000) {}
|
|
||||||
|
|
||||||
short getRandomPort() {
|
|
||||||
for (uint16_t i = remotePortStart; i < 60000; i++) {
|
|
||||||
if (std::find(usedRemotePorts.begin(), usedRemotePorts.end(), i) == usedRemotePorts.end()) {
|
|
||||||
TraceEvent(SevDebug, "RandomPortOpened").detail("PortNum", i);
|
|
||||||
usedRemotePorts.push_back(i);
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
void removeRemotePort(uint16_t port) {
|
|
||||||
if (port < remotePortStart)
|
|
||||||
return;
|
|
||||||
auto pos = std::find(usedRemotePorts.begin(), usedRemotePorts.end(), port);
|
|
||||||
if (pos != usedRemotePorts.end()) {
|
|
||||||
usedRemotePorts.erase(pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
ProcessInfo* getProcess(Endpoint const& endpoint) { return getProcessByAddress(endpoint.getPrimaryAddress()); }
|
ProcessInfo* getProcess(Endpoint const& endpoint) { return getProcessByAddress(endpoint.getPrimaryAddress()); }
|
||||||
ProcessInfo* getCurrentProcess() { return currentProcess; }
|
ProcessInfo* getCurrentProcess() { return currentProcess; }
|
||||||
ProcessInfo const* getCurrentProcess() const { return currentProcess; }
|
ProcessInfo const* getCurrentProcess() const { return currentProcess; }
|
||||||
@ -529,30 +340,19 @@ public:
|
|||||||
|
|
||||||
std::set<std::pair<std::string, unsigned>> corruptedBlocks;
|
std::set<std::pair<std::string, unsigned>> corruptedBlocks;
|
||||||
|
|
||||||
flowGlobalType global(int id) const final { return getCurrentProcess()->global(id); };
|
flowGlobalType global(int id) const final;
|
||||||
void setGlobal(size_t id, flowGlobalType v) final { getCurrentProcess()->setGlobal(id, v); };
|
void setGlobal(size_t id, flowGlobalType v) final;
|
||||||
|
|
||||||
void disableFor(const std::string& desc, double time) { disabledMap[desc] = time; }
|
void disableFor(const std::string& desc, double time);
|
||||||
|
|
||||||
double checkDisabled(const std::string& desc) const {
|
double checkDisabled(const std::string& desc) const;
|
||||||
auto iter = disabledMap.find(desc);
|
|
||||||
if (iter != disabledMap.end()) {
|
|
||||||
return iter->second;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate authz token for use in simulation environment
|
// generate authz token for use in simulation environment
|
||||||
Standalone<StringRef> makeToken(StringRef tenantName, uint64_t ttlSecondsFromNow);
|
Standalone<StringRef> makeToken(StringRef tenantName, uint64_t ttlSecondsFromNow);
|
||||||
|
|
||||||
static thread_local ProcessInfo* currentProcess;
|
static thread_local ProcessInfo* currentProcess;
|
||||||
|
|
||||||
bool checkInjectedCorruption() {
|
bool checkInjectedCorruption();
|
||||||
auto iter = corruptWorkerMap.find(currentProcess->address);
|
|
||||||
if (iter != corruptWorkerMap.end())
|
|
||||||
return iter->second;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
ISimulator();
|
ISimulator();
|
||||||
virtual ~ISimulator();
|
virtual ~ISimulator();
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include "flow/IAsyncFile.h"
|
#include "flow/IAsyncFile.h"
|
||||||
#include "fdbrpc/AsyncFileCached.actor.h"
|
#include "fdbrpc/AsyncFileCached.actor.h"
|
||||||
#include "fdbrpc/AsyncFileEncrypted.h"
|
#include "fdbrpc/AsyncFileEncrypted.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbrpc/AsyncFileNonDurable.actor.h"
|
#include "fdbrpc/AsyncFileNonDurable.actor.h"
|
||||||
#include "fdbrpc/AsyncFileChaos.h"
|
#include "fdbrpc/AsyncFileChaos.h"
|
||||||
#include "crc32/crc32c.h"
|
#include "crc32/crc32c.h"
|
||||||
@ -108,6 +109,33 @@ bool simulator_should_inject_fault(const char* context, const char* file, int li
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ISimulator::disableFor(const std::string& desc, double time) {
|
||||||
|
disabledMap[desc] = time;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ISimulator::checkDisabled(const std::string& desc) const {
|
||||||
|
auto iter = disabledMap.find(desc);
|
||||||
|
if (iter != disabledMap.end()) {
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ISimulator::checkInjectedCorruption() {
|
||||||
|
auto iter = corruptWorkerMap.find(currentProcess->address);
|
||||||
|
if (iter != corruptWorkerMap.end())
|
||||||
|
return iter->second;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
flowGlobalType ISimulator::global(int id) const {
|
||||||
|
return getCurrentProcess()->global(id);
|
||||||
|
};
|
||||||
|
|
||||||
|
void ISimulator::setGlobal(size_t id, flowGlobalType v) {
|
||||||
|
getCurrentProcess()->setGlobal(id, v);
|
||||||
|
};
|
||||||
|
|
||||||
void ISimulator::displayWorkers() const {
|
void ISimulator::displayWorkers() const {
|
||||||
std::map<std::string, std::vector<ISimulator::ProcessInfo*>> machineMap;
|
std::map<std::string, std::vector<ISimulator::ProcessInfo*>> machineMap;
|
||||||
|
|
||||||
@ -1389,7 +1417,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return canKillProcesses(processesLeft, processesDead, KillInstantly, nullptr);
|
return canKillProcesses(processesLeft, processesDead, KillType::KillInstantly, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool datacenterDead(Optional<Standalone<StringRef>> dcId) const override {
|
bool datacenterDead(Optional<Standalone<StringRef>> dcId) const override {
|
||||||
@ -1428,7 +1456,7 @@ public:
|
|||||||
// The following function will determine if a machine can be remove in case when it has a blob worker
|
// The following function will determine if a machine can be remove in case when it has a blob worker
|
||||||
bool canKillMachineWithBlobWorkers(Optional<Standalone<StringRef>> machineId, KillType kt, KillType* ktFinal) {
|
bool canKillMachineWithBlobWorkers(Optional<Standalone<StringRef>> machineId, KillType kt, KillType* ktFinal) {
|
||||||
// Allow if no blob workers, or it's a reboot(without removing the machine)
|
// Allow if no blob workers, or it's a reboot(without removing the machine)
|
||||||
if (!blobGranulesEnabled && kt >= RebootAndDelete) {
|
if (!blobGranulesEnabled && kt >= KillType::RebootAndDelete) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1459,7 +1487,7 @@ public:
|
|||||||
|
|
||||||
// Ensure there is at least 1 remaining blob workers after removing current machine
|
// Ensure there is at least 1 remaining blob workers after removing current machine
|
||||||
if (nLeft <= 1) {
|
if (nLeft <= 1) {
|
||||||
*ktFinal = RebootAndDelete; // reboot and delete data, but keep this machine
|
*ktFinal = KillType::RebootAndDelete; // reboot and delete data, but keep this machine
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -1475,8 +1503,8 @@ public:
|
|||||||
int nQuorum = ((desiredCoordinators + 1) / 2) * 2 - 1;
|
int nQuorum = ((desiredCoordinators + 1) / 2) * 2 - 1;
|
||||||
|
|
||||||
KillType newKt = kt;
|
KillType newKt = kt;
|
||||||
if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) ||
|
if ((kt == KillType::KillInstantly) || (kt == KillType::InjectFaults) || (kt == KillType::FailDisk) ||
|
||||||
(kt == RebootProcessAndDelete)) {
|
(kt == KillType::RebootAndDelete) || (kt == KillType::RebootProcessAndDelete)) {
|
||||||
LocalityGroup primaryProcessesLeft, primaryProcessesDead;
|
LocalityGroup primaryProcessesLeft, primaryProcessesDead;
|
||||||
LocalityGroup primarySatelliteProcessesLeft, primarySatelliteProcessesDead;
|
LocalityGroup primarySatelliteProcessesLeft, primarySatelliteProcessesDead;
|
||||||
LocalityGroup remoteProcessesLeft, remoteProcessesDead;
|
LocalityGroup remoteProcessesLeft, remoteProcessesDead;
|
||||||
@ -1643,7 +1671,7 @@ public:
|
|||||||
|
|
||||||
// Reboot if dead machines do fulfill policies
|
// Reboot if dead machines do fulfill policies
|
||||||
if (tooManyDead) {
|
if (tooManyDead) {
|
||||||
newKt = Reboot;
|
newKt = KillType::Reboot;
|
||||||
canSurvive = false;
|
canSurvive = false;
|
||||||
TraceEvent("KillChanged")
|
TraceEvent("KillChanged")
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
@ -1652,16 +1680,16 @@ public:
|
|||||||
.detail("Reason", "Too many dead processes that cannot satisfy tLogPolicy.");
|
.detail("Reason", "Too many dead processes that cannot satisfy tLogPolicy.");
|
||||||
}
|
}
|
||||||
// Reboot and Delete if remaining machines do NOT fulfill policies
|
// Reboot and Delete if remaining machines do NOT fulfill policies
|
||||||
else if ((kt < RebootAndDelete) && notEnoughLeft) {
|
else if ((kt < KillType::RebootAndDelete) && notEnoughLeft) {
|
||||||
newKt = RebootAndDelete;
|
newKt = KillType::RebootAndDelete;
|
||||||
canSurvive = false;
|
canSurvive = false;
|
||||||
TraceEvent("KillChanged")
|
TraceEvent("KillChanged")
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
.detail("NewKillType", newKt)
|
.detail("NewKillType", newKt)
|
||||||
.detail("TLogPolicy", tLogPolicy->info())
|
.detail("TLogPolicy", tLogPolicy->info())
|
||||||
.detail("Reason", "Not enough tLog left to satisfy tLogPolicy.");
|
.detail("Reason", "Not enough tLog left to satisfy tLogPolicy.");
|
||||||
} else if ((kt < RebootAndDelete) && (nQuorum > uniqueMachines.size())) {
|
} else if ((kt < KillType::RebootAndDelete) && (nQuorum > uniqueMachines.size())) {
|
||||||
newKt = RebootAndDelete;
|
newKt = KillType::RebootAndDelete;
|
||||||
canSurvive = false;
|
canSurvive = false;
|
||||||
TraceEvent("KillChanged")
|
TraceEvent("KillChanged")
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
@ -1697,26 +1725,26 @@ public:
|
|||||||
std::swap(*it, processes.back());
|
std::swap(*it, processes.back());
|
||||||
}
|
}
|
||||||
processes.pop_back();
|
processes.pop_back();
|
||||||
killProcess_internal(p, KillInstantly);
|
killProcess_internal(p, KillType::KillInstantly);
|
||||||
}
|
}
|
||||||
void killProcess_internal(ProcessInfo* machine, KillType kt) {
|
void killProcess_internal(ProcessInfo* machine, KillType kt) {
|
||||||
CODE_PROBE(
|
CODE_PROBE(
|
||||||
true, "Simulated machine was killed with any kill type", probe::context::sim2, probe::assert::simOnly);
|
true, "Simulated machine was killed with any kill type", probe::context::sim2, probe::assert::simOnly);
|
||||||
CODE_PROBE(kt == KillInstantly,
|
CODE_PROBE(kt == KillType::KillInstantly,
|
||||||
"Simulated machine was killed instantly",
|
"Simulated machine was killed instantly",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
CODE_PROBE(kt == InjectFaults,
|
CODE_PROBE(kt == KillType::InjectFaults,
|
||||||
"Simulated machine was killed with faults",
|
"Simulated machine was killed with faults",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
CODE_PROBE(kt == FailDisk,
|
CODE_PROBE(kt == KillType::FailDisk,
|
||||||
"Simulated machine was killed with a failed disk",
|
"Simulated machine was killed with a failed disk",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::decoration::rare);
|
probe::decoration::rare);
|
||||||
|
|
||||||
if (kt == KillInstantly) {
|
if (kt == KillType::KillInstantly) {
|
||||||
TraceEvent(SevWarn, "FailMachine")
|
TraceEvent(SevWarn, "FailMachine")
|
||||||
.detail("Name", machine->name)
|
.detail("Name", machine->name)
|
||||||
.detail("Address", machine->address)
|
.detail("Address", machine->address)
|
||||||
@ -1729,7 +1757,7 @@ public:
|
|||||||
if (!machine->isSpawnedKVProcess())
|
if (!machine->isSpawnedKVProcess())
|
||||||
latestEventCache.clear();
|
latestEventCache.clear();
|
||||||
machine->failed = true;
|
machine->failed = true;
|
||||||
} else if (kt == InjectFaults) {
|
} else if (kt == KillType::InjectFaults) {
|
||||||
TraceEvent(SevWarn, "FaultMachine")
|
TraceEvent(SevWarn, "FaultMachine")
|
||||||
.detail("Name", machine->name)
|
.detail("Name", machine->name)
|
||||||
.detail("Address", machine->address)
|
.detail("Address", machine->address)
|
||||||
@ -1742,8 +1770,8 @@ public:
|
|||||||
machine->fault_injection_r = deterministicRandom()->randomUniqueID().first();
|
machine->fault_injection_r = deterministicRandom()->randomUniqueID().first();
|
||||||
machine->fault_injection_p1 = 0.1;
|
machine->fault_injection_p1 = 0.1;
|
||||||
machine->fault_injection_p2 = deterministicRandom()->random01();
|
machine->fault_injection_p2 = deterministicRandom()->random01();
|
||||||
} else if (kt == FailDisk) {
|
} else if (kt == KillType::FailDisk) {
|
||||||
TraceEvent(SevWarn, "FailDiskMachine")
|
TraceEvent(SevWarn, "KillType::FailDiskMachine")
|
||||||
.detail("Name", machine->name)
|
.detail("Name", machine->name)
|
||||||
.detail("Address", machine->address)
|
.detail("Address", machine->address)
|
||||||
.detail("ZoneId", machine->locality.zoneId())
|
.detail("ZoneId", machine->locality.zoneId())
|
||||||
@ -1758,13 +1786,13 @@ public:
|
|||||||
ASSERT(!protectedAddresses.count(machine->address) || machine->rebooting || machine->isSpawnedKVProcess());
|
ASSERT(!protectedAddresses.count(machine->address) || machine->rebooting || machine->isSpawnedKVProcess());
|
||||||
}
|
}
|
||||||
void rebootProcess(ProcessInfo* process, KillType kt) override {
|
void rebootProcess(ProcessInfo* process, KillType kt) override {
|
||||||
if (kt == RebootProcessAndDelete && protectedAddresses.count(process->address)) {
|
if (kt == KillType::RebootProcessAndDelete && protectedAddresses.count(process->address)) {
|
||||||
TraceEvent("RebootChanged")
|
TraceEvent("RebootChanged")
|
||||||
.detail("ZoneId", process->locality.describeZone())
|
.detail("ZoneId", process->locality.describeZone())
|
||||||
.detail("KillType", RebootProcess)
|
.detail("KillType", KillType::RebootProcess)
|
||||||
.detail("OrigKillType", kt)
|
.detail("OrigKillType", kt)
|
||||||
.detail("Reason", "Protected process");
|
.detail("Reason", "Protected process");
|
||||||
kt = RebootProcess;
|
kt = KillType::RebootProcess;
|
||||||
}
|
}
|
||||||
doReboot(process, kt);
|
doReboot(process, kt);
|
||||||
}
|
}
|
||||||
@ -1773,7 +1801,7 @@ public:
|
|||||||
auto processes = getAllProcesses();
|
auto processes = getAllProcesses();
|
||||||
for (int i = 0; i < processes.size(); i++)
|
for (int i = 0; i < processes.size(); i++)
|
||||||
if (processes[i]->locality.zoneId() == zoneId && !processes[i]->rebooting)
|
if (processes[i]->locality.zoneId() == zoneId && !processes[i]->rebooting)
|
||||||
doReboot(processes[i], RebootProcess);
|
doReboot(processes[i], KillType::RebootProcess);
|
||||||
} else {
|
} else {
|
||||||
auto processes = getAllProcesses();
|
auto processes = getAllProcesses();
|
||||||
for (int i = 0; i < processes.size(); i++) {
|
for (int i = 0; i < processes.size(); i++) {
|
||||||
@ -1782,18 +1810,18 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (processes.size())
|
if (processes.size())
|
||||||
doReboot(deterministicRandom()->randomChoice(processes), RebootProcess);
|
doReboot(deterministicRandom()->randomChoice(processes), KillType::RebootProcess);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void killProcess(ProcessInfo* machine, KillType kt) override {
|
void killProcess(ProcessInfo* machine, KillType kt) override {
|
||||||
TraceEvent("AttemptingKillProcess").detail("ProcessInfo", machine->toString());
|
TraceEvent("AttemptingKillProcess").detail("ProcessInfo", machine->toString());
|
||||||
// Refuse to kill a protected process.
|
// Refuse to kill a protected process.
|
||||||
if (kt < RebootAndDelete && protectedAddresses.count(machine->address) == 0) {
|
if (kt < KillType::RebootAndDelete && protectedAddresses.count(machine->address) == 0) {
|
||||||
killProcess_internal(machine, kt);
|
killProcess_internal(machine, kt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void killInterface(NetworkAddress address, KillType kt) override {
|
void killInterface(NetworkAddress address, KillType kt) override {
|
||||||
if (kt < RebootAndDelete) {
|
if (kt < KillType::RebootAndDelete) {
|
||||||
std::vector<ProcessInfo*>& processes = machines[addressMap[address]->locality.machineId()].processes;
|
std::vector<ProcessInfo*>& processes = machines[addressMap[address]->locality.machineId()].processes;
|
||||||
for (auto& process : processes) {
|
for (auto& process : processes) {
|
||||||
// Refuse to kill a protected process.
|
// Refuse to kill a protected process.
|
||||||
@ -1854,9 +1882,12 @@ public:
|
|||||||
auto ktOrig = kt;
|
auto ktOrig = kt;
|
||||||
|
|
||||||
CODE_PROBE(true, "Trying to killing a machine", probe::context::sim2, probe::assert::simOnly);
|
CODE_PROBE(true, "Trying to killing a machine", probe::context::sim2, probe::assert::simOnly);
|
||||||
CODE_PROBE(kt == KillInstantly, "Trying to kill instantly", probe::context::sim2, probe::assert::simOnly);
|
|
||||||
CODE_PROBE(
|
CODE_PROBE(
|
||||||
kt == InjectFaults, "Trying to kill by injecting faults", probe::context::sim2, probe::assert::simOnly);
|
kt == KillType::KillInstantly, "Trying to kill instantly", probe::context::sim2, probe::assert::simOnly);
|
||||||
|
CODE_PROBE(kt == KillType::InjectFaults,
|
||||||
|
"Trying to kill by injecting faults",
|
||||||
|
probe::context::sim2,
|
||||||
|
probe::assert::simOnly);
|
||||||
|
|
||||||
if (speedUpSimulation && !forceKill) {
|
if (speedUpSimulation && !forceKill) {
|
||||||
TraceEvent(SevWarn, "AbortedKill")
|
TraceEvent(SevWarn, "AbortedKill")
|
||||||
@ -1864,7 +1895,7 @@ public:
|
|||||||
.detail("Reason", "Unforced kill within speedy simulation.")
|
.detail("Reason", "Unforced kill within speedy simulation.")
|
||||||
.backtrace();
|
.backtrace();
|
||||||
if (ktFinal)
|
if (ktFinal)
|
||||||
*ktFinal = None;
|
*ktFinal = KillType::None;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1875,7 +1906,7 @@ public:
|
|||||||
// Reboot if any of the processes are protected and count the number of processes not rebooting
|
// Reboot if any of the processes are protected and count the number of processes not rebooting
|
||||||
for (auto& process : machines[machineId].processes) {
|
for (auto& process : machines[machineId].processes) {
|
||||||
if (protectedAddresses.count(process->address))
|
if (protectedAddresses.count(process->address))
|
||||||
kt = Reboot;
|
kt = KillType::Reboot;
|
||||||
if (!process->rebooting)
|
if (!process->rebooting)
|
||||||
processesOnMachine++;
|
processesOnMachine++;
|
||||||
if (process->drProcess) {
|
if (process->drProcess) {
|
||||||
@ -1892,13 +1923,14 @@ public:
|
|||||||
.detail("ProcessesPerMachine", processesPerMachine)
|
.detail("ProcessesPerMachine", processesPerMachine)
|
||||||
.backtrace();
|
.backtrace();
|
||||||
if (ktFinal)
|
if (ktFinal)
|
||||||
*ktFinal = None;
|
*ktFinal = KillType::None;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if machine can be removed, if requested
|
// Check if machine can be removed, if requested
|
||||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) ||
|
if (!forceKill &&
|
||||||
(kt == RebootAndDelete) || (kt == RebootProcessAndDelete))) {
|
((kt == KillType::KillInstantly) || (kt == KillType::InjectFaults) || (kt == KillType::FailDisk) ||
|
||||||
|
(kt == KillType::RebootAndDelete) || (kt == KillType::RebootProcessAndDelete))) {
|
||||||
|
|
||||||
if (!canKillMachineWithBlobWorkers(machineId, kt, &kt)) {
|
if (!canKillMachineWithBlobWorkers(machineId, kt, &kt)) {
|
||||||
TraceEvent("CanKillMachineWithBlobWorkers")
|
TraceEvent("CanKillMachineWithBlobWorkers")
|
||||||
@ -1947,7 +1979,8 @@ public:
|
|||||||
.detail("ProtectedTotal", protectedAddresses.size())
|
.detail("ProtectedTotal", protectedAddresses.size())
|
||||||
.detail("TLogPolicy", tLogPolicy->info())
|
.detail("TLogPolicy", tLogPolicy->info())
|
||||||
.detail("StoragePolicy", storagePolicy->info());
|
.detail("StoragePolicy", storagePolicy->info());
|
||||||
} else if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk)) {
|
} else if ((kt == KillType::KillInstantly) || (kt == KillType::InjectFaults) ||
|
||||||
|
(kt == KillType::FailDisk)) {
|
||||||
TraceEvent("DeadMachine")
|
TraceEvent("DeadMachine")
|
||||||
.detail("MachineId", machineId)
|
.detail("MachineId", machineId)
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
@ -2007,12 +2040,12 @@ public:
|
|||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
|
|
||||||
if (isMainCluster && originalKt == RebootProcessAndSwitch) {
|
if (isMainCluster && originalKt == KillType::RebootProcessAndSwitch) {
|
||||||
// When killing processes with the RebootProcessAndSwitch kill
|
// When killing processes with the RebootProcessAndSwitch kill
|
||||||
// type, processes in the original cluster should be rebooted in
|
// type, processes in the original cluster should be rebooted in
|
||||||
// order to kill any zombie processes.
|
// order to kill any zombie processes.
|
||||||
kt = KillType::Reboot;
|
kt = KillType::Reboot;
|
||||||
} else if (processesOnMachine != processesPerMachine && kt != RebootProcessAndSwitch) {
|
} else if (processesOnMachine != processesPerMachine && kt != KillType::RebootProcessAndSwitch) {
|
||||||
// Check if any processes on machine are rebooting
|
// Check if any processes on machine are rebooting
|
||||||
CODE_PROBE(true,
|
CODE_PROBE(true,
|
||||||
"Attempted reboot, but the target did not have all of its processes running",
|
"Attempted reboot, but the target did not have all of its processes running",
|
||||||
@ -2026,7 +2059,7 @@ public:
|
|||||||
.detail("ProcessesPerMachine", processesPerMachine)
|
.detail("ProcessesPerMachine", processesPerMachine)
|
||||||
.backtrace();
|
.backtrace();
|
||||||
if (ktFinal)
|
if (ktFinal)
|
||||||
*ktFinal = None;
|
*ktFinal = KillType::None;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2037,8 +2070,9 @@ public:
|
|||||||
.detail("KillableMachines", processesOnMachine)
|
.detail("KillableMachines", processesOnMachine)
|
||||||
.detail("ProcessPerMachine", processesPerMachine)
|
.detail("ProcessPerMachine", processesPerMachine)
|
||||||
.detail("KillChanged", kt != ktOrig);
|
.detail("KillChanged", kt != ktOrig);
|
||||||
if (kt < RebootAndDelete) {
|
if (kt < KillType::RebootAndDelete) {
|
||||||
if ((kt == InjectFaults || kt == FailDisk) && machines[machineId].machineProcess != nullptr)
|
if ((kt == KillType::InjectFaults || kt == KillType::FailDisk) &&
|
||||||
|
machines[machineId].machineProcess != nullptr)
|
||||||
killProcess_internal(machines[machineId].machineProcess, kt);
|
killProcess_internal(machines[machineId].machineProcess, kt);
|
||||||
for (auto& process : machines[machineId].processes) {
|
for (auto& process : machines[machineId].processes) {
|
||||||
TraceEvent("KillMachineProcess")
|
TraceEvent("KillMachineProcess")
|
||||||
@ -2052,7 +2086,7 @@ public:
|
|||||||
if (process->startingClass != ProcessClass::TesterClass)
|
if (process->startingClass != ProcessClass::TesterClass)
|
||||||
killProcess_internal(process, kt);
|
killProcess_internal(process, kt);
|
||||||
}
|
}
|
||||||
} else if (kt == Reboot || kt == RebootAndDelete || kt == RebootProcessAndSwitch) {
|
} else if (kt == KillType::Reboot || kt == KillType::RebootAndDelete || kt == KillType::RebootProcessAndSwitch) {
|
||||||
for (auto& process : machines[machineId].processes) {
|
for (auto& process : machines[machineId].processes) {
|
||||||
TraceEvent("KillMachineProcess")
|
TraceEvent("KillMachineProcess")
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
@ -2067,12 +2101,17 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CODE_PROBE(kt == KillType::RebootAndDelete,
|
||||||
|
"Resulted in a reboot and delete",
|
||||||
|
probe::context::sim2,
|
||||||
|
probe::assert::simOnly);
|
||||||
|
CODE_PROBE(kt == KillType::Reboot, "Resulted in a reboot", probe::context::sim2, probe::assert::simOnly);
|
||||||
CODE_PROBE(
|
CODE_PROBE(
|
||||||
kt == RebootAndDelete, "Resulted in a reboot and delete", probe::context::sim2, probe::assert::simOnly);
|
kt == KillType::KillInstantly, "Resulted in an instant kill", probe::context::sim2, probe::assert::simOnly);
|
||||||
CODE_PROBE(kt == Reboot, "Resulted in a reboot", probe::context::sim2, probe::assert::simOnly);
|
CODE_PROBE(kt == KillType::InjectFaults,
|
||||||
CODE_PROBE(kt == KillInstantly, "Resulted in an instant kill", probe::context::sim2, probe::assert::simOnly);
|
"Resulted in a kill by injecting faults",
|
||||||
CODE_PROBE(
|
probe::context::sim2,
|
||||||
kt == InjectFaults, "Resulted in a kill by injecting faults", probe::context::sim2, probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
|
|
||||||
if (ktFinal)
|
if (ktFinal)
|
||||||
*ktFinal = kt;
|
*ktFinal = kt;
|
||||||
@ -2091,8 +2130,8 @@ public:
|
|||||||
auto processMachineId = procRecord->locality.machineId();
|
auto processMachineId = procRecord->locality.machineId();
|
||||||
ASSERT(processMachineId.present());
|
ASSERT(processMachineId.present());
|
||||||
if (processDcId.present() && (processDcId == dcId)) {
|
if (processDcId.present() && (processDcId == dcId)) {
|
||||||
if ((kt != Reboot) && (protectedAddresses.count(procRecord->address))) {
|
if ((kt != KillType::Reboot) && (protectedAddresses.count(procRecord->address))) {
|
||||||
kt = Reboot;
|
kt = KillType::Reboot;
|
||||||
TraceEvent(SevWarn, "DcKillChanged")
|
TraceEvent(SevWarn, "DcKillChanged")
|
||||||
.detail("DataCenter", dcId)
|
.detail("DataCenter", dcId)
|
||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
@ -2111,8 +2150,9 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if machine can be removed, if requested
|
// Check if machine can be removed, if requested
|
||||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) ||
|
if (!forceKill &&
|
||||||
(kt == RebootAndDelete) || (kt == RebootProcessAndDelete))) {
|
((kt == KillType::KillInstantly) || (kt == KillType::InjectFaults) || (kt == KillType::FailDisk) ||
|
||||||
|
(kt == KillType::RebootAndDelete) || (kt == KillType::RebootProcessAndDelete))) {
|
||||||
std::vector<ProcessInfo*> processesLeft, processesDead;
|
std::vector<ProcessInfo*> processesLeft, processesDead;
|
||||||
for (auto processInfo : getAllProcesses()) {
|
for (auto processInfo : getAllProcesses()) {
|
||||||
if (processInfo->isAvailableClass()) {
|
if (processInfo->isAvailableClass()) {
|
||||||
@ -2171,7 +2211,7 @@ public:
|
|||||||
.detail("KillType", kt)
|
.detail("KillType", kt)
|
||||||
.detail("KillTypeResult", ktResult)
|
.detail("KillTypeResult", ktResult)
|
||||||
.detail("KillTypeOrig", ktOrig);
|
.detail("KillTypeOrig", ktOrig);
|
||||||
ASSERT(ktResult == None);
|
ASSERT(ktResult == KillType::None);
|
||||||
}
|
}
|
||||||
ktMin = std::min<KillType>(ktResult, ktMin);
|
ktMin = std::min<KillType>(ktResult, ktMin);
|
||||||
}
|
}
|
||||||
@ -2191,19 +2231,19 @@ public:
|
|||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::decoration::rare);
|
probe::decoration::rare);
|
||||||
CODE_PROBE((kt == ktMin) && (kt == RebootAndDelete),
|
CODE_PROBE((kt == ktMin) && (kt == KillType::RebootAndDelete),
|
||||||
"Datacenter kill Resulted in a reboot and delete",
|
"Datacenter kill Resulted in a reboot and delete",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
CODE_PROBE((kt == ktMin) && (kt == Reboot),
|
CODE_PROBE((kt == ktMin) && (kt == KillType::Reboot),
|
||||||
"Datacenter kill Resulted in a reboot",
|
"Datacenter kill Resulted in a reboot",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
CODE_PROBE((kt == ktMin) && (kt == KillInstantly),
|
CODE_PROBE((kt == ktMin) && (kt == KillType::KillInstantly),
|
||||||
"Datacenter kill Resulted in an instant kill",
|
"Datacenter kill Resulted in an instant kill",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
CODE_PROBE((kt == ktMin) && (kt == InjectFaults),
|
CODE_PROBE((kt == ktMin) && (kt == KillType::InjectFaults),
|
||||||
"Datacenter kill Resulted in a kill by injecting faults",
|
"Datacenter kill Resulted in a kill by injecting faults",
|
||||||
probe::context::sim2,
|
probe::context::sim2,
|
||||||
probe::assert::simOnly);
|
probe::assert::simOnly);
|
||||||
@ -2279,7 +2319,7 @@ public:
|
|||||||
ASSERT(process->failed);
|
ASSERT(process->failed);
|
||||||
}
|
}
|
||||||
if (machine.machineProcess) {
|
if (machine.machineProcess) {
|
||||||
killProcess_internal(machine.machineProcess, KillInstantly);
|
killProcess_internal(machine.machineProcess, KillType::KillInstantly);
|
||||||
}
|
}
|
||||||
machines.erase(machineId);
|
machines.erase(machineId);
|
||||||
}
|
}
|
||||||
@ -2323,7 +2363,7 @@ public:
|
|||||||
ASSERT(this->currentProcess == t.machine);
|
ASSERT(this->currentProcess == t.machine);
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent(SevError, "UnhandledSimulationEventError").errorUnsuppressed(e);
|
TraceEvent(SevError, "UnhandledSimulationEventError").errorUnsuppressed(e);
|
||||||
killProcess(t.machine, KillInstantly);
|
killProcess(t.machine, KillType::KillInstantly);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (randLog)
|
if (randLog)
|
||||||
@ -2607,24 +2647,25 @@ ACTOR void doReboot(ISimulator::ProcessInfo* p, ISimulator::KillType kt) {
|
|||||||
wait(g_sim2.delay(0, TaskPriority::DefaultDelay, p)); // Switch to the machine in question
|
wait(g_sim2.delay(0, TaskPriority::DefaultDelay, p)); // Switch to the machine in question
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ASSERT(kt == ISimulator::RebootProcess || kt == ISimulator::Reboot || kt == ISimulator::RebootAndDelete ||
|
ASSERT(kt == ISimulator::KillType::RebootProcess || kt == ISimulator::KillType::Reboot ||
|
||||||
kt == ISimulator::RebootProcessAndDelete || kt == ISimulator::RebootProcessAndSwitch);
|
kt == ISimulator::KillType::RebootAndDelete || kt == ISimulator::KillType::RebootProcessAndDelete ||
|
||||||
|
kt == ISimulator::KillType::RebootProcessAndSwitch);
|
||||||
|
|
||||||
CODE_PROBE(kt == ISimulator::RebootProcess,
|
CODE_PROBE(kt == ISimulator::KillType::RebootProcess,
|
||||||
"Simulated process rebooted",
|
"Simulated process rebooted",
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::context::sim2);
|
probe::context::sim2);
|
||||||
CODE_PROBE(
|
CODE_PROBE(
|
||||||
kt == ISimulator::Reboot, "Simulated machine rebooted", probe::assert::simOnly, probe::context::sim2);
|
kt == ISimulator::KillType::Reboot, "Simulated machine rebooted", probe::assert::simOnly, probe::context::sim2);
|
||||||
CODE_PROBE(kt == ISimulator::RebootAndDelete,
|
CODE_PROBE(kt == ISimulator::KillType::RebootAndDelete,
|
||||||
"Simulated machine rebooted with data and coordination state deletion",
|
"Simulated machine rebooted with data and coordination state deletion",
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::context::sim2);
|
probe::context::sim2);
|
||||||
CODE_PROBE(kt == ISimulator::RebootProcessAndDelete,
|
CODE_PROBE(kt == ISimulator::KillType::RebootProcessAndDelete,
|
||||||
"Simulated process rebooted with data and coordination state deletion",
|
"Simulated process rebooted with data and coordination state deletion",
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::context::sim2);
|
probe::context::sim2);
|
||||||
CODE_PROBE(kt == ISimulator::RebootProcessAndSwitch,
|
CODE_PROBE(kt == ISimulator::KillType::RebootProcessAndSwitch,
|
||||||
"Simulated process rebooted with different cluster file",
|
"Simulated process rebooted with different cluster file",
|
||||||
probe::assert::simOnly,
|
probe::assert::simOnly,
|
||||||
probe::context::sim2);
|
probe::context::sim2);
|
||||||
@ -2653,10 +2694,10 @@ ACTOR void doReboot(ISimulator::ProcessInfo* p, ISimulator::KillType kt) {
|
|||||||
.detail("Cleared", p->cleared)
|
.detail("Cleared", p->cleared)
|
||||||
.backtrace();
|
.backtrace();
|
||||||
p->rebooting = true;
|
p->rebooting = true;
|
||||||
if ((kt == ISimulator::RebootAndDelete) || (kt == ISimulator::RebootProcessAndDelete)) {
|
if ((kt == ISimulator::KillType::RebootAndDelete) || (kt == ISimulator::KillType::RebootProcessAndDelete)) {
|
||||||
p->cleared = true;
|
p->cleared = true;
|
||||||
g_simulator->clearAddress(p->address);
|
g_simulator->clearAddress(p->address);
|
||||||
} else if (kt == ISimulator::RebootProcessAndSwitch) {
|
} else if (kt == ISimulator::KillType::RebootProcessAndSwitch) {
|
||||||
g_simulator->switchCluster(p->address);
|
g_simulator->switchCluster(p->address);
|
||||||
}
|
}
|
||||||
p->shutdownSignal.send(kt);
|
p->shutdownSignal.send(kt);
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#include "fdbrpc/FlowProcess.actor.h"
|
#include "fdbrpc/FlowProcess.actor.h"
|
||||||
#include "fdbrpc/Net2FileSystem.h"
|
#include "fdbrpc/Net2FileSystem.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbrpc/WellKnownEndpoints.h"
|
#include "fdbrpc/WellKnownEndpoints.h"
|
||||||
#include "fdbclient/versions.h"
|
#include "fdbclient/versions.h"
|
||||||
#include "fdbserver/CoroFlow.h"
|
#include "fdbserver/CoroFlow.h"
|
||||||
|
@ -38,6 +38,7 @@ u32 sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*);
|
|||||||
#include "fdbserver/VFSAsync.h"
|
#include "fdbserver/VFSAsync.h"
|
||||||
#include "fdbserver/template_fdb.h"
|
#include "fdbserver/template_fdb.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
#if SQLITE_THREADSAFE == 0
|
#if SQLITE_THREADSAFE == 0
|
||||||
|
@ -732,7 +732,7 @@ ACTOR Future<Void> repairDeadDatacenter(Database cx,
|
|||||||
.detail("PrimaryDead", primaryDead);
|
.detail("PrimaryDead", primaryDead);
|
||||||
g_simulator->usableRegions = 1;
|
g_simulator->usableRegions = 1;
|
||||||
g_simulator->killDataCenter(
|
g_simulator->killDataCenter(
|
||||||
primaryDead ? g_simulator->primaryDcId : g_simulator->remoteDcId, ISimulator::KillInstantly, true);
|
primaryDead ? g_simulator->primaryDcId : g_simulator->remoteDcId, ISimulator::KillType::KillInstantly, true);
|
||||||
wait(success(ManagementAPI::changeConfig(
|
wait(success(ManagementAPI::changeConfig(
|
||||||
cx.getReference(),
|
cx.getReference(),
|
||||||
(primaryDead ? g_simulator->disablePrimary : g_simulator->disableRemote) + " repopulate_anti_quorum=1",
|
(primaryDead ? g_simulator->disablePrimary : g_simulator->disableRemote) + " repopulate_anti_quorum=1",
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include "flow/ActorCollection.h"
|
#include "flow/ActorCollection.h"
|
||||||
#include "fdbserver/RestoreWorker.actor.h"
|
#include "fdbserver/RestoreWorker.actor.h"
|
||||||
#include "fdbserver/RestoreController.actor.h"
|
#include "fdbserver/RestoreController.actor.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "fdbrpc/Locality.h"
|
#include "fdbrpc/Locality.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
#include "fdbrpc/IPAllowList.h"
|
#include "fdbrpc/IPAllowList.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbclient/ClusterConnectionFile.h"
|
#include "fdbclient/ClusterConnectionFile.h"
|
||||||
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
#include "fdbclient/ClusterConnectionMemoryRecord.h"
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
@ -62,6 +63,10 @@ extern const char* getSourceVersion();
|
|||||||
|
|
||||||
using namespace std::literals;
|
using namespace std::literals;
|
||||||
|
|
||||||
|
bool isSimulatorProcessReliable() {
|
||||||
|
return g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable();
|
||||||
|
}
|
||||||
|
|
||||||
namespace probe {
|
namespace probe {
|
||||||
|
|
||||||
namespace assert {
|
namespace assert {
|
||||||
@ -780,7 +785,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||||||
.backtrace();
|
.backtrace();
|
||||||
|
|
||||||
if (e.code() == error_code_io_timeout && !onShutdown.isReady()) {
|
if (e.code() == error_code_io_timeout && !onShutdown.isReady()) {
|
||||||
onShutdown = ISimulator::RebootProcess;
|
onShutdown = ISimulator::KillType::RebootProcess;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (onShutdown.isReady() && onShutdown.isError())
|
if (onShutdown.isReady() && onShutdown.isError())
|
||||||
@ -804,10 +809,10 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||||||
.detail("Address", process->address)
|
.detail("Address", process->address)
|
||||||
.detail("Excluded", process->excluded)
|
.detail("Excluded", process->excluded)
|
||||||
.detail("ZoneId", localities.zoneId())
|
.detail("ZoneId", localities.zoneId())
|
||||||
.detail("KillType", onShutdown.isReady() ? onShutdown.get() : ISimulator::None);
|
.detail("KillType", onShutdown.isReady() ? onShutdown.get() : ISimulator::KillType::None);
|
||||||
|
|
||||||
if (!onShutdown.isReady())
|
if (!onShutdown.isReady())
|
||||||
onShutdown = ISimulator::InjectFaults;
|
onShutdown = ISimulator::KillType::InjectFaults;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
TraceEvent(destructed ? SevInfo : SevError, "SimulatedFDBDRebooterError")
|
TraceEvent(destructed ? SevInfo : SevError, "SimulatedFDBDRebooterError")
|
||||||
.errorUnsuppressed(e)
|
.errorUnsuppressed(e)
|
||||||
@ -820,7 +825,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||||||
|
|
||||||
if (!process->shutdownSignal.isSet() && !destructed) {
|
if (!process->shutdownSignal.isSet() && !destructed) {
|
||||||
process->rebooting = true;
|
process->rebooting = true;
|
||||||
process->shutdownSignal.send(ISimulator::None);
|
process->shutdownSignal.send(ISimulator::KillType::None);
|
||||||
}
|
}
|
||||||
TraceEvent("SimulatedFDBDWait")
|
TraceEvent("SimulatedFDBDWait")
|
||||||
.detail("Cycles", cycles)
|
.detail("Cycles", cycles)
|
||||||
@ -845,7 +850,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||||||
.detail("ZoneId", localities.zoneId())
|
.detail("ZoneId", localities.zoneId())
|
||||||
.detail("KillType", shutdownResult);
|
.detail("KillType", shutdownResult);
|
||||||
|
|
||||||
if (shutdownResult < ISimulator::RebootProcessAndDelete) {
|
if (shutdownResult < ISimulator::KillType::RebootProcessAndDelete) {
|
||||||
TraceEvent("SimulatedFDBDLowerReboot")
|
TraceEvent("SimulatedFDBDLowerReboot")
|
||||||
.detail("Cycles", cycles)
|
.detail("Cycles", cycles)
|
||||||
.detail("RandomId", randomId)
|
.detail("RandomId", randomId)
|
||||||
@ -856,7 +861,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||||||
return onShutdown.get();
|
return onShutdown.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (onShutdown.get() == ISimulator::RebootProcessAndDelete) {
|
if (onShutdown.get() == ISimulator::KillType::RebootProcessAndDelete) {
|
||||||
TraceEvent("SimulatedFDBDRebootAndDelete")
|
TraceEvent("SimulatedFDBDRebootAndDelete")
|
||||||
.detail("Cycles", cycles)
|
.detail("Cycles", cycles)
|
||||||
.detail("RandomId", randomId)
|
.detail("RandomId", randomId)
|
||||||
@ -874,7 +879,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<IClusterConne
|
|||||||
connRecord =
|
connRecord =
|
||||||
makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"), connStr.toString());
|
makeReference<ClusterConnectionFile>(joinPath(*dataFolder, "fdb.cluster"), connStr.toString());
|
||||||
}
|
}
|
||||||
} else if (onShutdown.get() == ISimulator::RebootProcessAndSwitch) {
|
} else if (onShutdown.get() == ISimulator::KillType::RebootProcessAndSwitch) {
|
||||||
TraceEvent("SimulatedFDBDRebootAndSwitch")
|
TraceEvent("SimulatedFDBDRebootAndSwitch")
|
||||||
.detail("Cycles", cycles)
|
.detail("Cycles", cycles)
|
||||||
.detail("RandomId", randomId)
|
.detail("RandomId", randomId)
|
||||||
@ -1147,7 +1152,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
|||||||
|
|
||||||
CODE_PROBE(true, "Simulated machine has been rebooted");
|
CODE_PROBE(true, "Simulated machine has been rebooted");
|
||||||
|
|
||||||
state bool swap = killType == ISimulator::Reboot && BUGGIFY_WITH_PROB(0.75) &&
|
state bool swap = killType == ISimulator::KillType::Reboot && BUGGIFY_WITH_PROB(0.75) &&
|
||||||
g_simulator->canSwapToMachine(localities.zoneId());
|
g_simulator->canSwapToMachine(localities.zoneId());
|
||||||
if (swap)
|
if (swap)
|
||||||
availableFolders[localities.dcId()].push_back(myFolders);
|
availableFolders[localities.dcId()].push_back(myFolders);
|
||||||
@ -1187,7 +1192,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (killType == ISimulator::RebootAndDelete) {
|
} else if (killType == ISimulator::KillType::RebootAndDelete) {
|
||||||
for (int i = 0; i < ips.size(); i++) {
|
for (int i = 0; i < ips.size(); i++) {
|
||||||
coordFolders[i] = joinPath(baseFolder, deterministicRandom()->randomUniqueID().toString());
|
coordFolders[i] = joinPath(baseFolder, deterministicRandom()->randomUniqueID().toString());
|
||||||
myFolders[i] = joinPath(baseFolder, deterministicRandom()->randomUniqueID().toString());
|
myFolders[i] = joinPath(baseFolder, deterministicRandom()->randomUniqueID().toString());
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include "flow/IAsyncFile.h"
|
#include "flow/IAsyncFile.h"
|
||||||
#include "fdbserver/CoroFlow.h"
|
#include "fdbserver/CoroFlow.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbrpc/AsyncFileReadAhead.actor.h"
|
#include "fdbrpc/AsyncFileReadAhead.actor.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include "Coro.h"
|
#include "Coro.h"
|
||||||
#include "flow/TDMetric.actor.h"
|
#include "flow/TDMetric.actor.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
// Old libcoroutine based implementation. Used on Windows until CI has
|
// Old libcoroutine based implementation. Used on Windows until CI has
|
||||||
|
@ -45,6 +45,7 @@
|
|||||||
#include "fdbclient/versions.h"
|
#include "fdbclient/versions.h"
|
||||||
#include "fdbclient/BuildFlags.h"
|
#include "fdbclient/BuildFlags.h"
|
||||||
#include "fdbrpc/WellKnownEndpoints.h"
|
#include "fdbrpc/WellKnownEndpoints.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbclient/SimpleIni.h"
|
#include "fdbclient/SimpleIni.h"
|
||||||
#include "fdbrpc/AsyncFileCached.actor.h"
|
#include "fdbrpc/AsyncFileCached.actor.h"
|
||||||
#include "fdbrpc/IPAllowList.h"
|
#include "fdbrpc/IPAllowList.h"
|
||||||
@ -363,7 +364,7 @@ ACTOR void failAfter(Future<Void> trigger, ISimulator::ProcessInfo* m = g_simula
|
|||||||
wait(trigger);
|
wait(trigger);
|
||||||
if (enableFailures) {
|
if (enableFailures) {
|
||||||
printf("Killing machine: %s at %f\n", m->address.toString().c_str(), now());
|
printf("Killing machine: %s at %f\n", m->address.toString().c_str(), now());
|
||||||
g_simulator->killProcess(m, ISimulator::KillInstantly);
|
g_simulator->killProcess(m, ISimulator::KillType::KillInstantly);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1301,6 +1301,8 @@ ACTOR Future<Void> tLog(IKeyValueStore* persistentData,
|
|||||||
|
|
||||||
typedef decltype(&tLog) TLogFn;
|
typedef decltype(&tLog) TLogFn;
|
||||||
|
|
||||||
|
extern bool isSimulatorProcessReliable();
|
||||||
|
|
||||||
ACTOR template <class T>
|
ACTOR template <class T>
|
||||||
Future<T> ioTimeoutError(Future<T> what, double time, const char* context = nullptr) {
|
Future<T> ioTimeoutError(Future<T> what, double time, const char* context = nullptr) {
|
||||||
// Before simulation is sped up, IO operations can take a very long time so limit timeouts
|
// Before simulation is sped up, IO operations can take a very long time so limit timeouts
|
||||||
@ -1310,12 +1312,10 @@ Future<T> ioTimeoutError(Future<T> what, double time, const char* context = null
|
|||||||
}
|
}
|
||||||
Future<Void> end = lowPriorityDelay(time);
|
Future<Void> end = lowPriorityDelay(time);
|
||||||
choose {
|
choose {
|
||||||
when(T t = wait(what)) {
|
when(T t = wait(what)) { return t; }
|
||||||
return t;
|
|
||||||
}
|
|
||||||
when(wait(end)) {
|
when(wait(end)) {
|
||||||
Error err = io_timeout();
|
Error err = io_timeout();
|
||||||
if (g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable()) {
|
if (!isSimulatorProcessReliable()) {
|
||||||
err = err.asInjectedFault();
|
err = err.asInjectedFault();
|
||||||
}
|
}
|
||||||
TraceEvent e(SevError, "IoTimeoutError");
|
TraceEvent e(SevError, "IoTimeoutError");
|
||||||
@ -1346,9 +1346,7 @@ Future<T> ioDegradedOrTimeoutError(Future<T> what,
|
|||||||
if (degradedTime < errTime) {
|
if (degradedTime < errTime) {
|
||||||
Future<Void> degradedEnd = lowPriorityDelay(degradedTime);
|
Future<Void> degradedEnd = lowPriorityDelay(degradedTime);
|
||||||
choose {
|
choose {
|
||||||
when(T t = wait(what)) {
|
when(T t = wait(what)) { return t; }
|
||||||
return t;
|
|
||||||
}
|
|
||||||
when(wait(degradedEnd)) {
|
when(wait(degradedEnd)) {
|
||||||
CODE_PROBE(true, "TLog degraded", probe::func::deduplicate);
|
CODE_PROBE(true, "TLog degraded", probe::func::deduplicate);
|
||||||
TraceEvent(SevWarnAlways, "IoDegraded").log();
|
TraceEvent(SevWarnAlways, "IoDegraded").log();
|
||||||
@ -1359,12 +1357,10 @@ Future<T> ioDegradedOrTimeoutError(Future<T> what,
|
|||||||
|
|
||||||
Future<Void> end = lowPriorityDelay(errTime - degradedTime);
|
Future<Void> end = lowPriorityDelay(errTime - degradedTime);
|
||||||
choose {
|
choose {
|
||||||
when(T t = wait(what)) {
|
when(T t = wait(what)) { return t; }
|
||||||
return t;
|
|
||||||
}
|
|
||||||
when(wait(end)) {
|
when(wait(end)) {
|
||||||
Error err = io_timeout();
|
Error err = io_timeout();
|
||||||
if (g_network->isSimulated() && !g_simulator->getCurrentProcess()->isReliable()) {
|
if (!isSimulatorProcessReliable()) {
|
||||||
err = err.asInjectedFault();
|
err = err.asInjectedFault();
|
||||||
}
|
}
|
||||||
TraceEvent e(SevError, "IoTimeoutError");
|
TraceEvent e(SevError, "IoTimeoutError");
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "fdbserver/Knobs.h"
|
#include "fdbserver/Knobs.h"
|
||||||
#include "fdbserver/WorkerInterface.actor.h"
|
#include "fdbserver/WorkerInterface.actor.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
FDB_DEFINE_BOOLEAN_PARAM(UntrustedMode);
|
FDB_DEFINE_BOOLEAN_PARAM(UntrustedMode);
|
||||||
|
@ -71,6 +71,7 @@
|
|||||||
#include "flow/network.h"
|
#include "flow/network.h"
|
||||||
#include "flow/serialize.h"
|
#include "flow/serialize.h"
|
||||||
#include "flow/ChaosMetrics.h"
|
#include "flow/ChaosMetrics.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "fdbserver/ServerDBInfo.actor.h"
|
#include "fdbserver/ServerDBInfo.actor.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
#include "fdbclient/NativeAPI.actor.h"
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbserver/TesterInterface.actor.h"
|
#include "fdbserver/TesterInterface.actor.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
class ClogSingleConnectionWorkload : public TestWorkload {
|
class ClogSingleConnectionWorkload : public TestWorkload {
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "fdbclient/StorageServerInterface.h"
|
#include "fdbclient/StorageServerInterface.h"
|
||||||
#include "flow/network.h"
|
#include "flow/network.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
@ -1114,9 +1115,9 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
|||||||
.detail("ReliableInfo", p->getReliableInfo())
|
.detail("ReliableInfo", p->getReliableInfo())
|
||||||
.detail("KillOrRebootProcess", p->address);
|
.detail("KillOrRebootProcess", p->address);
|
||||||
if (p->isReliable()) {
|
if (p->isReliable()) {
|
||||||
g_simulator->rebootProcess(p, ISimulator::RebootProcess);
|
g_simulator->rebootProcess(p, ISimulator::KillType::RebootProcess);
|
||||||
} else {
|
} else {
|
||||||
g_simulator->killProcess(p, ISimulator::KillInstantly);
|
g_simulator->killProcess(p, ISimulator::KillType::KillInstantly);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "flow/Error.h"
|
#include "flow/Error.h"
|
||||||
#include "flow/IRandom.h"
|
#include "flow/IRandom.h"
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -278,7 +279,7 @@ struct DataLossRecoveryWorkload : TestWorkload {
|
|||||||
void killProcess(DataLossRecoveryWorkload* self, const NetworkAddress& addr) {
|
void killProcess(DataLossRecoveryWorkload* self, const NetworkAddress& addr) {
|
||||||
ISimulator::ProcessInfo* process = g_simulator->getProcessByAddress(addr);
|
ISimulator::ProcessInfo* process = g_simulator->getProcessByAddress(addr);
|
||||||
ASSERT(process->addresses.contains(addr));
|
ASSERT(process->addresses.contains(addr));
|
||||||
g_simulator->killProcess(process, ISimulator::KillInstantly);
|
g_simulator->killProcess(process, ISimulator::KillType::KillInstantly);
|
||||||
TraceEvent("TestTeamKilled").detail("Address", addr);
|
TraceEvent("TestTeamKilled").detail("Address", addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,16 +89,16 @@ struct KillRegionWorkload : TestWorkload {
|
|||||||
// FIXME: killDataCenter breaks simulation if forceKill=false, since some processes can survive and
|
// FIXME: killDataCenter breaks simulation if forceKill=false, since some processes can survive and
|
||||||
// partially complete a recovery
|
// partially complete a recovery
|
||||||
g_simulator->killDataCenter("0"_sr,
|
g_simulator->killDataCenter("0"_sr,
|
||||||
deterministicRandom()->random01() < 0.5 ? ISimulator::KillInstantly
|
deterministicRandom()->random01() < 0.5 ? ISimulator::KillType::KillInstantly
|
||||||
: ISimulator::RebootAndDelete,
|
: ISimulator::KillType::RebootAndDelete,
|
||||||
true);
|
true);
|
||||||
g_simulator->killDataCenter("2"_sr,
|
g_simulator->killDataCenter("2"_sr,
|
||||||
deterministicRandom()->random01() < 0.5 ? ISimulator::KillInstantly
|
deterministicRandom()->random01() < 0.5 ? ISimulator::KillType::KillInstantly
|
||||||
: ISimulator::RebootAndDelete,
|
: ISimulator::KillType::RebootAndDelete,
|
||||||
true);
|
true);
|
||||||
g_simulator->killDataCenter("4"_sr,
|
g_simulator->killDataCenter("4"_sr,
|
||||||
deterministicRandom()->random01() < 0.5 ? ISimulator::KillInstantly
|
deterministicRandom()->random01() < 0.5 ? ISimulator::KillType::KillInstantly
|
||||||
: ISimulator::RebootAndDelete,
|
: ISimulator::KillType::RebootAndDelete,
|
||||||
true);
|
true);
|
||||||
|
|
||||||
TraceEvent("ForceRecovery_Begin").log();
|
TraceEvent("ForceRecovery_Begin").log();
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "flow/FaultInjection.h"
|
#include "flow/FaultInjection.h"
|
||||||
#include "flow/DeterministicRandom.h"
|
#include "flow/DeterministicRandom.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
static std::set<int> const& normalAttritionErrors() {
|
static std::set<int> const& normalAttritionErrors() {
|
||||||
@ -336,17 +337,17 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
|||||||
ASSERT(self->machines.size());
|
ASSERT(self->machines.size());
|
||||||
Optional<Standalone<StringRef>> target = self->machines.back().dcId();
|
Optional<Standalone<StringRef>> target = self->machines.back().dcId();
|
||||||
|
|
||||||
ISimulator::KillType kt = ISimulator::Reboot;
|
ISimulator::KillType kt = ISimulator::KillType::Reboot;
|
||||||
if (!self->reboot) {
|
if (!self->reboot) {
|
||||||
int killType = deterministicRandom()->randomInt(0, 3); // FIXME: enable disk stalls
|
int killType = deterministicRandom()->randomInt(0, 3); // FIXME: enable disk stalls
|
||||||
if (killType == 0)
|
if (killType == 0)
|
||||||
kt = ISimulator::KillInstantly;
|
kt = ISimulator::KillType::KillInstantly;
|
||||||
else if (killType == 1)
|
else if (killType == 1)
|
||||||
kt = ISimulator::InjectFaults;
|
kt = ISimulator::KillType::InjectFaults;
|
||||||
else if (killType == 2)
|
else if (killType == 2)
|
||||||
kt = ISimulator::RebootAndDelete;
|
kt = ISimulator::KillType::RebootAndDelete;
|
||||||
else
|
else
|
||||||
kt = ISimulator::FailDisk;
|
kt = ISimulator::KillType::FailDisk;
|
||||||
}
|
}
|
||||||
TraceEvent("Assassination")
|
TraceEvent("Assassination")
|
||||||
.detail("TargetDatacenter", target)
|
.detail("TargetDatacenter", target)
|
||||||
@ -362,12 +363,12 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
|||||||
ASSERT(self->targetIds.size() == 1);
|
ASSERT(self->targetIds.size() == 1);
|
||||||
auto target = self->targetIds.front();
|
auto target = self->targetIds.front();
|
||||||
|
|
||||||
auto kt = ISimulator::KillInstantly;
|
auto kt = ISimulator::KillType::KillInstantly;
|
||||||
TraceEvent("Assassination").detail("TargetDataHall", target).detail("KillType", kt);
|
TraceEvent("Assassination").detail("TargetDataHall", target).detail("KillType", kt);
|
||||||
|
|
||||||
g_simulator->killDataHall(target, kt);
|
g_simulator->killDataHall(target, kt);
|
||||||
} else if (self->killAll) {
|
} else if (self->killAll) {
|
||||||
state ISimulator::KillType kt = ISimulator::RebootProcessAndSwitch;
|
state ISimulator::KillType kt = ISimulator::KillType::RebootProcessAndSwitch;
|
||||||
TraceEvent("Assassination").detail("KillType", kt);
|
TraceEvent("Assassination").detail("KillType", kt);
|
||||||
g_simulator->killAll(kt, true);
|
g_simulator->killAll(kt, true);
|
||||||
g_simulator->toggleGlobalSwitchCluster();
|
g_simulator->toggleGlobalSwitchCluster();
|
||||||
@ -428,7 +429,7 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
|||||||
if (deterministicRandom()->random01() > 0.5) {
|
if (deterministicRandom()->random01() > 0.5) {
|
||||||
g_simulator->rebootProcess(targetMachine.zoneId(), deterministicRandom()->random01() > 0.5);
|
g_simulator->rebootProcess(targetMachine.zoneId(), deterministicRandom()->random01() > 0.5);
|
||||||
} else {
|
} else {
|
||||||
g_simulator->killZone(targetMachine.zoneId(), ISimulator::Reboot);
|
g_simulator->killZone(targetMachine.zoneId(), ISimulator::KillType::Reboot);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto randomDouble = deterministicRandom()->random01();
|
auto randomDouble = deterministicRandom()->random01();
|
||||||
@ -437,19 +438,19 @@ struct MachineAttritionWorkload : FailureInjectionWorkload {
|
|||||||
.detail("RandomValue", randomDouble);
|
.detail("RandomValue", randomDouble);
|
||||||
if (randomDouble < 0.33) {
|
if (randomDouble < 0.33) {
|
||||||
TraceEvent("RebootAndDelete").detail("TargetMachine", targetMachine.toString());
|
TraceEvent("RebootAndDelete").detail("TargetMachine", targetMachine.toString());
|
||||||
g_simulator->killZone(targetMachine.zoneId(), ISimulator::RebootAndDelete);
|
g_simulator->killZone(targetMachine.zoneId(), ISimulator::KillType::RebootAndDelete);
|
||||||
} else {
|
} else {
|
||||||
auto kt = ISimulator::KillInstantly;
|
auto kt = ISimulator::KillType::KillInstantly;
|
||||||
if (self->allowFaultInjection) {
|
if (self->allowFaultInjection) {
|
||||||
if (randomDouble < 0.50) {
|
if (randomDouble < 0.50) {
|
||||||
kt = ISimulator::InjectFaults;
|
kt = ISimulator::KillType::InjectFaults;
|
||||||
}
|
}
|
||||||
// FIXME: enable disk stalls
|
// FIXME: enable disk stalls
|
||||||
/*
|
/*
|
||||||
if( randomDouble < 0.56 ) {
|
if( randomDouble < 0.56 ) {
|
||||||
kt = ISimulator::InjectFaults;
|
kt = ISimulator::KillType::InjectFaults;
|
||||||
} else if( randomDouble < 0.66 ) {
|
} else if( randomDouble < 0.66 ) {
|
||||||
kt = ISimulator::FailDisk;
|
kt = ISimulator::KillType::FailDisk;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last include
|
#include "flow/actorcompiler.h" // This must be the last include
|
||||||
|
|
||||||
struct ProtocolVersionWorkload : TestWorkload {
|
struct ProtocolVersionWorkload : TestWorkload {
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include "flow/DeterministicRandom.h"
|
#include "flow/DeterministicRandom.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbclient/NativeAPI.actor.h"
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbserver/TesterInterface.actor.h"
|
#include "fdbserver/TesterInterface.actor.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
|
@ -211,7 +211,7 @@ struct MoveKeysWorkload : FailureInjectionWorkload {
|
|||||||
ACTOR Future<Void> forceMasterFailure(Database cx, MoveKeysWorkload* self) {
|
ACTOR Future<Void> forceMasterFailure(Database cx, MoveKeysWorkload* self) {
|
||||||
ASSERT(g_network->isSimulated());
|
ASSERT(g_network->isSimulated());
|
||||||
loop {
|
loop {
|
||||||
if (g_simulator->killZone(self->dbInfo->get().master.locality.zoneId(), ISimulator::Reboot, true))
|
if (g_simulator->killZone(self->dbInfo->get().master.locality.zoneId(), ISimulator::KillType::Reboot, true))
|
||||||
return Void();
|
return Void();
|
||||||
wait(delay(1.0));
|
wait(delay(1.0));
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include "fdbserver/WorkerInterface.actor.h"
|
#include "fdbserver/WorkerInterface.actor.h"
|
||||||
#include "fdbserver/workloads/workloads.actor.h"
|
#include "fdbserver/workloads/workloads.actor.h"
|
||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
#include "fdbclient/ManagementAPI.actor.h"
|
#include "fdbclient/ManagementAPI.actor.h"
|
||||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||||
|
|
||||||
@ -308,7 +309,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
|||||||
|
|
||||||
// Check if we can kill the added process
|
// Check if we can kill the added process
|
||||||
bCanKillProcess =
|
bCanKillProcess =
|
||||||
g_simulator->canKillProcesses(processesLeft, processesDead, ISimulator::KillInstantly, nullptr);
|
g_simulator->canKillProcesses(processesLeft, processesDead, ISimulator::KillType::KillInstantly, nullptr);
|
||||||
|
|
||||||
// Remove the added processes
|
// Remove the added processes
|
||||||
processesLeft.resize(processesLeft.size() - killProcArray.size());
|
processesLeft.resize(processesLeft.size() - killProcArray.size());
|
||||||
@ -467,7 +468,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
|||||||
.detail("ClusterAvailable", g_simulator->isAvailable())
|
.detail("ClusterAvailable", g_simulator->isAvailable())
|
||||||
.detail("Protected", g_simulator->protectedAddresses.count(killProcess->address));
|
.detail("Protected", g_simulator->protectedAddresses.count(killProcess->address));
|
||||||
else if (removeViaClear) {
|
else if (removeViaClear) {
|
||||||
g_simulator->rebootProcess(killProcess, ISimulator::RebootProcessAndDelete);
|
g_simulator->rebootProcess(killProcess, ISimulator::KillType::RebootProcessAndDelete);
|
||||||
TraceEvent("RemoveAndKill", functionId)
|
TraceEvent("RemoveAndKill", functionId)
|
||||||
.detail("Step", "Clear Process")
|
.detail("Step", "Clear Process")
|
||||||
.detail("Process", describe(*killProcess))
|
.detail("Process", describe(*killProcess))
|
||||||
@ -478,7 +479,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
|||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
else {
|
else {
|
||||||
g_simulator->killProcess( killProcess, ISimulator::KillInstantly );
|
g_simulator->killProcess( killProcess, ISimulator::KillType::KillInstantly );
|
||||||
TraceEvent("RemoveAndKill", functionId).detail("Step", "Kill Process").detail("Process", describe(*killProcess)).detail("Failed", killProcess->failed).detail("Rebooting", killProcess->rebooting).detail("ClusterAvailable", g_simulator->isAvailable()).detail("Protected", g_simulator->protectedAddresses.count(killProcess->address));
|
TraceEvent("RemoveAndKill", functionId).detail("Step", "Kill Process").detail("Process", describe(*killProcess)).detail("Failed", killProcess->failed).detail("Rebooting", killProcess->rebooting).detail("ClusterAvailable", g_simulator->isAvailable()).detail("Protected", g_simulator->protectedAddresses.count(killProcess->address));
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
@ -497,7 +498,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
|||||||
.detail("ClusterAvailable", g_simulator->isAvailable());
|
.detail("ClusterAvailable", g_simulator->isAvailable());
|
||||||
for (auto& zoneId : zoneIds) {
|
for (auto& zoneId : zoneIds) {
|
||||||
killedMachine = g_simulator->killZone(
|
killedMachine = g_simulator->killZone(
|
||||||
zoneId, removeViaClear ? ISimulator::RebootAndDelete : ISimulator::KillInstantly);
|
zoneId, removeViaClear ? ISimulator::KillType::RebootAndDelete : ISimulator::KillType::KillInstantly);
|
||||||
TraceEvent(killedMachine ? SevInfo : SevWarn, "RemoveAndKill")
|
TraceEvent(killedMachine ? SevInfo : SevWarn, "RemoveAndKill")
|
||||||
.detail("Step", removeViaClear ? "Clear Machine" : "Kill Machine")
|
.detail("Step", removeViaClear ? "Clear Machine" : "Kill Machine")
|
||||||
.detail("ZoneId", zoneId)
|
.detail("ZoneId", zoneId)
|
||||||
|
@ -105,7 +105,7 @@ struct RollbackWorkload : FailureInjectionWorkload {
|
|||||||
|
|
||||||
if (self->enableFailures) {
|
if (self->enableFailures) {
|
||||||
// Reboot the proxy and clog the unclogged tlog.
|
// Reboot the proxy and clog the unclogged tlog.
|
||||||
g_simulator->rebootProcess(g_simulator->getProcessByAddress(proxy.address()), ISimulator::Reboot);
|
g_simulator->rebootProcess(g_simulator->getProcessByAddress(proxy.address()), ISimulator::KillType::Reboot);
|
||||||
g_simulator->clogInterface(uncloggedTLog.ip, self->clogDuration, ClogAll);
|
g_simulator->clogInterface(uncloggedTLog.ip, self->clogDuration, ClogAll);
|
||||||
} else {
|
} else {
|
||||||
// Alternatively, if we're not injecting machine failures, clog the proxy and the unclogged tlog.
|
// Alternatively, if we're not injecting machine failures, clog the proxy and the unclogged tlog.
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include "fdbrpc/simulator.h"
|
#include "fdbrpc/simulator.h"
|
||||||
#include "boost/algorithm/string/predicate.hpp"
|
#include "boost/algorithm/string/predicate.hpp"
|
||||||
#include "flow/IConnection.h"
|
#include "flow/IConnection.h"
|
||||||
|
#include "fdbrpc/SimulatorProcessInfo.h"
|
||||||
|
|
||||||
#undef state
|
#undef state
|
||||||
#include "fdbclient/SimpleIni.h"
|
#include "fdbclient/SimpleIni.h"
|
||||||
@ -146,7 +147,7 @@ struct SaveAndKillWorkload : TestWorkload {
|
|||||||
ini.SaveFile(self->restartInfo.c_str());
|
ini.SaveFile(self->restartInfo.c_str());
|
||||||
|
|
||||||
for (auto process = allProcessesMap.begin(); process != allProcessesMap.end(); process++) {
|
for (auto process = allProcessesMap.begin(); process != allProcessesMap.end(); process++) {
|
||||||
g_simulator->killProcess(process->second, ISimulator::Reboot);
|
g_simulator->killProcess(process->second, ISimulator::KillType::Reboot);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 100; i++) {
|
for (i = 0; i < 100; i++) {
|
||||||
|
@ -63,7 +63,7 @@ struct TargetedKillWorkload : TestWorkload {
|
|||||||
Database cx,
|
Database cx,
|
||||||
TargetedKillWorkload* self) {
|
TargetedKillWorkload* self) {
|
||||||
if (g_simulator == g_network) {
|
if (g_simulator == g_network) {
|
||||||
g_simulator->killInterface(address, ISimulator::KillInstantly);
|
g_simulator->killInterface(address, ISimulator::KillType::KillInstantly);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user