mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
* initial structure for remote IKVS server * moved struct to .h file, added new files to CMakeList * happy path implementation, connection error when testing * saved minor local change * changed tracing to debug * fixed onClosed and getError being called before init is finished * fix spawn process bug, now use absolute path * added server knob to set ikvs process port number * added server knob for remote/local kv store * implement simulator remote process spawning * fixed bug for simulator timeout * commit all changes * removed print lines in trace * added FlowProcess implementation by Markus * initial debug of FlowProcess, stuck at parent sending OpenKVStoreRequest to child * temporary fix for process factory throwing segfault on create * specify public address in command * change remote kv store knob to false for jenkins build * made port 0 open random unused port * change remote store knob to true for benchmark * set listening port to randomly opened port * added print lines for jenkins run open kv store timeout debug * removed most tracing and print lines * removed tutorial changes * update handleIOErrors error handling to handle remote-ikvs cases * Push all debugging changes * A version where worker bug exists * A version where restarting tests fail * Use both the name and the port to determine the child process * Remove unnecessary update on local address * Disable remote-kvs for DiskFailureCycle test * A version where restarting stuck * A version where most restarting tests green * Reset connection with child process explicitly * Remove change on unnecessary files * Unify flags from _ to - * fix merging unexpected changes * fix trac.error to .errorUnsuppressed * Add license header * Remove unnecessary header in FlowProcess.actor.cpp * Fix Windows build * Fix Windows build, add missing ; * Fix a stupid bug caused by code dropped by code merging * Disable remote kvs by default * Pass the conn_file path to the flow process, though not needed, but the buildNetwork is difficult to tune * serialization change on readrange * Update traces * Refactor the RemoteIKVS interface * Format files * Update sim2 interface to not clog connections between parent and child processes in simulation * Update comments; remove debugging symbols; Add error handling for remote_kvs_cancelled * Add comments, format files * Change method name from isBuggifyDisabled to isStableConnection; Decrease(0.1x) latency for stable connections * Commit the IConnection interface change, forgot in previous commit * Fix the issue that onClosed request is cancelled by ActorCollection * Enable the remote kv store knob * Remove FlowProcess.actor.cpp and move functions to RemoteIKeyValueStore.actor.cpp; Add remote kv store delay to avoid race; Bind the child process to die with parent process * Fix the bug where one process starts storage server more than once * Add a please_reboot_remote_kv_store error to restart the storage server worker if remote kvs died abnormally * Remove unreachable code path and add comments * Clang format the code * Fix a simple wait error * Clang format after merging the main branch * Testing mixed mode in simulation if remote_kvs knob is enabled, setting the default to false * Disable remote kvs for PhysicalShardMove which is for RocksDB * Cleanup #include orders, remove debugging traces * Revert the reorder in fdbserver.actor.cpp, which fails the gcc build Co-authored-by: “Lincoln <“lincoln.xiao@snowflake.com”>
151 lines
6.2 KiB
C++
151 lines
6.2 KiB
C++
/*
|
|
* SaveAndKill.actor.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
|
#include "fdbserver/TesterInterface.actor.h"
|
|
#include "fdbserver/workloads/workloads.actor.h"
|
|
#include "fdbrpc/simulator.h"
|
|
#include "boost/algorithm/string/predicate.hpp"
|
|
|
|
#undef state
|
|
#include "fdbclient/SimpleIni.h"
|
|
#define state
|
|
#undef max
|
|
#undef min
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
struct SaveAndKillWorkload : TestWorkload {
|
|
|
|
std::string restartInfo;
|
|
double testDuration;
|
|
int isRestoring;
|
|
|
|
SaveAndKillWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
|
restartInfo = getOption(options, "restartInfoLocation"_sr, "simfdb/restartInfo.ini"_sr).toString();
|
|
testDuration = getOption(options, "testDuration"_sr, 10.0);
|
|
isRestoring = getOption(options, "isRestoring"_sr, 0);
|
|
}
|
|
|
|
std::string description() const override { return "SaveAndKillWorkload"; }
|
|
Future<Void> setup(Database const& cx) override {
|
|
g_simulator.disableSwapsToAll();
|
|
return Void();
|
|
}
|
|
Future<Void> start(Database const& cx) override { return _start(this); }
|
|
|
|
ACTOR Future<Void> _start(SaveAndKillWorkload* self) {
|
|
state int i;
|
|
wait(delay(deterministicRandom()->random01() * self->testDuration));
|
|
|
|
CSimpleIni ini;
|
|
ini.SetUnicode();
|
|
ini.LoadFile(self->restartInfo.c_str());
|
|
|
|
ini.SetValue("RESTORE", "isRestoring", format("%d", self->isRestoring).c_str());
|
|
ini.SetValue("META", "processesPerMachine", format("%d", g_simulator.processesPerMachine).c_str());
|
|
ini.SetValue("META", "listenersPerProcess", format("%d", g_simulator.listenersPerProcess).c_str());
|
|
ini.SetValue("META", "desiredCoordinators", format("%d", g_simulator.desiredCoordinators).c_str());
|
|
ini.SetValue("META", "connectionString", g_simulator.connectionString.c_str());
|
|
ini.SetValue("META", "testerCount", format("%d", g_simulator.testerCount).c_str());
|
|
ini.SetValue("META", "tssMode", format("%d", g_simulator.tssMode).c_str());
|
|
ini.SetValue("META", "mockDNS", INetworkConnections::net()->convertMockDNSToString().c_str());
|
|
|
|
std::vector<ISimulator::ProcessInfo*> processes = g_simulator.getAllProcesses();
|
|
std::map<NetworkAddress, ISimulator::ProcessInfo*> rebootingProcesses = g_simulator.currentlyRebootingProcesses;
|
|
std::map<std::string, ISimulator::ProcessInfo*> allProcessesMap;
|
|
for (const auto& [_, process] : rebootingProcesses) {
|
|
if (allProcessesMap.find(process->dataFolder) == allProcessesMap.end() &&
|
|
std::string(process->name) != "remote flow process") {
|
|
allProcessesMap[process->dataFolder] = process;
|
|
}
|
|
}
|
|
for (const auto& process : processes) {
|
|
if (allProcessesMap.find(process->dataFolder) == allProcessesMap.end() &&
|
|
std::string(process->name) != "remote flow process") {
|
|
allProcessesMap[process->dataFolder] = process;
|
|
}
|
|
}
|
|
ini.SetValue("META", "processCount", format("%d", allProcessesMap.size() - 1).c_str());
|
|
std::map<std::string, int> machines;
|
|
|
|
int j = 0;
|
|
for (const auto& [_, process] : allProcessesMap) {
|
|
std::string machineId = printable(process->locality.machineId());
|
|
const char* machineIdString = machineId.c_str();
|
|
if (strcmp(process->name, "TestSystem") != 0) {
|
|
if (machines.find(machineId) == machines.end()) {
|
|
machines.insert(std::pair<std::string, int>(machineId, 1));
|
|
ini.SetValue("META", format("%d", j).c_str(), machineIdString);
|
|
ini.SetValue(
|
|
machineIdString,
|
|
"dcUID",
|
|
(process->locality.dcId().present()) ? process->locality.dcId().get().printable().c_str() : "");
|
|
ini.SetValue(machineIdString,
|
|
"zoneId",
|
|
(process->locality.zoneId().present())
|
|
? process->locality.zoneId().get().printable().c_str()
|
|
: "");
|
|
ini.SetValue(machineIdString, "mClass", format("%d", process->startingClass.classType()).c_str());
|
|
ini.SetValue(machineIdString,
|
|
format("ipAddr%d", process->address.port - 1).c_str(),
|
|
process->address.ip.toString().c_str());
|
|
ini.SetValue(machineIdString, format("%d", process->address.port - 1).c_str(), process->dataFolder);
|
|
ini.SetValue(
|
|
machineIdString, format("c%d", process->address.port - 1).c_str(), process->coordinationFolder);
|
|
j++;
|
|
} else {
|
|
ini.SetValue(machineIdString,
|
|
format("ipAddr%d", process->address.port - 1).c_str(),
|
|
process->address.ip.toString().c_str());
|
|
int oldValue = machines.find(machineId)->second;
|
|
ini.SetValue(machineIdString, format("%d", process->address.port - 1).c_str(), process->dataFolder);
|
|
ini.SetValue(
|
|
machineIdString, format("c%d", process->address.port - 1).c_str(), process->coordinationFolder);
|
|
machines.erase(machines.find(machineId));
|
|
machines.insert(std::pair<std::string, int>(machineId, oldValue + 1));
|
|
}
|
|
}
|
|
}
|
|
for (auto entry = machines.begin(); entry != machines.end(); entry++) {
|
|
ini.SetValue((*entry).first.c_str(), "processes", format("%d", (*entry).second).c_str());
|
|
}
|
|
|
|
ini.SetValue("META", "machineCount", format("%d", machines.size()).c_str());
|
|
ini.SaveFile(self->restartInfo.c_str());
|
|
|
|
for (auto process = allProcessesMap.begin(); process != allProcessesMap.end(); process++) {
|
|
g_simulator.killProcess(process->second, ISimulator::Reboot);
|
|
}
|
|
|
|
for (i = 0; i < 100; i++) {
|
|
wait(delay(0.0));
|
|
}
|
|
|
|
g_simulator.stop();
|
|
|
|
return Void();
|
|
}
|
|
|
|
Future<bool> check(Database const& cx) override { return true; }
|
|
void getMetrics(std::vector<PerfMetric>&) override {}
|
|
};
|
|
|
|
WorkloadFactory<SaveAndKillWorkload> SaveAndKillWorkloadFactory("SaveAndKill");
|