foundationdb/fdbserver/RestoreWorker.actor.cpp
Meng Xu ca3b6135d0 FastRestore:Add debug to see why restore role is not connected
Reason: restore is a fdbserver who does not register with CC.
The new failure monitor changes how connection works for client and server.
For client, it does not connect to CC to get connected.
For server, it has to connect to CC to get connected.
Restore worker becomes the special role that behaves like a client but is a server.
2020-02-03 17:19:52 -08:00

344 lines
14 KiB
C++

/*
* RestoreWorker.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <ctime>
#include <climits>
#include <numeric>
#include <algorithm>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/SystemData.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/MutationList.h"
#include "fdbclient/BackupContainer.h"
#include "fdbrpc/IAsyncFile.h"
#include "flow/genericactors.actor.h"
#include "flow/Hash3.h"
#include "flow/ActorCollection.h"
#include "fdbserver/RestoreWorker.actor.h"
#include "fdbserver/RestoreMaster.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
class RestoreConfigFR;
struct RestoreWorkerData; // Only declare the struct exist but we cannot use its field
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self,
RestoreWorkerInterface workerInterf, Database cx);
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self);
void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self,
ActorCollection* actors, Database cx);
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx,
int min_num_workers = 2);
ACTOR Future<Void> monitorleader(Reference<AsyncVar<RestoreWorkerInterface>> leader, Database cx,
RestoreWorkerInterface myWorkerInterf);
ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf,
Database cx);
// Remove the worker interface from restoreWorkerKey and remove its roles interfaces from their keys.
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self,
RestoreWorkerInterface workerInterf, Database cx) {
wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->clear(restoreWorkerKeyFor(workerInterf.id()));
return Void();
}));
TraceEvent("FastRestore").detail("HandleTerminateWorkerReq", self->id());
return Void();
}
// Assume only 1 role on a restore worker.
// Future: Multiple roles in a restore worker
void handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self,
ActorCollection* actors, Database cx) {
// Already recruited a role
// Future: Allow multiple restore roles on a restore worker. The design should easily allow this.
if (self->loaderInterf.present()) {
ASSERT(req.role == RestoreRole::Loader);
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
return;
} else if (self->applierInterf.present()) {
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Applier, self->applierInterf.get()));
return;
}
if (req.role == RestoreRole::Loader) {
ASSERT(!self->loaderInterf.present());
self->loaderInterf = RestoreLoaderInterface();
self->loaderInterf.get().initEndpoints();
RestoreLoaderInterface& recruited = self->loaderInterf.get();
DUMPTOKEN(recruited.initVersionBatch);
DUMPTOKEN(recruited.loadFile);
DUMPTOKEN(recruited.sendMutations);
DUMPTOKEN(recruited.collectRestoreRoleInterfaces);
DUMPTOKEN(recruited.finishRestore);
actors->add(restoreLoaderCore(self->loaderInterf.get(), req.nodeIndex, cx));
TraceEvent("FastRestore").detail("RecruitedLoaderNodeIndex", req.nodeIndex);
req.reply.send(
RestoreRecruitRoleReply(self->loaderInterf.get().id(), RestoreRole::Loader, self->loaderInterf.get()));
} else if (req.role == RestoreRole::Applier) {
ASSERT(!self->applierInterf.present());
self->applierInterf = RestoreApplierInterface();
self->applierInterf.get().initEndpoints();
RestoreApplierInterface& recruited = self->applierInterf.get();
DUMPTOKEN(recruited.sendMutationVector);
DUMPTOKEN(recruited.applyToDB);
DUMPTOKEN(recruited.initVersionBatch);
DUMPTOKEN(recruited.collectRestoreRoleInterfaces);
DUMPTOKEN(recruited.finishRestore);
actors->add(restoreApplierCore(self->applierInterf.get(), req.nodeIndex, cx));
TraceEvent("FastRestore").detail("RecruitedApplierNodeIndex", req.nodeIndex);
req.reply.send(
RestoreRecruitRoleReply(self->applierInterf.get().id(), RestoreRole::Applier, self->applierInterf.get()));
} else {
TraceEvent(SevError, "FastRestore")
.detail("HandleRecruitRoleRequest", "UnknownRole"); //.detail("Request", req.printable());
}
return;
}
// Read restoreWorkersKeys from DB to get each restore worker's workerInterface and set it to self->workerInterfaces;
// This is done before we assign restore roles for restore workers.
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx, int min_num_workers) {
state Transaction tr(cx);
state vector<RestoreWorkerInterface> agents; // agents is cmdsInterf
loop {
try {
self->workerInterfaces.clear();
agents.clear();
tr.reset();
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
Standalone<RangeResultRef> agentValues = wait(tr.getRange(restoreWorkersKeys, CLIENT_KNOBS->TOO_MANY));
ASSERT(!agentValues.more);
// If agentValues.size() < min_num_workers, we should wait for coming workers to register their
// workerInterface before we read them once for all
if (agentValues.size() >= min_num_workers) {
for (auto& it : agentValues) {
agents.push_back(BinaryReader::fromStringRef<RestoreWorkerInterface>(it.value, IncludeVersion()));
// Save the RestoreWorkerInterface for the later operations
self->workerInterfaces.insert(std::make_pair(agents.back().id(), agents.back()));
}
break;
}
TraceEvent("FastRestore").suppressFor(10.0).detail("NotEnoughWorkers", agentValues.size());
wait(delay(5.0));
} catch (Error& e) {
wait(tr.onError(e));
}
}
ASSERT(agents.size() >= min_num_workers); // ASSUMPTION: We must have at least 1 loader and 1 applier
TraceEvent("FastRestore").detail("CollectWorkerInterfaceNumWorkers", self->workerInterfaces.size());
return Void();
}
// Periodically send worker heartbeat to
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self) {
ASSERT(!self->workerInterfaces.empty());
state std::map<UID, RestoreWorkerInterface>::iterator workerInterf;
loop {
std::vector<std::pair<UID, RestoreSimpleRequest>> requests;
for (auto& worker : self->workerInterfaces) {
requests.push_back(std::make_pair(worker.first, RestoreSimpleRequest()));
}
wait(sendBatchRequests(&RestoreWorkerInterface::heartbeat, self->workerInterfaces, requests));
wait(delay(60.0));
}
}
// RestoreWorkerLeader is the worker that runs RestoreMaster role
ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf,
Database cx) {
// We must wait for enough time to make sure all restore workers have registered their workerInterfaces into the DB
TraceEvent("FastRestore")
.detail("Master", workerInterf.id())
.detail("WaitForRestoreWorkerInterfaces",
SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS);
wait(delay(10.0));
TraceEvent("FastRestore")
.detail("Master", workerInterf.id())
.detail("CollectRestoreWorkerInterfaces",
SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS);
wait(collectRestoreWorkerInterface(self, cx,
SERVER_KNOBS->FASTRESTORE_NUM_LOADERS + SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS));
// TODO: Needs to keep this monitor's future. May use actorCollection
state Future<Void> workersFailureMonitor = monitorWorkerLiveness(self);
wait(startRestoreMaster(self, cx) || workersFailureMonitor);
return Void();
}
ACTOR Future<Void> startRestoreWorker(Reference<RestoreWorkerData> self, RestoreWorkerInterface interf, Database cx) {
state double lastLoopTopTime;
state ActorCollection actors(false); // Collect the main actor for each role
state Future<Void> exitRole = Never();
loop {
double loopTopTime = now();
double elapsedTime = loopTopTime - lastLoopTopTime;
if (elapsedTime > 0.050) {
if (deterministicRandom()->random01() < 0.01)
TraceEvent(SevWarn, "SlowRestoreWorkerLoopx100")
.detail("NodeDesc", self->describeNode())
.detail("Elapsed", elapsedTime);
}
lastLoopTopTime = loopTopTime;
state std::string requestTypeStr = "[Init]";
try {
choose {
when(RestoreSimpleRequest req = waitNext(interf.heartbeat.getFuture())) {
requestTypeStr = "heartbeat";
actors.add(handleHeartbeat(req, interf.id()));
}
when(RestoreRecruitRoleRequest req = waitNext(interf.recruitRole.getFuture())) {
requestTypeStr = "recruitRole";
handleRecruitRoleRequest(req, self, &actors, cx);
}
when(RestoreSimpleRequest req = waitNext(interf.terminateWorker.getFuture())) {
// Destroy the worker at the end of the restore
requestTypeStr = "terminateWorker";
exitRole = handlerTerminateWorkerRequest(req, self, interf, cx);
}
when(wait(exitRole)) {
TraceEvent("FastRestore").detail("RestoreWorkerCore", "ExitRole").detail("NodeID", self->id());
break;
}
}
} catch (Error& e) {
TraceEvent(SevWarn, "FastRestore")
.detail("RestoreWorkerError", e.what())
.detail("RequestType", requestTypeStr);
break;
}
}
return Void();
}
// RestoreMaster is the leader
ACTOR Future<Void> monitorleader(Reference<AsyncVar<RestoreWorkerInterface>> leader, Database cx,
RestoreWorkerInterface myWorkerInterf) {
wait(delay(5.0));
TraceEvent("FastRestore").detail("MonitorLeader", "StartLeaderElection");
state int count = 0;
loop {
try {
state RestoreWorkerInterface leaderInterf;
state ReadYourWritesTransaction tr(cx); // MX: Somewhere here program gets stuck
count++;
tr.reset();
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
Optional<Value> leaderValue = wait(tr.get(restoreLeaderKey));
TraceEvent(SevInfo, "FastRestoreLeaderElection")
.detail("Round", count)
.detail("LeaderExisted", leaderValue.present());
if (leaderValue.present()) {
leaderInterf = BinaryReader::fromStringRef<RestoreWorkerInterface>(leaderValue.get(), IncludeVersion());
// Register my interface as an worker if I am not the leader
if (leaderInterf != myWorkerInterf) {
tr.set(restoreWorkerKeyFor(myWorkerInterf.id()), restoreWorkerInterfaceValue(myWorkerInterf));
}
} else {
// Workers compete to be the leader
tr.set(restoreLeaderKey, BinaryWriter::toValue(myWorkerInterf, IncludeVersion()));
leaderInterf = myWorkerInterf;
}
wait(tr.commit());
leader->set(leaderInterf);
break;
} catch (Error& e) {
TraceEvent(SevInfo, "FastRestoreLeaderElection").detail("ErrorCode", e.code()).detail("Error", e.what());
wait(tr.onError(e));
}
}
TraceEvent("FastRestore").detail("MonitorLeader", "FinishLeaderElection").detail("Leader", leaderInterf.id());
return Void();
}
ACTOR Future<Void> _restoreWorker(Database cx, LocalityData locality) {
state ActorCollection actors(false);
state Future<Void> myWork = Never();
state Reference<AsyncVar<RestoreWorkerInterface>> leader =
Reference<AsyncVar<RestoreWorkerInterface>>(new AsyncVar<RestoreWorkerInterface>());
state RestoreWorkerInterface myWorkerInterf;
myWorkerInterf.initEndpoints();
state Reference<RestoreWorkerData> self = Reference<RestoreWorkerData>(new RestoreWorkerData());
self->workerID = myWorkerInterf.id();
TraceEvent("FastRestoreKnobs")
.detail("FailureTimeout", SERVER_KNOBS->FASTRESTORE_FAILURE_TIMEOUT)
.detail("HeartBeat", SERVER_KNOBS->FASTRESTORE_HEARTBEAT_INTERVAL)
.detail("SamplePercentage", SERVER_KNOBS->FASTRESTORE_SAMPLING_PERCENT)
.detail("NumLoaders", SERVER_KNOBS->FASTRESTORE_NUM_LOADERS)
.detail("NumAppliers", SERVER_KNOBS->FASTRESTORE_NUM_APPLIERS)
.detail("TxnBatchSize", SERVER_KNOBS->FASTRESTORE_TXN_BATCH_MAX_BYTES)
.detail("VersionBatchSize", SERVER_KNOBS->FASTRESTORE_VERSIONBATCH_MAX_BYTES);
wait(monitorleader(leader, cx, myWorkerInterf));
TraceEvent("FastRestore").detail("LeaderElection", "WaitForLeader");
if (leader->get() == myWorkerInterf) {
// Restore master worker: doLeaderThings();
myWork = startRestoreWorkerLeader(self, myWorkerInterf, cx);
} else {
// Restore normal worker (for RestoreLoader and RestoreApplier roles): doWorkerThings();
myWork = startRestoreWorker(self, myWorkerInterf, cx);
}
wait(myWork);
return Void();
}
ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> connFile, LocalityData locality,
std::string coordFolder) {
try {
state vector<Future<Void>> actors;
// Connect to coordinators in order to connect to fdb cluster
// ServerCoordinators coordinators(connFile);
// if (coordFolder.size())
// actors.push_back(fileNotFoundToNever(coordinationServer(coordFolder), "ClusterCoordinatorFailed"));
Database cx = Database::createDatabase(connFile, Database::API_VERSION_LATEST, true, locality);
wait(reportErrors(_restoreWorker(cx, locality), "RestoreWorker"));
} catch (Error& e) {
TraceEvent("FastRestoreRestoreWorker").detail("Error", e.what());
throw e;
}
return Void();
}