mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 02:18:39 +08:00
The current code uses one restore interface to handle the work for all restore roles, i.e., master, loader and applier. This makes it harder to review or maintain or scale. This commit split the restore into multiple roles by mimicing FDB transaction system: 1) It uses a RestoreWorker as the process to host restore roles; This commit assumes one restore role per RestoreWorker; but it should be easy to extend to support multiple roles per RestoreWorker; 2) It creates 3 restore roles: RestoreMaster: Coordinate the restore process and send commands to the other two roles; RestoreLoader: Parse backup files to mutations and send mutations to appliers; RestoreApplier: Sort received mutations and apply them to DB in order. Compilable version. To be tested in correctness.
503 lines
21 KiB
C++
503 lines
21 KiB
C++
/*
|
|
* Restore.actor.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
|
#include "fdbclient/SystemData.h"
|
|
|
|
// Backup agent header
|
|
#include "fdbclient/BackupAgent.actor.h"
|
|
//#include "FileBackupAgent.h"
|
|
#include "fdbclient/ManagementAPI.actor.h"
|
|
#include "fdbclient/MutationList.h"
|
|
#include "fdbclient/BackupContainer.h"
|
|
|
|
#include <ctime>
|
|
#include <climits>
|
|
#include "fdbrpc/IAsyncFile.h"
|
|
#include "flow/genericactors.actor.h"
|
|
#include "flow/Hash3.h"
|
|
#include <numeric>
|
|
#include <boost/algorithm/string/split.hpp>
|
|
#include <boost/algorithm/string/classification.hpp>
|
|
#include <algorithm>
|
|
|
|
#include "flow/ActorCollection.h"
|
|
#include "fdbserver/RestoreUtil.h"
|
|
#include "fdbserver/RestoreWorkerInterface.h"
|
|
#include "fdbserver/RestoreCommon.actor.h"
|
|
#include "fdbserver/RestoreRoleCommon.actor.h"
|
|
#include "fdbserver/RestoreLoader.actor.h"
|
|
#include "fdbserver/RestoreApplier.actor.h"
|
|
#include "fdbserver/RestoreMaster.actor.h"
|
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
// These configurations for restore workers will be set in initRestoreWorkerConfig() later.
|
|
int MIN_NUM_WORKERS = 3; //10; // TODO: This can become a configuration param later
|
|
int ratio_loader_to_applier = 1; // the ratio of loader over applier. The loader number = total worker * (ratio / (ratio + 1) )
|
|
int FastRestore_Failure_Timeout = 3600; // seconds
|
|
double loadBatchSizeMB = 1; // MB
|
|
double loadBatchSizeThresholdB = loadBatchSizeMB * 1024 * 1024;
|
|
double mutationVectorThreshold = 100; // Bytes // correctness passed when the value is 1
|
|
double transactionBatchSizeThreshold = 512; // Byte
|
|
|
|
int restoreStatusIndex = 0;
|
|
|
|
class RestoreConfig;
|
|
struct RestoreWorkerData; // Only declare the struct exist but we cannot use its field
|
|
|
|
// Forwaself declaration
|
|
void initRestoreWorkerConfig();
|
|
|
|
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx);
|
|
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self);
|
|
ACTOR Future<Void> commitRestoreRoleInterfaces(Reference<RestoreWorkerData> self, Database cx);
|
|
ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self, ActorCollection *actors, Database cx);
|
|
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx, int min_num_workers);
|
|
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> self);
|
|
|
|
bool debug_verbose = true;
|
|
void printGlobalNodeStatus(Reference<RestoreWorkerData>);
|
|
|
|
|
|
const char *RestoreCommandEnumStr[] = {"Init",
|
|
"Set_Role", "Set_Role_Done",
|
|
"Sample_Range_File", "Sample_Log_File", "Sample_File_Done",
|
|
"Loader_Send_Sample_Mutation_To_Applier", "Loader_Send_Sample_Mutation_To_Applier_Done",
|
|
"Calculate_Applier_KeyRange", "Get_Applier_KeyRange", "Get_Applier_KeyRange_Done",
|
|
"Assign_Applier_KeyRange", "Assign_Applier_KeyRange_Done",
|
|
"Assign_Loader_Range_File", "Assign_Loader_Log_File", "Assign_Loader_File_Done",
|
|
"Loader_Send_Mutations_To_Applier", "Loader_Send_Mutations_To_Applier_Done",
|
|
"Apply_Mutation_To_DB", "Apply_Mutation_To_DB_Skip",
|
|
"Loader_Notify_Appler_To_Apply_Mutation",
|
|
"Notify_Loader_ApplierKeyRange", "Notify_Loader_ApplierKeyRange_Done"
|
|
};
|
|
|
|
template<> Tuple Codec<ERestoreState>::pack(ERestoreState const &val); // { return Tuple().append(val); }
|
|
template<> ERestoreState Codec<ERestoreState>::unpack(Tuple const &val); // { return (ERestoreState)val.getInt(0); }
|
|
|
|
|
|
// DEBUG_FAST_RESTORE is not used right now!
|
|
#define DEBUG_FAST_RESTORE 1
|
|
|
|
#ifdef DEBUG_FAST_RESTORE
|
|
#define dbprintf_rs(fmt, args...) printf(fmt, ## args);
|
|
#else
|
|
#define dbprintf_rs(fmt, args...)
|
|
#endif
|
|
|
|
|
|
// Each restore worker (a process) is assigned for a role.
|
|
// MAYBE Later: We will support multiple restore roles on a worker
|
|
struct RestoreWorkerData : NonCopyable, public ReferenceCounted<RestoreWorkerData> {
|
|
UID workerID;
|
|
std::map<UID, RestoreWorkerInterface> workers_workerInterface; // UID is worker's node id, RestoreWorkerInterface is worker's communication workerInterface
|
|
|
|
// Restore Roles
|
|
Optional<RestoreLoaderInterface> loaderInterf;
|
|
Reference<RestoreLoaderData> loaderData;
|
|
Optional<RestoreApplierInterface> applierInterf;
|
|
Reference<RestoreApplierData> applierData;
|
|
Reference<RestoreMasterData> masterData;
|
|
|
|
CMDUID cmdID;
|
|
|
|
UID id() const { return workerID; };
|
|
|
|
RestoreWorkerData() {
|
|
workerID = UID();
|
|
}
|
|
|
|
~RestoreWorkerData() {
|
|
printf("[Exit] Worker:%s RestoreWorkerData is deleted\n", workerID.toString().c_str());
|
|
}
|
|
|
|
std::string describeNode() {
|
|
std::stringstream ss;
|
|
ss << "RestoreWorker workerID:" << workerID.toString();
|
|
return ss.str();
|
|
}
|
|
};
|
|
|
|
|
|
// Restore worker
|
|
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx) {
|
|
state Transaction tr(cx);
|
|
|
|
loop {
|
|
try {
|
|
tr.reset();
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
tr.clear(restoreWorkerKeyFor(workerInterf.id()));
|
|
if ( self->loaderInterf.present() ) {
|
|
tr.clear(restoreLoaderKeyFor(self->loaderInterf.get().id()));
|
|
}
|
|
if ( self->applierInterf.present() ) {
|
|
tr.clear(restoreApplierKeyFor(self->applierInterf.get().id()));
|
|
}
|
|
wait( tr.commit() ) ;
|
|
printf("Node:%s finish restore, clear the interface keys for all roles on the worker (id:%s) and the worker itself. Then exit\n", self->describeNode().c_str(), workerInterf.id().toString().c_str());
|
|
req.reply.send( RestoreCommonReply(workerInterf.id(), req.cmdID) );
|
|
break;
|
|
} catch( Error &e ) {
|
|
printf("[WARNING] Node:%s finishRestoreHandler() transaction error:%s\n", self->describeNode().c_str(), e.what());
|
|
wait( tr.onError(e) );
|
|
}
|
|
};
|
|
|
|
return Void();
|
|
}
|
|
|
|
// Periodically send worker heartbeat to
|
|
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self) {
|
|
ASSERT( !self->workers_workerInterface.empty() );
|
|
state int wIndex = 0;
|
|
for (auto &workerInterf : self->workers_workerInterface) {
|
|
printf("[Worker:%d][UID:%s][Interf.NodeInfo:%s]\n", wIndex, workerInterf.first.toString().c_str(), workerInterf.second.id().toString().c_str());
|
|
wIndex++;
|
|
}
|
|
|
|
state std::vector<Future<RestoreCommonReply>> cmdReplies;
|
|
state std::map<UID, RestoreWorkerInterface>::iterator workerInterf;
|
|
loop {
|
|
wIndex = 0;
|
|
self->cmdID.initPhase(RestoreCommandEnum::Heart_Beat);
|
|
for ( workerInterf = self->workers_workerInterface.begin(); workerInterf != self->workers_workerInterface.end(); workerInterf++) {
|
|
self->cmdID.nextCmd();
|
|
try {
|
|
wait( delay(1.0) );
|
|
cmdReplies.push_back( workerInterf->second.heartbeat.getReply(RestoreSimpleRequest(self->cmdID)) );
|
|
std::vector<RestoreCommonReply> reps = wait( timeoutError(getAll(cmdReplies), FastRestore_Failure_Timeout) );
|
|
cmdReplies.clear();
|
|
wIndex++;
|
|
} catch (Error &e) {
|
|
// Handle the command reply timeout error
|
|
fprintf(stdout, "[ERROR] Node:%s, Commands before cmdID:%s error. error code:%d, error message:%s\n", self->describeNode().c_str(),
|
|
self->cmdID.toString().c_str(), e.code(), e.what());
|
|
printf("[Heartbeat: Node may be down][Worker:%d][UID:%s][Interf.NodeInfo:%s]\n", wIndex, workerInterf->first.toString().c_str(), workerInterf->second.id().toString().c_str());
|
|
}
|
|
}
|
|
wait( delay(30.0) );
|
|
}
|
|
}
|
|
|
|
|
|
void initRestoreWorkerConfig() {
|
|
MIN_NUM_WORKERS = g_network->isSimulated() ? 3 : 120; //10; // TODO: This can become a configuration param later
|
|
ratio_loader_to_applier = 1; // the ratio of loader over applier. The loader number = total worker * (ratio / (ratio + 1) )
|
|
FastRestore_Failure_Timeout = 3600; // seconds
|
|
loadBatchSizeMB = g_network->isSimulated() ? 1 : 10 * 1000.0; // MB
|
|
loadBatchSizeThresholdB = loadBatchSizeMB * 1024 * 1024;
|
|
mutationVectorThreshold = g_network->isSimulated() ? 100 : 10 * 1024; // Bytes // correctness passed when the value is 1
|
|
transactionBatchSizeThreshold = g_network->isSimulated() ? 512 : 1 * 1024 * 1024; // Byte
|
|
|
|
// Debug
|
|
//loadBatchSizeThresholdB = 1;
|
|
//transactionBatchSizeThreshold = 1;
|
|
|
|
printf("Init RestoreWorkerConfig. min_num_workers:%d ratio_loader_to_applier:%d loadBatchSizeMB:%.2f loadBatchSizeThresholdB:%.2f transactionBatchSizeThreshold:%.2f\n",
|
|
MIN_NUM_WORKERS, ratio_loader_to_applier, loadBatchSizeMB, loadBatchSizeThresholdB, transactionBatchSizeThreshold);
|
|
}
|
|
|
|
|
|
// Restore Worker
|
|
ACTOR Future<Void> commitRestoreRoleInterfaces(Reference<RestoreWorkerData> self, Database cx) {
|
|
state ReadYourWritesTransaction tr(cx);
|
|
// For now, we assume only one role per restore worker
|
|
ASSERT( !(self->loaderInterf.present() && self->applierInterf.present()) );
|
|
|
|
loop {
|
|
try {
|
|
tr.reset();
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
if ( self->loaderInterf.present() ) {
|
|
tr.set( restoreLoaderKeyFor(self->loaderInterf.get().id()), restoreLoaderInterfaceValue(self->loaderInterf.get()) );
|
|
}
|
|
if ( self->applierInterf.present() ) {
|
|
tr.set( restoreApplierKeyFor(self->applierInterf.get().id()), restoreApplierInterfaceValue(self->applierInterf.get()) );
|
|
}
|
|
wait (tr.commit() );
|
|
break;
|
|
} catch( Error &e ) {
|
|
printf("[WARNING]%s: commitRestoreRoleInterfaces transaction error:%s\n", self->describeNode().c_str(), e.what());
|
|
wait( tr.onError(e) );
|
|
}
|
|
}
|
|
|
|
return Void();
|
|
}
|
|
|
|
// Restore Worker
|
|
ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self, ActorCollection *actors, Database cx) {
|
|
printf("[INFO][Worker] Node:%s get role %s\n", self->describeNode().c_str(),
|
|
getRoleStr(req.role).c_str());
|
|
|
|
if (req.role == RestoreRole::Loader) {
|
|
ASSERT( !self->loaderInterf.present() );
|
|
self->loaderData = Reference<RestoreLoaderData>(new RestoreLoaderData());
|
|
self->loaderInterf = RestoreLoaderInterface();
|
|
actors->add( restoreLoaderCore(self->loaderData, self->loaderInterf.get(), cx) );
|
|
} else if (req.role == RestoreRole::Applier) {
|
|
ASSERT( !self->applierInterf.present() );
|
|
self->applierData = Reference<RestoreApplierData>( new RestoreApplierData() );
|
|
self->applierInterf = RestoreApplierInterface();
|
|
actors->add( restoreApplierCore(self->applierData, self->applierInterf.get(), cx) );
|
|
} else {
|
|
TraceEvent(SevError, "FastRestore").detail("HandleRecruitRoleRequest", "UnknownRole"); //.detail("Request", req.printable());
|
|
}
|
|
|
|
wait( commitRestoreRoleInterfaces(self, cx) ); // Commit the interface after the interface is ready to accept requests
|
|
req.reply.send(RestoreCommonReply(self->id(), req.cmdID));
|
|
|
|
return Void();
|
|
}
|
|
|
|
|
|
// Read restoreWorkersKeys from DB to get each restore worker's restore workerInterface and set it to self->workers_workerInterface
|
|
// This is done before we assign restore roles for restore workers
|
|
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx, int min_num_workers) {
|
|
state Transaction tr(cx);
|
|
|
|
state vector<RestoreWorkerInterface> agents; // agents is cmdsInterf
|
|
|
|
loop {
|
|
try {
|
|
self->workers_workerInterface.clear();
|
|
agents.clear();
|
|
tr.reset();
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
Standalone<RangeResultRef> agentValues = wait(tr.getRange(restoreWorkersKeys, CLIENT_KNOBS->TOO_MANY));
|
|
ASSERT(!agentValues.more);
|
|
// If agentValues.size() < min_num_workers, we should wait for coming workers to register their workerInterface before we read them once for all
|
|
if(agentValues.size() >= min_num_workers) {
|
|
for(auto& it : agentValues) {
|
|
agents.push_back(BinaryReader::fromStringRef<RestoreWorkerInterface>(it.value, IncludeVersion()));
|
|
// Save the RestoreWorkerInterface for the later operations
|
|
self->workers_workerInterface.insert(std::make_pair(agents.back().id(), agents.back()));
|
|
printf("collectWorkerInterface, workerInterface id:%s\n", agents.back().id().toString().c_str());
|
|
}
|
|
break;
|
|
}
|
|
printf("%s:Wait for enough workers. Current num_workers:%d target num_workers:%d\n",
|
|
self->describeNode().c_str(), agentValues.size(), min_num_workers);
|
|
wait( delay(5.0) );
|
|
} catch( Error &e ) {
|
|
printf("[WARNING]%s: collectWorkerInterface transaction error:%s\n", self->describeNode().c_str(), e.what());
|
|
wait( tr.onError(e) );
|
|
}
|
|
}
|
|
ASSERT(agents.size() >= min_num_workers); // ASSUMPTION: We must have at least 1 loader and 1 applier
|
|
|
|
TraceEvent("FastRestore").detail("CollectWorkerInterfaceNumWorkers", self->workers_workerInterface.size());
|
|
|
|
return Void();
|
|
}
|
|
|
|
|
|
// RestoreWorker that has restore master role: Recruite a role for each worker
|
|
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> self) {
|
|
printf("%s:Start configuring roles for workers\n", self->describeNode().c_str());
|
|
ASSERT( self->masterData.isValid() );
|
|
|
|
// Set up the role, and the global status for each node
|
|
int numNodes = self->workers_workerInterface.size();
|
|
state int numLoader = numNodes * ratio_loader_to_applier / (ratio_loader_to_applier + 1);
|
|
int numApplier = numNodes - numLoader;
|
|
if (numLoader <= 0 || numApplier <= 0) {
|
|
ASSERT( numLoader > 0 ); // Quick check in correctness
|
|
ASSERT( numApplier > 0 );
|
|
fprintf(stderr, "[ERROR] not enough nodes for loader and applier. numLoader:%d, numApplier:%d, ratio_loader_to_applier:%d, numAgents:%d\n", numLoader, numApplier, ratio_loader_to_applier, numNodes);
|
|
} else {
|
|
printf("Node%s: Configure roles numWorkders:%d numLoader:%d numApplier:%d\n", self->describeNode().c_str(), numNodes, numLoader, numApplier);
|
|
}
|
|
|
|
// Assign a role to each worker
|
|
state int nodeIndex = 0;
|
|
state RestoreRole role;
|
|
state UID nodeID;
|
|
printf("Node:%s Start configuring roles for workers\n", self->describeNode().c_str());
|
|
self->cmdID.initPhase(RestoreCommandEnum::Set_Role);
|
|
loop {
|
|
try {
|
|
std::vector<Future<RestoreCommonReply>> cmdReplies;
|
|
for (auto &workerInterf : self->workers_workerInterface) {
|
|
if ( nodeIndex < numLoader ) {
|
|
role = RestoreRole::Loader;
|
|
} else {
|
|
role = RestoreRole::Applier;
|
|
}
|
|
nodeID = workerInterf.first;
|
|
self->cmdID.nextCmd();
|
|
printf("[CMD:%s] Node:%s Set role (%s) to node (index=%d uid=%s)\n", self->cmdID.toString().c_str(), self->describeNode().c_str(),
|
|
getRoleStr(role).c_str(), nodeIndex, nodeID.toString().c_str());
|
|
cmdReplies.push_back( workerInterf.second.recruitRole.getReply(RestoreRecruitRoleRequest(self->cmdID, role, nodeIndex)) );
|
|
nodeIndex++;
|
|
}
|
|
std::vector<RestoreCommonReply> reps = wait( timeoutError(getAll(cmdReplies), FastRestore_Failure_Timeout) );
|
|
printf("[RecruitRestoreRoles] Finished\n");
|
|
break;
|
|
} catch (Error &e) {
|
|
// Handle the command reply timeout error
|
|
fprintf(stdout, "[ERROR] Node:%s, Commands before cmdID:%s error. error code:%d, error message:%s\n", self->describeNode().c_str(),
|
|
self->cmdID.toString().c_str(), e.code(), e.what());
|
|
printf("Node:%s waits on replies time out. Current phase: Set_Role, Retry all commands.\n", self->describeNode().c_str());
|
|
}
|
|
}
|
|
|
|
return Void();
|
|
}
|
|
|
|
|
|
ACTOR Future<Void> startRestoreWorker(Reference<RestoreWorkerData> self, RestoreWorkerInterface interf, Database cx) {
|
|
state double lastLoopTopTime;
|
|
state ActorCollection actors(false); // Collect the main actor for each role
|
|
|
|
loop {
|
|
double loopTopTime = now();
|
|
double elapsedTime = loopTopTime - lastLoopTopTime;
|
|
if( elapsedTime > 0.050 ) {
|
|
if (g_random->random01() < 0.01)
|
|
TraceEvent(SevWarn, "SlowRestoreLoaderLoopx100").detail("NodeDesc", self->describeNode()).detail("Elapsed", elapsedTime);
|
|
}
|
|
lastLoopTopTime = loopTopTime;
|
|
state std::string requestTypeStr = "[Init]";
|
|
|
|
try {
|
|
choose {
|
|
when ( RestoreSimpleRequest req = waitNext(interf.heartbeat.getFuture()) ) {
|
|
requestTypeStr = "heartbeat";
|
|
actors.add( handleHeartbeat(req, interf.id()) );
|
|
}
|
|
when ( RestoreRecruitRoleRequest req = waitNext(interf.recruitRole.getFuture()) ) {
|
|
requestTypeStr = "recruitRole";
|
|
actors.add( handleRecruitRoleRequest(req, self, &actors, cx) );
|
|
}
|
|
when ( RestoreSimpleRequest req = waitNext(interf.terminateWorker.getFuture()) ) {
|
|
// Destroy the worker at the end of the restore
|
|
// TODO: Cancel its own actors
|
|
requestTypeStr = "terminateWorker";
|
|
actors.add( handlerTerminateWorkerRequest(req, self, interf, cx) );
|
|
return Void();
|
|
}
|
|
}
|
|
|
|
} catch (Error &e) {
|
|
fprintf(stdout, "[ERROR] Loader handle received request:%s error. error code:%d, error message:%s\n",
|
|
requestTypeStr.c_str(), e.code(), e.what());
|
|
if ( requestTypeStr.find("[Init]") != std::string::npos ) {
|
|
printf("Exit due to error at requestType:%s", requestTypeStr.c_str());
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> _restoreWorker(Database cx_input, LocalityData locality) {
|
|
state Database cx = cx_input;
|
|
state RestoreWorkerInterface workerInterf;
|
|
workerInterf.initEndpoints();
|
|
state Optional<RestoreWorkerInterface> leaderInterf;
|
|
//Global data for the worker
|
|
state Reference<RestoreWorkerData> self = Reference<RestoreWorkerData>(new RestoreWorkerData());
|
|
|
|
self->workerID = workerInterf.id();
|
|
|
|
initRestoreWorkerConfig(); //TODO: Change to a global struct to store the restore configuration
|
|
|
|
// Compete in registering its restoreInterface as the leader.
|
|
state Transaction tr(cx);
|
|
loop {
|
|
try {
|
|
tr.reset();
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
Optional<Value> leader = wait(tr.get(restoreLeaderKey));
|
|
if(leader.present()) {
|
|
leaderInterf = BinaryReader::fromStringRef<RestoreWorkerInterface>(leader.get(), IncludeVersion());
|
|
// NOTE: Handle the situation that the leader's commit of its key causes error(commit_unknown_result)
|
|
// In this situation, the leader will try to register its key again, which will never succeed.
|
|
// We should let leader escape from the infinite loop
|
|
if ( leaderInterf.get().id() == workerInterf.id() ) {
|
|
printf("[Worker] NodeID:%s is the leader and has registered its key in commit_unknown_result error. Let it set the key again\n",
|
|
leaderInterf.get().id().toString().c_str());
|
|
tr.set(restoreLeaderKey, BinaryWriter::toValue(workerInterf, IncludeVersion()));
|
|
wait(tr.commit());
|
|
// reset leaderInterf to invalid for the leader process
|
|
// because a process will not execute leader's logic unless leaderInterf is invalid
|
|
leaderInterf = Optional<RestoreWorkerInterface>();
|
|
break;
|
|
}
|
|
printf("[Worker] Leader key exists:%s. Worker registers its restore workerInterface id:%s\n",
|
|
leaderInterf.get().id().toString().c_str(), workerInterf.id().toString().c_str());
|
|
tr.set(restoreWorkerKeyFor(workerInterf.id()), restoreWorkerInterfaceValue(workerInterf));
|
|
wait(tr.commit());
|
|
break;
|
|
}
|
|
printf("[Worker] NodeID:%s competes register its workerInterface as leader\n", workerInterf.id().toString().c_str());
|
|
tr.set(restoreLeaderKey, BinaryWriter::toValue(workerInterf, IncludeVersion()));
|
|
wait(tr.commit());
|
|
break;
|
|
} catch( Error &e ) {
|
|
// We may have error commit_unknown_result, the commit may or may not succeed!
|
|
// We must handle this error, otherwise, if the leader does not know its key has been registered, the leader will stuck here!
|
|
printf("[INFO] NodeID:%s restoreWorker select leader error, error code:%d error info:%s\n",
|
|
workerInterf.id().toString().c_str(), e.code(), e.what());
|
|
wait( tr.onError(e) );
|
|
}
|
|
}
|
|
|
|
|
|
if(leaderInterf.present()) { // Logic for restoer workers (restore loader and restore applier)
|
|
wait( startRestoreWorker(self, workerInterf, cx) );
|
|
} else { // Logic for restore master
|
|
self->masterData = Reference<RestoreMasterData>(new RestoreMasterData());
|
|
// We must wait for enough time to make sure all restore workers have registered their workerInterfaces into the DB
|
|
printf("[INFO][Master] NodeID:%s Restore master waits for agents to register their workerKeys\n",
|
|
workerInterf.id().toString().c_str());
|
|
wait( delay(10.0) );
|
|
|
|
printf("[INFO][Master] NodeID:%s starts configuring roles for workers\n", workerInterf.id().toString().c_str());
|
|
|
|
wait( collectRestoreWorkerInterface(self, cx, MIN_NUM_WORKERS) );
|
|
|
|
state Future<Void> workersFailureMonitor = monitorWorkerLiveness(self);
|
|
|
|
// configureRoles must be after collectWorkerInterface
|
|
// TODO: remove the delay() Why do I need to put an extra wait() to make sure the above wait is executed after the below wwait?
|
|
wait( delay(1.0) );
|
|
wait( recruitRestoreRoles(self) );
|
|
|
|
wait( startRestoreMaster(self->masterData, cx) );
|
|
}
|
|
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> restoreWorker(Reference<ClusterConnectionFile> ccf, LocalityData locality) {
|
|
Database cx = Database::createDatabase(ccf->getFilename(), Database::API_VERSION_LATEST,locality);
|
|
wait(_restoreWorker(cx, locality));
|
|
return Void();
|
|
} |