Add a boolean parameter for ForceRemove; rename ForceJoinNewMetacluster to ForceJoin

This commit is contained in:
A.J. Beamon 2023-02-21 16:23:25 -08:00
parent 0c55749f65
commit ec79ecce73
5 changed files with 42 additions and 36 deletions

View File

@ -171,14 +171,14 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
}
state ClusterNameRef clusterName = tokens[tokens.size() - 1];
state bool force = tokens.size() == 4;
state ForceRemove forceRemove = ForceRemove(tokens.size() == 4);
state ClusterType clusterType = wait(runTransaction(db, [](Reference<ITransaction> tr) {
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
return TenantAPI::getClusterType(tr);
}));
if (clusterType == ClusterType::METACLUSTER_DATA && !force) {
if (clusterType == ClusterType::METACLUSTER_DATA && !forceRemove) {
if (tokens[2] == "FORCE"_sr) {
fmt::print("ERROR: a cluster name must be specified.\n");
} else {
@ -190,8 +190,7 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
return false;
}
bool updatedDataCluster =
wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, tokens.size() == 4, 15.0));
bool updatedDataCluster = wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, forceRemove, 15.0));
if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
fmt::print("The cluster `{}' has been removed\n", printable(clusterName).c_str());
@ -211,7 +210,7 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
void printRestoreUsage() {
fmt::print("Usage: metacluster restore <NAME> [dryrun] connection_string=<CONNECTION_STRING>\n"
"<restore_known_data_cluster|repopulate_from_data_cluster> [force_join_new_metacluster]\n\n");
"<restore_known_data_cluster|repopulate_from_data_cluster> [force_join]\n\n");
fmt::print("Add a restored data cluster back to a metacluster.\n\n");
@ -223,8 +222,9 @@ void printRestoreUsage() {
fmt::print("that the metacluster is already tracking. This mode should be used if only data\n");
fmt::print("clusters are being restored, and any discrepancies between the management and\n");
fmt::print("data clusters will be resolved using the management cluster metadata.\n");
fmt::print("If `force_join_new_metacluster' is specified, the cluster will try to restore\n");
fmt::print("to a different metacluster than it was originally registered to.\n\n");
fmt::print("If `force_join' is specified, the cluster will try to restore to a different\n");
fmt::print("metacluster than it was originally registered to or with a different ID than\n");
fmt::print("is associated with the given cluster name.\n\n");
fmt::print("Use `repopulate_from_data_cluster' to rebuild a lost management cluster from the\n");
fmt::print("data clusters in a metacluster. This mode should be used if the management\n");
@ -244,7 +244,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
}
state bool dryRun = tokens[3] == "dryrun"_sr;
state bool forceJoin = tokens[tokens.size() - 1] == "force_join_new_metacluster"_sr;
state bool forceJoin = tokens[tokens.size() - 1] == "force_join"_sr;
if (tokens.size() < 5 + (int)dryRun + (int)forceJoin) {
printRestoreUsage();
@ -274,7 +274,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
config.get().first.get(),
ApplyManagementClusterUpdates::True,
RestoreDryRun(dryRun),
ForceJoinNewMetacluster(forceJoin),
ForceJoin(forceJoin),
&messages));
} else if (restoreType == "repopulate_from_data_cluster"_sr) {
wait(MetaclusterAPI::restoreCluster(db,
@ -282,7 +282,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
config.get().first.get(),
ApplyManagementClusterUpdates::False,
RestoreDryRun(dryRun),
ForceJoinNewMetacluster(forceJoin),
ForceJoin(forceJoin),
&messages));
} else {
fmt::print(stderr, "ERROR: unrecognized restore mode `{}'\n", printable(restoreType));
@ -589,7 +589,7 @@ void metaclusterGenerator(const char* text,
const char* opts[] = { "restore_known_data_cluster", "repopulate_from_data_cluster", nullptr };
arrayGenerator(text, line, opts, lc);
} else if (tokens.size() == 5 + (int)dryrun) {
const char* opts[] = { "force_join_new_metacluster", nullptr };
const char* opts[] = { "force_join", nullptr };
arrayGenerator(text, line, opts, lc);
}
}
@ -624,7 +624,7 @@ std::vector<const char*> metaclusterHintGenerator(std::vector<StringRef> const&
"[dryrun]",
"connection_string=<CONNECTION_STRING>",
"<restore_known_data_cluster|repopulate_from_data_cluster>",
"[force_join_new_metacluster]" };
"[force_join]" };
if (tokens.size() < 4 || (tokens[3].size() <= 6 && "dryrun"_sr.startsWith(tokens[3]))) {
return std::vector<const char*>(opts.begin() + tokens.size() - 2, opts.end());
} else if (tokens.size() < 6) {

View File

@ -31,7 +31,8 @@ FDB_DEFINE_BOOLEAN_PARAM(IsRestoring);
FDB_DEFINE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
FDB_DEFINE_BOOLEAN_PARAM(RunOnMismatchedCluster);
FDB_DEFINE_BOOLEAN_PARAM(RestoreDryRun);
FDB_DEFINE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
FDB_DEFINE_BOOLEAN_PARAM(ForceJoin);
FDB_DEFINE_BOOLEAN_PARAM(ForceRemove);
namespace MetaclusterAPI {

View File

@ -100,7 +100,8 @@ FDB_DECLARE_BOOLEAN_PARAM(IsRestoring);
FDB_DECLARE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
FDB_DECLARE_BOOLEAN_PARAM(RunOnMismatchedCluster);
FDB_DECLARE_BOOLEAN_PARAM(RestoreDryRun);
FDB_DECLARE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
FDB_DECLARE_BOOLEAN_PARAM(ForceJoin);
FDB_DECLARE_BOOLEAN_PARAM(ForceRemove);
namespace MetaclusterAPI {
@ -870,7 +871,7 @@ struct RemoveClusterImpl {
Reference<DB> db;
ClusterType clusterType;
ClusterName clusterName;
bool forceRemove;
ForceRemove forceRemove;
double dataClusterTimeout;
// Parameters set in markClusterRemoving
@ -882,7 +883,7 @@ struct RemoveClusterImpl {
RemoveClusterImpl(Reference<DB> db,
ClusterName clusterName,
ClusterType clusterType,
bool forceRemove,
ForceRemove forceRemove,
double dataClusterTimeout)
: ctx(db,
Optional<ClusterName>(),
@ -1191,7 +1192,7 @@ ACTOR template <class DB>
Future<bool> removeCluster(Reference<DB> db,
ClusterName name,
ClusterType clusterType,
bool forceRemove,
ForceRemove forceRemove,
double dataClusterTimeout = 0) {
state RemoveClusterImpl<DB> impl(db, name, clusterType, forceRemove, dataClusterTimeout);
wait(impl.run());
@ -1332,7 +1333,7 @@ struct RestoreClusterImpl {
ClusterConnectionString connectionString;
ApplyManagementClusterUpdates applyManagementClusterUpdates;
RestoreDryRun restoreDryRun;
ForceJoinNewMetacluster forceJoinNewMetacluster;
ForceJoin forceJoin;
std::vector<std::string>& messages;
// Unique ID generated for this restore. Used to avoid concurrent restores
@ -1352,11 +1353,11 @@ struct RestoreClusterImpl {
ClusterConnectionString connectionString,
ApplyManagementClusterUpdates applyManagementClusterUpdates,
RestoreDryRun restoreDryRun,
ForceJoinNewMetacluster forceJoinNewMetacluster,
ForceJoin forceJoin,
std::vector<std::string>& messages)
: ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName),
connectionString(connectionString), applyManagementClusterUpdates(applyManagementClusterUpdates),
restoreDryRun(restoreDryRun), forceJoinNewMetacluster(forceJoinNewMetacluster), messages(messages) {}
restoreDryRun(restoreDryRun), forceJoin(forceJoin), messages(messages) {}
ACTOR template <class Transaction>
static Future<Void> checkRestoreId(RestoreClusterImpl* self, Transaction tr) {
@ -1422,7 +1423,7 @@ struct RestoreClusterImpl {
if (!metaclusterRegistration.present()) {
throw invalid_data_cluster();
} else if (!metaclusterRegistration.get().matches(self->ctx.metaclusterRegistration.get())) {
if (!self->forceJoinNewMetacluster) {
if (!self->forceJoin) {
TraceEvent(SevWarn, "MetaclusterRestoreClusterMismatch")
.detail("ExistingRegistration", metaclusterRegistration.get())
.detail("ManagementClusterRegistration", self->ctx.metaclusterRegistration.get());
@ -2073,7 +2074,7 @@ struct RestoreClusterImpl {
wait(self->runRestoreDataClusterTransaction(
[self = self](Reference<ITransaction> tr) { return getTenantsFromDataCluster(self, tr); },
RunOnDisconnectedCluster::False,
RunOnMismatchedCluster(self->restoreDryRun && self->forceJoinNewMetacluster)));
RunOnMismatchedCluster(self->restoreDryRun && self->forceJoin)));
// Fix any differences between the data cluster and the management cluster
wait(reconcileTenants(self));
@ -2164,10 +2165,10 @@ Future<Void> restoreCluster(Reference<DB> db,
ClusterConnectionString connectionString,
ApplyManagementClusterUpdates applyManagementClusterUpdates,
RestoreDryRun restoreDryRun,
ForceJoinNewMetacluster forceJoinNewMetacluster,
ForceJoin forceJoin,
std::vector<std::string>* messages) {
state RestoreClusterImpl<DB> impl(
db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoinNewMetacluster, *messages);
db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoin, *messages);
wait(impl.run());
return Void();
}

View File

@ -143,7 +143,7 @@ struct MetaclusterManagementConcurrencyWorkload : TestWorkload {
TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRemovingCluster", debugId)
.detail("ClusterName", clusterName);
Future<bool> removeFuture = MetaclusterAPI::removeCluster(
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, false);
self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, ForceRemove::False);
Optional<bool> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
if (result.present()) {
ASSERT(result.get());

View File

@ -239,7 +239,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
Database dataDb,
std::string backupUrl,
bool addToMetacluster,
ForceJoinNewMetacluster forceJoinNewMetacluster,
ForceJoin forceJoin,
int simultaneousRestoreCount,
MetaclusterRestoreWorkload* self) {
state FileBackupAgent backupAgent;
@ -274,7 +274,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
dataDb->getConnectionRecord()->getConnectionString(),
ApplyManagementClusterUpdates::True,
RestoreDryRun::True,
forceJoinNewMetacluster,
forceJoin,
&messages));
state MetaclusterData<IDatabase> postDryRunMetaclusterData(self->managementDb);
@ -298,7 +298,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
dataDb->getConnectionRecord()->getConnectionString(),
ApplyManagementClusterUpdates::True,
RestoreDryRun::False,
forceJoinNewMetacluster,
forceJoin,
&messages));
TraceEvent("MetaclusterRestoreWorkloadRestoreComplete").detail("ClusterName", clusterName);
}
@ -516,8 +516,10 @@ struct MetaclusterRestoreWorkload : TestWorkload {
TraceEvent("MetaclusterRestoreWorkloadProcessDataCluster").detail("FromCluster", clusterItr->first);
// Remove the data cluster from its old metacluster
wait(success(MetaclusterAPI::removeCluster(
clusterItr->second.db.getReference(), clusterItr->first, ClusterType::METACLUSTER_DATA, true)));
wait(success(MetaclusterAPI::removeCluster(clusterItr->second.db.getReference(),
clusterItr->first,
ClusterType::METACLUSTER_DATA,
ForceRemove::True)));
TraceEvent("MetaclusterRestoreWorkloadForgotMetacluster").detail("ClusterName", clusterItr->first);
state std::pair<TenantCollisions, GroupCollisions> collisions =
@ -554,7 +556,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
clusterItr->second.db->getConnectionRecord()->getConnectionString(),
ApplyManagementClusterUpdates::False,
RestoreDryRun::True,
ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
ForceJoin(deterministicRandom()->coinflip()),
&messages));
state MetaclusterData<IDatabase> postDryRunMetaclusterData(self->managementDb);
@ -582,7 +584,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
clusterItr->second.db->getConnectionRecord()->getConnectionString(),
ApplyManagementClusterUpdates::False,
RestoreDryRun::False,
ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
ForceJoin(deterministicRandom()->coinflip()),
&messages));
ASSERT(collisions.first.empty() && collisions.second.empty());
@ -597,8 +599,10 @@ struct MetaclusterRestoreWorkload : TestWorkload {
// If the restore did not succeed, remove the partially restored cluster
try {
wait(success(MetaclusterAPI::removeCluster(
self->managementDb, clusterItr->first, ClusterType::METACLUSTER_MANAGEMENT, true)));
wait(success(MetaclusterAPI::removeCluster(self->managementDb,
clusterItr->first,
ClusterType::METACLUSTER_MANAGEMENT,
ForceRemove::True)));
TraceEvent("MetaclusterRestoreWorkloadRemoveFailedCluster")
.detail("ClusterName", clusterItr->first);
} catch (Error& e) {
@ -928,7 +932,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
self->dataDbs[cluster].db,
backupUrl.get(),
!self->recoverManagementCluster,
ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
ForceJoin(deterministicRandom()->coinflip()),
backups.size(),
self));
}
@ -945,7 +949,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
self->dataDbs[cluster].db,
backupUrl.get(),
true,
ForceJoinNewMetacluster::True,
ForceJoin::True,
backups.size(),
self));
}