Add a boolean parameter for ForceRemove; rename ForceJoinNewMetacluster to ForceJoin

2025-06-03 03:41:53 +08:00 · 2023-02-21 16:23:25 -08:00 · 2023-02-21 16:23:25 -08:00 · ec79ecce73
commit ec79ecce73
parent 0c55749f65
5 changed files with 42 additions and 36 deletions
--- a/fdbcli/MetaclusterCommands.actor.cpp
+++ b/fdbcli/MetaclusterCommands.actor.cpp
@ -171,14 +171,14 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
 	}

 	state ClusterNameRef clusterName = tokens[tokens.size() - 1];
-	state bool force = tokens.size() == 4;
+	state ForceRemove forceRemove = ForceRemove(tokens.size() == 4);

 	state ClusterType clusterType = wait(runTransaction(db, [](Reference<ITransaction> tr) {
 		tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
 		return TenantAPI::getClusterType(tr);
 	}));

-	if (clusterType == ClusterType::METACLUSTER_DATA && !force) {
+	if (clusterType == ClusterType::METACLUSTER_DATA && !forceRemove) {
 		if (tokens[2] == "FORCE"_sr) {
 			fmt::print("ERROR: a cluster name must be specified.\n");
 		} else {
@ -190,8 +190,7 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector
 		return false;
 	}

-	bool updatedDataCluster =
-	    wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, tokens.size() == 4, 15.0));
+	bool updatedDataCluster = wait(MetaclusterAPI::removeCluster(db, clusterName, clusterType, forceRemove, 15.0));

 	if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
 		fmt::print("The cluster `{}' has been removed\n", printable(clusterName).c_str());
@ -211,7 +210,7 @@ ACTOR Future<bool> metaclusterRemoveCommand(Reference<IDatabase> db, std::vector

 void printRestoreUsage() {
 	fmt::print("Usage: metacluster restore <NAME> [dryrun] connection_string=<CONNECTION_STRING>\n"
-	           "<restore_known_data_cluster|repopulate_from_data_cluster> [force_join_new_metacluster]\n\n");
+	           "<restore_known_data_cluster|repopulate_from_data_cluster> [force_join]\n\n");

 	fmt::print("Add a restored data cluster back to a metacluster.\n\n");

@ -223,8 +222,9 @@ void printRestoreUsage() {
 	fmt::print("that the metacluster is already tracking. This mode should be used if only data\n");
 	fmt::print("clusters are being restored, and any discrepancies between the management and\n");
 	fmt::print("data clusters will be resolved using the management cluster metadata.\n");
-	fmt::print("If `force_join_new_metacluster' is specified, the cluster will try to restore\n");
-	fmt::print("to a different metacluster than it was originally registered to.\n\n");
+	fmt::print("If `force_join' is specified, the cluster will try to restore to a different\n");
+	fmt::print("metacluster than it was originally registered to or with a different ID than\n");
+	fmt::print("is associated with the given cluster name.\n\n");

 	fmt::print("Use `repopulate_from_data_cluster' to rebuild a lost management cluster from the\n");
 	fmt::print("data clusters in a metacluster. This mode should be used if the management\n");
@ -244,7 +244,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
 	}

 	state bool dryRun = tokens[3] == "dryrun"_sr;
-	state bool forceJoin = tokens[tokens.size() - 1] == "force_join_new_metacluster"_sr;
+	state bool forceJoin = tokens[tokens.size() - 1] == "force_join"_sr;

 	if (tokens.size() < 5 + (int)dryRun + (int)forceJoin) {
 		printRestoreUsage();
@ -274,7 +274,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
 			                                    config.get().first.get(),
 			                                    ApplyManagementClusterUpdates::True,
 			                                    RestoreDryRun(dryRun),
-			                                    ForceJoinNewMetacluster(forceJoin),
+			                                    ForceJoin(forceJoin),
 			                                    &messages));
 		} else if (restoreType == "repopulate_from_data_cluster"_sr) {
 			wait(MetaclusterAPI::restoreCluster(db,
@ -282,7 +282,7 @@ ACTOR Future<bool> metaclusterRestoreCommand(Reference<IDatabase> db, std::vecto
 			                                    config.get().first.get(),
 			                                    ApplyManagementClusterUpdates::False,
 			                                    RestoreDryRun(dryRun),
-			                                    ForceJoinNewMetacluster(forceJoin),
+			                                    ForceJoin(forceJoin),
 			                                    &messages));
 		} else {
 			fmt::print(stderr, "ERROR: unrecognized restore mode `{}'\n", printable(restoreType));
@ -589,7 +589,7 @@ void metaclusterGenerator(const char* text,
 				const char* opts[] = { "restore_known_data_cluster", "repopulate_from_data_cluster", nullptr };
 				arrayGenerator(text, line, opts, lc);
 			} else if (tokens.size() == 5 + (int)dryrun) {
-				const char* opts[] = { "force_join_new_metacluster", nullptr };
+				const char* opts[] = { "force_join", nullptr };
 				arrayGenerator(text, line, opts, lc);
 			}
 		}
@ -624,7 +624,7 @@ std::vector<const char*> metaclusterHintGenerator(std::vector<StringRef> const&
 			                                     "[dryrun]",
 			                                     "connection_string=<CONNECTION_STRING>",
 			                                     "<restore_known_data_cluster|repopulate_from_data_cluster>",
-			                                     "[force_join_new_metacluster]" };
+			                                     "[force_join]" };
 		if (tokens.size() < 4 || (tokens[3].size() <= 6 && "dryrun"_sr.startsWith(tokens[3]))) {
 			return std::vector<const char*>(opts.begin() + tokens.size() - 2, opts.end());
 		} else if (tokens.size() < 6) {
--- a/fdbclient/Metacluster.cpp
+++ b/fdbclient/Metacluster.cpp
@ -31,7 +31,8 @@ FDB_DEFINE_BOOLEAN_PARAM(IsRestoring);
 FDB_DEFINE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
 FDB_DEFINE_BOOLEAN_PARAM(RunOnMismatchedCluster);
 FDB_DEFINE_BOOLEAN_PARAM(RestoreDryRun);
-FDB_DEFINE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
+FDB_DEFINE_BOOLEAN_PARAM(ForceJoin);
+FDB_DEFINE_BOOLEAN_PARAM(ForceRemove);

 namespace MetaclusterAPI {

--- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h
+++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h
@ -100,7 +100,8 @@ FDB_DECLARE_BOOLEAN_PARAM(IsRestoring);
 FDB_DECLARE_BOOLEAN_PARAM(RunOnDisconnectedCluster);
 FDB_DECLARE_BOOLEAN_PARAM(RunOnMismatchedCluster);
 FDB_DECLARE_BOOLEAN_PARAM(RestoreDryRun);
-FDB_DECLARE_BOOLEAN_PARAM(ForceJoinNewMetacluster);
+FDB_DECLARE_BOOLEAN_PARAM(ForceJoin);
+FDB_DECLARE_BOOLEAN_PARAM(ForceRemove);

 namespace MetaclusterAPI {

@ -870,7 +871,7 @@ struct RemoveClusterImpl {
 	Reference<DB> db;
 	ClusterType clusterType;
 	ClusterName clusterName;
-	bool forceRemove;
+	ForceRemove forceRemove;
 	double dataClusterTimeout;

 	// Parameters set in markClusterRemoving
@ -882,7 +883,7 @@ struct RemoveClusterImpl {
 	RemoveClusterImpl(Reference<DB> db,
 	                  ClusterName clusterName,
 	                  ClusterType clusterType,
-	                  bool forceRemove,
+	                  ForceRemove forceRemove,
 	                  double dataClusterTimeout)
 	  : ctx(db,
 	        Optional<ClusterName>(),
@ -1191,7 +1192,7 @@ ACTOR template <class DB>
 Future<bool> removeCluster(Reference<DB> db,
                           ClusterName name,
                           ClusterType clusterType,
-                           bool forceRemove,
+                           ForceRemove forceRemove,
                           double dataClusterTimeout = 0) {
 	state RemoveClusterImpl<DB> impl(db, name, clusterType, forceRemove, dataClusterTimeout);
 	wait(impl.run());
@ -1332,7 +1333,7 @@ struct RestoreClusterImpl {
 	ClusterConnectionString connectionString;
 	ApplyManagementClusterUpdates applyManagementClusterUpdates;
 	RestoreDryRun restoreDryRun;
-	ForceJoinNewMetacluster forceJoinNewMetacluster;
+	ForceJoin forceJoin;
 	std::vector<std::string>& messages;

 	// Unique ID generated for this restore. Used to avoid concurrent restores
@ -1352,11 +1353,11 @@ struct RestoreClusterImpl {
 	                   ClusterConnectionString connectionString,
 	                   ApplyManagementClusterUpdates applyManagementClusterUpdates,
 	                   RestoreDryRun restoreDryRun,
-	                   ForceJoinNewMetacluster forceJoinNewMetacluster,
+	                   ForceJoin forceJoin,
 	                   std::vector<std::string>& messages)
 	  : ctx(managementDb, {}, { DataClusterState::RESTORING }), clusterName(clusterName),
 	    connectionString(connectionString), applyManagementClusterUpdates(applyManagementClusterUpdates),
-	    restoreDryRun(restoreDryRun), forceJoinNewMetacluster(forceJoinNewMetacluster), messages(messages) {}
+	    restoreDryRun(restoreDryRun), forceJoin(forceJoin), messages(messages) {}

 	ACTOR template <class Transaction>
 	static Future<Void> checkRestoreId(RestoreClusterImpl* self, Transaction tr) {
@ -1422,7 +1423,7 @@ struct RestoreClusterImpl {
 				if (!metaclusterRegistration.present()) {
 					throw invalid_data_cluster();
 				} else if (!metaclusterRegistration.get().matches(self->ctx.metaclusterRegistration.get())) {
-					if (!self->forceJoinNewMetacluster) {
+					if (!self->forceJoin) {
 						TraceEvent(SevWarn, "MetaclusterRestoreClusterMismatch")
 						    .detail("ExistingRegistration", metaclusterRegistration.get())
 						    .detail("ManagementClusterRegistration", self->ctx.metaclusterRegistration.get());
@ -2073,7 +2074,7 @@ struct RestoreClusterImpl {
 		wait(self->runRestoreDataClusterTransaction(
 		    [self = self](Reference<ITransaction> tr) { return getTenantsFromDataCluster(self, tr); },
 		    RunOnDisconnectedCluster::False,
-		    RunOnMismatchedCluster(self->restoreDryRun && self->forceJoinNewMetacluster)));
+		    RunOnMismatchedCluster(self->restoreDryRun && self->forceJoin)));

 		// Fix any differences between the data cluster and the management cluster
 		wait(reconcileTenants(self));
@ -2164,10 +2165,10 @@ Future<Void> restoreCluster(Reference<DB> db,
                            ClusterConnectionString connectionString,
                            ApplyManagementClusterUpdates applyManagementClusterUpdates,
                            RestoreDryRun restoreDryRun,
-                            ForceJoinNewMetacluster forceJoinNewMetacluster,
+                            ForceJoin forceJoin,
                            std::vector<std::string>* messages) {
 	state RestoreClusterImpl<DB> impl(
-	    db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoinNewMetacluster, *messages);
+	    db, name, connectionString, applyManagementClusterUpdates, restoreDryRun, forceJoin, *messages);
 	wait(impl.run());
 	return Void();
 }
--- a/fdbserver/workloads/MetaclusterManagementConcurrencyWorkload.actor.cpp
+++ b/fdbserver/workloads/MetaclusterManagementConcurrencyWorkload.actor.cpp
@ -143,7 +143,7 @@ struct MetaclusterManagementConcurrencyWorkload : TestWorkload {
 				TraceEvent(SevDebug, "MetaclusterManagementConcurrencyRemovingCluster", debugId)
 				    .detail("ClusterName", clusterName);
 				Future<bool> removeFuture = MetaclusterAPI::removeCluster(
-				    self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, false);
+				    self->managementDb, clusterName, ClusterType::METACLUSTER_MANAGEMENT, ForceRemove::False);
 				Optional<bool> result = wait(timeout(removeFuture, deterministicRandom()->randomInt(1, 30)));
 				if (result.present()) {
 					ASSERT(result.get());
--- a/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp
+++ b/fdbserver/workloads/MetaclusterRestoreWorkload.actor.cpp
@ -239,7 +239,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 	                                             Database dataDb,
 	                                             std::string backupUrl,
 	                                             bool addToMetacluster,
-	                                             ForceJoinNewMetacluster forceJoinNewMetacluster,
+	                                             ForceJoin forceJoin,
 	                                             int simultaneousRestoreCount,
 	                                             MetaclusterRestoreWorkload* self) {
 		state FileBackupAgent backupAgent;
@ -274,7 +274,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 				                                    dataDb->getConnectionRecord()->getConnectionString(),
 				                                    ApplyManagementClusterUpdates::True,
 				                                    RestoreDryRun::True,
-				                                    forceJoinNewMetacluster,
+				                                    forceJoin,
 				                                    &messages));

 				state MetaclusterData<IDatabase> postDryRunMetaclusterData(self->managementDb);
@ -298,7 +298,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 			                                    dataDb->getConnectionRecord()->getConnectionString(),
 			                                    ApplyManagementClusterUpdates::True,
 			                                    RestoreDryRun::False,
-			                                    forceJoinNewMetacluster,
+			                                    forceJoin,
 			                                    &messages));
 			TraceEvent("MetaclusterRestoreWorkloadRestoreComplete").detail("ClusterName", clusterName);
 		}
@ -516,8 +516,10 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 			TraceEvent("MetaclusterRestoreWorkloadProcessDataCluster").detail("FromCluster", clusterItr->first);

 			// Remove the data cluster from its old metacluster
-			wait(success(MetaclusterAPI::removeCluster(
-			    clusterItr->second.db.getReference(), clusterItr->first, ClusterType::METACLUSTER_DATA, true)));
+			wait(success(MetaclusterAPI::removeCluster(clusterItr->second.db.getReference(),
+			                                           clusterItr->first,
+			                                           ClusterType::METACLUSTER_DATA,
+			                                           ForceRemove::True)));
 			TraceEvent("MetaclusterRestoreWorkloadForgotMetacluster").detail("ClusterName", clusterItr->first);

 			state std::pair<TenantCollisions, GroupCollisions> collisions =
@ -554,7 +556,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 						    clusterItr->second.db->getConnectionRecord()->getConnectionString(),
 						    ApplyManagementClusterUpdates::False,
 						    RestoreDryRun::True,
-						    ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
+						    ForceJoin(deterministicRandom()->coinflip()),
 						    &messages));

 						state MetaclusterData<IDatabase> postDryRunMetaclusterData(self->managementDb);
@ -582,7 +584,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 					    clusterItr->second.db->getConnectionRecord()->getConnectionString(),
 					    ApplyManagementClusterUpdates::False,
 					    RestoreDryRun::False,
-					    ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
+					    ForceJoin(deterministicRandom()->coinflip()),
 					    &messages));

 					ASSERT(collisions.first.empty() && collisions.second.empty());
@ -597,8 +599,10 @@ struct MetaclusterRestoreWorkload : TestWorkload {

 					// If the restore did not succeed, remove the partially restored cluster
 					try {
-						wait(success(MetaclusterAPI::removeCluster(
-						    self->managementDb, clusterItr->first, ClusterType::METACLUSTER_MANAGEMENT, true)));
+						wait(success(MetaclusterAPI::removeCluster(self->managementDb,
+						                                           clusterItr->first,
+						                                           ClusterType::METACLUSTER_MANAGEMENT,
+						                                           ForceRemove::True)));
 						TraceEvent("MetaclusterRestoreWorkloadRemoveFailedCluster")
 						    .detail("ClusterName", clusterItr->first);
 					} catch (Error& e) {
@ -928,7 +932,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 			                                      self->dataDbs[cluster].db,
 			                                      backupUrl.get(),
 			                                      !self->recoverManagementCluster,
-			                                      ForceJoinNewMetacluster(deterministicRandom()->coinflip()),
+			                                      ForceJoin(deterministicRandom()->coinflip()),
 			                                      backups.size(),
 			                                      self));
 		}
@ -945,7 +949,7 @@ struct MetaclusterRestoreWorkload : TestWorkload {
 					                                            self->dataDbs[cluster].db,
 					                                            backupUrl.get(),
 					                                            true,
-					                                            ForceJoinNewMetacluster::True,
+					                                            ForceJoin::True,
 					                                            backups.size(),
 					                                            self));
 				}