pass info through cc data to populate in status

This commit is contained in:
Jon Fu 2022-09-28 16:18:44 -07:00
parent 0fa462fca9
commit 6357ad1750
9 changed files with 112 additions and 38 deletions

View File

@ -24,6 +24,19 @@
FDB_DEFINE_BOOLEAN_PARAM(AddNewTenants); FDB_DEFINE_BOOLEAN_PARAM(AddNewTenants);
FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants); FDB_DEFINE_BOOLEAN_PARAM(RemoveMissingTenants);
std::string clusterTypeToString(const ClusterType& clusterType) {
switch (clusterType) {
case ClusterType::STANDALONE:
return "standalone";
case ClusterType::METACLUSTER_MANAGEMENT:
return "metacluster_management";
case ClusterType::METACLUSTER_DATA:
return "metacluster_data";
default:
return "unknown";
}
}
std::string DataClusterEntry::clusterStateToString(DataClusterState clusterState) { std::string DataClusterEntry::clusterStateToString(DataClusterState clusterState) {
switch (clusterState) { switch (clusterState) {
case DataClusterState::READY: case DataClusterState::READY:

View File

@ -27,15 +27,15 @@
namespace MetaclusterAPI { namespace MetaclusterAPI {
ACTOR std::pair<ClusterUsage, ClusterUsage> metaclusterCapacity(std::map<ClusterName, DataClusterMetadata> clusters) { std::pair<ClusterUsage, ClusterUsage> metaclusterCapacity(std::map<ClusterName, DataClusterMetadata> clusters) {
ClusterUsage totalCapacity; ClusterUsage tenantGroupCapacity;
ClusterUsage totalAllocated; ClusterUsage tenantGroupsAllocated;
for (auto cluster : clusters) { for (auto cluster : clusters) {
totalCapacity.numTenantGroups += tenantGroupCapacity.numTenantGroups +=
std::max(cluster.second.entry.capacity.numTenantGroups, cluster.second.entry.allocated.numTenantGroups); std::max(cluster.second.entry.capacity.numTenantGroups, cluster.second.entry.allocated.numTenantGroups);
totalAllocated.numTenantGroups += cluster.second.entry.allocated.numTenantGroups; tenantGroupsAllocated.numTenantGroups += cluster.second.entry.allocated.numTenantGroups;
} }
return { totalCapacity, totalAllocated }; return { tenantGroupCapacity, tenantGroupsAllocated };
} }
ACTOR Future<Reference<IDatabase>> openDatabase(ClusterConnectionString connectionString) { ACTOR Future<Reference<IDatabase>> openDatabase(ClusterConnectionString connectionString) {

View File

@ -1463,19 +1463,6 @@ typedef Standalone<ClusterNameRef> ClusterName;
enum class ClusterType { STANDALONE, METACLUSTER_MANAGEMENT, METACLUSTER_DATA }; enum class ClusterType { STANDALONE, METACLUSTER_MANAGEMENT, METACLUSTER_DATA };
// std::string getClusterType(const ClusterType& clusterType) {
// switch (clusterType) {
// case ClusterType::STANDALONE:
// return "standalone";
// case ClusterType::METACLUSTER_MANAGEMENT:
// return "metacluster_management";
// case ClusterType::METACLUSTER_DATA:
// return "metacluster_data";
// default:
// return "unknown";
// }
// }
struct GRVCacheSpace { struct GRVCacheSpace {
Version cachedReadVersion; Version cachedReadVersion;
double lastGrvTime; double lastGrvTime;

View File

@ -53,6 +53,8 @@ struct Traceable<ClusterUsage> : std::true_type {
} }
}; };
std::string clusterTypeToString(const ClusterType& clusterType);
// Represents the various states that a data cluster could be in. // Represents the various states that a data cluster could be in.
// //
// READY - the data cluster is active // READY - the data cluster is active
@ -98,6 +100,15 @@ struct DataClusterEntry {
} }
}; };
struct MetaclusterMetrics {
int numTenants = 0;
int numDataClusters = 0;
int tenantGroupCapacity = 0;
int tenantGroupsAllocated = 0;
MetaclusterMetrics() = default;
};
struct MetaclusterRegistrationEntry { struct MetaclusterRegistrationEntry {
constexpr static FileIdentifier file_identifier = 13448589; constexpr static FileIdentifier file_identifier = 13448589;

View File

@ -44,6 +44,7 @@
#include "fdbserver/ClusterRecovery.actor.h" #include "fdbserver/ClusterRecovery.actor.h"
#include "fdbserver/DataDistributorInterface.h" #include "fdbserver/DataDistributorInterface.h"
#include "fdbserver/DBCoreState.h" #include "fdbserver/DBCoreState.h"
#include "fdbclient/Metacluster.h"
#include "fdbclient/MetaclusterManagement.actor.h" #include "fdbclient/MetaclusterManagement.actor.h"
#include "fdbserver/MoveKeys.actor.h" #include "fdbserver/MoveKeys.actor.h"
#include "fdbserver/LeaderElection.h" #include "fdbserver/LeaderElection.h"
@ -1499,7 +1500,9 @@ ACTOR Future<Void> statusServer(FutureStream<StatusRequest> requests,
coordinators, coordinators,
incompatibleConnections, incompatibleConnections,
self->datacenterVersionDifference, self->datacenterVersionDifference,
configBroadcaster))); configBroadcaster,
self->db.metaclusterRegistration,
self->db.metaclusterMetrics)));
if (result.isError() && result.getError().code() == error_code_actor_cancelled) if (result.isError() && result.getError().code() == error_code_actor_cancelled)
throw result.getError(); throw result.getError();
@ -2686,17 +2689,45 @@ ACTOR Future<Void> workerHealthMonitor(ClusterControllerData* self) {
ACTOR Future<Void> metaclusterMetricsUpdater(ClusterControllerData* self) { ACTOR Future<Void> metaclusterMetricsUpdater(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx); state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop { loop {
try { if (self->db.clusterType == ClusterType::METACLUSTER_MANAGEMENT) {
std::map<ClusterName, DataClusterMetadata> clusters = try {
wait(MetaclusterAPI::listClustersTransaction(tr, ""_sr, "\xff"_sr, CLIENT_KNOBS->MAX_DATA_CLUSTERS)); tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
state std::map<ClusterName, DataClusterMetadata> clusters =
auto capacityNumbers = MetaclusterAPI::metaclusterCapacity(clusters); // wait(MetaclusterAPI::listClustersTransaction(tr, ""_sr, "\xff"_sr,
TraceEvent("MetaclusterCapacity") // CLIENT_KNOBS->MAX_DATA_CLUSTERS));
.detail("DataClusters", clusters.size()) wait(MetaclusterAPI::listClusters(
.detail("TotalCapacity", capacityNumbers.first.numTenantGroups) self->cx.getReference(), ""_sr, "\xff"_sr, CLIENT_KNOBS->MAX_DATA_CLUSTERS));
.detail("AllocatedCapacity", capacityNumbers.second.numTenantGroups); state int64_t tenantCount =
} catch (Error& e) { wait(MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantCount.getD(
wait(tr->onError(e)); tr, Snapshot::False, 0));
state std::pair<ClusterUsage, ClusterUsage> capacityNumbers =
MetaclusterAPI::metaclusterCapacity(clusters);
// TraceEvent("MetaclusterCapacityDebugTr")
// .detail("DataClusters", clusters.size())
// .detail("TenantGroupCapacity", capacityNumbers.first.numTenantGroups)
// .detail("TenantGroupsAllocated", capacityNumbers.second.numTenantGroups);
// state std::map<ClusterName, DataClusterMetadata> clusters2 =
// wait(MetaclusterAPI::listClusters(self->cx.getReference(), ""_sr, "\xff"_sr,
// CLIENT_KNOBS->MAX_DATA_CLUSTERS)); state std::pair<ClusterUsage, ClusterUsage> capacityNumbers2 =
// MetaclusterAPI::metaclusterCapacity(clusters2); TraceEvent("MetaclusterCapacityDebugDb")
// .detail("DataClusters", clusters2.size())
// .detail("TenantGroupCapacity", capacityNumbers2.first.numTenantGroups)
// .detail("TenantGroupsAllocated", capacityNumbers2.second.numTenantGroups);
MetaclusterMetrics metrics;
metrics.numTenants = tenantCount;
metrics.numDataClusters = clusters.size();
metrics.tenantGroupCapacity = capacityNumbers.first.numTenantGroups;
metrics.tenantGroupsAllocated = capacityNumbers.second.numTenantGroups;
self->db.metaclusterMetrics = metrics;
TraceEvent("MetaclusterCapacity")
.detail("DataClusters", self->db.metaclusterMetrics.numDataClusters)
.detail("TenantGroupCapacity", self->db.metaclusterMetrics.tenantGroupCapacity)
.detail("TenantGroupsAllocated", self->db.metaclusterMetrics.tenantGroupsAllocated);
} catch (Error& e) {
TraceEvent("MetaclusterUpdaterError").error(e);
wait(tr->onError(e));
continue;
}
} }
// Background updater updates every minute // Background updater updates every minute
wait(delay(60.0)); wait(delay(60.0));

View File

@ -1169,6 +1169,7 @@ ACTOR Future<Void> readTransactionSystemState(Reference<ClusterRecoveryData> sel
Optional<ClusterName> clusterName; Optional<ClusterName> clusterName;
Optional<UID> clusterId; Optional<UID> clusterId;
if (metaclusterRegistration.present()) { if (metaclusterRegistration.present()) {
self->controllerData->db.metaclusterRegistration = metaclusterRegistration.get();
self->controllerData->db.metaclusterName = metaclusterRegistration.get().metaclusterName; self->controllerData->db.metaclusterName = metaclusterRegistration.get().metaclusterName;
self->controllerData->db.clusterType = metaclusterRegistration.get().clusterType; self->controllerData->db.clusterType = metaclusterRegistration.get().clusterType;
metaclusterName = metaclusterRegistration.get().metaclusterName; metaclusterName = metaclusterRegistration.get().metaclusterName;
@ -1178,15 +1179,17 @@ ACTOR Future<Void> readTransactionSystemState(Reference<ClusterRecoveryData> sel
clusterId = metaclusterRegistration.get().id; clusterId = metaclusterRegistration.get().id;
} }
} else { } else {
self->controllerData->db.metaclusterRegistration = Optional<MetaclusterRegistrationEntry>();
self->controllerData->db.metaclusterName = Optional<ClusterName>();
self->controllerData->db.clusterType = ClusterType::STANDALONE; self->controllerData->db.clusterType = ClusterType::STANDALONE;
} }
TraceEvent("MetaclusterMetadata") TraceEvent("MetaclusterMetadata")
.detail("ClusterType", self->controllerData->db.clusterType) .detail("ClusterType", clusterTypeToString(self->controllerData->db.clusterType))
.detail("MetaclusterName", metaclusterName.present() ? metaclusterName.get() : ClusterName()) .detail("MetaclusterName", metaclusterName)
.detail("MetaclusterId", metaclusterId.present() ? metaclusterId.get() : UID()) .detail("MetaclusterId", metaclusterId)
.detail("ClusterName", clusterName.present() ? clusterName.get() : ClusterName()) .detail("DataClusterName", clusterName)
.detail("ClusterId", clusterId.present() ? clusterId.get() : UID()); .detail("DataClusterId", clusterId);
uniquify(self->allTags); uniquify(self->allTags);

View File

@ -19,6 +19,7 @@
*/ */
#include <cinttypes> #include <cinttypes>
#include "fdbclient/Metacluster.h"
#include "fmt/format.h" #include "fmt/format.h"
#include "fdbclient/BlobWorkerInterface.h" #include "fdbclient/BlobWorkerInterface.h"
#include "fdbclient/KeyBackedTypes.h" #include "fdbclient/KeyBackedTypes.h"
@ -2919,7 +2920,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
ServerCoordinators coordinators, ServerCoordinators coordinators,
std::vector<NetworkAddress> incompatibleConnections, std::vector<NetworkAddress> incompatibleConnections,
Version datacenterVersionDifference, Version datacenterVersionDifference,
ConfigBroadcaster const* configBroadcaster) { ConfigBroadcaster const* configBroadcaster,
Optional<MetaclusterRegistrationEntry> metaclusterRegistration,
MetaclusterMetrics metaclusterMetrics) {
state double tStart = timer(); state double tStart = timer();
state JsonBuilderArray messages; state JsonBuilderArray messages;
@ -3061,6 +3064,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state JsonBuilderObject qos; state JsonBuilderObject qos;
state JsonBuilderObject dataOverlay; state JsonBuilderObject dataOverlay;
state JsonBuilderObject tenants; state JsonBuilderObject tenants;
state JsonBuilderObject metacluster;
state JsonBuilderObject storageWiggler; state JsonBuilderObject storageWiggler;
state std::unordered_set<UID> wiggleServers; state std::unordered_set<UID> wiggleServers;
@ -3243,6 +3247,25 @@ ACTOR Future<StatusReply> clusterGetStatus(
if (!qos.empty()) if (!qos.empty())
statusObj["qos"] = qos; statusObj["qos"] = qos;
// Metacluster metadata
if (metaclusterRegistration.present()) {
metacluster["cluster_type"] = clusterTypeToString(metaclusterRegistration.get().clusterType);
metacluster["metacluster_name"] = metaclusterRegistration.get().metaclusterName;
metacluster["metacluster_id"] = metaclusterRegistration.get().metaclusterId.toString();
if (metaclusterRegistration.get().clusterType == ClusterType::METACLUSTER_DATA) {
metacluster["data_cluster_name"] = metaclusterRegistration.get().name;
metacluster["data_cluster_id"] = metaclusterRegistration.get().id.toString();
} else { // clusterType == ClusterType::METACLUSTER_MANAGEMENT
metacluster["num_data_clusters"] = metaclusterMetrics.numDataClusters;
tenants["num_tenants"] = metaclusterMetrics.numTenants;
tenants["tenant_group_capacity"] = metaclusterMetrics.tenantGroupCapacity;
tenants["tenant_groups_allocated"] = metaclusterMetrics.tenantGroupsAllocated;
}
} else {
metacluster["cluster_type"] = clusterTypeToString(ClusterType::STANDALONE);
}
statusObj["metacluster"] = metacluster;
if (!tenants.empty()) if (!tenants.empty())
statusObj["tenants"] = tenants; statusObj["tenants"] = tenants;

View File

@ -31,6 +31,7 @@
#define FDBSERVER_CLUSTERCONTROLLER_ACTOR_H #define FDBSERVER_CLUSTERCONTROLLER_ACTOR_H
#include "fdbclient/DatabaseContext.h" #include "fdbclient/DatabaseContext.h"
#include "fdbclient/Metacluster.h"
#include "fdbrpc/Replication.h" #include "fdbrpc/Replication.h"
#include "fdbrpc/ReplicationUtils.h" #include "fdbrpc/ReplicationUtils.h"
#include "fdbserver/Knobs.h" #include "fdbserver/Knobs.h"
@ -142,6 +143,8 @@ public:
AsyncVar<bool> blobGranulesEnabled; AsyncVar<bool> blobGranulesEnabled;
ClusterType clusterType = ClusterType::STANDALONE; ClusterType clusterType = ClusterType::STANDALONE;
Optional<ClusterName> metaclusterName; Optional<ClusterName> metaclusterName;
Optional<MetaclusterRegistrationEntry> metaclusterRegistration;
MetaclusterMetrics metaclusterMetrics;
DBInfo() DBInfo()
: clientInfo(new AsyncVar<ClientDBInfo>()), serverInfo(new AsyncVar<ServerDBInfo>()), : clientInfo(new AsyncVar<ClientDBInfo>()), serverInfo(new AsyncVar<ServerDBInfo>()),

View File

@ -27,6 +27,7 @@
#include "fdbserver/WorkerInterface.actor.h" #include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/MasterInterface.h" #include "fdbserver/MasterInterface.h"
#include "fdbclient/ClusterInterface.h" #include "fdbclient/ClusterInterface.h"
#include "fdbclient/Metacluster.h"
struct ProcessIssues { struct ProcessIssues {
NetworkAddress address; NetworkAddress address;
@ -44,7 +45,9 @@ Future<StatusReply> clusterGetStatus(
ServerCoordinators const& coordinators, ServerCoordinators const& coordinators,
std::vector<NetworkAddress> const& incompatibleConnections, std::vector<NetworkAddress> const& incompatibleConnections,
Version const& datacenterVersionDifference, Version const& datacenterVersionDifference,
ConfigBroadcaster const* const& conifgBroadcaster); ConfigBroadcaster const* const& conifgBroadcaster,
Optional<MetaclusterRegistrationEntry> const& metaclusterRegistration,
MetaclusterMetrics const& metaclusterMetrics);
struct WorkerEvents : std::map<NetworkAddress, TraceEventFields> {}; struct WorkerEvents : std::map<NetworkAddress, TraceEventFields> {};
Future<Optional<std::pair<WorkerEvents, std::set<std::string>>>> latestEventOnWorkers( Future<Optional<std::pair<WorkerEvents, std::set<std::string>>>> latestEventOnWorkers(