1
0
mirror of https://github.com/apple/foundationdb.git synced 2025-05-31 10:14:52 +08:00

get_client_status: determine the health status

This commit is contained in:
Vaidas Gasiunas 2023-01-12 18:46:04 +01:00
parent 205466e04c
commit 962cd6efcd
3 changed files with 90 additions and 10 deletions

@ -164,6 +164,48 @@ class ClientConfigTest:
self.tc.assertEqual(1, len(matching_clients))
self.tc.assertEqual(expected_client, matching_clients[0])
def check_healthy_status_report(self):
self.tc.assertIsNotNone(self.status_json)
expected_mvc_attributes = {
"Healthy",
"InitializationState",
"DatabaseStatus",
"ProtocolVersion",
"AvailableClients",
"ConnectionRecord",
"ClusterId",
}
self.tc.assertEqual(expected_mvc_attributes, set(self.status_json.keys()))
self.tc.assertEqual("created", self.status_json["InitializationState"])
self.tc.assertGreater(len(self.status_json["AvailableClients"]), 0)
expected_db_attributes = {
"Healthy",
"Coordinators",
"CurrentCoordinator",
"ClusterID",
"GrvProxies",
"CommitProxies",
"StorageServers",
"Connections",
"NumConnectionsFailed",
}
db_status = self.status_json["DatabaseStatus"]
self.tc.assertEqual(expected_db_attributes, set(db_status.keys()))
self.tc.assertTrue(db_status["Healthy"])
self.tc.assertGreater(len(db_status["Coordinators"]), 0)
self.tc.assertGreater(len(db_status["GrvProxies"]), 0)
self.tc.assertGreater(len(db_status["CommitProxies"]), 0)
self.tc.assertGreater(len(db_status["StorageServers"]), 0)
self.tc.assertGreater(len(db_status["Connections"]), 0)
self.tc.assertEqual(0, db_status["NumConnectionsFailed"])
self.tc.assertTrue(self.status_json["Healthy"])
def check_healthy_status(self, expected_is_healthy):
self.tc.assertIsNotNone(self.status_json)
self.tc.assertTrue("Healthy" in self.status_json)
self.tc.assertEqual(expected_is_healthy, self.status_json["Healthy"])
def exec(self):
cmd_args = [self.cluster.client_config_tester_bin, "--cluster-file", self.test_cluster_file]
@ -241,6 +283,7 @@ class ClientConfigTests(unittest.TestCase):
test = ClientConfigTest(self)
test.print_status = True
test.exec()
test.check_healthy_status(True)
def test_disable_mvc_bypass(self):
# Local client only
@ -248,7 +291,7 @@ class ClientConfigTests(unittest.TestCase):
test.print_status = True
test.disable_client_bypass = True
test.exec()
test.check_initialization_state("created")
test.check_healthy_status_report()
test.check_available_clients([CURRENT_VERSION])
test.check_current_client(CURRENT_VERSION)
@ -259,7 +302,7 @@ class ClientConfigTests(unittest.TestCase):
test.create_external_lib_path(CURRENT_VERSION)
test.disable_local_client = True
test.exec()
test.check_initialization_state("created")
test.check_healthy_status_report()
test.check_available_clients([CURRENT_VERSION])
test.check_current_client(CURRENT_VERSION)
@ -269,7 +312,7 @@ class ClientConfigTests(unittest.TestCase):
test.print_status = True
test.create_external_lib_path(CURRENT_VERSION)
test.exec()
test.check_initialization_state("created")
test.check_healthy_status_report()
test.check_available_clients([CURRENT_VERSION])
test.check_current_client(CURRENT_VERSION)
@ -281,7 +324,7 @@ class ClientConfigTests(unittest.TestCase):
test.disable_local_client = True
test.api_version = api_version_from_str(PREV2_RELEASE_VERSION)
test.exec()
test.check_initialization_state("created")
test.check_healthy_status_report()
test.check_available_clients([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION])
test.check_current_client(CURRENT_VERSION)
@ -322,7 +365,7 @@ class ClientConfigTests(unittest.TestCase):
test.api_version = api_version_from_str(CURRENT_VERSION)
test.ignore_external_client_failures = True
test.exec()
test.check_initialization_state("created")
test.check_healthy_status_report()
test.check_available_clients([CURRENT_VERSION])
test.check_current_client(CURRENT_VERSION)
@ -338,6 +381,7 @@ class ClientConfigTests(unittest.TestCase):
test.expected_error = 2125 # Incompatible client
test.exec()
test.check_initialization_state("incompatible")
test.check_healthy_status(False)
test.check_available_clients([PREV_RELEASE_VERSION])
test.check_current_client(None)
@ -355,6 +399,7 @@ class ClientConfigTests(unittest.TestCase):
test.expected_error = 1031 # Timeout
test.exec()
test.check_initialization_state("incompatible")
test.check_healthy_status(False)
test.check_available_clients([PREV_RELEASE_VERSION])
test.check_current_client(None)
@ -370,6 +415,7 @@ class ClientConfigTests(unittest.TestCase):
test.expected_error = 1031 # Timeout
test.exec()
test.check_initialization_state("initializing")
test.check_healthy_status(False)
test.check_available_clients([CURRENT_VERSION])
test.check_protocol_version_not_set()
@ -385,6 +431,7 @@ class ClientConfigTests(unittest.TestCase):
test.expected_error = 2104 # Connection string invalid
test.exec()
test.check_initialization_state("initialization_failed")
test.check_healthy_status(False)
test.check_available_clients([CURRENT_VERSION])
test.check_protocol_version_not_set()
@ -408,6 +455,7 @@ class ClientConfigPrevVersionTests(unittest.TestCase):
test.api_version = api_version_from_str(PREV_RELEASE_VERSION)
test.exec()
test.check_initialization_state("created")
test.check_healthy_status(False)
test.check_available_clients([PREV_RELEASE_VERSION, CURRENT_VERSION])
test.check_current_client(PREV_RELEASE_VERSION)
@ -428,6 +476,7 @@ class ClientConfigPrevVersionTests(unittest.TestCase):
test.ignore_external_client_failures = True
test.exec()
test.check_initialization_state("incompatible")
test.check_healthy_status(False)
test.check_available_clients([CURRENT_VERSION])
test.check_current_client(None)
@ -453,7 +502,7 @@ class ClientConfigSeparateCluster(unittest.TestCase):
t = Thread(target=upgrade, args=(self.cluster,))
t.start()
test.exec()
test.check_initialization_state("created")
test.check_healthy_status_report()
test.check_available_clients([CURRENT_VERSION])
test.check_current_client(CURRENT_VERSION)
t.join()

@ -30,16 +30,18 @@ namespace {
class ClientReportGenerator {
public:
ClientReportGenerator(DatabaseContext& cx) : cx(cx) {}
ClientReportGenerator(DatabaseContext& cx) : cx(cx), healthy(true), numConnectionsFailed(0) {}
Standalone<StringRef> generateReport() {
if (cx.isError()) {
statusObj["InitializationError"] = cx.deferredError.code();
healthy = false;
} else {
reportCoordinators();
reportClientInfo();
reportStorageServers();
reportConnections();
statusObj["Healthy"] = healthy;
}
return StringRef(json_spirit::write_string(json_spirit::mValue(statusObj)));
}
@ -60,23 +62,36 @@ private:
if (cx.coordinator->get().present()) {
statusObj["CurrentCoordinator"] = cx.coordinator->get().get().getAddressString();
}
// Update health status
if (cs.hostnames.size() + cs.coords.size() == 0) {
healthy = false;
}
if (!cx.coordinator->get().present()) {
healthy = false;
}
}
void reportClientInfo() {
auto& clientInfo = cx.clientInfo->get();
statusObj["ClusterID"] = clientInfo.clusterId.toString();
json_spirit::mArray grvProxyArr;
for (auto& grvProxy : clientInfo.grvProxies) {
for (const auto& grvProxy : clientInfo.grvProxies) {
serverAddresses.insert(grvProxy.address());
grvProxyArr.push_back(grvProxy.address().toString());
}
statusObj["GrvProxies"] = grvProxyArr;
json_spirit::mArray commitProxyArr;
for (auto& commitProxy : clientInfo.commitProxies) {
for (const auto& commitProxy : clientInfo.commitProxies) {
serverAddresses.insert(commitProxy.address());
commitProxyArr.push_back(commitProxy.address().toString());
}
statusObj["CommitProxies"] = commitProxyArr;
// Update health status
if (clientInfo.grvProxies.size() == 0 || clientInfo.commitProxies.size() == 0) {
healthy = false;
}
}
void reportStorageServers() {
@ -93,10 +108,16 @@ private:
void reportConnections() {
json_spirit::mArray connectionArr;
for (auto& addr : serverAddresses) {
for (const auto& addr : serverAddresses) {
connectionArr.push_back(connectionStatusReport(addr));
}
statusObj["Connections"] = connectionArr;
statusObj["NumConnectionsFailed"] = numConnectionsFailed;
// Update health status
if (numConnectionsFailed > 0) {
healthy = false;
}
}
json_spirit::mObject connectionStatusReport(const NetworkAddress& address) {
@ -110,6 +131,7 @@ private:
bool failed = IFailureMonitor::failureMonitor().getState(address).isFailed();
if (failed) {
connStatus["Status"] = "failed";
numConnectionsFailed++;
} else if (peerIter == peers.end()) {
connStatus["Status"] = "disconnected";
} else {
@ -139,6 +161,8 @@ private:
DatabaseContext& cx;
json_spirit::mObject statusObj;
std::set<NetworkAddress> serverAddresses;
int numConnectionsFailed;
bool healthy;
};
} // namespace

@ -2397,6 +2397,7 @@ Standalone<StringRef> MultiVersionDatabase::DatabaseState::getClientStatus(
if (dbProtocolVersion.present()) {
statusObj["ProtocolVersion"] = format("%llx", dbProtocolVersion.get().version());
}
bool dbContextHealthy = false;
if (initializationState != InitializationState::INITIALIZATION_FAILED) {
if (dbContextStatus.isError()) {
statusObj["ErrorRetrievingDatabaseStatus"] = dbContextStatus.getError().code();
@ -2404,8 +2405,14 @@ Standalone<StringRef> MultiVersionDatabase::DatabaseState::getClientStatus(
json_spirit::mValue dbContextStatusVal;
json_spirit::read_string(dbContextStatus.get().toString(), dbContextStatusVal);
statusObj["DatabaseStatus"] = dbContextStatusVal;
auto& dbContextStatusObj = dbContextStatusVal.get_obj();
auto healthyIter = dbContextStatusObj.find("Healthy");
if (healthyIter != dbContextStatusObj.end() && healthyIter->second.type() == json_spirit::bool_type) {
dbContextHealthy = healthyIter->second.get_bool();
}
}
}
statusObj["Healthy"] = initializationState == InitializationState::CREATED && dbContextHealthy;
return StringRef(json_spirit::write_string(json_spirit::mValue(statusObj)));
}