diff --git a/bindings/c/test/fdb_c_client_config_tests.py b/bindings/c/test/fdb_c_client_config_tests.py index ee2ec4ed2b..0bcc2a2eb8 100644 --- a/bindings/c/test/fdb_c_client_config_tests.py +++ b/bindings/c/test/fdb_c_client_config_tests.py @@ -164,6 +164,48 @@ class ClientConfigTest: self.tc.assertEqual(1, len(matching_clients)) self.tc.assertEqual(expected_client, matching_clients[0]) + def check_healthy_status_report(self): + self.tc.assertIsNotNone(self.status_json) + expected_mvc_attributes = { + "Healthy", + "InitializationState", + "DatabaseStatus", + "ProtocolVersion", + "AvailableClients", + "ConnectionRecord", + "ClusterId", + } + self.tc.assertEqual(expected_mvc_attributes, set(self.status_json.keys())) + self.tc.assertEqual("created", self.status_json["InitializationState"]) + self.tc.assertGreater(len(self.status_json["AvailableClients"]), 0) + + expected_db_attributes = { + "Healthy", + "Coordinators", + "CurrentCoordinator", + "ClusterID", + "GrvProxies", + "CommitProxies", + "StorageServers", + "Connections", + "NumConnectionsFailed", + } + db_status = self.status_json["DatabaseStatus"] + self.tc.assertEqual(expected_db_attributes, set(db_status.keys())) + self.tc.assertTrue(db_status["Healthy"]) + self.tc.assertGreater(len(db_status["Coordinators"]), 0) + self.tc.assertGreater(len(db_status["GrvProxies"]), 0) + self.tc.assertGreater(len(db_status["CommitProxies"]), 0) + self.tc.assertGreater(len(db_status["StorageServers"]), 0) + self.tc.assertGreater(len(db_status["Connections"]), 0) + self.tc.assertEqual(0, db_status["NumConnectionsFailed"]) + self.tc.assertTrue(self.status_json["Healthy"]) + + def check_healthy_status(self, expected_is_healthy): + self.tc.assertIsNotNone(self.status_json) + self.tc.assertTrue("Healthy" in self.status_json) + self.tc.assertEqual(expected_is_healthy, self.status_json["Healthy"]) + def exec(self): cmd_args = [self.cluster.client_config_tester_bin, "--cluster-file", self.test_cluster_file] @@ -241,6 +283,7 @@ class ClientConfigTests(unittest.TestCase): test = ClientConfigTest(self) test.print_status = True test.exec() + test.check_healthy_status(True) def test_disable_mvc_bypass(self): # Local client only @@ -248,7 +291,7 @@ class ClientConfigTests(unittest.TestCase): test.print_status = True test.disable_client_bypass = True test.exec() - test.check_initialization_state("created") + test.check_healthy_status_report() test.check_available_clients([CURRENT_VERSION]) test.check_current_client(CURRENT_VERSION) @@ -259,7 +302,7 @@ class ClientConfigTests(unittest.TestCase): test.create_external_lib_path(CURRENT_VERSION) test.disable_local_client = True test.exec() - test.check_initialization_state("created") + test.check_healthy_status_report() test.check_available_clients([CURRENT_VERSION]) test.check_current_client(CURRENT_VERSION) @@ -269,7 +312,7 @@ class ClientConfigTests(unittest.TestCase): test.print_status = True test.create_external_lib_path(CURRENT_VERSION) test.exec() - test.check_initialization_state("created") + test.check_healthy_status_report() test.check_available_clients([CURRENT_VERSION]) test.check_current_client(CURRENT_VERSION) @@ -281,7 +324,7 @@ class ClientConfigTests(unittest.TestCase): test.disable_local_client = True test.api_version = api_version_from_str(PREV2_RELEASE_VERSION) test.exec() - test.check_initialization_state("created") + test.check_healthy_status_report() test.check_available_clients([CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION]) test.check_current_client(CURRENT_VERSION) @@ -322,7 +365,7 @@ class ClientConfigTests(unittest.TestCase): test.api_version = api_version_from_str(CURRENT_VERSION) test.ignore_external_client_failures = True test.exec() - test.check_initialization_state("created") + test.check_healthy_status_report() test.check_available_clients([CURRENT_VERSION]) test.check_current_client(CURRENT_VERSION) @@ -338,6 +381,7 @@ class ClientConfigTests(unittest.TestCase): test.expected_error = 2125 # Incompatible client test.exec() test.check_initialization_state("incompatible") + test.check_healthy_status(False) test.check_available_clients([PREV_RELEASE_VERSION]) test.check_current_client(None) @@ -355,6 +399,7 @@ class ClientConfigTests(unittest.TestCase): test.expected_error = 1031 # Timeout test.exec() test.check_initialization_state("incompatible") + test.check_healthy_status(False) test.check_available_clients([PREV_RELEASE_VERSION]) test.check_current_client(None) @@ -370,6 +415,7 @@ class ClientConfigTests(unittest.TestCase): test.expected_error = 1031 # Timeout test.exec() test.check_initialization_state("initializing") + test.check_healthy_status(False) test.check_available_clients([CURRENT_VERSION]) test.check_protocol_version_not_set() @@ -385,6 +431,7 @@ class ClientConfigTests(unittest.TestCase): test.expected_error = 2104 # Connection string invalid test.exec() test.check_initialization_state("initialization_failed") + test.check_healthy_status(False) test.check_available_clients([CURRENT_VERSION]) test.check_protocol_version_not_set() @@ -408,6 +455,7 @@ class ClientConfigPrevVersionTests(unittest.TestCase): test.api_version = api_version_from_str(PREV_RELEASE_VERSION) test.exec() test.check_initialization_state("created") + test.check_healthy_status(False) test.check_available_clients([PREV_RELEASE_VERSION, CURRENT_VERSION]) test.check_current_client(PREV_RELEASE_VERSION) @@ -428,6 +476,7 @@ class ClientConfigPrevVersionTests(unittest.TestCase): test.ignore_external_client_failures = True test.exec() test.check_initialization_state("incompatible") + test.check_healthy_status(False) test.check_available_clients([CURRENT_VERSION]) test.check_current_client(None) @@ -453,7 +502,7 @@ class ClientConfigSeparateCluster(unittest.TestCase): t = Thread(target=upgrade, args=(self.cluster,)) t.start() test.exec() - test.check_initialization_state("created") + test.check_healthy_status_report() test.check_available_clients([CURRENT_VERSION]) test.check_current_client(CURRENT_VERSION) t.join() diff --git a/fdbclient/ClientStatusReport.cpp b/fdbclient/ClientStatusReport.cpp index ef58b01cb4..41ea14a2dd 100644 --- a/fdbclient/ClientStatusReport.cpp +++ b/fdbclient/ClientStatusReport.cpp @@ -30,16 +30,18 @@ namespace { class ClientReportGenerator { public: - ClientReportGenerator(DatabaseContext& cx) : cx(cx) {} + ClientReportGenerator(DatabaseContext& cx) : cx(cx), healthy(true), numConnectionsFailed(0) {} Standalone generateReport() { if (cx.isError()) { statusObj["InitializationError"] = cx.deferredError.code(); + healthy = false; } else { reportCoordinators(); reportClientInfo(); reportStorageServers(); reportConnections(); + statusObj["Healthy"] = healthy; } return StringRef(json_spirit::write_string(json_spirit::mValue(statusObj))); } @@ -60,23 +62,36 @@ private: if (cx.coordinator->get().present()) { statusObj["CurrentCoordinator"] = cx.coordinator->get().get().getAddressString(); } + + // Update health status + if (cs.hostnames.size() + cs.coords.size() == 0) { + healthy = false; + } + if (!cx.coordinator->get().present()) { + healthy = false; + } } void reportClientInfo() { auto& clientInfo = cx.clientInfo->get(); statusObj["ClusterID"] = clientInfo.clusterId.toString(); json_spirit::mArray grvProxyArr; - for (auto& grvProxy : clientInfo.grvProxies) { + for (const auto& grvProxy : clientInfo.grvProxies) { serverAddresses.insert(grvProxy.address()); grvProxyArr.push_back(grvProxy.address().toString()); } statusObj["GrvProxies"] = grvProxyArr; json_spirit::mArray commitProxyArr; - for (auto& commitProxy : clientInfo.commitProxies) { + for (const auto& commitProxy : clientInfo.commitProxies) { serverAddresses.insert(commitProxy.address()); commitProxyArr.push_back(commitProxy.address().toString()); } statusObj["CommitProxies"] = commitProxyArr; + + // Update health status + if (clientInfo.grvProxies.size() == 0 || clientInfo.commitProxies.size() == 0) { + healthy = false; + } } void reportStorageServers() { @@ -93,10 +108,16 @@ private: void reportConnections() { json_spirit::mArray connectionArr; - for (auto& addr : serverAddresses) { + for (const auto& addr : serverAddresses) { connectionArr.push_back(connectionStatusReport(addr)); } statusObj["Connections"] = connectionArr; + statusObj["NumConnectionsFailed"] = numConnectionsFailed; + + // Update health status + if (numConnectionsFailed > 0) { + healthy = false; + } } json_spirit::mObject connectionStatusReport(const NetworkAddress& address) { @@ -110,6 +131,7 @@ private: bool failed = IFailureMonitor::failureMonitor().getState(address).isFailed(); if (failed) { connStatus["Status"] = "failed"; + numConnectionsFailed++; } else if (peerIter == peers.end()) { connStatus["Status"] = "disconnected"; } else { @@ -139,6 +161,8 @@ private: DatabaseContext& cx; json_spirit::mObject statusObj; std::set serverAddresses; + int numConnectionsFailed; + bool healthy; }; } // namespace diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index dd410cf912..13be58839e 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -2397,6 +2397,7 @@ Standalone MultiVersionDatabase::DatabaseState::getClientStatus( if (dbProtocolVersion.present()) { statusObj["ProtocolVersion"] = format("%llx", dbProtocolVersion.get().version()); } + bool dbContextHealthy = false; if (initializationState != InitializationState::INITIALIZATION_FAILED) { if (dbContextStatus.isError()) { statusObj["ErrorRetrievingDatabaseStatus"] = dbContextStatus.getError().code(); @@ -2404,8 +2405,14 @@ Standalone MultiVersionDatabase::DatabaseState::getClientStatus( json_spirit::mValue dbContextStatusVal; json_spirit::read_string(dbContextStatus.get().toString(), dbContextStatusVal); statusObj["DatabaseStatus"] = dbContextStatusVal; + auto& dbContextStatusObj = dbContextStatusVal.get_obj(); + auto healthyIter = dbContextStatusObj.find("Healthy"); + if (healthyIter != dbContextStatusObj.end() && healthyIter->second.type() == json_spirit::bool_type) { + dbContextHealthy = healthyIter->second.get_bool(); + } } } + statusObj["Healthy"] = initializationState == InitializationState::CREATED && dbContextHealthy; return StringRef(json_spirit::write_string(json_spirit::mValue(statusObj))); }