mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-16 10:52:20 +08:00
* Encryption data at-rest db-config Description diff-1: Handle 'force' updates to encryption_at_rest db-config Major changes proposed: 1. Introduce 'encryption_data_at_rest_mode" 'configure new' option to enable Encryption data at-rest. The feature is disabled by default. 2. The configuration is meant to be set at the time of database creation, addition checks will be done to avoid updating the config in subsequent PR. 3. DatabaseConfiguration validity check to account for "tenant_mode" set to `required` if Encryption data at-rest is selected given EncryptionDomain matches Tenant boundaries. Testing devCorrectness - 100K
1298 lines
46 KiB
C++
1298 lines
46 KiB
C++
/*
|
|
* StatusCommand.actor.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "fdbcli/fdbcli.actor.h"
|
|
|
|
#include "fdbclient/FDBOptions.g.h"
|
|
#include "fdbclient/IClientApi.h"
|
|
#include "fdbclient/Knobs.h"
|
|
#include "fdbclient/StatusClient.h"
|
|
|
|
#include "flow/Arena.h"
|
|
#include "flow/FastRef.h"
|
|
#include "flow/ThreadHelper.actor.h"
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
namespace {
|
|
|
|
std::string getCoordinatorsInfoString(StatusObjectReader statusObj) {
|
|
std::string outputString;
|
|
try {
|
|
StatusArray coordinatorsArr = statusObj["client.coordinators.coordinators"].get_array();
|
|
for (StatusObjectReader coor : coordinatorsArr)
|
|
outputString += format("\n %s (%s)",
|
|
coor["address"].get_str().c_str(),
|
|
coor["reachable"].get_bool() ? "reachable" : "unreachable");
|
|
} catch (std::runtime_error&) {
|
|
outputString = "\n Unable to retrieve list of coordination servers";
|
|
}
|
|
|
|
return outputString;
|
|
}
|
|
|
|
std::string lineWrap(const char* text, int col) {
|
|
const char* iter = text;
|
|
const char* start = text;
|
|
const char* space = nullptr;
|
|
std::string out = "";
|
|
do {
|
|
iter++;
|
|
if (*iter == '\n' || *iter == ' ' || *iter == '\0')
|
|
space = iter;
|
|
if (*iter == '\n' || *iter == '\0' || (iter - start == col)) {
|
|
if (!space)
|
|
space = iter;
|
|
out += format("%.*s\n", (int)(space - start), start);
|
|
start = space;
|
|
if (*start == ' ' /* || *start == '\n'*/)
|
|
start++;
|
|
space = nullptr;
|
|
}
|
|
} while (*iter);
|
|
return out;
|
|
}
|
|
|
|
std::pair<int, int> getNumOfNonExcludedProcessAndZones(StatusObjectReader statusObjCluster) {
|
|
StatusObjectReader processesMap;
|
|
std::set<std::string> zones;
|
|
int numOfNonExcludedProcesses = 0;
|
|
if (statusObjCluster.get("processes", processesMap)) {
|
|
for (auto proc : processesMap.obj()) {
|
|
StatusObjectReader process(proc.second);
|
|
if (process.has("excluded") && process.last().get_bool())
|
|
continue;
|
|
numOfNonExcludedProcesses++;
|
|
std::string zoneId;
|
|
if (process.get("locality.zoneid", zoneId)) {
|
|
zones.insert(zoneId);
|
|
}
|
|
}
|
|
}
|
|
return { numOfNonExcludedProcesses, zones.size() };
|
|
}
|
|
|
|
int getNumofNonExcludedMachines(StatusObjectReader statusObjCluster) {
|
|
StatusObjectReader machineMap;
|
|
int numOfNonExcludedMachines = 0;
|
|
if (statusObjCluster.get("machines", machineMap)) {
|
|
for (auto mach : machineMap.obj()) {
|
|
StatusObjectReader machine(mach.second);
|
|
if (machine.has("excluded") && !machine.last().get_bool())
|
|
numOfNonExcludedMachines++;
|
|
}
|
|
}
|
|
return numOfNonExcludedMachines;
|
|
}
|
|
|
|
std::string getDateInfoString(StatusObjectReader statusObj, std::string key) {
|
|
time_t curTime;
|
|
if (!statusObj.has(key)) {
|
|
return "";
|
|
}
|
|
curTime = statusObj.last().get_int64();
|
|
char buffer[128];
|
|
struct tm* timeinfo;
|
|
timeinfo = localtime(&curTime);
|
|
strftime(buffer, 128, "%m/%d/%y %H:%M:%S", timeinfo);
|
|
return std::string(buffer);
|
|
}
|
|
|
|
std::string getProcessAddressByServerID(StatusObjectReader processesMap, std::string serverID) {
|
|
if (serverID == "")
|
|
return "unknown";
|
|
|
|
for (auto proc : processesMap.obj()) {
|
|
try {
|
|
StatusArray rolesArray = proc.second.get_obj()["roles"].get_array();
|
|
for (StatusObjectReader role : rolesArray) {
|
|
if (role["id"].get_str().find(serverID) == 0) {
|
|
// If this next line throws, then we found the serverID but the role has no address, so the role is
|
|
// skipped.
|
|
return proc.second.get_obj()["address"].get_str();
|
|
}
|
|
}
|
|
} catch (std::exception&) {
|
|
// If an entry in the process map is badly formed then something will throw. Since we are
|
|
// looking for a positive match, just ignore any read execeptions and move on to the next proc
|
|
}
|
|
}
|
|
return "unknown";
|
|
}
|
|
|
|
std::string getWorkloadRates(StatusObjectReader statusObj,
|
|
bool unknown,
|
|
std::string first,
|
|
std::string second,
|
|
bool transactionSection = false) {
|
|
// Re-point statusObj at either the transactions sub-doc or the operations sub-doc depending on transactionSection
|
|
// flag
|
|
if (transactionSection) {
|
|
if (!statusObj.get("transactions", statusObj))
|
|
return "unknown";
|
|
} else {
|
|
if (!statusObj.get("operations", statusObj))
|
|
return "unknown";
|
|
}
|
|
|
|
std::string path = first + "." + second;
|
|
double value;
|
|
if (!unknown && statusObj.get(path, value)) {
|
|
return format("%d Hz", (int)round(value));
|
|
}
|
|
return "unknown";
|
|
}
|
|
|
|
void getBackupDRTags(StatusObjectReader& statusObjCluster,
|
|
const char* context,
|
|
std::map<std::string, std::string>& tagMap) {
|
|
std::string path = format("layers.%s.tags", context);
|
|
StatusObjectReader tags;
|
|
if (statusObjCluster.tryGet(path, tags)) {
|
|
for (auto itr : tags.obj()) {
|
|
JSONDoc tag(itr.second);
|
|
bool running = false;
|
|
tag.tryGet("running_backup", running);
|
|
if (running) {
|
|
std::string uid;
|
|
if (tag.tryGet("mutation_stream_id", uid)) {
|
|
tagMap[itr.first] = uid;
|
|
} else {
|
|
tagMap[itr.first] = "";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string logBackupDR(const char* context, std::map<std::string, std::string> const& tagMap) {
|
|
std::string outputString = "";
|
|
if (tagMap.size() > 0) {
|
|
outputString += format("\n\n%s:", context);
|
|
for (auto itr : tagMap) {
|
|
outputString += format("\n %-22s", itr.first.c_str());
|
|
if (itr.second.size() > 0) {
|
|
outputString += format(" - %s", itr.second.c_str());
|
|
}
|
|
}
|
|
}
|
|
|
|
return outputString;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace fdb_cli {
|
|
|
|
void printStatus(StatusObjectReader statusObj,
|
|
StatusClient::StatusLevel level,
|
|
bool displayDatabaseAvailable,
|
|
bool hideErrorMessages) {
|
|
if (FlowTransport::transport().incompatibleOutgoingConnectionsPresent()) {
|
|
fprintf(
|
|
stderr,
|
|
"WARNING: One or more of the processes in the cluster is incompatible with this version of fdbcli.\n\n");
|
|
}
|
|
|
|
try {
|
|
bool printedCoordinators = false;
|
|
|
|
// status or status details
|
|
if (level == StatusClient::NORMAL || level == StatusClient::DETAILED) {
|
|
|
|
StatusObjectReader statusObjClient;
|
|
statusObj.get("client", statusObjClient);
|
|
|
|
// The way the output string is assembled is to add new line character before addition to the string rather
|
|
// than after
|
|
std::string outputString = "";
|
|
std::string clusterFilePath;
|
|
if (statusObjClient.get("cluster_file.path", clusterFilePath))
|
|
outputString = format("Using cluster file `%s'.\n", clusterFilePath.c_str());
|
|
else
|
|
outputString = "Using unknown cluster file.\n";
|
|
|
|
StatusObjectReader statusObjCoordinators;
|
|
StatusArray coordinatorsArr;
|
|
|
|
if (statusObjClient.get("coordinators", statusObjCoordinators)) {
|
|
// Look for a second "coordinators", under the first one.
|
|
if (statusObjCoordinators.has("coordinators"))
|
|
coordinatorsArr = statusObjCoordinators.last().get_array();
|
|
}
|
|
|
|
// Check if any coordination servers are unreachable
|
|
bool quorum_reachable;
|
|
if (statusObjCoordinators.get("quorum_reachable", quorum_reachable) && !quorum_reachable) {
|
|
outputString += "\nCould not communicate with a quorum of coordination servers:";
|
|
outputString += getCoordinatorsInfoString(statusObj);
|
|
|
|
printf("%s\n", outputString.c_str());
|
|
return;
|
|
} else {
|
|
for (StatusObjectReader coor : coordinatorsArr) {
|
|
bool reachable;
|
|
if (coor.get("reachable", reachable) && !reachable) {
|
|
outputString += "\nCould not communicate with all of the coordination servers."
|
|
"\n The database will remain operational as long as we"
|
|
"\n can connect to a quorum of servers, however the fault"
|
|
"\n tolerance of the system is reduced as long as the"
|
|
"\n servers remain disconnected.\n";
|
|
outputString += getCoordinatorsInfoString(statusObj);
|
|
outputString += "\n";
|
|
printedCoordinators = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// print any client messages
|
|
if (statusObjClient.has("messages")) {
|
|
for (StatusObjectReader message : statusObjClient.last().get_array()) {
|
|
std::string desc;
|
|
if (message.get("description", desc))
|
|
outputString += "\n" + lineWrap(desc.c_str(), 80);
|
|
}
|
|
}
|
|
|
|
bool fatalRecoveryState = false;
|
|
StatusObjectReader statusObjCluster;
|
|
try {
|
|
if (statusObj.get("cluster", statusObjCluster)) {
|
|
|
|
StatusObjectReader recoveryState;
|
|
if (statusObjCluster.get("recovery_state", recoveryState)) {
|
|
std::string name;
|
|
std::string description;
|
|
if (recoveryState.get("name", name) && recoveryState.get("description", description) &&
|
|
name != "accepting_commits" && name != "all_logs_recruited" &&
|
|
name != "storage_recovered" && name != "fully_recovered") {
|
|
fatalRecoveryState = true;
|
|
|
|
if (name == "recruiting_transaction_servers") {
|
|
description +=
|
|
format("\nNeed at least %d log servers across unique zones, %d commit proxies, "
|
|
"%d GRV proxies and %d resolvers.",
|
|
recoveryState["required_logs"].get_int(),
|
|
recoveryState["required_commit_proxies"].get_int(),
|
|
recoveryState["required_grv_proxies"].get_int(),
|
|
recoveryState["required_resolvers"].get_int());
|
|
if (statusObjCluster.has("machines") && statusObjCluster.has("processes")) {
|
|
auto numOfNonExcludedProcessesAndZones =
|
|
getNumOfNonExcludedProcessAndZones(statusObjCluster);
|
|
description +=
|
|
format("\nHave %d non-excluded processes on %d machines across %d zones.",
|
|
numOfNonExcludedProcessesAndZones.first,
|
|
getNumofNonExcludedMachines(statusObjCluster),
|
|
numOfNonExcludedProcessesAndZones.second);
|
|
}
|
|
} else if (name == "locking_old_transaction_servers" &&
|
|
recoveryState["missing_logs"].get_str().size()) {
|
|
description += format("\nNeed one or more of the following log servers: %s",
|
|
recoveryState["missing_logs"].get_str().c_str());
|
|
}
|
|
description = lineWrap(description.c_str(), 80);
|
|
if (!printedCoordinators &&
|
|
(name == "reading_coordinated_state" || name == "locking_coordinated_state" ||
|
|
name == "configuration_never_created" || name == "writing_coordinated_state")) {
|
|
description += getCoordinatorsInfoString(statusObj);
|
|
description += "\n";
|
|
printedCoordinators = true;
|
|
}
|
|
|
|
outputString += "\n" + description;
|
|
}
|
|
}
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
}
|
|
|
|
// Check if cluster controllable is reachable
|
|
try {
|
|
// print any cluster messages
|
|
if (statusObjCluster.has("messages") && statusObjCluster.last().get_array().size()) {
|
|
|
|
// any messages we don't want to display
|
|
std::set<std::string> skipMsgs = { "unreachable_process", "" };
|
|
if (fatalRecoveryState) {
|
|
skipMsgs.insert("status_incomplete");
|
|
skipMsgs.insert("unreadable_configuration");
|
|
skipMsgs.insert("immediate_priority_transaction_start_probe_timeout");
|
|
skipMsgs.insert("batch_priority_transaction_start_probe_timeout");
|
|
skipMsgs.insert("transaction_start_probe_timeout");
|
|
skipMsgs.insert("read_probe_timeout");
|
|
skipMsgs.insert("commit_probe_timeout");
|
|
}
|
|
|
|
for (StatusObjectReader msgObj : statusObjCluster.last().get_array()) {
|
|
std::string messageName;
|
|
if (!msgObj.get("name", messageName)) {
|
|
continue;
|
|
}
|
|
if (skipMsgs.count(messageName)) {
|
|
continue;
|
|
} else if (messageName == "client_issues") {
|
|
if (msgObj.has("issues")) {
|
|
for (StatusObjectReader issue : msgObj["issues"].get_array()) {
|
|
std::string issueName;
|
|
if (!issue.get("name", issueName)) {
|
|
continue;
|
|
}
|
|
|
|
std::string description;
|
|
if (!issue.get("description", description)) {
|
|
description = issueName;
|
|
}
|
|
|
|
std::string countStr;
|
|
StatusArray addresses;
|
|
if (!issue.has("addresses")) {
|
|
countStr = "Some client(s)";
|
|
} else {
|
|
addresses = issue["addresses"].get_array();
|
|
countStr = format("%d client(s)", addresses.size());
|
|
}
|
|
outputString +=
|
|
format("\n%s reported: %s\n", countStr.c_str(), description.c_str());
|
|
|
|
if (level == StatusClient::StatusLevel::DETAILED) {
|
|
for (int i = 0; i < addresses.size() && i < 4; ++i) {
|
|
outputString += format(" %s\n", addresses[i].get_str().c_str());
|
|
}
|
|
if (addresses.size() > 4) {
|
|
outputString += " ...\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if (msgObj.has("description"))
|
|
outputString += "\n" + lineWrap(msgObj.last().get_str().c_str(), 80);
|
|
}
|
|
}
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
}
|
|
|
|
if (fatalRecoveryState) {
|
|
printf("%s", outputString.c_str());
|
|
return;
|
|
}
|
|
|
|
StatusObjectReader statusObjConfig;
|
|
StatusArray excludedServersArr;
|
|
Optional<std::string> activePrimaryDC;
|
|
|
|
if (statusObjCluster.has("active_primary_dc")) {
|
|
activePrimaryDC = statusObjCluster["active_primary_dc"].get_str();
|
|
}
|
|
if (statusObjCluster.get("configuration", statusObjConfig)) {
|
|
if (statusObjConfig.has("excluded_servers"))
|
|
excludedServersArr = statusObjConfig.last().get_array();
|
|
}
|
|
|
|
// If there is a configuration message then there is no configuration information to display
|
|
outputString += "\nConfiguration:";
|
|
std::string outputStringCache = outputString;
|
|
bool isOldMemory = false;
|
|
bool blobGranuleEnabled{ false };
|
|
try {
|
|
// Configuration section
|
|
// FIXME: Should we suppress this if there are cluster messages implying that the database has no
|
|
// configuration?
|
|
|
|
outputString += "\n Redundancy mode - ";
|
|
std::string strVal;
|
|
|
|
if (statusObjConfig.get("redundancy_mode", strVal)) {
|
|
outputString += strVal;
|
|
} else
|
|
outputString += "unknown";
|
|
|
|
outputString += "\n Storage engine - ";
|
|
if (statusObjConfig.get("storage_engine", strVal)) {
|
|
if (strVal == "memory-1") {
|
|
isOldMemory = true;
|
|
}
|
|
outputString += strVal;
|
|
} else
|
|
outputString += "unknown";
|
|
|
|
int intVal = 0;
|
|
if (statusObjConfig.get("blob_granules_enabled", intVal) && intVal) {
|
|
blobGranuleEnabled = true;
|
|
}
|
|
if (blobGranuleEnabled) {
|
|
outputString += "\n Blob granules - enabled";
|
|
}
|
|
|
|
outputString += "\n Encryption at-rest - ";
|
|
if (statusObjConfig.get("encryption_at_rest_mode", strVal)) {
|
|
outputString += strVal;
|
|
} else {
|
|
outputString += "disabled";
|
|
}
|
|
|
|
outputString += "\n Coordinators - ";
|
|
if (statusObjConfig.get("coordinators_count", intVal)) {
|
|
outputString += std::to_string(intVal);
|
|
} else
|
|
outputString += "unknown";
|
|
|
|
if (excludedServersArr.size()) {
|
|
outputString += format("\n Exclusions - %d (type `exclude' for details)",
|
|
excludedServersArr.size());
|
|
}
|
|
|
|
if (statusObjConfig.get("commit_proxies", intVal))
|
|
outputString += format("\n Desired Commit Proxies - %d", intVal);
|
|
|
|
if (statusObjConfig.get("grv_proxies", intVal))
|
|
outputString += format("\n Desired GRV Proxies - %d", intVal);
|
|
|
|
if (statusObjConfig.get("resolvers", intVal))
|
|
outputString += format("\n Desired Resolvers - %d", intVal);
|
|
|
|
if (statusObjConfig.get("logs", intVal))
|
|
outputString += format("\n Desired Logs - %d", intVal);
|
|
|
|
if (statusObjConfig.get("remote_logs", intVal))
|
|
outputString += format("\n Desired Remote Logs - %d", intVal);
|
|
|
|
if (statusObjConfig.get("log_routers", intVal))
|
|
outputString += format("\n Desired Log Routers - %d", intVal);
|
|
|
|
if (statusObjConfig.get("tss_count", intVal) && intVal > 0) {
|
|
int activeTss = 0;
|
|
if (statusObjCluster.has("active_tss_count")) {
|
|
statusObjCluster.get("active_tss_count", activeTss);
|
|
}
|
|
outputString += format("\n TSS - %d/%d", activeTss, intVal);
|
|
|
|
if (statusObjConfig.get("tss_storage_engine", strVal))
|
|
outputString += format("\n TSS Storage Engine - %s", strVal.c_str());
|
|
}
|
|
|
|
outputString += "\n Usable Regions - ";
|
|
if (statusObjConfig.get("usable_regions", intVal)) {
|
|
outputString += std::to_string(intVal);
|
|
} else {
|
|
outputString += "unknown";
|
|
}
|
|
|
|
StatusArray regions;
|
|
if (statusObjConfig.has("regions")) {
|
|
outputString += "\n Regions: ";
|
|
regions = statusObjConfig["regions"].get_array();
|
|
for (StatusObjectReader region : regions) {
|
|
bool isPrimary = false;
|
|
std::vector<std::string> regionSatelliteDCs;
|
|
std::string regionDC;
|
|
for (StatusObjectReader dc : region["datacenters"].get_array()) {
|
|
if (!dc.has("satellite")) {
|
|
regionDC = dc["id"].get_str();
|
|
if (activePrimaryDC.present() && dc["id"].get_str() == activePrimaryDC.get()) {
|
|
isPrimary = true;
|
|
}
|
|
} else if (dc["satellite"].get_int() == 1) {
|
|
regionSatelliteDCs.push_back(dc["id"].get_str());
|
|
}
|
|
}
|
|
if (activePrimaryDC.present()) {
|
|
if (isPrimary) {
|
|
outputString += "\n Primary -";
|
|
} else {
|
|
outputString += "\n Remote -";
|
|
}
|
|
} else {
|
|
outputString += "\n Region -";
|
|
}
|
|
outputString += format("\n Datacenter - %s", regionDC.c_str());
|
|
if (regionSatelliteDCs.size() > 0) {
|
|
outputString += "\n Satellite datacenters - ";
|
|
for (int i = 0; i < regionSatelliteDCs.size(); i++) {
|
|
if (i != regionSatelliteDCs.size() - 1) {
|
|
outputString += format("%s, ", regionSatelliteDCs[i].c_str());
|
|
} else {
|
|
outputString += format("%s", regionSatelliteDCs[i].c_str());
|
|
}
|
|
}
|
|
}
|
|
isPrimary = false;
|
|
if (region.get("satellite_redundancy_mode", strVal)) {
|
|
outputString += format("\n Satellite Redundancy Mode - %s", strVal.c_str());
|
|
}
|
|
if (region.get("satellite_anti_quorum", intVal)) {
|
|
outputString += format("\n Satellite Anti Quorum - %d", intVal);
|
|
}
|
|
if (region.get("satellite_logs", intVal)) {
|
|
outputString += format("\n Satellite Logs - %d", intVal);
|
|
}
|
|
if (region.get("satellite_log_policy", strVal)) {
|
|
outputString += format("\n Satellite Log Policy - %s", strVal.c_str());
|
|
}
|
|
if (region.get("satellite_log_replicas", intVal)) {
|
|
outputString += format("\n Satellite Log Replicas - %d", intVal);
|
|
}
|
|
if (region.get("satellite_usable_dcs", intVal)) {
|
|
outputString += format("\n Satellite Usable DCs - %d", intVal);
|
|
}
|
|
}
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
outputString = outputStringCache;
|
|
outputString += "\n Unable to retrieve configuration status";
|
|
}
|
|
|
|
// Cluster section
|
|
outputString += "\n\nCluster:";
|
|
StatusObjectReader processesMap;
|
|
StatusObjectReader machinesMap;
|
|
|
|
outputStringCache = outputString;
|
|
|
|
bool machinesAreZones = true;
|
|
std::map<std::string, int> zones;
|
|
try {
|
|
outputString += "\n FoundationDB processes - ";
|
|
if (statusObjCluster.get("processes", processesMap)) {
|
|
|
|
outputString += format("%d", processesMap.obj().size());
|
|
|
|
int errors = 0;
|
|
int processExclusions = 0;
|
|
for (auto p : processesMap.obj()) {
|
|
StatusObjectReader process(p.second);
|
|
bool excluded = process.has("excluded") && process.last().get_bool();
|
|
if (excluded) {
|
|
processExclusions++;
|
|
}
|
|
if (process.has("messages") && process.last().get_array().size()) {
|
|
errors++;
|
|
}
|
|
|
|
std::string zoneId;
|
|
if (process.get("locality.zoneid", zoneId)) {
|
|
std::string machineId;
|
|
if (!process.get("locality.machineid", machineId) || machineId != zoneId) {
|
|
machinesAreZones = false;
|
|
}
|
|
int& nonExcluded = zones[zoneId];
|
|
if (!excluded) {
|
|
nonExcluded = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (errors > 0 || processExclusions) {
|
|
outputString += format(" (less %d excluded; %d with errors)", processExclusions, errors);
|
|
}
|
|
|
|
} else
|
|
outputString += "unknown";
|
|
|
|
if (zones.size() > 0) {
|
|
outputString += format("\n Zones - %d", zones.size());
|
|
int zoneExclusions = 0;
|
|
for (auto itr : zones) {
|
|
if (itr.second == 0) {
|
|
++zoneExclusions;
|
|
}
|
|
}
|
|
if (zoneExclusions > 0) {
|
|
outputString += format(" (less %d excluded)", zoneExclusions);
|
|
}
|
|
} else {
|
|
outputString += "\n Zones - unknown";
|
|
}
|
|
|
|
outputString += "\n Machines - ";
|
|
if (statusObjCluster.get("machines", machinesMap)) {
|
|
outputString += format("%d", machinesMap.obj().size());
|
|
|
|
int machineExclusions = 0;
|
|
for (auto mach : machinesMap.obj()) {
|
|
StatusObjectReader machine(mach.second);
|
|
if (machine.has("excluded") && machine.last().get_bool())
|
|
machineExclusions++;
|
|
}
|
|
|
|
if (machineExclusions) {
|
|
outputString += format(" (less %d excluded)", machineExclusions);
|
|
}
|
|
|
|
int64_t minMemoryAvailable = std::numeric_limits<int64_t>::max();
|
|
for (auto proc : processesMap.obj()) {
|
|
StatusObjectReader process(proc.second);
|
|
int64_t availBytes;
|
|
if (process.get("memory.available_bytes", availBytes)) {
|
|
minMemoryAvailable = std::min(minMemoryAvailable, availBytes);
|
|
}
|
|
}
|
|
|
|
if (minMemoryAvailable < std::numeric_limits<int64_t>::max()) {
|
|
double worstServerGb = minMemoryAvailable / (1024.0 * 1024 * 1024);
|
|
outputString += "\n Memory availability - ";
|
|
outputString += format("%.1f GB per process on machine with least available", worstServerGb);
|
|
outputString += minMemoryAvailable < 4294967296
|
|
? "\n >>>>> (WARNING: 4.0 GB recommended) <<<<<"
|
|
: "";
|
|
}
|
|
|
|
double retransCount = 0;
|
|
for (auto mach : machinesMap.obj()) {
|
|
StatusObjectReader machine(mach.second);
|
|
double hz;
|
|
if (machine.get("network.tcp_segments_retransmitted.hz", hz))
|
|
retransCount += hz;
|
|
}
|
|
|
|
if (retransCount > 0) {
|
|
outputString += format("\n Retransmissions rate - %d Hz", (int)round(retransCount));
|
|
}
|
|
} else
|
|
outputString += "\n Machines - unknown";
|
|
|
|
StatusObjectReader faultTolerance;
|
|
if (statusObjCluster.get("fault_tolerance", faultTolerance)) {
|
|
int availLoss, dataLoss;
|
|
|
|
if (faultTolerance.get("max_zone_failures_without_losing_availability", availLoss) &&
|
|
faultTolerance.get("max_zone_failures_without_losing_data", dataLoss)) {
|
|
|
|
outputString += "\n Fault Tolerance - ";
|
|
|
|
int minLoss = std::min(availLoss, dataLoss);
|
|
const char* faultDomain = machinesAreZones ? "machine" : "zone";
|
|
outputString += format("%d %ss", minLoss, faultDomain);
|
|
|
|
if (dataLoss > availLoss) {
|
|
outputString += format(" (%d without data loss)", dataLoss);
|
|
}
|
|
|
|
if (dataLoss == -1) {
|
|
ASSERT_WE_THINK(availLoss == -1);
|
|
outputString += format(
|
|
"\n\n Warning: the database may have data loss and availability loss. Please restart "
|
|
"following tlog interfaces, otherwise storage servers may never be able to catch "
|
|
"up.\n");
|
|
StatusObjectReader logs;
|
|
if (statusObjCluster.has("logs")) {
|
|
for (StatusObjectReader logEpoch : statusObjCluster.last().get_array()) {
|
|
bool possiblyLosingData;
|
|
if (logEpoch.get("possibly_losing_data", possiblyLosingData) &&
|
|
!possiblyLosingData) {
|
|
continue;
|
|
}
|
|
// Current epoch doesn't have an end version.
|
|
int64_t epoch, beginVersion, endVersion = invalidVersion;
|
|
bool current;
|
|
logEpoch.get("epoch", epoch);
|
|
logEpoch.get("begin_version", beginVersion);
|
|
logEpoch.get("end_version", endVersion);
|
|
logEpoch.get("current", current);
|
|
std::string missing_log_interfaces;
|
|
if (logEpoch.has("log_interfaces")) {
|
|
for (StatusObjectReader logInterface : logEpoch.last().get_array()) {
|
|
bool healthy;
|
|
std::string address, id;
|
|
if (logInterface.get("healthy", healthy) && !healthy) {
|
|
logInterface.get("id", id);
|
|
logInterface.get("address", address);
|
|
missing_log_interfaces += format("%s,%s ", id.c_str(), address.c_str());
|
|
}
|
|
}
|
|
}
|
|
outputString += format(
|
|
" %s log epoch: %lld begin: %lld end: %s, missing "
|
|
"log interfaces(id,address): %s\n",
|
|
current ? "Current" : "Old",
|
|
epoch,
|
|
beginVersion,
|
|
endVersion == invalidVersion ? "(unknown)" : format("%lld", endVersion).c_str(),
|
|
missing_log_interfaces.c_str());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string serverTime = getDateInfoString(statusObjCluster, "cluster_controller_timestamp");
|
|
if (serverTime != "") {
|
|
outputString += "\n Server time - " + serverTime;
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
outputString = outputStringCache;
|
|
outputString += "\n Unable to retrieve cluster status";
|
|
}
|
|
|
|
StatusObjectReader statusObjData;
|
|
statusObjCluster.get("data", statusObjData);
|
|
|
|
// Data section
|
|
outputString += "\n\nData:";
|
|
outputStringCache = outputString;
|
|
try {
|
|
outputString += "\n Replication health - ";
|
|
|
|
StatusObjectReader statusObjDataState;
|
|
statusObjData.get("state", statusObjDataState);
|
|
|
|
std::string dataState;
|
|
statusObjDataState.get("name", dataState);
|
|
|
|
std::string description = "";
|
|
statusObjDataState.get("description", description);
|
|
|
|
bool healthy;
|
|
if (statusObjDataState.get("healthy", healthy) && healthy) {
|
|
outputString += "Healthy" + (description != "" ? " (" + description + ")" : "");
|
|
} else if (dataState == "missing_data") {
|
|
outputString += "UNHEALTHY" + (description != "" ? ": " + description : "");
|
|
} else if (dataState == "healing") {
|
|
outputString += "HEALING" + (description != "" ? ": " + description : "");
|
|
} else if (description != "") {
|
|
outputString += description;
|
|
} else {
|
|
outputString += "unknown";
|
|
}
|
|
|
|
if (statusObjData.has("moving_data")) {
|
|
StatusObjectReader movingData = statusObjData.last();
|
|
double dataInQueue, dataInFlight;
|
|
if (movingData.get("in_queue_bytes", dataInQueue) &&
|
|
movingData.get("in_flight_bytes", dataInFlight))
|
|
outputString += format("\n Moving data - %.3f GB",
|
|
((double)dataInQueue + (double)dataInFlight) / 1e9);
|
|
} else if (dataState == "initializing") {
|
|
outputString += "\n Moving data - unknown (initializing)";
|
|
} else {
|
|
outputString += "\n Moving data - unknown";
|
|
}
|
|
|
|
outputString += "\n Sum of key-value sizes - ";
|
|
|
|
if (statusObjData.has("total_kv_size_bytes")) {
|
|
double totalDBBytes = statusObjData.last().get_int64();
|
|
|
|
if (totalDBBytes >= 1e12)
|
|
outputString += format("%.3f TB", (totalDBBytes / 1e12));
|
|
|
|
else if (totalDBBytes >= 1e9)
|
|
outputString += format("%.3f GB", (totalDBBytes / 1e9));
|
|
|
|
else
|
|
// no decimal points for MB
|
|
outputString += format("%d MB", (int)round(totalDBBytes / 1e6));
|
|
} else {
|
|
outputString += "unknown";
|
|
}
|
|
|
|
outputString += "\n Disk space used - ";
|
|
|
|
if (statusObjData.has("total_disk_used_bytes")) {
|
|
double totalDiskUsed = statusObjData.last().get_int64();
|
|
|
|
if (totalDiskUsed >= 1e12)
|
|
outputString += format("%.3f TB", (totalDiskUsed / 1e12));
|
|
|
|
else if (totalDiskUsed >= 1e9)
|
|
outputString += format("%.3f GB", (totalDiskUsed / 1e9));
|
|
|
|
else
|
|
// no decimal points for MB
|
|
outputString += format("%d MB", (int)round(totalDiskUsed / 1e6));
|
|
} else
|
|
outputString += "unknown";
|
|
|
|
} catch (std::runtime_error&) {
|
|
outputString = outputStringCache;
|
|
outputString += "\n Unable to retrieve data status";
|
|
}
|
|
// Storage Wiggle section
|
|
StatusObjectReader storageWigglerObj;
|
|
std::string storageWigglerString;
|
|
try {
|
|
if (statusObjCluster.get("storage_wiggler", storageWigglerObj)) {
|
|
int size = 0;
|
|
if (storageWigglerObj.has("wiggle_server_addresses")) {
|
|
storageWigglerString += "\n Wiggle server addresses-";
|
|
for (auto& v : storageWigglerObj.obj().at("wiggle_server_addresses").get_array()) {
|
|
storageWigglerString += " " + v.get_str();
|
|
size += 1;
|
|
}
|
|
}
|
|
storageWigglerString += "\n Wiggle server count - " + std::to_string(size);
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
storageWigglerString += "\n Unable to retrieve storage wiggler status";
|
|
}
|
|
if (storageWigglerString.size()) {
|
|
outputString += "\n\nStorage wiggle:";
|
|
outputString += storageWigglerString;
|
|
}
|
|
|
|
// Operating space section
|
|
outputString += "\n\nOperating space:";
|
|
std::string operatingSpaceString = "";
|
|
try {
|
|
int64_t val;
|
|
if (statusObjData.get("least_operating_space_bytes_storage_server", val))
|
|
operatingSpaceString += format("\n Storage server - %.1f GB free on most full server",
|
|
std::max(val / 1e9, 0.0));
|
|
|
|
if (statusObjData.get("least_operating_space_bytes_log_server", val))
|
|
operatingSpaceString += format("\n Log server - %.1f GB free on most full server",
|
|
std::max(val / 1e9, 0.0));
|
|
|
|
} catch (std::runtime_error&) {
|
|
operatingSpaceString = "";
|
|
}
|
|
|
|
if (operatingSpaceString.empty()) {
|
|
operatingSpaceString += "\n Unable to retrieve operating space status";
|
|
}
|
|
outputString += operatingSpaceString;
|
|
|
|
// Workload section
|
|
outputString += "\n\nWorkload:";
|
|
outputStringCache = outputString;
|
|
bool foundLogAndStorage = false;
|
|
try {
|
|
// Determine which rates are unknown
|
|
StatusObjectReader statusObjWorkload;
|
|
statusObjCluster.get("workload", statusObjWorkload);
|
|
|
|
std::string performanceLimited = "";
|
|
bool unknownMCT = false;
|
|
bool unknownRP = false;
|
|
|
|
// Print performance limit details if known.
|
|
try {
|
|
StatusObjectReader limit = statusObjCluster["qos.performance_limited_by"];
|
|
std::string name = limit["name"].get_str();
|
|
if (name != "workload") {
|
|
std::string desc = limit["description"].get_str();
|
|
std::string serverID;
|
|
limit.get("reason_server_id", serverID);
|
|
std::string procAddr = getProcessAddressByServerID(processesMap, serverID);
|
|
performanceLimited = format("\n Performance limited by %s: %s",
|
|
(procAddr == "unknown")
|
|
? ("server" + (serverID == "" ? "" : (" " + serverID))).c_str()
|
|
: "process",
|
|
desc.c_str());
|
|
if (procAddr != "unknown")
|
|
performanceLimited += format("\n Most limiting process: %s", procAddr.c_str());
|
|
}
|
|
} catch (std::exception&) {
|
|
// If anything here throws (such as for an incompatible type) ignore it.
|
|
}
|
|
|
|
// display the known rates
|
|
outputString += "\n Read rate - ";
|
|
outputString += getWorkloadRates(statusObjWorkload, unknownRP, "reads", "hz");
|
|
|
|
outputString += "\n Write rate - ";
|
|
outputString += getWorkloadRates(statusObjWorkload, unknownMCT, "writes", "hz");
|
|
|
|
outputString += "\n Transactions started - ";
|
|
outputString += getWorkloadRates(statusObjWorkload, unknownMCT, "started", "hz", true);
|
|
|
|
outputString += "\n Transactions committed - ";
|
|
outputString += getWorkloadRates(statusObjWorkload, unknownMCT, "committed", "hz", true);
|
|
|
|
outputString += "\n Conflict rate - ";
|
|
outputString += getWorkloadRates(statusObjWorkload, unknownMCT, "conflicted", "hz", true);
|
|
|
|
outputString += unknownRP ? "" : performanceLimited;
|
|
|
|
// display any process messages
|
|
// FIXME: Above comment is not what this code block does, it actually just looks for a specific message
|
|
// in the process map, *by description*, and adds process addresses that have it to a vector. Either
|
|
// change the comment or the code.
|
|
std::vector<std::string> messagesAddrs;
|
|
for (auto proc : processesMap.obj()) {
|
|
StatusObjectReader process(proc.second);
|
|
if (process.has("roles")) {
|
|
StatusArray rolesArray = proc.second.get_obj()["roles"].get_array();
|
|
bool storageRole = false;
|
|
bool logRole = false;
|
|
for (StatusObjectReader role : rolesArray) {
|
|
if (role["role"].get_str() == "storage") {
|
|
storageRole = true;
|
|
} else if (role["role"].get_str() == "log") {
|
|
logRole = true;
|
|
}
|
|
}
|
|
if (storageRole && logRole) {
|
|
foundLogAndStorage = true;
|
|
}
|
|
}
|
|
if (process.has("messages")) {
|
|
StatusArray processMessagesArr = process.last().get_array();
|
|
if (processMessagesArr.size()) {
|
|
for (StatusObjectReader msg : processMessagesArr) {
|
|
std::string desc;
|
|
std::string addr;
|
|
if (msg.get("description", desc) && desc == "Unable to update cluster file." &&
|
|
process.get("address", addr)) {
|
|
messagesAddrs.push_back(addr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (messagesAddrs.size()) {
|
|
outputString += format("\n\n%d FoundationDB processes reported unable to update cluster file:",
|
|
messagesAddrs.size());
|
|
for (auto msg : messagesAddrs) {
|
|
outputString += "\n " + msg;
|
|
}
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
outputString = outputStringCache;
|
|
outputString += "\n Unable to retrieve workload status";
|
|
}
|
|
|
|
// Backup and DR section
|
|
outputString += "\n\nBackup and DR:";
|
|
|
|
std::map<std::string, std::string> backupTags;
|
|
getBackupDRTags(statusObjCluster, "backup", backupTags);
|
|
|
|
std::map<std::string, std::string> drPrimaryTags;
|
|
getBackupDRTags(statusObjCluster, "dr_backup", drPrimaryTags);
|
|
|
|
std::map<std::string, std::string> drSecondaryTags;
|
|
getBackupDRTags(statusObjCluster, "dr_backup_dest", drSecondaryTags);
|
|
|
|
outputString += format("\n Running backups - %d", backupTags.size());
|
|
outputString += format("\n Running DRs - ");
|
|
|
|
if (drPrimaryTags.size() == 0 && drSecondaryTags.size() == 0) {
|
|
outputString += format("%d", 0);
|
|
} else {
|
|
if (drPrimaryTags.size() > 0) {
|
|
outputString += format("%d as primary", drPrimaryTags.size());
|
|
if (drSecondaryTags.size() > 0) {
|
|
outputString += ", ";
|
|
}
|
|
}
|
|
if (drSecondaryTags.size() > 0) {
|
|
outputString += format("%d as secondary", drSecondaryTags.size());
|
|
}
|
|
}
|
|
|
|
// status details
|
|
if (level == StatusClient::DETAILED) {
|
|
outputString += logBackupDR("Running backup tags", backupTags);
|
|
outputString += logBackupDR("Running DR tags (as primary)", drPrimaryTags);
|
|
outputString += logBackupDR("Running DR tags (as secondary)", drSecondaryTags);
|
|
|
|
outputString += "\n\nProcess performance details:";
|
|
outputStringCache = outputString;
|
|
try {
|
|
// constructs process performance details output
|
|
std::map<NetworkAddress, std::string> workerDetails;
|
|
for (auto proc : processesMap.obj()) {
|
|
StatusObjectReader procObj(proc.second);
|
|
std::string address;
|
|
procObj.get("address", address);
|
|
|
|
std::string line;
|
|
|
|
NetworkAddress parsedAddress;
|
|
try {
|
|
parsedAddress = NetworkAddress::parse(address);
|
|
} catch (Error&) {
|
|
// Groups all invalid IP address/port pair in the end of this detail group.
|
|
line = format(" %-22s (invalid IP address or port)", address.c_str());
|
|
IPAddress::IPAddressStore maxIp;
|
|
for (int i = 0; i < maxIp.size(); ++i) {
|
|
maxIp[i] = std::numeric_limits<std::remove_reference<decltype(maxIp[0])>::type>::max();
|
|
}
|
|
std::string& lastline =
|
|
workerDetails[NetworkAddress(IPAddress(maxIp), std::numeric_limits<uint16_t>::max())];
|
|
if (!lastline.empty())
|
|
lastline.append("\n");
|
|
lastline += line;
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
double tx = -1, rx = -1, mCPUUtil = -1;
|
|
int64_t processTotalSize;
|
|
|
|
// Get the machine for this process
|
|
// StatusObjectReader mach = machinesMap[procObj["machine_id"].get_str()];
|
|
StatusObjectReader mach;
|
|
if (machinesMap.get(procObj["machine_id"].get_str(), mach, false)) {
|
|
StatusObjectReader machCPU;
|
|
if (mach.get("cpu", machCPU)) {
|
|
|
|
machCPU.get("logical_core_utilization", mCPUUtil);
|
|
|
|
StatusObjectReader network;
|
|
if (mach.get("network", network)) {
|
|
network.get("megabits_sent.hz", tx);
|
|
network.get("megabits_received.hz", rx);
|
|
}
|
|
}
|
|
}
|
|
|
|
procObj.get("memory.used_bytes", processTotalSize);
|
|
|
|
StatusObjectReader procCPUObj;
|
|
procObj.get("cpu", procCPUObj);
|
|
|
|
line = format(" %-22s (", address.c_str());
|
|
|
|
double usageCores;
|
|
if (procCPUObj.get("usage_cores", usageCores))
|
|
line += format("%3.0f%% cpu;", usageCores * 100);
|
|
|
|
line += mCPUUtil != -1 ? format("%3.0f%% machine;", mCPUUtil * 100) : "";
|
|
line += std::min(tx, rx) != -1 ? format("%6.3f Gbps;", std::max(tx, rx) / 1000.0) : "";
|
|
|
|
double diskBusy;
|
|
if (procObj.get("disk.busy", diskBusy))
|
|
line += format("%3.0f%% disk IO;", 100.0 * diskBusy);
|
|
|
|
line += processTotalSize != -1
|
|
? format("%4.1f GB", processTotalSize / (1024.0 * 1024 * 1024))
|
|
: "";
|
|
|
|
double availableBytes;
|
|
if (procObj.get("memory.available_bytes", availableBytes))
|
|
line += format(" / %3.1f GB RAM )", availableBytes / (1024.0 * 1024 * 1024));
|
|
else
|
|
line += " )";
|
|
|
|
if (procObj.has("messages")) {
|
|
for (StatusObjectReader message : procObj.last().get_array()) {
|
|
std::string desc;
|
|
if (message.get("description", desc)) {
|
|
if (message.has("type")) {
|
|
line += "\n Last logged error: " + desc;
|
|
} else {
|
|
line += "\n " + desc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
workerDetails[parsedAddress] = line;
|
|
}
|
|
|
|
catch (std::runtime_error&) {
|
|
std::string noMetrics = format(" %-22s (no metrics available)", address.c_str());
|
|
workerDetails[parsedAddress] = noMetrics;
|
|
}
|
|
}
|
|
for (auto w : workerDetails)
|
|
outputString += "\n" + format("%s", w.second.c_str());
|
|
} catch (std::runtime_error&) {
|
|
outputString = outputStringCache;
|
|
outputString += "\n Unable to retrieve process performance details";
|
|
}
|
|
|
|
if (!printedCoordinators) {
|
|
printedCoordinators = true;
|
|
outputString += "\n\nCoordination servers:";
|
|
outputString += getCoordinatorsInfoString(statusObj);
|
|
}
|
|
|
|
if (blobGranuleEnabled) {
|
|
outputString += "\n\nBlob Granules:";
|
|
StatusObjectReader statusObjBlobGranules = statusObjCluster["blob_granules"];
|
|
auto numWorkers = statusObjBlobGranules["number_of_blob_workers"].get_int();
|
|
outputString += "\n Number of Workers - " + format("%d", numWorkers);
|
|
auto numKeyRanges = statusObjBlobGranules["number_of_key_ranges"].get_int();
|
|
outputString += "\n Number of Key Ranges - " + format("%d", numKeyRanges);
|
|
}
|
|
}
|
|
|
|
// client time
|
|
std::string clientTime = getDateInfoString(statusObjClient, "timestamp");
|
|
if (clientTime != "") {
|
|
outputString += "\n\nClient time: " + clientTime;
|
|
}
|
|
|
|
if (processesMap.obj().size() > 1 && isOldMemory) {
|
|
outputString += "\n\nWARNING: type `configure memory' to switch to a safer method of persisting data "
|
|
"on the transaction logs.";
|
|
}
|
|
if (processesMap.obj().size() > 9 && foundLogAndStorage) {
|
|
outputString +=
|
|
"\n\nWARNING: A single process is both a transaction log and a storage server.\n For best "
|
|
"performance use dedicated disks for the transaction logs by setting process classes.";
|
|
}
|
|
|
|
if (statusObjCluster.has("data_distribution_disabled")) {
|
|
outputString += "\n\nWARNING: Data distribution is off.";
|
|
} else {
|
|
if (statusObjCluster.has("data_distribution_disabled_for_ss_failures")) {
|
|
outputString += "\n\nWARNING: Data distribution is currently turned on but disabled for all "
|
|
"storage server failures.";
|
|
}
|
|
if (statusObjCluster.has("data_distribution_disabled_for_rebalance")) {
|
|
outputString += "\n\nWARNING: Data distribution is currently turned on but one or both of shard "
|
|
"size and read-load based balancing are disabled.";
|
|
// data_distribution_disabled_hex
|
|
if (statusObjCluster.has("data_distribution_disabled_hex")) {
|
|
outputString += " Ignore code: " + statusObjCluster["data_distribution_disabled_hex"].get_str();
|
|
}
|
|
}
|
|
}
|
|
|
|
printf("%s\n", outputString.c_str());
|
|
}
|
|
|
|
// status minimal
|
|
else if (level == StatusClient::MINIMAL) {
|
|
// Checking for field exsistence is not necessary here because if a field is missing there is no additional
|
|
// information that we would be able to display if we continued execution. Instead, any missing fields will
|
|
// throw and the catch will display the proper message.
|
|
try {
|
|
// If any of these throw, can't get status because the result makes no sense.
|
|
StatusObjectReader statusObjClient = statusObj["client"].get_obj();
|
|
StatusObjectReader statusObjClientDatabaseStatus = statusObjClient["database_status"].get_obj();
|
|
|
|
bool available = statusObjClientDatabaseStatus["available"].get_bool();
|
|
|
|
// Database unavailable
|
|
if (!available) {
|
|
printf("%s", "The database is unavailable; type `status' for more information.\n");
|
|
} else {
|
|
try {
|
|
bool healthy = statusObjClientDatabaseStatus["healthy"].get_bool();
|
|
|
|
// Database available without issues
|
|
if (healthy) {
|
|
if (displayDatabaseAvailable) {
|
|
printf("The database is available.\n");
|
|
}
|
|
} else { // Database running but with issues
|
|
printf("The database is available, but has issues (type 'status' for more information).\n");
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
printf("The database is available, but has issues (type 'status' for more information).\n");
|
|
}
|
|
}
|
|
|
|
bool upToDate;
|
|
if (!statusObjClient.get("cluster_file.up_to_date", upToDate) || !upToDate) {
|
|
fprintf(stderr,
|
|
"WARNING: The cluster file is not up to date. Type 'status' for more information.\n");
|
|
}
|
|
} catch (std::runtime_error&) {
|
|
printf("Unable to determine database state, type 'status' for more information.\n");
|
|
}
|
|
|
|
}
|
|
|
|
// status JSON
|
|
else if (level == StatusClient::JSON) {
|
|
printf("%s\n",
|
|
json_spirit::write_string(json_spirit::mValue(statusObj.obj()),
|
|
json_spirit::Output_options::pretty_print)
|
|
.c_str());
|
|
}
|
|
} catch (Error&) {
|
|
if (hideErrorMessages)
|
|
return;
|
|
if (level == StatusClient::MINIMAL) {
|
|
printf("Unable to determine database state, type 'status' for more information.\n");
|
|
} else if (level == StatusClient::JSON) {
|
|
printf("Could not retrieve status json.\n\n");
|
|
} else {
|
|
printf("Could not retrieve status, type 'status json' for more information.\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
// "db" is the handler to the multiversion database
|
|
// localDb is the native Database object
|
|
// localDb is rarely needed except the "db" has not established a connection to the cluster where the operation will
|
|
// return Never as we expect status command to always return, we use "localDb" to return the default result
|
|
ACTOR Future<bool> statusCommandActor(Reference<IDatabase> db,
|
|
Database localDb,
|
|
std::vector<StringRef> tokens,
|
|
bool isExecMode) {
|
|
|
|
state StatusClient::StatusLevel level;
|
|
if (tokens.size() == 1)
|
|
level = StatusClient::NORMAL;
|
|
else if (tokens.size() == 2 && tokencmp(tokens[1], "details"))
|
|
level = StatusClient::DETAILED;
|
|
else if (tokens.size() == 2 && tokencmp(tokens[1], "minimal"))
|
|
level = StatusClient::MINIMAL;
|
|
else if (tokens.size() == 2 && tokencmp(tokens[1], "json"))
|
|
level = StatusClient::JSON;
|
|
else {
|
|
printUsage(tokens[0]);
|
|
return false;
|
|
}
|
|
|
|
state StatusObject s;
|
|
state Reference<ITransaction> tr = db->createTransaction();
|
|
if (!tr->isValid()) {
|
|
StatusObject _s = wait(StatusClient::statusFetcher(localDb));
|
|
s = _s;
|
|
} else {
|
|
state ThreadFuture<Optional<Value>> statusValueF = tr->get(LiteralStringRef("\xff\xff/status/json"));
|
|
Optional<Value> statusValue = wait(safeThreadFutureToFuture(statusValueF));
|
|
if (!statusValue.present()) {
|
|
fprintf(stderr, "ERROR: Failed to get status json from the cluster\n");
|
|
}
|
|
json_spirit::mValue mv;
|
|
json_spirit::read_string(statusValue.get().toString(), mv);
|
|
s = StatusObject(mv.get_obj());
|
|
}
|
|
|
|
if (!isExecMode)
|
|
printf("\n");
|
|
printStatus(s, level);
|
|
if (!isExecMode)
|
|
printf("\n");
|
|
return true;
|
|
}
|
|
|
|
void statusGenerator(const char* text,
|
|
const char* line,
|
|
std::vector<std::string>& lc,
|
|
std::vector<StringRef> const& tokens) {
|
|
if (tokens.size() == 1) {
|
|
const char* opts[] = { "minimal", "details", "json", nullptr };
|
|
arrayGenerator(text, line, opts, lc);
|
|
}
|
|
}
|
|
|
|
CommandFactory statusFactory(
|
|
"status",
|
|
CommandHelp("status [minimal|details|json]",
|
|
"get the status of a FoundationDB cluster",
|
|
"If the cluster is down, this command will print a diagnostic which may be useful in figuring out "
|
|
"what is wrong. If the cluster is running, this command will print cluster "
|
|
"statistics.\n\nSpecifying `minimal' will provide a minimal description of the status of your "
|
|
"database.\n\nSpecifying `details' will provide load information for individual "
|
|
"workers.\n\nSpecifying `json' will provide status information in a machine readable JSON format."),
|
|
&statusGenerator);
|
|
} // namespace fdb_cli
|