mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-31 18:19:35 +08:00
Merge pull request #6033 from sfc-gh-ajbeamon/improved-client-db-logging
Client logging improvements
This commit is contained in:
commit
c47535245b
@ -500,6 +500,8 @@ public:
|
||||
using TransactionT = ReadYourWritesTransaction;
|
||||
Reference<TransactionT> createTransaction();
|
||||
|
||||
EventCacheHolder connectToDatabaseEventCacheHolder;
|
||||
|
||||
private:
|
||||
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
|
||||
};
|
||||
|
@ -1754,6 +1754,8 @@ void MultiVersionApi::setNetworkOption(FDBNetworkOptions::Option option, Optiona
|
||||
}
|
||||
|
||||
void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option, Optional<StringRef> value) {
|
||||
bool forwardOption = false;
|
||||
|
||||
auto itr = FDBNetworkOptions::optionInfo.find(option);
|
||||
if (itr != FDBNetworkOptions::optionInfo.end()) {
|
||||
TraceEvent("SetNetworkOption").detail("Option", itr->second.name);
|
||||
@ -1785,6 +1787,7 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
|
||||
ASSERT(!value.present() && !networkStartSetup);
|
||||
externalClient = true;
|
||||
bypassMultiClientApi = true;
|
||||
forwardOption = true;
|
||||
} else if (option == FDBNetworkOptions::CLIENT_THREADS_PER_VERSION) {
|
||||
MutexHolder holder(lock);
|
||||
validateOption(value, true, false, false);
|
||||
@ -1798,6 +1801,10 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
|
||||
threadCount = extractIntOption(value, 1, 1);
|
||||
#endif
|
||||
} else {
|
||||
forwardOption = true;
|
||||
}
|
||||
|
||||
if (forwardOption) {
|
||||
MutexHolder holder(lock);
|
||||
localClient->api->setNetworkOption(option, value);
|
||||
|
||||
@ -1871,13 +1878,13 @@ void MultiVersionApi::setupNetwork() {
|
||||
localClient->api->setupNetwork();
|
||||
}
|
||||
|
||||
localClient->loadProtocolVersion();
|
||||
localClient->loadVersion();
|
||||
|
||||
if (!bypassMultiClientApi) {
|
||||
runOnExternalClientsAllThreads([this](Reference<ClientInfo> client) {
|
||||
TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath);
|
||||
client->api->selectApiVersion(apiVersion);
|
||||
client->loadProtocolVersion();
|
||||
client->loadVersion();
|
||||
});
|
||||
|
||||
MutexHolder holder(lock);
|
||||
@ -1925,11 +1932,21 @@ void MultiVersionApi::runNetwork() {
|
||||
|
||||
std::vector<THREAD_HANDLE> handles;
|
||||
if (!bypassMultiClientApi) {
|
||||
runOnExternalClientsAllThreads([&handles](Reference<ClientInfo> client) {
|
||||
if (client->external) {
|
||||
handles.push_back(g_network->startThread(&runNetworkThread, client.getPtr()));
|
||||
}
|
||||
});
|
||||
for (int threadNum = 0; threadNum < threadCount; threadNum++) {
|
||||
runOnExternalClients(threadNum, [&handles, threadNum](Reference<ClientInfo> client) {
|
||||
if (client->external) {
|
||||
std::string threadName = format("fdb-%s-%d", client->releaseVersion.c_str(), threadNum);
|
||||
if (threadName.size() > 15) {
|
||||
threadName = format("fdb-%s", client->releaseVersion.c_str());
|
||||
if (threadName.size() > 15) {
|
||||
threadName = "fdb-external";
|
||||
}
|
||||
}
|
||||
handles.push_back(
|
||||
g_network->startThread(&runNetworkThread, client.getPtr(), 0, threadName.c_str()));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
localClient->api->runNetwork();
|
||||
@ -2133,19 +2150,24 @@ MultiVersionApi::MultiVersionApi()
|
||||
MultiVersionApi* MultiVersionApi::api = new MultiVersionApi();
|
||||
|
||||
// ClientInfo
|
||||
void ClientInfo::loadProtocolVersion() {
|
||||
void ClientInfo::loadVersion() {
|
||||
std::string version = api->getClientVersion();
|
||||
if (version == "unknown") {
|
||||
protocolVersion = ProtocolVersion(0);
|
||||
releaseVersion = "unknown";
|
||||
return;
|
||||
}
|
||||
|
||||
Standalone<ClientVersionRef> clientVersion = ClientVersionRef(StringRef(version));
|
||||
|
||||
char* next;
|
||||
std::string protocolVersionStr = ClientVersionRef(StringRef(version)).protocolVersion.toString();
|
||||
std::string protocolVersionStr = clientVersion.protocolVersion.toString();
|
||||
protocolVersion = ProtocolVersion(strtoull(protocolVersionStr.c_str(), &next, 16));
|
||||
|
||||
ASSERT(protocolVersion.version() != 0 && protocolVersion.version() != ULLONG_MAX);
|
||||
ASSERT_EQ(next, &protocolVersionStr[protocolVersionStr.length()]);
|
||||
|
||||
releaseVersion = clientVersion.clientVersion.toString();
|
||||
}
|
||||
|
||||
bool ClientInfo::canReplace(Reference<ClientInfo> other) const {
|
||||
|
@ -472,6 +472,7 @@ struct ClientDesc {
|
||||
|
||||
struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted<ClientInfo> {
|
||||
ProtocolVersion protocolVersion;
|
||||
std::string releaseVersion = "unknown";
|
||||
IClientApi* api;
|
||||
bool failed;
|
||||
std::atomic_bool initialized;
|
||||
@ -484,7 +485,7 @@ struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted<ClientInfo> {
|
||||
ClientInfo(IClientApi* api, std::string libPath)
|
||||
: ClientDesc(libPath, true), protocolVersion(0), api(api), failed(false), initialized(false) {}
|
||||
|
||||
void loadProtocolVersion();
|
||||
void loadVersion();
|
||||
bool canReplace(Reference<ClientInfo> other) const;
|
||||
};
|
||||
|
||||
|
@ -127,8 +127,8 @@ TLSConfig tlsConfig(TLSEndpointType::CLIENT);
|
||||
NetworkOptions::NetworkOptions()
|
||||
: traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"),
|
||||
traceFormat("xml"), traceClockSource("now"),
|
||||
supportedVersions(new ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>()), runLoopProfilingEnabled(false) {
|
||||
}
|
||||
supportedVersions(new ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>()), runLoopProfilingEnabled(false),
|
||||
primaryClient(true) {}
|
||||
|
||||
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
|
||||
static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
|
||||
@ -1229,7 +1229,8 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
|
||||
clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), coordinator(coordinator), apiVersion(apiVersion),
|
||||
mvCacheInsertLocation(0), healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0),
|
||||
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
||||
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
|
||||
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)),
|
||||
connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {
|
||||
dbId = deterministicRandom()->randomUniqueID();
|
||||
connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size())
|
||||
? Void()
|
||||
@ -1481,7 +1482,8 @@ DatabaseContext::DatabaseContext(const Error& err)
|
||||
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
|
||||
transactionGrvFullBatches("NumGrvFullBatches", cc), transactionGrvTimedOutBatches("NumGrvTimedOutBatches", cc),
|
||||
latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000),
|
||||
bytesPerCommit(1000), transactionTracingSample(false), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT) {}
|
||||
bytesPerCommit(1000), transactionTracingSample(false), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
||||
connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {}
|
||||
|
||||
// Static constructor used by server processes to create a DatabaseContext
|
||||
// For internal (fdbserver) use only
|
||||
@ -1795,6 +1797,8 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
|
||||
if (!g_network)
|
||||
throw network_not_setup();
|
||||
|
||||
platform::ImageInfo imageInfo = platform::getImageInfo();
|
||||
|
||||
if (connRecord) {
|
||||
if (networkOptions.traceDirectory.present() && !traceFileIsOpen()) {
|
||||
g_network->initMetrics();
|
||||
@ -1817,11 +1821,11 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
|
||||
.detail("SourceVersion", getSourceVersion())
|
||||
.detail("Version", FDB_VT_VERSION)
|
||||
.detail("PackageName", FDB_VT_PACKAGE_NAME)
|
||||
.detail("ClusterFile", connRecord->toString())
|
||||
.detail("ConnectionString", connRecord->getConnectionString().toString())
|
||||
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
|
||||
.detail("ApiVersion", apiVersion)
|
||||
.detailf("ImageOffset", "%p", platform::getImageOffset())
|
||||
.detail("ClientLibrary", imageInfo.fileName)
|
||||
.detailf("ImageOffset", "%p", imageInfo.offset)
|
||||
.detail("Primary", networkOptions.primaryClient)
|
||||
.trackLatest("ClientStart");
|
||||
|
||||
initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));
|
||||
@ -1875,6 +1879,16 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
|
||||
database, Reference<AsyncVar<ClientDBInfo> const>(clientInfo), std::addressof(clientInfo->get()));
|
||||
GlobalConfig::globalConfig().trigger(samplingFrequency, samplingProfilerUpdateFrequency);
|
||||
GlobalConfig::globalConfig().trigger(samplingWindow, samplingProfilerUpdateWindow);
|
||||
|
||||
TraceEvent("ConnectToDatabase", database->dbId)
|
||||
.detail("Version", FDB_VT_VERSION)
|
||||
.detail("ClusterFile", connRecord->toString())
|
||||
.detail("ConnectionString", connRecord->getConnectionString().toString())
|
||||
.detail("ClientLibrary", imageInfo.fileName)
|
||||
.detail("Primary", networkOptions.primaryClient)
|
||||
.detail("Internal", internal)
|
||||
.trackLatest(database->connectToDatabaseEventCacheHolder.trackingKey);
|
||||
|
||||
return database;
|
||||
}
|
||||
|
||||
@ -2097,6 +2111,9 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
|
||||
}
|
||||
break;
|
||||
}
|
||||
case FDBNetworkOptions::EXTERNAL_CLIENT:
|
||||
networkOptions.primaryClient = false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -4140,7 +4157,6 @@ ACTOR Future<Void> getRangeStream(PromiseStream<RangeResult> _results,
|
||||
Reverse reverse,
|
||||
TransactionInfo info,
|
||||
TagSet tags) {
|
||||
|
||||
state ParallelStream<RangeResult> results(_results, CLIENT_KNOBS->RANGESTREAM_BUFFERED_FRAGMENTS_LIMIT);
|
||||
|
||||
// FIXME: better handling to disable row limits
|
||||
@ -4611,7 +4627,6 @@ Future<RangeResult> Transaction::getRangeAndFlatMap(const KeySelector& begin,
|
||||
GetRangeLimits limits,
|
||||
Snapshot snapshot,
|
||||
Reverse reverse) {
|
||||
|
||||
return getRangeInternal<GetKeyValuesAndFlatMapRequest, GetKeyValuesAndFlatMapReply>(
|
||||
begin, end, mapper, limits, snapshot, reverse);
|
||||
}
|
||||
@ -6029,7 +6044,6 @@ ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordina
|
||||
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
|
||||
NetworkAddress coordinatorAddress,
|
||||
Optional<ProtocolVersion> expectedVersion) {
|
||||
|
||||
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
|
||||
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
|
||||
|
||||
@ -6057,7 +6071,6 @@ ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
|
||||
ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
|
||||
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
|
||||
Optional<ProtocolVersion> expectedVersion) {
|
||||
|
||||
state bool needToConnect = true;
|
||||
state Future<ProtocolVersion> protocolVersion = Never();
|
||||
|
||||
|
@ -72,6 +72,7 @@ struct NetworkOptions {
|
||||
Optional<bool> logClientInfo;
|
||||
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions;
|
||||
bool runLoopProfilingEnabled;
|
||||
bool primaryClient;
|
||||
std::map<std::string, KnobValue> knobs;
|
||||
|
||||
NetworkOptions();
|
||||
|
@ -3175,16 +3175,9 @@ extern "C" void flushAndExit(int exitCode) {
|
||||
#include <link.h>
|
||||
#endif
|
||||
|
||||
struct ImageInfo {
|
||||
void* offset;
|
||||
std::string symbolFileName;
|
||||
|
||||
ImageInfo() : offset(nullptr), symbolFileName("") {}
|
||||
};
|
||||
|
||||
ImageInfo getImageInfo(const void* symbol) {
|
||||
platform::ImageInfo getImageInfo(const void* symbol) {
|
||||
Dl_info info;
|
||||
ImageInfo imageInfo;
|
||||
platform::ImageInfo imageInfo;
|
||||
|
||||
#ifdef __linux__
|
||||
link_map* linkMap = nullptr;
|
||||
@ -3194,6 +3187,7 @@ ImageInfo getImageInfo(const void* symbol) {
|
||||
#endif
|
||||
|
||||
if (res != 0) {
|
||||
imageInfo.fileName = info.dli_fname;
|
||||
std::string imageFile = basename(info.dli_fname);
|
||||
// If we have a client library that doesn't end in the appropriate extension, we will get the wrong debug
|
||||
// suffix. This should only be a cosmetic problem, though.
|
||||
@ -3211,25 +3205,23 @@ ImageInfo getImageInfo(const void* symbol) {
|
||||
else {
|
||||
imageInfo.symbolFileName = imageFile + ".debug";
|
||||
}
|
||||
} else {
|
||||
imageInfo.symbolFileName = "unknown";
|
||||
}
|
||||
|
||||
return imageInfo;
|
||||
}
|
||||
|
||||
ImageInfo getCachedImageInfo() {
|
||||
platform::ImageInfo getCachedImageInfo() {
|
||||
// The use of "getCachedImageInfo" is arbitrary and was a best guess at a good way to get the image of the
|
||||
// most likely candidate for the "real" flow library or binary
|
||||
static ImageInfo info = getImageInfo((const void*)&getCachedImageInfo);
|
||||
static platform::ImageInfo info = getImageInfo((const void*)&getCachedImageInfo);
|
||||
return info;
|
||||
}
|
||||
|
||||
#include <execinfo.h>
|
||||
|
||||
namespace platform {
|
||||
void* getImageOffset() {
|
||||
return getCachedImageInfo().offset;
|
||||
ImageInfo getImageInfo() {
|
||||
return getCachedImageInfo();
|
||||
}
|
||||
|
||||
size_t raw_backtrace(void** addresses, int maxStackDepth) {
|
||||
@ -3272,8 +3264,8 @@ std::string get_backtrace() {
|
||||
std::string format_backtrace(void** addresses, int numAddresses) {
|
||||
return std::string();
|
||||
}
|
||||
void* getImageOffset() {
|
||||
return nullptr;
|
||||
ImageInfo getImageInfo() {
|
||||
return ImageInfo();
|
||||
}
|
||||
} // namespace platform
|
||||
#endif
|
||||
|
@ -392,7 +392,13 @@ std::string getDefaultConfigPath();
|
||||
// Returns the absolute platform-dependant path for the default fdb.cluster file
|
||||
std::string getDefaultClusterFilePath();
|
||||
|
||||
void* getImageOffset();
|
||||
struct ImageInfo {
|
||||
void* offset = nullptr;
|
||||
std::string fileName = "unknown";
|
||||
std::string symbolFileName = "unknown";
|
||||
};
|
||||
|
||||
ImageInfo getImageInfo();
|
||||
|
||||
// Places the frame pointers in a string formatted as parameters for addr2line.
|
||||
size_t raw_backtrace(void** addresses, int maxStackDepth);
|
||||
|
@ -318,7 +318,7 @@ public:
|
||||
writer = Reference<IThreadPool>(new DummyThreadPool());
|
||||
else
|
||||
writer = createGenericThreadPool();
|
||||
writer->addThread(new WriterThread(barriers, logWriter, formatter));
|
||||
writer->addThread(new WriterThread(barriers, logWriter, formatter), "fdb-trace-log");
|
||||
|
||||
rollsize = rs;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user