1
0
mirror of https://github.com/apple/foundationdb.git synced 2025-05-31 18:19:35 +08:00

Merge pull request from sfc-gh-ajbeamon/improved-client-db-logging

Client logging improvements
This commit is contained in:
A.J. Beamon 2021-11-29 13:23:10 -08:00 committed by GitHub
commit c47535245b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 77 additions and 40 deletions

@ -500,6 +500,8 @@ public:
using TransactionT = ReadYourWritesTransaction;
Reference<TransactionT> createTransaction();
EventCacheHolder connectToDatabaseEventCacheHolder;
private:
std::unordered_map<KeyRef, Reference<WatchMetadata>> watchMap;
};

@ -1754,6 +1754,8 @@ void MultiVersionApi::setNetworkOption(FDBNetworkOptions::Option option, Optiona
}
void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option, Optional<StringRef> value) {
bool forwardOption = false;
auto itr = FDBNetworkOptions::optionInfo.find(option);
if (itr != FDBNetworkOptions::optionInfo.end()) {
TraceEvent("SetNetworkOption").detail("Option", itr->second.name);
@ -1785,6 +1787,7 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
ASSERT(!value.present() && !networkStartSetup);
externalClient = true;
bypassMultiClientApi = true;
forwardOption = true;
} else if (option == FDBNetworkOptions::CLIENT_THREADS_PER_VERSION) {
MutexHolder holder(lock);
validateOption(value, true, false, false);
@ -1798,6 +1801,10 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
threadCount = extractIntOption(value, 1, 1);
#endif
} else {
forwardOption = true;
}
if (forwardOption) {
MutexHolder holder(lock);
localClient->api->setNetworkOption(option, value);
@ -1871,13 +1878,13 @@ void MultiVersionApi::setupNetwork() {
localClient->api->setupNetwork();
}
localClient->loadProtocolVersion();
localClient->loadVersion();
if (!bypassMultiClientApi) {
runOnExternalClientsAllThreads([this](Reference<ClientInfo> client) {
TraceEvent("InitializingExternalClient").detail("LibraryPath", client->libPath);
client->api->selectApiVersion(apiVersion);
client->loadProtocolVersion();
client->loadVersion();
});
MutexHolder holder(lock);
@ -1925,11 +1932,21 @@ void MultiVersionApi::runNetwork() {
std::vector<THREAD_HANDLE> handles;
if (!bypassMultiClientApi) {
runOnExternalClientsAllThreads([&handles](Reference<ClientInfo> client) {
if (client->external) {
handles.push_back(g_network->startThread(&runNetworkThread, client.getPtr()));
}
});
for (int threadNum = 0; threadNum < threadCount; threadNum++) {
runOnExternalClients(threadNum, [&handles, threadNum](Reference<ClientInfo> client) {
if (client->external) {
std::string threadName = format("fdb-%s-%d", client->releaseVersion.c_str(), threadNum);
if (threadName.size() > 15) {
threadName = format("fdb-%s", client->releaseVersion.c_str());
if (threadName.size() > 15) {
threadName = "fdb-external";
}
}
handles.push_back(
g_network->startThread(&runNetworkThread, client.getPtr(), 0, threadName.c_str()));
}
});
}
}
localClient->api->runNetwork();
@ -2133,19 +2150,24 @@ MultiVersionApi::MultiVersionApi()
MultiVersionApi* MultiVersionApi::api = new MultiVersionApi();
// ClientInfo
void ClientInfo::loadProtocolVersion() {
void ClientInfo::loadVersion() {
std::string version = api->getClientVersion();
if (version == "unknown") {
protocolVersion = ProtocolVersion(0);
releaseVersion = "unknown";
return;
}
Standalone<ClientVersionRef> clientVersion = ClientVersionRef(StringRef(version));
char* next;
std::string protocolVersionStr = ClientVersionRef(StringRef(version)).protocolVersion.toString();
std::string protocolVersionStr = clientVersion.protocolVersion.toString();
protocolVersion = ProtocolVersion(strtoull(protocolVersionStr.c_str(), &next, 16));
ASSERT(protocolVersion.version() != 0 && protocolVersion.version() != ULLONG_MAX);
ASSERT_EQ(next, &protocolVersionStr[protocolVersionStr.length()]);
releaseVersion = clientVersion.clientVersion.toString();
}
bool ClientInfo::canReplace(Reference<ClientInfo> other) const {

@ -472,6 +472,7 @@ struct ClientDesc {
struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted<ClientInfo> {
ProtocolVersion protocolVersion;
std::string releaseVersion = "unknown";
IClientApi* api;
bool failed;
std::atomic_bool initialized;
@ -484,7 +485,7 @@ struct ClientInfo : ClientDesc, ThreadSafeReferenceCounted<ClientInfo> {
ClientInfo(IClientApi* api, std::string libPath)
: ClientDesc(libPath, true), protocolVersion(0), api(api), failed(false), initialized(false) {}
void loadProtocolVersion();
void loadVersion();
bool canReplace(Reference<ClientInfo> other) const;
};

@ -127,8 +127,8 @@ TLSConfig tlsConfig(TLSEndpointType::CLIENT);
NetworkOptions::NetworkOptions()
: traceRollSize(TRACE_DEFAULT_ROLL_SIZE), traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"),
traceFormat("xml"), traceClockSource("now"),
supportedVersions(new ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>()), runLoopProfilingEnabled(false) {
}
supportedVersions(new ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>()), runLoopProfilingEnabled(false),
primaryClient(true) {}
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
@ -1229,7 +1229,8 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), coordinator(coordinator), apiVersion(apiVersion),
mvCacheInsertLocation(0), healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0),
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)),
connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {
dbId = deterministicRandom()->randomUniqueID();
connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size())
? Void()
@ -1481,7 +1482,8 @@ DatabaseContext::DatabaseContext(const Error& err)
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
transactionGrvFullBatches("NumGrvFullBatches", cc), transactionGrvTimedOutBatches("NumGrvTimedOutBatches", cc),
latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000),
bytesPerCommit(1000), transactionTracingSample(false), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT) {}
bytesPerCommit(1000), transactionTracingSample(false), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {}
// Static constructor used by server processes to create a DatabaseContext
// For internal (fdbserver) use only
@ -1795,6 +1797,8 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
if (!g_network)
throw network_not_setup();
platform::ImageInfo imageInfo = platform::getImageInfo();
if (connRecord) {
if (networkOptions.traceDirectory.present() && !traceFileIsOpen()) {
g_network->initMetrics();
@ -1817,11 +1821,11 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
.detail("SourceVersion", getSourceVersion())
.detail("Version", FDB_VT_VERSION)
.detail("PackageName", FDB_VT_PACKAGE_NAME)
.detail("ClusterFile", connRecord->toString())
.detail("ConnectionString", connRecord->getConnectionString().toString())
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
.detail("ApiVersion", apiVersion)
.detailf("ImageOffset", "%p", platform::getImageOffset())
.detail("ClientLibrary", imageInfo.fileName)
.detailf("ImageOffset", "%p", imageInfo.offset)
.detail("Primary", networkOptions.primaryClient)
.trackLatest("ClientStart");
initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));
@ -1875,6 +1879,16 @@ Database Database::createDatabase(Reference<IClusterConnectionRecord> connRecord
database, Reference<AsyncVar<ClientDBInfo> const>(clientInfo), std::addressof(clientInfo->get()));
GlobalConfig::globalConfig().trigger(samplingFrequency, samplingProfilerUpdateFrequency);
GlobalConfig::globalConfig().trigger(samplingWindow, samplingProfilerUpdateWindow);
TraceEvent("ConnectToDatabase", database->dbId)
.detail("Version", FDB_VT_VERSION)
.detail("ClusterFile", connRecord->toString())
.detail("ConnectionString", connRecord->getConnectionString().toString())
.detail("ClientLibrary", imageInfo.fileName)
.detail("Primary", networkOptions.primaryClient)
.detail("Internal", internal)
.trackLatest(database->connectToDatabaseEventCacheHolder.trackingKey);
return database;
}
@ -2097,6 +2111,9 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
}
break;
}
case FDBNetworkOptions::EXTERNAL_CLIENT:
networkOptions.primaryClient = false;
break;
default:
break;
}
@ -4140,7 +4157,6 @@ ACTOR Future<Void> getRangeStream(PromiseStream<RangeResult> _results,
Reverse reverse,
TransactionInfo info,
TagSet tags) {
state ParallelStream<RangeResult> results(_results, CLIENT_KNOBS->RANGESTREAM_BUFFERED_FRAGMENTS_LIMIT);
// FIXME: better handling to disable row limits
@ -4611,7 +4627,6 @@ Future<RangeResult> Transaction::getRangeAndFlatMap(const KeySelector& begin,
GetRangeLimits limits,
Snapshot snapshot,
Reverse reverse) {
return getRangeInternal<GetKeyValuesAndFlatMapRequest, GetKeyValuesAndFlatMapReply>(
begin, end, mapper, limits, snapshot, reverse);
}
@ -6029,7 +6044,6 @@ ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordina
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
NetworkAddress coordinatorAddress,
Optional<ProtocolVersion> expectedVersion) {
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
@ -6057,7 +6071,6 @@ ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
Optional<ProtocolVersion> expectedVersion) {
state bool needToConnect = true;
state Future<ProtocolVersion> protocolVersion = Never();

@ -72,6 +72,7 @@ struct NetworkOptions {
Optional<bool> logClientInfo;
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions;
bool runLoopProfilingEnabled;
bool primaryClient;
std::map<std::string, KnobValue> knobs;
NetworkOptions();

@ -3175,16 +3175,9 @@ extern "C" void flushAndExit(int exitCode) {
#include <link.h>
#endif
struct ImageInfo {
void* offset;
std::string symbolFileName;
ImageInfo() : offset(nullptr), symbolFileName("") {}
};
ImageInfo getImageInfo(const void* symbol) {
platform::ImageInfo getImageInfo(const void* symbol) {
Dl_info info;
ImageInfo imageInfo;
platform::ImageInfo imageInfo;
#ifdef __linux__
link_map* linkMap = nullptr;
@ -3194,6 +3187,7 @@ ImageInfo getImageInfo(const void* symbol) {
#endif
if (res != 0) {
imageInfo.fileName = info.dli_fname;
std::string imageFile = basename(info.dli_fname);
// If we have a client library that doesn't end in the appropriate extension, we will get the wrong debug
// suffix. This should only be a cosmetic problem, though.
@ -3211,25 +3205,23 @@ ImageInfo getImageInfo(const void* symbol) {
else {
imageInfo.symbolFileName = imageFile + ".debug";
}
} else {
imageInfo.symbolFileName = "unknown";
}
return imageInfo;
}
ImageInfo getCachedImageInfo() {
platform::ImageInfo getCachedImageInfo() {
// The use of "getCachedImageInfo" is arbitrary and was a best guess at a good way to get the image of the
// most likely candidate for the "real" flow library or binary
static ImageInfo info = getImageInfo((const void*)&getCachedImageInfo);
static platform::ImageInfo info = getImageInfo((const void*)&getCachedImageInfo);
return info;
}
#include <execinfo.h>
namespace platform {
void* getImageOffset() {
return getCachedImageInfo().offset;
ImageInfo getImageInfo() {
return getCachedImageInfo();
}
size_t raw_backtrace(void** addresses, int maxStackDepth) {
@ -3272,8 +3264,8 @@ std::string get_backtrace() {
std::string format_backtrace(void** addresses, int numAddresses) {
return std::string();
}
void* getImageOffset() {
return nullptr;
ImageInfo getImageInfo() {
return ImageInfo();
}
} // namespace platform
#endif

@ -392,7 +392,13 @@ std::string getDefaultConfigPath();
// Returns the absolute platform-dependant path for the default fdb.cluster file
std::string getDefaultClusterFilePath();
void* getImageOffset();
struct ImageInfo {
void* offset = nullptr;
std::string fileName = "unknown";
std::string symbolFileName = "unknown";
};
ImageInfo getImageInfo();
// Places the frame pointers in a string formatted as parameters for addr2line.
size_t raw_backtrace(void** addresses, int maxStackDepth);

@ -318,7 +318,7 @@ public:
writer = Reference<IThreadPool>(new DummyThreadPool());
else
writer = createGenericThreadPool();
writer->addThread(new WriterThread(barriers, logWriter, formatter));
writer->addThread(new WriterThread(barriers, logWriter, formatter), "fdb-trace-log");
rollsize = rs;