mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-15 02:18:39 +08:00
* Use localhost cluster for trace_partial_file_suffix_test This way we get a predictable 127.0.0.1 in the trace file name * Skip suspend test of pidof is not available * Avoid writing to closed trace log calling fdb_network_stop sends a "close" message to the trace thread, but the network thread might can still be running and sending "flush" messages to the network thread. This change basically ignores any flushes that come after a close. * Ensure unique ports for multi-process tests
273 lines
8.2 KiB
C++
273 lines
8.2 KiB
C++
/*
|
|
* FileTraceLogWriter.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "flow/FileTraceLogWriter.h"
|
|
#include "flow/Platform.h"
|
|
#include "flow/flow.h"
|
|
#include "flow/ThreadHelper.actor.h"
|
|
|
|
#if defined(__unixish__)
|
|
#define __open ::open
|
|
#define __write ::write
|
|
#define __close ::close
|
|
#define __fsync ::fsync
|
|
#define TRACEFILE_FLAGS O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC
|
|
#define TRACEFILE_MODE 0664
|
|
#elif defined(_WIN32)
|
|
//#include <windows.h>
|
|
//#undef max
|
|
//#undef min
|
|
#include <io.h>
|
|
#include <stdio.h>
|
|
#include <sys/stat.h>
|
|
#define __open _open
|
|
#define __write _write
|
|
#define __close _close
|
|
#define __fsync _commit
|
|
#define TRACEFILE_FLAGS _O_WRONLY | _O_CREAT | _O_EXCL
|
|
#define TRACEFILE_MODE _S_IWRITE
|
|
#endif
|
|
|
|
#include <fcntl.h>
|
|
#include <cmath>
|
|
|
|
struct IssuesListImpl {
|
|
IssuesListImpl() {}
|
|
void addIssue(std::string const& issue) {
|
|
MutexHolder h(mutex);
|
|
issues.insert(issue);
|
|
}
|
|
|
|
void retrieveIssues(std::set<std::string>& out) const {
|
|
MutexHolder h(mutex);
|
|
for (auto const& i : issues) {
|
|
out.insert(i);
|
|
}
|
|
}
|
|
|
|
void resolveIssue(std::string const& issue) {
|
|
MutexHolder h(mutex);
|
|
issues.erase(issue);
|
|
}
|
|
|
|
private:
|
|
mutable Mutex mutex;
|
|
std::set<std::string> issues;
|
|
};
|
|
|
|
IssuesList::IssuesList() : impl(std::make_unique<IssuesListImpl>()) {}
|
|
IssuesList::~IssuesList() = default;
|
|
void IssuesList::addIssue(std::string const& issue) {
|
|
impl->addIssue(issue);
|
|
}
|
|
void IssuesList::retrieveIssues(std::set<std::string>& out) const {
|
|
impl->retrieveIssues(out);
|
|
}
|
|
void IssuesList::resolveIssue(std::string const& issue) {
|
|
impl->resolveIssue(issue);
|
|
}
|
|
|
|
FileTraceLogWriter::FileTraceLogWriter(std::string const& directory,
|
|
std::string const& processName,
|
|
std::string const& basename,
|
|
std::string const& extension,
|
|
std::string const& tracePartialFileSuffix,
|
|
uint64_t maxLogsSize,
|
|
std::function<void()> const& onError,
|
|
Reference<ITraceLogIssuesReporter> const& issues)
|
|
: directory(directory), processName(processName), basename(basename), extension(extension),
|
|
tracePartialFileSuffix(tracePartialFileSuffix), maxLogsSize(maxLogsSize), traceFileFD(-1), index(0), issues(issues),
|
|
onError(onError) {}
|
|
|
|
void FileTraceLogWriter::addref() {
|
|
ReferenceCounted<FileTraceLogWriter>::addref();
|
|
}
|
|
|
|
void FileTraceLogWriter::delref() {
|
|
ReferenceCounted<FileTraceLogWriter>::delref();
|
|
}
|
|
|
|
void FileTraceLogWriter::lastError(int err) {
|
|
// Whenever we get a serious error writing a trace log, all flush barriers posted between the operation encountering
|
|
// the error and the occurrence of the error are unblocked, even though we haven't actually succeeded in flushing.
|
|
// Otherwise a permanent write error would make the program block forever.
|
|
if (err != 0 && err != EINTR) {
|
|
onError();
|
|
}
|
|
}
|
|
|
|
void FileTraceLogWriter::write(const std::string& str) {
|
|
write(str.data(), str.size());
|
|
}
|
|
|
|
void FileTraceLogWriter::write(const StringRef& str) {
|
|
write(reinterpret_cast<const char*>(str.begin()), str.size());
|
|
}
|
|
|
|
void FileTraceLogWriter::write(const char* str, size_t len) {
|
|
if (traceFileFD < 0) {
|
|
return;
|
|
}
|
|
auto ptr = str;
|
|
int remaining = len;
|
|
bool needsResolve = false;
|
|
|
|
while (remaining) {
|
|
int ret = __write(traceFileFD, ptr, remaining);
|
|
if (ret > 0) {
|
|
lastError(0);
|
|
remaining -= ret;
|
|
ptr += ret;
|
|
if (needsResolve) {
|
|
issues->resolveIssue("trace_log_file_write_error");
|
|
needsResolve = false;
|
|
}
|
|
} else {
|
|
issues->addIssue("trace_log_file_write_error");
|
|
needsResolve = true;
|
|
fprintf(stderr, "Unexpected error [%d] when flushing trace log.\n", errno);
|
|
lastError(errno);
|
|
threadSleep(0.1);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FileTraceLogWriter::open() {
|
|
cleanupTraceFiles();
|
|
bool needsResolve = false;
|
|
|
|
++index;
|
|
|
|
// this allows one process to write 10 billion log files
|
|
// this should be enough - if not we could make the base larger...
|
|
ASSERT(index > 0);
|
|
auto indexWidth = unsigned(::floor(log10f(float(index))));
|
|
|
|
// index is at most 2^32 - 1. 2^32 - 1 < 10^10 - therefore
|
|
// log10(index) < 10
|
|
UNSTOPPABLE_ASSERT(indexWidth < 10);
|
|
|
|
finalname =
|
|
format("%s.%d.%d.%s%s", basename.c_str(), indexWidth, index, extension.c_str(), tracePartialFileSuffix.c_str());
|
|
while ((traceFileFD = __open(finalname.c_str(), TRACEFILE_FLAGS, TRACEFILE_MODE)) == -1) {
|
|
lastError(errno);
|
|
if (errno == EEXIST) {
|
|
++index;
|
|
indexWidth = unsigned(::floor(log10f(float(index))));
|
|
|
|
UNSTOPPABLE_ASSERT(indexWidth < 10);
|
|
finalname = format("%s.%d.%d.%s%s",
|
|
basename.c_str(),
|
|
indexWidth,
|
|
index,
|
|
extension.c_str(),
|
|
tracePartialFileSuffix.c_str());
|
|
} else {
|
|
fprintf(stderr,
|
|
"ERROR: could not create trace log file `%s' (%d: %s)\n",
|
|
finalname.c_str(),
|
|
errno,
|
|
strerror(errno));
|
|
issues->addIssue("trace_log_could_not_create_file");
|
|
needsResolve = true;
|
|
|
|
int errorNum = errno;
|
|
onMainThreadVoid(
|
|
[finalname = finalname, errorNum] {
|
|
TraceEvent(SevWarnAlways, "TraceFileOpenError")
|
|
.detail("Filename", finalname)
|
|
.detail("ErrorCode", errorNum)
|
|
.detail("Error", strerror(errorNum))
|
|
.trackLatest("TraceFileOpenError");
|
|
},
|
|
nullptr);
|
|
threadSleep(FLOW_KNOBS->TRACE_RETRY_OPEN_INTERVAL);
|
|
}
|
|
}
|
|
onMainThreadVoid([] { latestEventCache.clear("TraceFileOpenError"); }, nullptr);
|
|
if (needsResolve) {
|
|
issues->resolveIssue("trace_log_could_not_create_file");
|
|
}
|
|
lastError(0);
|
|
}
|
|
|
|
void FileTraceLogWriter::close() {
|
|
if (traceFileFD >= 0) {
|
|
while (__close(traceFileFD))
|
|
threadSleep(0.1);
|
|
}
|
|
traceFileFD = -1;
|
|
if (!tracePartialFileSuffix.empty()) {
|
|
renameFile(finalname, finalname.substr(0, finalname.size() - tracePartialFileSuffix.size()));
|
|
}
|
|
finalname = "";
|
|
}
|
|
|
|
void FileTraceLogWriter::roll() {
|
|
close();
|
|
open();
|
|
}
|
|
|
|
void FileTraceLogWriter::sync() {
|
|
__fsync(traceFileFD);
|
|
}
|
|
|
|
void FileTraceLogWriter::cleanupTraceFiles() {
|
|
// Setting maxLogsSize=0 disables trace file cleanup based on dir size
|
|
if (!g_network->isSimulated() && maxLogsSize > 0) {
|
|
try {
|
|
// Rename/finalize any stray files ending in tracePartialFileSuffix for this process.
|
|
if (!tracePartialFileSuffix.empty()) {
|
|
for (const auto& f : platform::listFiles(directory, tracePartialFileSuffix)) {
|
|
if (f.substr(0, processName.length()) == processName) {
|
|
renameFile(f, f.substr(0, f.size() - tracePartialFileSuffix.size()));
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<std::string> existingFiles = platform::listFiles(directory, extension);
|
|
std::vector<std::string> existingTraceFiles;
|
|
|
|
for (auto f = existingFiles.begin(); f != existingFiles.end(); ++f) {
|
|
if (f->substr(0, processName.length()) == processName) {
|
|
existingTraceFiles.push_back(*f);
|
|
}
|
|
}
|
|
|
|
// reverse sort, so we preserve the most recent files and delete the oldest
|
|
std::sort(existingTraceFiles.begin(), existingTraceFiles.end(), std::greater<std::string>());
|
|
|
|
uint64_t runningTotal = 0;
|
|
std::vector<std::string>::iterator fileListIterator = existingTraceFiles.begin();
|
|
|
|
while (runningTotal < maxLogsSize && fileListIterator != existingTraceFiles.end()) {
|
|
runningTotal += (fileSize(joinPath(directory, *fileListIterator)) + FLOW_KNOBS->ZERO_LENGTH_FILE_PAD);
|
|
++fileListIterator;
|
|
}
|
|
|
|
while (fileListIterator != existingTraceFiles.end()) {
|
|
deleteFile(joinPath(directory, *fileListIterator));
|
|
++fileListIterator;
|
|
}
|
|
} catch (Error&) {
|
|
}
|
|
}
|
|
}
|