Add metric for tlog commit time minus time spent waiting in the queue.

This commit is contained in:
Dan Adkins 2023-02-24 15:03:19 -08:00
parent 37b6804f88
commit b8c9c8b0f4

View File

@ -363,8 +363,18 @@ struct TLogData : NonCopyable {
// Distribution of queue wait times, per request.
// This is the time spent waiting for previous versions.
//
// Note: we only wait for previous versions to enter the
// in-memory DiskQueue commit queue, not until the records are
// flushed and durable.
Reference<Histogram> queueWaitLatencyDist;
// Distribution of just the disk commit times, per request.
//
// Time starts as soon as this request is done waiting for previous versions,
// and ends when the data is flushed and durable.
Reference<Histogram> timeUntilDurableDist;
TLogData(UID dbgid,
UID workerID,
IKeyValueStore* persistentData,
@ -381,7 +391,8 @@ struct TLogData : NonCopyable {
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopDeadline(0), dataFolder(folder),
degraded(degraded),
commitLatencyDist(Histogram::getHistogram("tLog"_sr, "commit"_sr, Histogram::Unit::milliseconds)),
queueWaitLatencyDist(Histogram::getHistogram("tlog"_sr, "QueueWait"_sr, Histogram::Unit::milliseconds)) {
queueWaitLatencyDist(Histogram::getHistogram("tLog"_sr, "QueueWait"_sr, Histogram::Unit::milliseconds)),
timeUntilDurableDist(Histogram::getHistogram("tLog"_sr, "TimeUntilDurable"_sr, Histogram::Unit::milliseconds)) {
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, LockAware::True);
}
};
@ -2314,7 +2325,7 @@ ACTOR Future<Void> tLogCommit(TLogData* self,
wait(logData->version.whenAtLeast(req.prevVersion));
// Time until now has been spent waiting in the queue to do actual work.
double queueWaitEndTime = g_network->timer();
state double queueWaitEndTime = g_network->timer();
self->queueWaitLatencyDist->sampleSeconds(queueWaitEndTime - req.requestTime());
// Calling check_yield instead of yield to avoid a destruction ordering problem in simulation
@ -2380,24 +2391,29 @@ ACTOR Future<Void> tLogCommit(TLogData* self,
wait(
timeoutWarning(logData->queueCommittedVersion.whenAtLeast(req.version) || stopped, 0.1, warningCollectorInput));
// This is the point at which the transaction is durable (unless it timed out, or the tlog stopped).
const double durableTime = g_network->timer();
if (stopped.isReady()) {
ASSERT(logData->stopped());
req.reply.sendError(tlog_stopped());
return Void();
}
// Measure server-side RPC latency from the time a request was
// received to time the response was sent.
const double endTime = g_network->timer();
if (isNotDuplicate) {
self->commitLatencyDist->sampleSeconds(endTime - req.requestTime());
}
if (req.debugID.present())
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
req.reply.send(logData->durableKnownCommittedVersion);
// Measure server-side RPC latency from the time a request was
// received until time the response was sent.
const double endTime = g_network->timer();
if (isNotDuplicate) {
self->timeUntilDurableDist->sampleSeconds(durableTime - queueWaitEndTime);
self->commitLatencyDist->sampleSeconds(endTime - req.requestTime());
}
return Void();
}