diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 5b90e0b930..119e3e735e 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -450,6 +450,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1; init( MAX_PROXY_COMPUTE, 2.0 ); init( MAX_COMPUTE_PER_OPERATION, 0.1 ); + init( MAX_COMPUTE_DURATION_LOG_CUTOFF, 0.15 ); init( PROXY_COMPUTE_BUCKETS, 20000 ); init( PROXY_COMPUTE_GROWTH_RATE, 0.01 ); init( TXN_STATE_SEND_AMOUNT, 4 ); diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 92674e55d0..dc770502e2 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -374,6 +374,7 @@ public: int MAX_COMMIT_UPDATES; double MAX_PROXY_COMPUTE; double MAX_COMPUTE_PER_OPERATION; + double MAX_COMPUTE_DURATION_LOG_CUTOFF; int PROXY_COMPUTE_BUCKETS; double PROXY_COMPUTE_GROWTH_RATE; int TXN_STATE_SEND_AMOUNT; diff --git a/fdbserver/CommitProxyServer.actor.cpp b/fdbserver/CommitProxyServer.actor.cpp index 097c6f632f..18e255f03c 100644 --- a/fdbserver/CommitProxyServer.actor.cpp +++ b/fdbserver/CommitProxyServer.actor.cpp @@ -708,6 +708,7 @@ ACTOR Future preresolutionProcessing(CommitBatchContext* self) { // Pre-resolution the commits TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1)); + pProxyCommitData->stats.computeLatency.addMeasurement(now() - timeStart); double queuingDelay = g_network->now() - timeStart; pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay); if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND || @@ -1401,6 +1402,18 @@ ACTOR Future postResolution(CommitBatchContext* self) { pProxyCommitData->stats.minComputeNS = std::min(pProxyCommitData->stats.minComputeNS, 1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]); + + if (self->computeDuration >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF) { + TraceEvent(SevInfo, "LongComputeDuration") + .suppressFor(5.0) + .detail("ComputeDuration", self->computeDuration) + .detail("ComputePerOperation", computePerOperation) + .detail("LatencyBucket", self->latencyBucket) + .detail("UpdatedComputePerOperationEstimate", + pProxyCommitData->commitComputePerOperation[self->latencyBucket]) + .detail("BatchBytes", self->batchBytes) + .detail("BatchOperations", self->batchOperations); + } } pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing); diff --git a/fdbserver/ProxyCommitData.actor.h b/fdbserver/ProxyCommitData.actor.h index 620e58622e..5b9315da59 100644 --- a/fdbserver/ProxyCommitData.actor.h +++ b/fdbserver/ProxyCommitData.actor.h @@ -73,6 +73,8 @@ struct ProxyStats { LatencySample commitBatchingWindowSize; + LatencySample computeLatency; + Future logger; int64_t maxComputeNS; @@ -126,6 +128,10 @@ struct ProxyStats { id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + computeLatency("ComputeLatency", + id, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SAMPLE_SIZE), maxComputeNS(0), minComputeNS(1e12), commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"), LiteralStringRef("CommitBatchQueuing"),