diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index af6a294aa4..5e5865b701 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -102,6 +102,8 @@ void ServerKnobs::initialize(Randomize _randomize, ClientKnobs* clientKnobs, IsS init( PUSH_STATS_SLOW_AMOUNT, 2 ); init( PUSH_STATS_SLOW_RATIO, 0.5 ); init( TLOG_POP_BATCH_SIZE, 1000 ); if ( randomize && BUGGIFY ) TLOG_POP_BATCH_SIZE = 10; + init( TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE, 100e6 ); + init( ENABLE_DETAILED_TLOG_POP_TRACE, 1 ); // disk snapshot max timeout, to be put in TLog, storage and coordinator nodes init( MAX_FORKED_PROCESS_OUTPUT, 1024 ); diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 83ac569589..dc0e273d0d 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -64,6 +64,8 @@ public: // message (measured in 1/1024ths, e.g. a value of 2048 yields a // factor of 2). int64_t VERSION_MESSAGES_ENTRY_BYTES_WITH_OVERHEAD; + int64_t TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE; + bool ENABLE_DETAILED_TLOG_POP_TRACE; double TLOG_MESSAGE_BLOCK_OVERHEAD_FACTOR; int64_t TLOG_MESSAGE_BLOCK_BYTES; int64_t MAX_MESSAGE_SIZE; diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index f7f25868f9..01311635e1 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -1440,6 +1440,18 @@ ACTOR Future<Void> tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen } } + uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion; + if ( SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE && //trace is open + (tagData->unpoppedRecovered || PoppedVersionLag >= SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { //when recovery or long lag + TraceEvent("TLogPopDetails", logData->logId) + .detail("Tag", tagData->tag.toString()) + .detail("UpTo", upTo) + .detail("PoppedVersionLag", PoppedVersionLag) + .detail("MinPoppedTag", logData->minPoppedTag.toString()) + .detail("QueuePoppedVersion", logData->queuePoppedVersion) + .detail("UnpoppedRecovered", tagData->unpoppedRecovered ? "True" : "False") + .detail("NothingPersistent", tagData->nothingPersistent ? "True" : "False"); + } if (upTo > logData->persistentDataDurableVersion) wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop)); //TraceEvent("TLogPop", self->dbgid).detail("Tag", tag.toString()).detail("To", upTo); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 3d2d90c3f7..1ca1d45b72 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1166,6 +1166,18 @@ ACTOR Future<Void> tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen } } + uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion; + if ( SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE && //trace is open + (tagData->unpoppedRecovered || PoppedVersionLag >= SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { //when recovery or long lag + TraceEvent("TLogPopDetails", logData->logId) + .detail("Tag", tagData->tag.toString()) + .detail("UpTo", upTo) + .detail("PoppedVersionLag", PoppedVersionLag) + .detail("MinPoppedTag", logData->minPoppedTag.toString()) + .detail("QueuePoppedVersion", logData->queuePoppedVersion) + .detail("UnpoppedRecovered", tagData->unpoppedRecovered ? "True" : "False") + .detail("NothingPersistent", tagData->nothingPersistent ? "True" : "False"); + } if (upTo > logData->persistentDataDurableVersion) wait(tagData->eraseMessagesBefore(upTo, self, logData, TaskPriority::TLogPop)); //TraceEvent("TLogPop", logData->logId).detail("Tag", tag.toString()).detail("To", upTo);