Deprioritize spilled peeks in spill-by-value, and improve its logic.

This deprioritizes before calling peekMessagesFromMemory, which should improve the memory usage of the TLog, and makes sure to keep txsTag peeks at a high priority to help recoveries stay fast.
2025-06-02 19:25:52 +08:00 · 2019-05-03 12:55:41 -07:00 · 2019-05-03 12:55:41 -07:00 · c918b21137
commit c918b21137
parent 4052f3826a
2 changed files with 18 additions and 5 deletions
--- a/fdbserver/OldTLogServer_6_0.actor.cpp
+++ b/fdbserver/OldTLogServer_6_0.actor.cpp
@ -992,6 +992,15 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 		wait( delay(0.0, TaskLowPriority) );
 	}

+	if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
+		// Reading spilled data will almost always imply that the storage server is >5s behind the rest
+		// of the cluster.  We shouldn't prioritize spending CPU on helping this server catch up
+		// slightly faster over keeping the rest of the cluster operating normally.
+		// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
+		// that impact recovery duration.
+		wait(delay(0, TaskTLogSpilledPeekReply));
+	}
+
 	Version poppedVer = poppedVersion(logData, req.tag);
 	if(poppedVer > req.begin) {
 		TLogPeekReply rep;
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@ -1282,6 +1282,15 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 		wait( delay(0.0, TaskLowPriority) );
 	}

+	if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
+		// Reading spilled data will almost always imply that the storage server is >5s behind the rest
+		// of the cluster.  We shouldn't prioritize spending CPU on helping this server catch up
+		// slightly faster over keeping the rest of the cluster operating normally.
+		// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
+		// that impact recovery duration.
+		wait(delay(0, TaskTLogSpilledPeekReply));
+	}
+
 	Version poppedVer = poppedVersion(logData, req.tag);
 	if(poppedVer > req.begin) {
 		TLogPeekReply rep;
@ -1343,11 +1352,6 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
 			else
 				messages.serializeBytes( messages2.toValue() );
 		} else {
-			// Calculating checksums of read pages is potentially expensive, and storage servers with
-			// spilled data are likely behind and not contributing usefully to the cluster anyway.
-			// Thus, we penalize their priority slightly to make sure that commits have a higher priority
-			// than catching up old storage servers.
-			wait(delay(0, TaskTLogSpilledPeekReply));
 			// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
 			Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
 					self->persistentData->readRange(KeyRangeRef(