Deprioritize spilled peeks in spill-by-value, and improve its logic.

This deprioritizes before calling peekMessagesFromMemory, which should
improve the memory usage of the TLog, and makes sure to keep txsTag
peeks at a high priority to help recoveries stay fast.
This commit is contained in:
Alex Miller 2019-05-03 12:55:41 -07:00
parent 4052f3826a
commit c918b21137
2 changed files with 18 additions and 5 deletions

View File

@ -992,6 +992,15 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
wait( delay(0.0, TaskLowPriority) );
}
if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
// slightly faster over keeping the rest of the cluster operating normally.
// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
// that impact recovery duration.
wait(delay(0, TaskTLogSpilledPeekReply));
}
Version poppedVer = poppedVersion(logData, req.tag);
if(poppedVer > req.begin) {
TLogPeekReply rep;

View File

@ -1282,6 +1282,15 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
wait( delay(0.0, TaskLowPriority) );
}
if( req.begin <= logData->persistentDataDurableVersion && req.tag != txsTag) {
// Reading spilled data will almost always imply that the storage server is >5s behind the rest
// of the cluster. We shouldn't prioritize spending CPU on helping this server catch up
// slightly faster over keeping the rest of the cluster operating normally.
// txsTag is only ever peeked on recovery, and we would still wish to prioritize requests
// that impact recovery duration.
wait(delay(0, TaskTLogSpilledPeekReply));
}
Version poppedVer = poppedVersion(logData, req.tag);
if(poppedVer > req.begin) {
TLogPeekReply rep;
@ -1343,11 +1352,6 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
else
messages.serializeBytes( messages2.toValue() );
} else {
// Calculating checksums of read pages is potentially expensive, and storage servers with
// spilled data are likely behind and not contributing usefully to the cluster anyway.
// Thus, we penalize their priority slightly to make sure that commits have a higher priority
// than catching up old storage servers.
wait(delay(0, TaskTLogSpilledPeekReply));
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
self->persistentData->readRange(KeyRangeRef(