mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
Add RatekeeperLimitReasonDetails traceevent for RK
This commit is contained in:
parent
017f9b2308
commit
d684508540
@ -546,6 +546,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||
init( METRIC_UPDATE_RATE, .1 ); if( slowRatekeeper ) METRIC_UPDATE_RATE = 0.5;
|
||||
init( DETAILED_METRIC_UPDATE_RATE, 5.0 );
|
||||
init (RATEKEEPER_DEFAULT_LIMIT, 1e6 ); if( randomize && BUGGIFY ) RATEKEEPER_DEFAULT_LIMIT = 0;
|
||||
init( RATEKEEPER_LIMIT_REASON_SAMPLE_RATE, 0.1 );
|
||||
init( RATEKEEPER_PRINT_LIMIT_REASON, false ); if( randomize && BUGGIFY ) RATEKEEPER_PRINT_LIMIT_REASON = true;
|
||||
|
||||
bool smallStorageTarget = randomize && BUGGIFY;
|
||||
init( TARGET_BYTES_PER_STORAGE_SERVER, 1000e6 ); if( smallStorageTarget ) TARGET_BYTES_PER_STORAGE_SERVER = 3000e3;
|
||||
|
@ -482,6 +482,8 @@ public:
|
||||
double DETAILED_METRIC_UPDATE_RATE;
|
||||
double LAST_LIMITED_RATIO;
|
||||
double RATEKEEPER_DEFAULT_LIMIT;
|
||||
double RATEKEEPER_LIMIT_REASON_SAMPLE_RATE;
|
||||
bool RATEKEEPER_PRINT_LIMIT_REASON;
|
||||
|
||||
int64_t TARGET_BYTES_PER_STORAGE_SERVER;
|
||||
int64_t SPRING_BYTES_STORAGE_SERVER;
|
||||
|
@ -1024,6 +1024,10 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
|
||||
std::map<UID, limitReason_t> ssReasons;
|
||||
|
||||
bool printRateKeepLimitReasonDetails =
|
||||
SERVER_KNOBS->RATEKEEPER_PRINT_LIMIT_REASON &&
|
||||
(deterministicRandom()->random01() < SERVER_KNOBS->RATEKEEPER_LIMIT_REASON_SAMPLE_RATE);
|
||||
|
||||
// Look at each storage server's write queue and local rate, compute and store the desired rate ratio
|
||||
for (auto i = self->storageQueueInfo.begin(); i != self->storageQueueInfo.end(); ++i) {
|
||||
auto& ss = i->value;
|
||||
@ -1050,6 +1054,16 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
} else {
|
||||
ssLimitReason = limitReason_t::storage_server_min_free_space_ratio;
|
||||
}
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
.detail("Reason", ssLimitReason)
|
||||
.detail("SSID", ss.id)
|
||||
.detail("SSSmoothTotalSpace", ss.smoothTotalSpace.smoothTotal())
|
||||
.detail("SSSmoothFreeSpace", ss.smoothFreeSpace.smoothTotal())
|
||||
.detail("TargetBytes", targetBytes)
|
||||
.detail("LimitsStorageTargetBytes", limits->storageTargetBytes)
|
||||
.detail("MinFreeSpace", minFreeSpace);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t storageQueue = ss.lastReply.bytesInput - ss.smoothDurableBytes.smoothTotal();
|
||||
@ -1109,9 +1123,34 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
double x = smoothedRate / (inputRate * targetRateRatio);
|
||||
double lim = actualTps * x;
|
||||
if (lim < limitTps) {
|
||||
double oldLimitTps = limitTps;
|
||||
limitTps = lim;
|
||||
if (ssLimitReason == limitReason_t::unlimited ||
|
||||
ssLimitReason == limitReason_t::storage_server_write_bandwidth_mvcc) {
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
.detail("Reason", limitReason_t::storage_server_write_queue_size)
|
||||
.detail("FromReason", ssLimitReason)
|
||||
.detail("SSID", ss.id)
|
||||
.detail("SSSmoothTotalSpace", ss.smoothTotalSpace.smoothTotal())
|
||||
.detail("LimitsStorageTargetBytes", limits->storageTargetBytes)
|
||||
.detail("LimitsStorageSpringBytes", limits->storageSpringBytes)
|
||||
.detail("SSSmoothFreeSpace", ss.smoothFreeSpace.smoothTotal())
|
||||
.detail("MinFreeSpace", minFreeSpace)
|
||||
.detail("SSLastReplyBytesInput", ss.lastReply.bytesInput)
|
||||
.detail("SSSmoothDurableBytes", ss.smoothDurableBytes.smoothTotal())
|
||||
.detail("StorageQueue", storageQueue)
|
||||
.detail("TargetBytes", targetBytes)
|
||||
.detail("SpringBytes", springBytes)
|
||||
.detail("SSVerySmoothDurableBytesSmoothRate", ss.verySmoothDurableBytes.smoothRate())
|
||||
.detail("SmoothedRate", smoothedRate)
|
||||
.detail("X", x)
|
||||
.detail("ActualTps", actualTps)
|
||||
.detail("Lim", lim)
|
||||
.detail("LimitTps", oldLimitTps)
|
||||
.detail("InputRate", inputRate)
|
||||
.detail("TargetRateRatio", targetRateRatio);
|
||||
}
|
||||
ssLimitReason = limitReason_t::storage_server_write_queue_size;
|
||||
}
|
||||
}
|
||||
@ -1179,6 +1218,15 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
limits->durabilityLagLimit = SERVER_KNOBS->DURABILITY_LAG_REDUCTION_RATE * limits->durabilityLagLimit;
|
||||
}
|
||||
if (limits->durabilityLagLimit < limits->tpsLimit) {
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
.detail("SSID", ss->second->id)
|
||||
.detail("Reason", limitReason_t::storage_server_durability_lag)
|
||||
.detail("LimitsDurabilityLagLimit", limits->durabilityLagLimit)
|
||||
.detail("LimitsTpsLimit", limits->tpsLimit)
|
||||
.detail("LimitingDurabilityLag", limitingDurabilityLag)
|
||||
.detail("LimitsLastDurabilityLag", limits->lastDurabilityLag);
|
||||
}
|
||||
limits->tpsLimit = limits->durabilityLagLimit;
|
||||
limitReason = limitReason_t::storage_server_durability_lag;
|
||||
}
|
||||
@ -1262,6 +1310,16 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
} else {
|
||||
tlogLimitReason = limitReason_t::log_server_min_free_space_ratio;
|
||||
}
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
.detail("TLogID", tl.id)
|
||||
.detail("Reason", tlogLimitReason)
|
||||
.detail("TLSmoothFreeSpace", tl.smoothFreeSpace.smoothTotal())
|
||||
.detail("TLSmoothTotalSpace", tl.smoothTotalSpace.smoothTotal())
|
||||
.detail("LimitsLogTargetBytes", limits->logTargetBytes)
|
||||
.detail("TargetBytes", targetBytes)
|
||||
.detail("MinFreeSpace", minFreeSpace);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t queue = tl.lastReply.bytesInput - tl.smoothDurableBytes.smoothTotal();
|
||||
@ -1282,6 +1340,25 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
double targetRateRatio = std::min((b + springBytes) / (double)springBytes, 2.0);
|
||||
|
||||
if (writeToReadLatencyLimit > targetRateRatio) {
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
.detail("TLogID", tl.id)
|
||||
.detail("Reason", limitReason_t::storage_server_readable_behind)
|
||||
.detail("TLSmoothFreeSpace", tl.smoothFreeSpace.smoothTotal())
|
||||
.detail("TLSmoothTotalSpace", tl.smoothTotalSpace.smoothTotal())
|
||||
.detail("LimitsLogSpringBytes", limits->logSpringBytes)
|
||||
.detail("LimitsLogTargetBytes", limits->logTargetBytes)
|
||||
.detail("SpringBytes", springBytes)
|
||||
.detail("TargetBytes", targetBytes)
|
||||
.detail("TLLastReplyBytesInput", tl.lastReply.bytesInput)
|
||||
.detail("TLSmoothDurableBytes", tl.smoothDurableBytes.smoothTotal())
|
||||
.detail("Queue", queue)
|
||||
.detail("B", b)
|
||||
.detail("TargetRateRatio", targetRateRatio)
|
||||
.detail("WriteToReadLatencyLimit", writeToReadLatencyLimit)
|
||||
.detail("MinFreeSpace", minFreeSpace)
|
||||
.detail("LimitsMaxVersionDifference", limits->maxVersionDifference);
|
||||
}
|
||||
targetRateRatio = writeToReadLatencyLimit;
|
||||
tlogLimitReason = limitReason_t::storage_server_readable_behind;
|
||||
}
|
||||
@ -1310,6 +1387,23 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
||||
inputRate;
|
||||
double lim = actualTps * x;
|
||||
if (lim < limits->tpsLimit) {
|
||||
if (printRateKeepLimitReasonDetails) {
|
||||
TraceEvent("RatekeeperLimitReasonDetails")
|
||||
.detail("Reason", limitReason_t::log_server_mvcc_write_bandwidth)
|
||||
.detail("TLogID", tl.id)
|
||||
.detail("MinFreeSpace", minFreeSpace)
|
||||
.detail("TLSmoothFreeSpace", tl.smoothFreeSpace.smoothTotal())
|
||||
.detail("TLSmoothTotalSpace", tl.smoothTotalSpace.smoothTotal())
|
||||
.detail("LimitsLogSpringBytes", limits->logSpringBytes)
|
||||
.detail("LimitsLogTargetBytes", limits->logTargetBytes)
|
||||
.detail("SpringBytes", springBytes)
|
||||
.detail("TargetBytes", targetBytes)
|
||||
.detail("InputRate", inputRate)
|
||||
.detail("X", x)
|
||||
.detail("ActualTps", actualTps)
|
||||
.detail("Lim", lim)
|
||||
.detail("LimitsTpsLimit", limits->tpsLimit);
|
||||
}
|
||||
limits->tpsLimit = lim;
|
||||
reasonID = tl.id;
|
||||
limitReason = limitReason_t::log_server_mvcc_write_bandwidth;
|
||||
|
Loading…
x
Reference in New Issue
Block a user