Support sending multiple busy tags from storage server to ratekeeper

This commit is contained in:
sfc-gh-tclinkenbeard 2022-03-02 18:35:13 -08:00
parent 96983fdd7a
commit cad106f9eb
7 changed files with 40 additions and 49 deletions

View File

@ -921,6 +921,22 @@ struct GetStorageMetricsRequest {
}; };
struct StorageQueuingMetricsReply { struct StorageQueuingMetricsReply {
struct TagInfo {
constexpr static FileIdentifier file_identifier = 4528694;
TransactionTag tag;
double rate{ 0.0 };
double fractionalBusyness{ 0.0 };
TagInfo() = default;
TagInfo(TransactionTag const& tag, double rate, double fractionalBusyness)
: tag(tag), rate(rate), fractionalBusyness(fractionalBusyness) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, tag, rate, fractionalBusyness);
}
};
constexpr static FileIdentifier file_identifier = 7633366; constexpr static FileIdentifier file_identifier = 7633366;
double localTime; double localTime;
int64_t instanceID; // changes if bytesDurable and bytesInput reset int64_t instanceID; // changes if bytesDurable and bytesInput reset
@ -931,9 +947,7 @@ struct StorageQueuingMetricsReply {
double cpuUsage; double cpuUsage;
double diskUsage; double diskUsage;
double localRateLimit; double localRateLimit;
Optional<TransactionTag> busiestTag; std::vector<TagInfo> busiestTags;
double busiestTagFractionalBusyness;
double busiestTagRate;
template <class Ar> template <class Ar>
void serialize(Ar& ar) { void serialize(Ar& ar) {
@ -948,9 +962,7 @@ struct StorageQueuingMetricsReply {
cpuUsage, cpuUsage,
diskUsage, diskUsage,
localRateLimit, localRateLimit,
busiestTag, busiestTags);
busiestTagFractionalBusyness,
busiestTagRate);
} }
}; };

View File

@ -180,9 +180,7 @@ public:
myQueueInfo->value.smoothLatestVersion.setTotal(reply.get().version); myQueueInfo->value.smoothLatestVersion.setTotal(reply.get().version);
} }
myQueueInfo->value.busiestReadTag = reply.get().busiestTag; myQueueInfo->value.busiestReadTags = reply.get().busiestTags;
myQueueInfo->value.busiestReadTagFractionalBusyness = reply.get().busiestTagFractionalBusyness;
myQueueInfo->value.busiestReadTagRate = reply.get().busiestTagRate;
} else { } else {
if (myQueueInfo->value.valid) { if (myQueueInfo->value.valid) {
TraceEvent("RkStorageServerDidNotRespond", self->id).detail("StorageServer", ssi.id()); TraceEvent("RkStorageServerDidNotRespond", self->id).detail("StorageServer", ssi.id());
@ -974,7 +972,7 @@ Future<Void> Ratekeeper::refreshStorageServerCommitCost() {
double elapsed = now() - lastBusiestCommitTagPick; double elapsed = now() - lastBusiestCommitTagPick;
// for each SS, select the busiest commit tag from ssTrTagCommitCost // for each SS, select the busiest commit tag from ssTrTagCommitCost
for (auto it = storageQueueInfo.begin(); it != storageQueueInfo.end(); ++it) { for (auto it = storageQueueInfo.begin(); it != storageQueueInfo.end(); ++it) {
it->value.busiestWriteTag.reset(); it->value.busiestWriteTags.clear();
TransactionTag busiestTag; TransactionTag busiestTag;
TransactionCommitCostEstimation maxCost; TransactionCommitCostEstimation maxCost;
double maxRate = 0, maxBusyness = 0; double maxRate = 0, maxBusyness = 0;
@ -987,12 +985,10 @@ Future<Void> Ratekeeper::refreshStorageServerCommitCost() {
} }
} }
if (maxRate > SERVER_KNOBS->MIN_TAG_WRITE_PAGES_RATE) { if (maxRate > SERVER_KNOBS->MIN_TAG_WRITE_PAGES_RATE) {
it->value.busiestWriteTag = busiestTag;
// TraceEvent("RefreshSSCommitCost").detail("TotalWriteCost", it->value.totalWriteCost).detail("TotalWriteOps",it->value.totalWriteOps); // TraceEvent("RefreshSSCommitCost").detail("TotalWriteCost", it->value.totalWriteCost).detail("TotalWriteOps",it->value.totalWriteOps);
ASSERT(it->value.totalWriteCosts > 0); ASSERT_GT(it->value.totalWriteCosts, 0);
maxBusyness = double(maxCost.getCostSum()) / it->value.totalWriteCosts; maxBusyness = double(maxCost.getCostSum()) / it->value.totalWriteCosts;
it->value.busiestWriteTagFractionalBusyness = maxBusyness; it->value.busiestWriteTags.emplace_back(busiestTag, maxBusyness, maxRate);
it->value.busiestWriteTagRate = maxRate;
} }
TraceEvent("BusiestWriteTag", it->key) TraceEvent("BusiestWriteTag", it->key)
@ -1001,7 +997,7 @@ Future<Void> Ratekeeper::refreshStorageServerCommitCost() {
.detail("TagOps", maxCost.getOpsSum()) .detail("TagOps", maxCost.getOpsSum())
.detail("TagCost", maxCost.getCostSum()) .detail("TagCost", maxCost.getCostSum())
.detail("TotalCost", it->value.totalWriteCosts) .detail("TotalCost", it->value.totalWriteCosts)
.detail("Reported", it->value.busiestWriteTag.present()) .detail("Reported", !it->value.busiestWriteTags.empty())
.trackLatest(it->value.busiestWriteTagEventHolder->trackingKey); .trackLatest(it->value.busiestWriteTagEventHolder->trackingKey);
// reset statistics // reset statistics

View File

@ -58,10 +58,7 @@ struct StorageQueueInfo {
Smoother smoothTotalSpace; Smoother smoothTotalSpace;
limitReason_t limitReason; limitReason_t limitReason;
Optional<TransactionTag> busiestReadTag, busiestWriteTag; std::vector<StorageQueuingMetricsReply::TagInfo> busiestReadTags, busiestWriteTags;
double busiestReadTagFractionalBusyness = 0, busiestWriteTagFractionalBusyness = 0;
double busiestReadTagRate = 0, busiestWriteTagRate = 0;
Reference<EventCacheHolder> busiestWriteTagEventHolder; Reference<EventCacheHolder> busiestWriteTagEventHolder;
// refresh periodically // refresh periodically

View File

@ -544,16 +544,16 @@ public:
// the future // the future
if (storageQueue > SERVER_KNOBS->AUTO_TAG_THROTTLE_STORAGE_QUEUE_BYTES || if (storageQueue > SERVER_KNOBS->AUTO_TAG_THROTTLE_STORAGE_QUEUE_BYTES ||
storageDurabilityLag > SERVER_KNOBS->AUTO_TAG_THROTTLE_DURABILITY_LAG_VERSIONS) { storageDurabilityLag > SERVER_KNOBS->AUTO_TAG_THROTTLE_DURABILITY_LAG_VERSIONS) {
if (ss.busiestWriteTag.present()) { for (const auto& busiestWriteTag : ss.busiestWriteTags) {
return tryAutoThrottleTag(ss.busiestWriteTag.get(), return tryAutoThrottleTag(busiestWriteTag.tag,
ss.busiestWriteTagRate, busiestWriteTag.rate,
ss.busiestWriteTagFractionalBusyness, busiestWriteTag.fractionalBusyness,
TagThrottledReason::BUSY_WRITE); TagThrottledReason::BUSY_READ);
} }
if (ss.busiestReadTag.present()) { for (const auto& busiestReadTag : ss.busiestReadTags) {
return tryAutoThrottleTag(ss.busiestReadTag.get(), return tryAutoThrottleTag(busiestReadTag.tag,
ss.busiestReadTagRate, busiestReadTag.rate,
ss.busiestReadTagFractionalBusyness, busiestReadTag.fractionalBusyness,
TagThrottledReason::BUSY_READ); TagThrottledReason::BUSY_READ);
} }
} }

View File

@ -44,11 +44,11 @@ void TransactionTagCounter::addRequest(Optional<TagSet> const& tags, int64_t byt
void TransactionTagCounter::startNewInterval() { void TransactionTagCounter::startNewInterval() {
double elapsed = now() - intervalStart; double elapsed = now() - intervalStart;
previousBusiestTag.reset(); previousBusiestTags.clear();
if (intervalStart > 0 && CLIENT_KNOBS->READ_TAG_SAMPLE_RATE > 0 && elapsed > 0) { if (intervalStart > 0 && CLIENT_KNOBS->READ_TAG_SAMPLE_RATE > 0 && elapsed > 0) {
double rate = busiestTagCount / CLIENT_KNOBS->READ_TAG_SAMPLE_RATE / elapsed; double rate = busiestTagCount / CLIENT_KNOBS->READ_TAG_SAMPLE_RATE / elapsed;
if (rate > SERVER_KNOBS->MIN_TAG_READ_PAGES_RATE) { if (rate > SERVER_KNOBS->MIN_TAG_READ_PAGES_RATE) {
previousBusiestTag = TagInfo(busiestTag, rate, (double)busiestTagCount / intervalTotalSampledCount); previousBusiestTags.emplace_back(busiestTag, rate, (double)busiestTagCount / intervalTotalSampledCount);
} }
TraceEvent("BusiestReadTag", thisServerID) TraceEvent("BusiestReadTag", thisServerID)
@ -56,7 +56,7 @@ void TransactionTagCounter::startNewInterval() {
.detail("Tag", printable(busiestTag)) .detail("Tag", printable(busiestTag))
.detail("TagCost", busiestTagCount) .detail("TagCost", busiestTagCount)
.detail("TotalSampledCost", intervalTotalSampledCount) .detail("TotalSampledCost", intervalTotalSampledCount)
.detail("Reported", previousBusiestTag.present()) .detail("Reported", !previousBusiestTags.empty())
.trackLatest(busiestReadTagEventHolder->trackingKey); .trackLatest(busiestReadTagEventHolder->trackingKey);
} }

View File

@ -20,28 +20,18 @@
#pragma once #pragma once
#include "fdbclient/StorageServerInterface.h"
#include "fdbclient/TagThrottle.actor.h" #include "fdbclient/TagThrottle.actor.h"
#include "fdbserver/Knobs.h" #include "fdbserver/Knobs.h"
class TransactionTagCounter { class TransactionTagCounter {
public:
struct TagInfo {
TransactionTag tag;
double rate;
double fractionalBusyness;
TagInfo(TransactionTag const& tag, double rate, double fractionalBusyness)
: tag(tag), rate(rate), fractionalBusyness(fractionalBusyness) {}
};
private:
TransactionTagMap<int64_t> intervalCounts; TransactionTagMap<int64_t> intervalCounts;
int64_t intervalTotalSampledCount = 0; int64_t intervalTotalSampledCount = 0;
TransactionTag busiestTag; TransactionTag busiestTag;
int64_t busiestTagCount = 0; int64_t busiestTagCount = 0;
double intervalStart = 0; double intervalStart = 0;
Optional<TagInfo> previousBusiestTag; std::vector<StorageQueuingMetricsReply::TagInfo> previousBusiestTags;
UID thisServerID; UID thisServerID;
Reference<EventCacheHolder> busiestReadTagEventHolder; Reference<EventCacheHolder> busiestReadTagEventHolder;
@ -50,5 +40,5 @@ public:
static int64_t costFunction(int64_t bytes) { return bytes / SERVER_KNOBS->READ_COST_BYTE_FACTOR + 1; } static int64_t costFunction(int64_t bytes) { return bytes / SERVER_KNOBS->READ_COST_BYTE_FACTOR + 1; }
void addRequest(Optional<TagSet> const& tags, int64_t bytes); void addRequest(Optional<TagSet> const& tags, int64_t bytes);
void startNewInterval(); void startNewInterval();
Optional<TagInfo> getBusiestTag() const { return previousBusiestTag; } std::vector<StorageQueuingMetricsReply::TagInfo> const& getBusiestTags() const { return previousBusiestTags; }
}; };

View File

@ -3574,11 +3574,7 @@ void getQueuingMetrics(StorageServer* self, StorageQueuingMetricsRequest const&
reply.diskUsage = self->diskUsage; reply.diskUsage = self->diskUsage;
reply.durableVersion = self->durableVersion.get(); reply.durableVersion = self->durableVersion.get();
Optional<TransactionTagCounter::TagInfo> busiestTag = self->transactionTagCounter.getBusiestTag(); reply.busiestTags = self->transactionTagCounter.getBusiestTags();
reply.busiestTag =
busiestTag.map<TransactionTag>([](TransactionTagCounter::TagInfo tagInfo) { return tagInfo.tag; });
reply.busiestTagFractionalBusyness = busiestTag.present() ? busiestTag.get().fractionalBusyness : 0.0;
reply.busiestTagRate = busiestTag.present() ? busiestTag.get().rate : 0.0;
req.reply.send(reply); req.reply.send(reply);
} }