mirror of
https://github.com/apple/foundationdb.git
synced 2025-06-01 10:45:56 +08:00
We are currently emitting Role transition traces when a role starts and when it ends. While this is useful for debugging, it doesn't work well with tools that inject data and might potentially miss some trace lines. We do decorate each trace lines with the roles assigned to that particular process, however, this is not sufficient for tools that can make use of the UID -> Role mapping
371 lines
16 KiB
C++
371 lines
16 KiB
C++
/*
|
|
* Resolver.actor.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "flow/ActorCollection.h"
|
|
#include "fdbclient/NativeAPI.actor.h"
|
|
#include "fdbserver/ResolverInterface.h"
|
|
#include "fdbserver/MasterInterface.h"
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
|
#include "fdbserver/WaitFailure.h"
|
|
#include "fdbserver/Knobs.h"
|
|
#include "fdbserver/ServerDBInfo.h"
|
|
#include "fdbserver/Orderer.actor.h"
|
|
#include "fdbserver/ConflictSet.h"
|
|
#include "fdbserver/StorageMetrics.h"
|
|
#include "fdbclient/SystemData.h"
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
namespace {
|
|
struct ProxyRequestsInfo {
|
|
std::map<Version, ResolveTransactionBatchReply> outstandingBatches;
|
|
Version lastVersion;
|
|
|
|
ProxyRequestsInfo() : lastVersion(-1) {}
|
|
};
|
|
}
|
|
|
|
namespace{
|
|
struct Resolver : ReferenceCounted<Resolver> {
|
|
UID dbgid;
|
|
int proxyCount, resolverCount;
|
|
NotifiedVersion version;
|
|
AsyncVar<Version> neededVersion;
|
|
|
|
Map<Version, Standalone<VectorRef<StateTransactionRef>>> recentStateTransactions;
|
|
Deque<std::pair<Version, int64_t>> recentStateTransactionSizes;
|
|
AsyncVar<int64_t> totalStateBytes;
|
|
AsyncTrigger checkNeededVersion;
|
|
std::map<NetworkAddress, ProxyRequestsInfo> proxyInfoMap;
|
|
ConflictSet *conflictSet;
|
|
TransientStorageMetricSample iopsSample;
|
|
|
|
Version debugMinRecentStateVersion;
|
|
|
|
CounterCollection cc;
|
|
Counter resolveBatchIn;
|
|
Counter resolveBatchStart;
|
|
Counter resolvedTransactions;
|
|
Counter resolvedBytes;
|
|
Counter resolvedReadConflictRanges;
|
|
Counter resolvedWriteConflictRanges;
|
|
Counter transactionsAccepted;
|
|
Counter transactionsTooOld;
|
|
Counter transactionsConflicted;
|
|
Counter resolvedStateTransactions;
|
|
Counter resolvedStateMutations;
|
|
Counter resolvedStateBytes;
|
|
Counter resolveBatchOut;
|
|
Counter metricsRequests;
|
|
Counter splitRequests;
|
|
|
|
Future<Void> logger;
|
|
|
|
Resolver( UID dbgid, int proxyCount, int resolverCount )
|
|
: dbgid(dbgid), proxyCount(proxyCount), resolverCount(resolverCount), version(-1), conflictSet( newConflictSet() ), iopsSample( SERVER_KNOBS->KEY_BYTES_PER_SAMPLE ), debugMinRecentStateVersion(0),
|
|
cc("Resolver", dbgid.toString()),
|
|
resolveBatchIn("ResolveBatchIn", cc), resolveBatchStart("ResolveBatchStart", cc), resolvedTransactions("ResolvedTransactions", cc), resolvedBytes("ResolvedBytes", cc),
|
|
resolvedReadConflictRanges("ResolvedReadConflictRanges", cc), resolvedWriteConflictRanges("ResolvedWriteConflictRanges", cc), transactionsAccepted("TransactionsAccepted", cc),
|
|
transactionsTooOld("TransactionsTooOld", cc), transactionsConflicted("TransactionsConflicted", cc), resolvedStateTransactions("ResolvedStateTransactions", cc),
|
|
resolvedStateMutations("ResolvedStateMutations", cc), resolvedStateBytes("ResolvedStateBytes", cc), resolveBatchOut("ResolveBatchOut", cc), metricsRequests("MetricsRequests", cc),
|
|
splitRequests("SplitRequests", cc)
|
|
{
|
|
specialCounter(cc, "Version", [this](){ return this->version.get(); });
|
|
specialCounter(cc, "NeededVersion", [this](){ return this->neededVersion.get(); });
|
|
specialCounter(cc, "TotalStateBytes", [this](){ return this->totalStateBytes.get(); });
|
|
|
|
logger = traceCounters("ResolverMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ResolverMetrics");
|
|
}
|
|
~Resolver() {
|
|
destroyConflictSet( conflictSet );
|
|
}
|
|
|
|
};
|
|
} // namespace
|
|
|
|
ACTOR Future<Void> resolveBatch(
|
|
Reference<Resolver> self,
|
|
ResolveTransactionBatchRequest req)
|
|
{
|
|
state Optional<UID> debugID;
|
|
|
|
// The first request (prevVersion < 0) comes from the master
|
|
state NetworkAddress proxyAddress = req.prevVersion >= 0 ? req.reply.getEndpoint().getPrimaryAddress() : NetworkAddress();
|
|
state ProxyRequestsInfo &proxyInfo = self->proxyInfoMap[proxyAddress];
|
|
|
|
++self->resolveBatchIn;
|
|
|
|
if(req.debugID.present()) {
|
|
debugID = nondeterministicRandom()->randomUniqueID();
|
|
g_traceBatch.addAttach("CommitAttachID", req.debugID.get().first(), debugID.get().first());
|
|
g_traceBatch.addEvent("CommitDebug",debugID.get().first(),"Resolver.resolveBatch.Before");
|
|
}
|
|
|
|
/*TraceEvent("ResolveBatchStart", self->dbgid).detail("From", proxyAddress).detail("Version", req.version).detail("PrevVersion", req.prevVersion).detail("StateTransactions", req.txnStateTransactions.size())
|
|
.detail("RecentStateTransactions", self->recentStateTransactionSizes.size()).detail("LastVersion", proxyInfo.lastVersion).detail("FirstVersion", self->recentStateTransactionSizes.empty() ? -1 : self->recentStateTransactionSizes.front().first)
|
|
.detail("ResolverVersion", self->version.get());*/
|
|
|
|
while( self->totalStateBytes.get() > SERVER_KNOBS->RESOLVER_STATE_MEMORY_LIMIT && self->recentStateTransactionSizes.size() &&
|
|
proxyInfo.lastVersion > self->recentStateTransactionSizes.front().first && req.version > self->neededVersion.get() ) {
|
|
/*TraceEvent("ResolveBatchDelay").detail("From", proxyAddress).detail("StateBytes", self->totalStateBytes.get()).detail("RecentStateTransactionSize", self->recentStateTransactionSizes.size())
|
|
.detail("LastVersion", proxyInfo.lastVersion).detail("RequestVersion", req.version).detail("NeededVersion", self->neededVersion.get())
|
|
.detail("RecentStateVer", self->recentStateTransactions.begin()->key);*/
|
|
|
|
wait( self->totalStateBytes.onChange() || self->neededVersion.onChange() );
|
|
}
|
|
|
|
if(debugID.present()) {
|
|
g_traceBatch.addEvent("CommitDebug",debugID.get().first(),"Resolver.resolveBatch.AfterQueueSizeCheck");
|
|
}
|
|
|
|
loop {
|
|
if( self->recentStateTransactionSizes.size() && proxyInfo.lastVersion <= self->recentStateTransactionSizes.front().first ) {
|
|
self->neededVersion.set( std::max(self->neededVersion.get(), req.prevVersion) );
|
|
}
|
|
|
|
choose {
|
|
when(wait(self->version.whenAtLeast(req.prevVersion))) {
|
|
break;
|
|
}
|
|
when(wait(self->checkNeededVersion.onTrigger())) { }
|
|
}
|
|
}
|
|
|
|
if (check_yield(TaskPriority::DefaultEndpoint)) {
|
|
wait( delay( 0, TaskPriority::Low ) || delay( SERVER_KNOBS->COMMIT_SLEEP_TIME ) ); // FIXME: Is this still right?
|
|
g_network->setCurrentTask(TaskPriority::DefaultEndpoint);
|
|
}
|
|
|
|
if (self->version.get() == req.prevVersion) { // Not a duplicate (check relies on no waiting between here and self->version.set() below!)
|
|
++self->resolveBatchStart;
|
|
self->resolvedTransactions += req.transactions.size();
|
|
self->resolvedBytes += req.transactions.expectedSize();
|
|
|
|
if(proxyInfo.lastVersion > 0) {
|
|
proxyInfo.outstandingBatches.erase(proxyInfo.outstandingBatches.begin(), proxyInfo.outstandingBatches.upper_bound(req.lastReceivedVersion));
|
|
}
|
|
|
|
Version firstUnseenVersion = proxyInfo.lastVersion + 1;
|
|
proxyInfo.lastVersion = req.version;
|
|
|
|
if(req.debugID.present())
|
|
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "Resolver.resolveBatch.AfterOrderer");
|
|
|
|
ResolveTransactionBatchReply& reply = proxyInfo.outstandingBatches[req.version];
|
|
|
|
vector<int> commitList;
|
|
vector<int> tooOldList;
|
|
|
|
// Detect conflicts
|
|
double expire = now() + SERVER_KNOBS->SAMPLE_EXPIRATION_TIME;
|
|
ConflictBatch conflictBatch(self->conflictSet, &reply.conflictingKeyRangeMap, &reply.arena);
|
|
int keys = 0;
|
|
for(int t=0; t<req.transactions.size(); t++) {
|
|
conflictBatch.addTransaction( req.transactions[t] );
|
|
self->resolvedReadConflictRanges += req.transactions[t].read_conflict_ranges.size();
|
|
self->resolvedWriteConflictRanges += req.transactions[t].write_conflict_ranges.size();
|
|
keys += req.transactions[t].write_conflict_ranges.size()*2 + req.transactions[t].read_conflict_ranges.size()*2;
|
|
|
|
if(self->resolverCount > 1) {
|
|
for(auto it : req.transactions[t].write_conflict_ranges)
|
|
self->iopsSample.addAndExpire( it.begin, SERVER_KNOBS->SAMPLE_OFFSET_PER_KEY + it.begin.size(), expire );
|
|
for(auto it : req.transactions[t].read_conflict_ranges)
|
|
self->iopsSample.addAndExpire( it.begin, SERVER_KNOBS->SAMPLE_OFFSET_PER_KEY + it.begin.size(), expire );
|
|
}
|
|
}
|
|
conflictBatch.detectConflicts( req.version, req.version - SERVER_KNOBS->MAX_WRITE_TRANSACTION_LIFE_VERSIONS, commitList, &tooOldList);
|
|
|
|
reply.debugID = req.debugID;
|
|
reply.committed.resize( reply.arena, req.transactions.size() );
|
|
for(int c=0; c<commitList.size(); c++)
|
|
reply.committed[commitList[c]] = ConflictBatch::TransactionCommitted;
|
|
|
|
for (int c = 0; c<tooOldList.size(); c++) {
|
|
ASSERT(reply.committed[tooOldList[c]] == ConflictBatch::TransactionConflict);
|
|
reply.committed[tooOldList[c]] = ConflictBatch::TransactionTooOld;
|
|
}
|
|
|
|
self->transactionsAccepted += commitList.size();
|
|
self->transactionsTooOld += tooOldList.size();
|
|
self->transactionsConflicted += req.transactions.size() - commitList.size() - tooOldList.size();
|
|
|
|
ASSERT(req.prevVersion >= 0 || req.txnStateTransactions.size() == 0); // The master's request should not have any state transactions
|
|
|
|
auto& stateTransactions = self->recentStateTransactions[ req.version ];
|
|
int64_t stateMutations = 0;
|
|
int64_t stateBytes = 0;
|
|
for(int t : req.txnStateTransactions) {
|
|
stateMutations += req.transactions[t].mutations.size();
|
|
stateBytes += req.transactions[t].mutations.expectedSize();
|
|
stateTransactions.push_back_deep(stateTransactions.arena(), StateTransactionRef(reply.committed[t] == ConflictBatch::TransactionCommitted, req.transactions[t].mutations));
|
|
}
|
|
|
|
self->resolvedStateTransactions += req.txnStateTransactions.size();
|
|
self->resolvedStateMutations += stateMutations;
|
|
self->resolvedStateBytes += stateBytes;
|
|
|
|
if(stateBytes > 0)
|
|
self->recentStateTransactionSizes.push_back(std::make_pair(req.version, stateBytes));
|
|
|
|
ASSERT(req.version >= firstUnseenVersion);
|
|
ASSERT(firstUnseenVersion >= self->debugMinRecentStateVersion);
|
|
|
|
TEST(firstUnseenVersion == req.version); // Resolver first unseen version is current version
|
|
|
|
auto stateTransactionItr = self->recentStateTransactions.lower_bound(firstUnseenVersion);
|
|
auto endItr = self->recentStateTransactions.lower_bound(req.version);
|
|
for(; stateTransactionItr != endItr; ++stateTransactionItr) {
|
|
reply.stateMutations.push_back( reply.arena, stateTransactionItr->value);
|
|
reply.arena.dependsOn( stateTransactionItr->value.arena() );
|
|
}
|
|
|
|
//TraceEvent("ResolveBatch", self->dbgid).detail("PrevVersion", req.prevVersion).detail("Version", req.version).detail("StateTransactionVersions", self->recentStateTransactionSizes.size()).detail("StateBytes", stateBytes).detail("FirstVersion", self->recentStateTransactionSizes.empty() ? -1 : self->recentStateTransactionSizes.front().first).detail("StateMutationsIn", req.txnStateTransactions.size()).detail("StateMutationsOut", reply.stateMutations.size()).detail("From", proxyAddress);
|
|
|
|
ASSERT(!proxyInfo.outstandingBatches.empty());
|
|
ASSERT(self->proxyInfoMap.size() <= self->proxyCount+1);
|
|
|
|
// SOMEDAY: This is O(n) in number of proxies. O(log n) solution using appropriate data structure?
|
|
Version oldestProxyVersion = req.version;
|
|
for(auto itr = self->proxyInfoMap.begin(); itr != self->proxyInfoMap.end(); ++itr) {
|
|
//TraceEvent("ResolveBatchProxyVersion", self->dbgid).detail("Proxy", itr->first).detail("Version", itr->second.lastVersion);
|
|
if(itr->first.isValid()) { // Don't consider the first master request
|
|
oldestProxyVersion = std::min(itr->second.lastVersion, oldestProxyVersion);
|
|
}
|
|
else {
|
|
// The master's request version should never prevent us from clearing recentStateTransactions
|
|
ASSERT(self->debugMinRecentStateVersion == 0 || self->debugMinRecentStateVersion > itr->second.lastVersion);
|
|
}
|
|
}
|
|
|
|
TEST(oldestProxyVersion == req.version); // The proxy that sent this request has the oldest current version
|
|
TEST(oldestProxyVersion != req.version); // The proxy that sent this request does not have the oldest current version
|
|
|
|
bool anyPopped = false;
|
|
if(firstUnseenVersion <= oldestProxyVersion && self->proxyInfoMap.size() == self->proxyCount+1) {
|
|
TEST(true); // Deleting old state transactions
|
|
self->recentStateTransactions.erase( self->recentStateTransactions.begin(), self->recentStateTransactions.upper_bound( oldestProxyVersion ) );
|
|
self->debugMinRecentStateVersion = oldestProxyVersion + 1;
|
|
|
|
while(self->recentStateTransactionSizes.size() && self->recentStateTransactionSizes.front().first <= oldestProxyVersion) {
|
|
anyPopped = true;
|
|
stateBytes -= self->recentStateTransactionSizes.front().second;
|
|
self->recentStateTransactionSizes.pop_front();
|
|
}
|
|
}
|
|
|
|
self->version.set( req.version );
|
|
bool breachedLimit = self->totalStateBytes.get() <= SERVER_KNOBS->RESOLVER_STATE_MEMORY_LIMIT && self->totalStateBytes.get() + stateBytes > SERVER_KNOBS->RESOLVER_STATE_MEMORY_LIMIT;
|
|
self->totalStateBytes.setUnconditional(self->totalStateBytes.get() + stateBytes);
|
|
if(anyPopped || breachedLimit) {
|
|
self->checkNeededVersion.trigger();
|
|
}
|
|
|
|
if(req.debugID.present())
|
|
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "Resolver.resolveBatch.After");
|
|
}
|
|
else {
|
|
TEST(true); // Duplicate resolve batch request
|
|
//TraceEvent("DupResolveBatchReq", self->dbgid).detail("From", proxyAddress);
|
|
}
|
|
|
|
auto proxyInfoItr = self->proxyInfoMap.find(proxyAddress);
|
|
|
|
if(proxyInfoItr != self->proxyInfoMap.end()) {
|
|
auto batchItr = proxyInfoItr->second.outstandingBatches.find(req.version);
|
|
if(batchItr != proxyInfoItr->second.outstandingBatches.end()) {
|
|
req.reply.send(batchItr->second);
|
|
}
|
|
else {
|
|
TEST(true); // No outstanding batches for version on proxy
|
|
req.reply.send(Never());
|
|
}
|
|
}
|
|
else {
|
|
ASSERT_WE_THINK(false); // The first non-duplicate request with this proxyAddress, including this one, should have inserted this item in the map!
|
|
//TEST(true); // No prior proxy requests
|
|
req.reply.send(Never());
|
|
}
|
|
|
|
++self->resolveBatchOut;
|
|
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> resolverCore(
|
|
ResolverInterface resolver,
|
|
InitializeResolverRequest initReq)
|
|
{
|
|
state Reference<Resolver> self( new Resolver(resolver.id(), initReq.proxyCount, initReq.resolverCount) );
|
|
state ActorCollection actors(false);
|
|
state Future<Void> doPollMetrics = self->resolverCount > 1 ? Void() : Future<Void>(Never());
|
|
actors.add( waitFailureServer(resolver.waitFailure.getFuture()) );
|
|
actors.add( traceRole(Role::RESOLVER, resolver.id()) );
|
|
|
|
TraceEvent("ResolverInit", resolver.id()).detail("RecoveryCount", initReq.recoveryCount);
|
|
loop choose {
|
|
when ( ResolveTransactionBatchRequest batch = waitNext( resolver.resolve.getFuture() ) ) {
|
|
actors.add( resolveBatch(self, batch) );
|
|
}
|
|
when ( ResolutionMetricsRequest req = waitNext( resolver.metrics.getFuture() ) ) {
|
|
++self->metricsRequests;
|
|
req.reply.send(self->iopsSample.getEstimate(allKeys));
|
|
}
|
|
when ( ResolutionSplitRequest req = waitNext( resolver.split.getFuture() ) ) {
|
|
++self->splitRequests;
|
|
ResolutionSplitReply rep;
|
|
rep.key = self->iopsSample.splitEstimate(req.range, req.offset, req.front);
|
|
rep.used = self->iopsSample.getEstimate(req.front ? KeyRangeRef(req.range.begin, rep.key) : KeyRangeRef(rep.key, req.range.end));
|
|
req.reply.send(rep);
|
|
}
|
|
when ( wait( actors.getResult() ) ) {}
|
|
when (wait(doPollMetrics) ) {
|
|
self->iopsSample.poll();
|
|
doPollMetrics = delay(SERVER_KNOBS->SAMPLE_POLL_TIME);
|
|
}
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Void> checkRemoved( Reference<AsyncVar<ServerDBInfo>> db, uint64_t recoveryCount, ResolverInterface myInterface ) {
|
|
loop {
|
|
if ( db->get().recoveryCount >= recoveryCount && !std::count(db->get().resolvers.begin(), db->get().resolvers.end(), myInterface) )
|
|
throw worker_removed();
|
|
wait( db->onChange() );
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Void> resolver(
|
|
ResolverInterface resolver,
|
|
InitializeResolverRequest initReq,
|
|
Reference<AsyncVar<ServerDBInfo>> db )
|
|
{
|
|
try {
|
|
state Future<Void> core = resolverCore( resolver, initReq );
|
|
loop choose {
|
|
when( wait( core ) ) { return Void(); }
|
|
when( wait( checkRemoved( db, initReq.recoveryCount, resolver ) ) ) {}
|
|
}
|
|
} catch (Error& e) {
|
|
if (e.code() == error_code_actor_cancelled || e.code() == error_code_worker_removed) {
|
|
TraceEvent("ResolverTerminated", resolver.id()).error(e,true);
|
|
return Void();
|
|
}
|
|
throw;
|
|
}
|
|
}
|