From ee92f0574f360bd9365b8102b73eb8b955d059c3 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 26 Jul 2019 13:23:56 -0700 Subject: [PATCH] fix: lastRequestTime was not updated fix: COORDINATOR_REGISTER_INTERVAL was not set fixed review comments --- fdbclient/ClusterInterface.h | 2 +- fdbclient/CoordinationInterface.h | 4 ++-- fdbclient/MonitorLeader.actor.cpp | 6 ++++-- fdbserver/Coordination.actor.cpp | 9 +++++---- fdbserver/Knobs.cpp | 2 ++ fdbserver/Knobs.h | 1 + fdbserver/Status.actor.cpp | 3 ++- 7 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fdbclient/ClusterInterface.h b/fdbclient/ClusterInterface.h index b8a2a870bc..b0724e2b57 100644 --- a/fdbclient/ClusterInterface.h +++ b/fdbclient/ClusterInterface.h @@ -145,7 +145,7 @@ struct ItemWithExamples { int count; std::vector> examples; - ItemWithExamples() : count(0) {} + ItemWithExamples() : item{}, count(0) {} ItemWithExamples(T const& item, int count, std::vector> const& examples) : item(item), count(count), examples(examples) {} template diff --git a/fdbclient/CoordinationInterface.h b/fdbclient/CoordinationInterface.h index 0972980658..f090c3abdf 100644 --- a/fdbclient/CoordinationInterface.h +++ b/fdbclient/CoordinationInterface.h @@ -153,13 +153,13 @@ struct OpenDatabaseCoordRequest { Standalone> issues; Standalone> supportedVersions; UID knownClientInfoID; - Key key; + Key clusterKey; vector coordinators; ReplyPromise< struct ClientDBInfo > reply; template void serialize(Ar& ar) { - serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, key, coordinators, reply); + serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, clusterKey, coordinators, reply); } }; diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index ef54683758..afebb505b2 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -385,7 +385,7 @@ ACTOR Future monitorNominee( Key key, ClientLeaderRegInterface coord, Asyn state Optional li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskPriority::CoordinationReply ) ); wait( Future(Void()) ); // Make sure we weren't cancelled - TraceEvent("GetLeaderReply").detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress()).detail("Nominee", li.present() ? li.get().changeID : UID()).detail("Key", key.printable()); + TraceEvent("GetLeaderReply").suppressFor(1.0).detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress()).detail("Nominee", li.present() ? li.get().changeID : UID()).detail("ClusterKey", key.printable()); if (li != *info) { *info = li; @@ -542,6 +542,7 @@ OpenDatabaseRequest ClientData::getRequest() { std::map versionMap; std::map maxProtocolMap; + //SOMEDAY: add a yield in this loop for(auto& ci : clientStatusInfoMap) { for(auto& it : ci.second.issues) { auto& entry = issueMap[it]; @@ -593,6 +594,7 @@ ACTOR Future getClientInfoFromLeader( Reference CLIENT_KNOBS->MAX_CLIENT_STATUS_AGE) { + lastRequestTime = now(); req = clientData->getRequest(); } else { resetReply(req); @@ -669,7 +671,7 @@ ACTOR Future monitorProxiesOneGeneration( Referenceget().id; req.supportedVersions = supportedVersions; diff --git a/fdbserver/Coordination.actor.cpp b/fdbserver/Coordination.actor.cpp index 47e910c0f4..5a46283a5e 100644 --- a/fdbserver/Coordination.actor.cpp +++ b/fdbserver/Coordination.actor.cpp @@ -221,7 +221,7 @@ ACTOR Future openDatabase(ClientData* db, int* clientCount, ReferenceclientInfo->get().id == req.knownClientInfoID && !db->clientInfo->get().forward.present()) { choose { when (wait( db->clientInfo->onChange() )) {} - when (wait( delayJittered( 300 ) )) { break; } // The client might be long gone! + when (wait( delayJittered( SERVER_KNOBS->CLIENT_REGISTER_INTERVAL ) )) { break; } // The client might be long gone! } } @@ -257,7 +257,7 @@ ACTOR Future leaderRegister(LeaderElectionRegInterface interf, Key key) { loop choose { when ( OpenDatabaseCoordRequest req = waitNext( interf.openDatabase.getFuture() ) ) { if(!leaderMon.isValid()) { - leaderMon = monitorLeaderForProxies(req.key, req.coordinators, &clientData); + leaderMon = monitorLeaderForProxies(req.clusterKey, req.coordinators, &clientData); } actors.add(openDatabase(&clientData, &clientCount, hasConnectedClients, req)); } @@ -472,13 +472,14 @@ ACTOR Future leaderServer(LeaderElectionRegInterface interf, OnDemandStore loop choose { when ( OpenDatabaseCoordRequest req = waitNext( interf.openDatabase.getFuture() ) ) { - Optional forward = regs.getForward(req.key); + Optional forward = regs.getForward(req.clusterKey); if( forward.present() ) { ClientDBInfo info; + info.id = deterministicRandom()->randomUniqueID(); info.forward = forward.get().serializedInfo; req.reply.send( info ); } else { - regs.getInterface(req.key, id).openDatabase.send( req ); + regs.getInterface(req.clusterKey, id).openDatabase.send( req ); } } when ( GetLeaderRequest req = waitNext( interf.getLeader.getFuture() ) ) { diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 7a570281ca..5d6a1afead 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -332,6 +332,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( RATEKEEPER_FAILURE_TIME, 1.0 ); init( REPLACE_INTERFACE_DELAY, 60.0 ); init( REPLACE_INTERFACE_CHECK_DELAY, 5.0 ); + init( COORDINATOR_REGISTER_INTERVAL, 30.0 ); + init( CLIENT_REGISTER_INTERVAL, 300.0 ); init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0; init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 2ce5110742..f59ee2dd74 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -275,6 +275,7 @@ public: double REPLACE_INTERFACE_DELAY; double REPLACE_INTERFACE_CHECK_DELAY; double COORDINATOR_REGISTER_INTERVAL; + double CLIENT_REGISTER_INTERVAL; // Knobs used to select the best policy (via monte carlo) int POLICY_RATING_TESTS; // number of tests per policy (in order to compare) diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index f907a323e0..01c3fec762 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -932,9 +932,9 @@ static JsonBuilderObject clientStatusFetcher(std::mapsecond.count; ver["max_protocol_clients"] = maxClients; + maxSupportedProtocol.erase(cv.first.protocolVersion); } ver["connected_clients"] = clients; @@ -1883,6 +1883,7 @@ static JsonBuilderArray getClientIssuesAsMessages( std::map