diff --git a/fdbclient/StatusClient.actor.cpp b/fdbclient/StatusClient.actor.cpp index 19b214a4a0..59724826ab 100644 --- a/fdbclient/StatusClient.actor.cpp +++ b/fdbclient/StatusClient.actor.cpp @@ -373,6 +373,9 @@ ACTOR Future> clusterStatusFetcher(ClusterInterface cI, S if (result.getError().code() == error_code_request_maybe_delivered) messages->push_back(makeMessage("unreachable_cluster_controller", ("Unable to communicate with the cluster controller at " + cI.address().toString() + " to get status.").c_str())); + else if (result.getError().code() == error_code_server_overloaded) + messages->push_back(makeMessage("server_overloaded", + "The cluster controller is currently processing too many status requests, and is unable to respond")); else messages->push_back(makeMessage("status_incomplete_error", "Cluster encountered an error fetching status.")); } diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 4626919c64..e99473becf 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1829,8 +1829,19 @@ ACTOR Future statusServer(FutureStream< StatusRequest> requests, // Get all requests that are ready right *now*, before GetStatus() begins. // All of these requests will be responded to with the next GetStatus() result. - while (requests.isReady()) - requests_batch.push_back(requests.pop()); + // If requests are batched, do not respond to more than MAX_STATUS_REQUESTS_PER_SECOND + // requests per second + while (requests.isReady()) { + auto req = requests.pop(); + if (SERVER_KNOBS->STATUS_MIN_TIME_BETWEEN_REQUESTS > 0.0 && + requests_batch.size() + 1 > + SERVER_KNOBS->STATUS_MIN_TIME_BETWEEN_REQUESTS * SERVER_KNOBS->MAX_STATUS_REQUESTS_PER_SECOND) { + TraceEvent("TooManyStatusRequests").detail("BatchSize", requests_batch.size()); + req.reply.sendError(server_overloaded()); + } else { + requests_batch.push_back(req); + } + } // Get status but trap errors to send back to client. vector> workers; diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index 9f594c876a..320198ce1c 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -402,6 +402,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { // Status init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 ); + init( MAX_STATUS_REQUESTS_PER_SECOND, 256.0 ); init( CONFIGURATION_ROWS_TO_FETCH, 20000 ); // IPager diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 11eab24db9..dcb4f74766 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -339,6 +339,7 @@ public: // Status double STATUS_MIN_TIME_BETWEEN_REQUESTS; + double MAX_STATUS_REQUESTS_PER_SECOND; int CONFIGURATION_ROWS_TO_FETCH; // IPager