diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 67106318cb..61bf411f94 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -2825,24 +2825,15 @@ ACTOR Future<Void> storageServerFailureTracker( if( status->isFailed ) self->restartRecruiting.trigger(); - state double startTime = now(); Future<Void> healthChanged = Never(); if(status->isFailed) { ASSERT(!inHealthyZone); healthChanged = IFailureMonitor::failureMonitor().onStateEqual( interf.waitFailure.getEndpoint(), FailureStatus(false)); } else if(!inHealthyZone) { - healthChanged = waitFailureClient(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, 0, TaskDataDistribution); + healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskDataDistribution); } choose { when ( wait(healthChanged) ) { - double elapsed = now() - startTime; - if(!status->isFailed && elapsed < SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME) { - wait(delay(SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME - elapsed)); - if(!IFailureMonitor::failureMonitor().getState( interf.waitFailure.getEndpoint() ).isFailed()) { - continue; - } - } - status->isFailed = !status->isFailed; if(!status->isFailed && !server->teams.size()) { self->doBuildTeams = true; diff --git a/fdbserver/WaitFailure.actor.cpp b/fdbserver/WaitFailure.actor.cpp index 9fa2c8025c..c4ca435551 100644 --- a/fdbserver/WaitFailure.actor.cpp +++ b/fdbserver/WaitFailure.actor.cpp @@ -56,6 +56,16 @@ ACTOR Future<Void> waitFailureClient(RequestStream<ReplyPromise<Void>> waitFailu } } +ACTOR Future<Void> waitFailureClientStrict(RequestStream<ReplyPromise<Void>> waitFailure, double failureReactionTime, int taskID){ + loop { + wait(waitFailureClient(waitFailure, 0, 0, taskID)); + wait(delay(failureReactionTime, taskID) || IFailureMonitor::failureMonitor().onStateEqual( waitFailure.getEndpoint(), FailureStatus(false))); + if(IFailureMonitor::failureMonitor().getState( waitFailure.getEndpoint() ).isFailed()) { + return Void(); + } + } +} + ACTOR Future<Void> waitFailureTracker(RequestStream<ReplyPromise<Void>> waitFailure, Reference<AsyncVar<bool>> failed, double reactionTime, double reactionSlope, int taskID){ loop { try { diff --git a/fdbserver/WaitFailure.h b/fdbserver/WaitFailure.h index f30c8d35f5..9ef3b4c3a0 100644 --- a/fdbserver/WaitFailure.h +++ b/fdbserver/WaitFailure.h @@ -28,6 +28,9 @@ Future<Void> waitFailureServer(const FutureStream<ReplyPromise<Void>>& waitFailu Future<Void> waitFailureClient(const RequestStream<ReplyPromise<Void>>& waitFailure, double const& failureReactionTime=0, double const& failureReactionSlope=0, int const& taskID=TaskDefaultEndpoint); +// talks to a wait failure server, returns Void on failure, reaction time is always waited +Future<Void> waitFailureClientStrict(const RequestStream<ReplyPromise<Void>>& waitFailure, double const& failureReactionTime=0, int const& taskID=TaskDefaultEndpoint); + // talks to a wait failure server, updates failed to be true or false based on failure status. Future<Void> waitFailureTracker(const RequestStream<ReplyPromise<Void>>& waitFailure, Reference<AsyncVar<bool>> const& failed, double const& failureReactionTime=0, double const& failureReactionSlope=0, int const& taskID=TaskDefaultEndpoint);