mirror of
https://github.com/apple/foundationdb.git
synced 2025-06-03 03:41:53 +08:00
add a large random delay on failure detection so that not all storage servers need to attempt to become the cluster controller
This commit is contained in:
parent
2ecea80539
commit
07111f0e41
@ -563,7 +563,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
||||
init( DEGRADED_WARNING_RESET_DELAY, 7*24*60*60 );
|
||||
init( TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS, 10 );
|
||||
init( TRACE_LOG_PING_TIMEOUT_SECONDS, 5.0 );
|
||||
init( DELAY_STORAGE_CANDIDACY_SECONDS, 10 ); if ( randomize && BUGGIFY ) DELAY_STORAGE_CANDIDACY_SECONDS = 10;
|
||||
init( MIN_DELAY_STORAGE_CANDIDACY_SECONDS, 10.0 );
|
||||
init( MAX_DELAY_STORAGE_CANDIDACY_SECONDS, 30.0 );
|
||||
init( DBINFO_FAILED_DELAY, 1.0 );
|
||||
|
||||
// Test harness
|
||||
|
@ -492,7 +492,8 @@ public:
|
||||
double DEGRADED_WARNING_RESET_DELAY;
|
||||
int64_t TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS;
|
||||
double TRACE_LOG_PING_TIMEOUT_SECONDS;
|
||||
int DELAY_STORAGE_CANDIDACY_SECONDS; // Listen for a leader for N seconds, and if not heard, then try to become the leader.
|
||||
double MIN_DELAY_STORAGE_CANDIDACY_SECONDS; // Listen for a leader for N seconds, and if not heard, then try to become the leader.
|
||||
double MAX_DELAY_STORAGE_CANDIDACY_SECONDS;
|
||||
double DBINFO_FAILED_DELAY;
|
||||
|
||||
// Test harness
|
||||
|
@ -1654,7 +1654,7 @@ ACTOR Future<Void> monitorLeaderRemotelyWithDelayedCandidacy( Reference<ClusterC
|
||||
if(currentCC->get().present() && dbInfo->get().clusterInterface == currentCC->get().get() && IFailureMonitor::failureMonitor().getState( currentCC->get().get().registerWorker.getEndpoint() ).isAvailable()) {
|
||||
timeout = Future<Void>();
|
||||
} else if(!timeout.isValid()) {
|
||||
timeout = delay( SERVER_KNOBS->DELAY_STORAGE_CANDIDACY_SECONDS );
|
||||
timeout = delay( SERVER_KNOBS->MIN_DELAY_STORAGE_CANDIDACY_SECONDS + (deterministicRandom()->random01()*(SERVER_KNOBS->MAX_DELAY_STORAGE_CANDIDACY_SECONDS-SERVER_KNOBS->MIN_DELAY_STORAGE_CANDIDACY_SECONDS)) );
|
||||
}
|
||||
choose {
|
||||
when( wait(currentCC->onChange()) ) {}
|
||||
@ -1713,7 +1713,7 @@ ACTOR Future<Void> fdbd(
|
||||
actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));
|
||||
if (processClass == ProcessClass::TesterClass) {
|
||||
actors.push_back( reportErrors( monitorLeader( connFile, cc ), "ClusterController" ) );
|
||||
} else if (processClass == ProcessClass::StorageClass && SERVER_KNOBS->DELAY_STORAGE_CANDIDACY_SECONDS) {
|
||||
} else if (processClass == ProcessClass::StorageClass && SERVER_KNOBS->MAX_DELAY_STORAGE_CANDIDACY_SECONDS > 0) {
|
||||
actors.push_back( reportErrors( monitorLeaderRemotelyWithDelayedCandidacy( connFile, cc, asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities, dbInfo ), "ClusterController" ) );
|
||||
} else {
|
||||
actors.push_back( reportErrors( clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
|
||||
|
Loading…
x
Reference in New Issue
Block a user