mirror of
https://github.com/apple/foundationdb.git
synced 2025-06-02 03:12:12 +08:00
The consistency check should retry if it couldn't find all the commit proxies when getting key server locations
This commit is contained in:
parent
c492f83bf4
commit
dc2bd78aa7
@ -166,6 +166,7 @@ ACTOR Future<bool> getKeyServers(
|
||||
Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServersPromise,
|
||||
KeyRangeRef kr,
|
||||
bool performQuiescentChecks,
|
||||
bool failureIsError,
|
||||
bool* success);
|
||||
ACTOR Future<bool> getKeyLocations(Database cx,
|
||||
std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> shards,
|
||||
|
@ -97,6 +97,7 @@ ACTOR Future<bool> getKeyServers(
|
||||
Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServersPromise,
|
||||
KeyRangeRef kr,
|
||||
bool performQuiescentChecks,
|
||||
bool failureIsError,
|
||||
bool* success) {
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> keyServers;
|
||||
|
||||
@ -134,7 +135,7 @@ ACTOR Future<bool> getKeyServers(
|
||||
TraceEvent("ConsistencyCheck_CommitProxyUnavailable")
|
||||
.error(shards.getError())
|
||||
.detail("CommitProxyID", commitProxyInfo->getId(i));
|
||||
testFailure("Commit proxy unavailable", performQuiescentChecks, success, true);
|
||||
testFailure("Commit proxy unavailable", performQuiescentChecks, success, failureIsError);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -979,7 +980,8 @@ ACTOR Future<Void> runDataValidationCheck(ConsistencyScanData* self) {
|
||||
// Get a list of key servers; verify that the TLogs and master all agree about who the key servers are
|
||||
state Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServerPromise;
|
||||
state std::map<UID, StorageServerInterface> tssMapping;
|
||||
bool keyServerResult = wait(getKeyServers(self->db, keyServerPromise, keyServersKeys, false, &self->success));
|
||||
bool keyServerResult =
|
||||
wait(getKeyServers(self->db, keyServerPromise, keyServersKeys, false, false, &self->success));
|
||||
if (keyServerResult) {
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> keyServers =
|
||||
keyServerPromise.getFuture().get();
|
||||
|
@ -345,8 +345,12 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
|
||||
// Get a list of key servers; verify that the TLogs and master all agree about who the key servers are
|
||||
state Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServerPromise;
|
||||
bool keyServerResult = wait(
|
||||
getKeyServers(cx, keyServerPromise, keyServersKeys, self->performQuiescentChecks, &self->success));
|
||||
bool keyServerResult = wait(getKeyServers(cx,
|
||||
keyServerPromise,
|
||||
keyServersKeys,
|
||||
self->performQuiescentChecks,
|
||||
self->failureIsError,
|
||||
&self->success));
|
||||
if (keyServerResult) {
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> keyServers =
|
||||
keyServerPromise.getFuture().get();
|
||||
@ -797,8 +801,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
bool removePrefix) {
|
||||
// get shards paired with corresponding storage servers
|
||||
state Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServerPromise;
|
||||
bool keyServerResult =
|
||||
wait(getKeyServers(cx, keyServerPromise, range, self->performQuiescentChecks, &self->success));
|
||||
bool keyServerResult = wait(getKeyServers(
|
||||
cx, keyServerPromise, range, self->performQuiescentChecks, self->failureIsError, &self->success));
|
||||
if (!keyServerResult)
|
||||
return false;
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> shards =
|
||||
@ -1160,14 +1164,6 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
}
|
||||
|
||||
if (foundExtraDataStore) {
|
||||
// Let the cluster fully recover after rebooting/killing storage servers with extra stores.
|
||||
//
|
||||
// This requires an end-to-end comitting transaction to ensure recovery has started in case
|
||||
// any stateless processes, like the commit proxy, were killed.
|
||||
wait(::success(doEmptyCommit(cx)));
|
||||
while (self->dbInfo->get().recoveryState != RecoveryState::FULLY_RECOVERED) {
|
||||
wait(self->dbInfo->onChange());
|
||||
}
|
||||
self->testFailure("Extra data stores present on workers");
|
||||
return false;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user