mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
Recruit new singleton for consistency checker. (#5804)
* Recruit new singleton for consistency checker. * Recruit the consistency checker only if enabled. * Add a yield in monitorConsistencyChecker(). * Minor fixes. * Consistency check workload enhancements. * Minor fixes and clarifications. * clang format * Clang format. * Minor fixes, cleanup, debug tracing. * Misc. * Move the consistency scan information from dbconfig to a key backed object. * Move consistency scan config out of db cofig to a state object and feature rename. * ConsistencyCheck workload refactor. * devFormat * Update fdbcli/ConsistencyScanCommand.actor.cpp * Review Comments. Co-authored-by: negoyal <neelam.goyal@gmail.com> Co-authored-by: Ata E Husain Bohra <ata.husain@snowflake.com>
This commit is contained in:
parent
17c855be7e
commit
1bd97fe628
122
fdbcli/ConsistencyScanCommand.actor.cpp
Normal file
122
fdbcli/ConsistencyScanCommand.actor.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* ConsistencyScanCommand.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbcli/fdbcli.actor.h"
|
||||
|
||||
#include "fdbclient/FDBOptions.g.h"
|
||||
#include "fdbclient/IClientApi.h"
|
||||
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/ThreadHelper.actor.h"
|
||||
#include "fdbclient/ConsistencyScanInterface.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
namespace fdb_cli {
|
||||
|
||||
ACTOR Future<bool> consistencyScanCommandActor(Database db, std::vector<StringRef> tokens) {
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
|
||||
// Here we do not proceed in a try-catch loop since the transaction is always supposed to succeed.
|
||||
// If not, the outer loop catch block(fdbcli.actor.cpp) will handle the error and print out the error message
|
||||
state int usageError = 0;
|
||||
state ConsistencyScanInfo csInfo = ConsistencyScanInfo();
|
||||
tr->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
|
||||
// Get the exisiting consistencyScanInfo object if present
|
||||
state Optional<Value> consistencyScanInfo = wait(ConsistencyScanInfo::getInfo(tr));
|
||||
wait(tr->commit());
|
||||
if (consistencyScanInfo.present())
|
||||
csInfo = ObjectReader::fromStringRef<ConsistencyScanInfo>(consistencyScanInfo.get(), IncludeVersion());
|
||||
tr->reset();
|
||||
|
||||
if (tokens.size() == 1) {
|
||||
printf("Consistency Scan Info: %s\n", csInfo.toString().c_str());
|
||||
} else if ((tokens.size() == 2) && tokencmp(tokens[1], "off")) {
|
||||
csInfo.consistency_scan_enabled = false;
|
||||
wait(ConsistencyScanInfo::setInfo(tr, csInfo));
|
||||
wait(tr->commit());
|
||||
} else if ((tokencmp(tokens[1], "on") && tokens.size() > 2)) {
|
||||
csInfo.consistency_scan_enabled = true;
|
||||
state std::vector<StringRef>::iterator t;
|
||||
for (t = tokens.begin() + 2; t != tokens.end(); ++t) {
|
||||
if (tokencmp(t->toString(), "restart")) {
|
||||
if (++t != tokens.end()) {
|
||||
if (tokencmp(t->toString(), "0")) {
|
||||
csInfo.restart = false;
|
||||
} else if (tokencmp(t->toString(), "1")) {
|
||||
csInfo.restart = true;
|
||||
} else {
|
||||
usageError = 1;
|
||||
}
|
||||
} else {
|
||||
usageError = 1;
|
||||
}
|
||||
} else if (tokencmp(t->toString(), "maxRate")) {
|
||||
if (++t != tokens.end()) {
|
||||
char* end;
|
||||
csInfo.max_rate = std::strtod(t->toString().data(), &end);
|
||||
if (!std::isspace(*end) && (*end != '\0')) {
|
||||
fprintf(stderr, "ERROR: %s failed to parse.\n", t->toString().c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
usageError = 1;
|
||||
}
|
||||
} else if (tokencmp(t->toString(), "targetInterval")) {
|
||||
if (++t != tokens.end()) {
|
||||
char* end;
|
||||
csInfo.target_interval = std::strtod(t->toString().data(), &end);
|
||||
if (!std::isspace(*end) && (*end != '\0')) {
|
||||
fprintf(stderr, "ERROR: %s failed to parse.\n", t->toString().c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
usageError = 1;
|
||||
}
|
||||
} else {
|
||||
usageError = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!usageError) {
|
||||
wait(ConsistencyScanInfo::setInfo(tr, csInfo));
|
||||
wait(tr->commit());
|
||||
}
|
||||
} else {
|
||||
usageError = 1;
|
||||
}
|
||||
|
||||
if (usageError) {
|
||||
printUsage(tokens[0]);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
CommandFactory consistencyScanFactory(
|
||||
"consistencyscan",
|
||||
CommandHelp("consistencyscan <on|off> <restart 0|1> <maxRate val> <targetInterval val>",
|
||||
"enables or disables consistency scan",
|
||||
"Calling this command with `on' enables the consistency scan process to run the scan with given "
|
||||
"arguments and `off' will halt the scan. "
|
||||
"Calling this command with no arguments will display if consistency scan is currently enabled.\n"));
|
||||
|
||||
} // namespace fdb_cli
|
@ -1582,6 +1582,13 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise, Reference<ClusterCo
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "consistencyscan")) {
|
||||
bool _result = wait(makeInterruptable(consistencyScanCommandActor(localDb, tokens)));
|
||||
if (!_result)
|
||||
is_error = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "profile")) {
|
||||
getTransaction(db, managementTenant, tr, options, intrans);
|
||||
bool _result = wait(makeInterruptable(profileCommandActor(localDb, tr, tokens, intrans)));
|
||||
|
@ -166,6 +166,8 @@ ACTOR Future<bool> configureTenantCommandActor(Reference<IDatabase> db, std::vec
|
||||
ACTOR Future<bool> consistencyCheckCommandActor(Reference<ITransaction> tr,
|
||||
std::vector<StringRef> tokens,
|
||||
bool intrans);
|
||||
// consistency scan command
|
||||
ACTOR Future<bool> consistencyScanCommandActor(Database localDb, std::vector<StringRef> tokens);
|
||||
// coordinators command
|
||||
ACTOR Future<bool> coordinatorsCommandActor(Reference<IDatabase> db, std::vector<StringRef> tokens);
|
||||
// createtenant command
|
||||
|
@ -66,6 +66,16 @@ void parse(int* i, ValueRef const& v) {
|
||||
*i = atoi(v.toString().c_str());
|
||||
}
|
||||
|
||||
void parse(int64_t* i, ValueRef const& v) {
|
||||
// FIXME: Sanity checking
|
||||
*i = atoll(v.toString().c_str());
|
||||
}
|
||||
|
||||
void parse(double* i, ValueRef const& v) {
|
||||
// FIXME: Sanity checking
|
||||
*i = atof(v.toString().c_str());
|
||||
}
|
||||
|
||||
void parseReplicationPolicy(Reference<IReplicationPolicy>* policy, ValueRef const& v) {
|
||||
BinaryReader reader(v, IncludeVersion());
|
||||
serializeReplicationPolicy(reader, *policy);
|
||||
|
@ -137,6 +137,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||
"blob_manager",
|
||||
"blob_worker",
|
||||
"encrypt_key_proxy",
|
||||
"consistency_scan",
|
||||
"storage_cache",
|
||||
"router",
|
||||
"coordinator"
|
||||
@ -561,6 +562,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||
"unreachable_ratekeeper_worker",
|
||||
"unreachable_blobManager_worker",
|
||||
"unreachable_encryptKeyProxy_worker",
|
||||
"unreachable_consistencyScan_worker",
|
||||
"unreadable_configuration",
|
||||
"full_replication_timeout",
|
||||
"client_issues",
|
||||
@ -855,6 +857,19 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
||||
"aes_256_ctr"
|
||||
]}
|
||||
},
|
||||
"consistency_scan_info":{
|
||||
"consistency_scan_enabled":false,
|
||||
"restart":false,
|
||||
"max_rate":0,
|
||||
"target_interval":0,
|
||||
"bytes_read_prev_round":0,
|
||||
"last_round_start_datetime":"2022-04-20 00:05:05.123 +0000",
|
||||
"last_round_finish_datetime":"1970-01-01 00:00:00.000 +0000",
|
||||
"last_round_start_timestamp":1648857905.123,
|
||||
"last_round_finish_timestamp":0,
|
||||
"smoothed_round_seconds":1,
|
||||
"finished_rounds":1
|
||||
},
|
||||
"data":{
|
||||
"least_operating_space_bytes_log_server":0,
|
||||
"average_partition_size_bytes":0,
|
||||
|
@ -546,6 +546,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||
init( ATTEMPT_RECRUITMENT_DELAY, 0.035 );
|
||||
init( WAIT_FOR_DISTRIBUTOR_JOIN_DELAY, 1.0 );
|
||||
init( WAIT_FOR_RATEKEEPER_JOIN_DELAY, 1.0 );
|
||||
init( WAIT_FOR_CONSISTENCYSCAN_JOIN_DELAY, 1.0 );
|
||||
init( WAIT_FOR_BLOB_MANAGER_JOIN_DELAY, 1.0 );
|
||||
init( WAIT_FOR_ENCRYPT_KEY_PROXY_JOIN_DELAY, 1.0 );
|
||||
init( WORKER_FAILURE_TIME, 1.0 ); if( randomize && BUGGIFY ) WORKER_FAILURE_TIME = 10.0;
|
||||
@ -556,6 +557,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||
init( CHECK_REMOTE_HEALTH_INTERVAL, 60 );
|
||||
init( FORCE_RECOVERY_CHECK_DELAY, 5.0 );
|
||||
init( RATEKEEPER_FAILURE_TIME, 1.0 );
|
||||
init( CONSISTENCYSCAN_FAILURE_TIME, 1.0 );
|
||||
init( BLOB_MANAGER_FAILURE_TIME, 1.0 );
|
||||
init( REPLACE_INTERFACE_DELAY, 60.0 );
|
||||
init( REPLACE_INTERFACE_CHECK_DELAY, 5.0 );
|
||||
|
@ -852,6 +852,8 @@ const KeyRef perpetualStorageWiggleStatsPrefix(
|
||||
|
||||
const KeyRef triggerDDTeamInfoPrintKey(LiteralStringRef("\xff/triggerDDTeamInfoPrint"));
|
||||
|
||||
const KeyRef consistencyScanInfoKey = "\xff/consistencyScanInfo"_sr;
|
||||
|
||||
const KeyRef encryptionAtRestModeConfKey(LiteralStringRef("\xff/conf/encryption_at_rest_mode"));
|
||||
|
||||
const KeyRangeRef excludedServersKeys(LiteralStringRef("\xff/conf/excluded/"), LiteralStringRef("\xff/conf/excluded0"));
|
||||
|
189
fdbclient/include/fdbclient/ConsistencyScanInterface.h
Normal file
189
fdbclient/include/fdbclient/ConsistencyScanInterface.h
Normal file
@ -0,0 +1,189 @@
|
||||
/*
|
||||
* ConsistencyScanInterface.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDBCLIENT_CONSISTENCYSCANINTERFACE_H
|
||||
#define FDBCLIENT_CONSISTENCYSCANINTERFACE_H
|
||||
|
||||
#include "fdbclient/CommitProxyInterface.h"
|
||||
#include "fdbclient/DatabaseConfiguration.h"
|
||||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/RunTransaction.actor.h"
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
#include "fdbrpc/Locality.h"
|
||||
|
||||
struct ConsistencyScanInterface {
|
||||
constexpr static FileIdentifier file_identifier = 4983265;
|
||||
RequestStream<ReplyPromise<Void>> waitFailure;
|
||||
RequestStream<struct HaltConsistencyScanRequest> haltConsistencyScan;
|
||||
struct LocalityData locality;
|
||||
UID myId;
|
||||
|
||||
ConsistencyScanInterface() {}
|
||||
explicit ConsistencyScanInterface(const struct LocalityData& l, UID id) : locality(l), myId(id) {}
|
||||
|
||||
void initEndpoints() {}
|
||||
UID id() const { return myId; }
|
||||
NetworkAddress address() const { return waitFailure.getEndpoint().getPrimaryAddress(); }
|
||||
bool operator==(const ConsistencyScanInterface& r) const { return id() == r.id(); }
|
||||
bool operator!=(const ConsistencyScanInterface& r) const { return !(*this == r); }
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
serializer(ar, waitFailure, haltConsistencyScan, locality, myId);
|
||||
}
|
||||
};
|
||||
|
||||
struct HaltConsistencyScanRequest {
|
||||
constexpr static FileIdentifier file_identifier = 2323417;
|
||||
UID requesterID;
|
||||
ReplyPromise<Void> reply;
|
||||
|
||||
HaltConsistencyScanRequest() {}
|
||||
explicit HaltConsistencyScanRequest(UID uid) : requesterID(uid) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, requesterID, reply);
|
||||
}
|
||||
};
|
||||
|
||||
// consistency scan configuration and metrics
|
||||
struct ConsistencyScanInfo {
|
||||
constexpr static FileIdentifier file_identifier = 732125;
|
||||
bool consistency_scan_enabled = false;
|
||||
bool restart = false;
|
||||
int64_t max_rate = 0;
|
||||
int64_t target_interval = CLIENT_KNOBS->CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME;
|
||||
int64_t bytes_read_prev_round = 0;
|
||||
KeyRef progress_key = KeyRef();
|
||||
|
||||
// Round Metrics - one round of complete validation across all SSs
|
||||
// Start and finish are in epoch seconds
|
||||
double last_round_start = 0;
|
||||
double last_round_finish = 0;
|
||||
TimerSmoother smoothed_round_duration;
|
||||
int finished_rounds = 0;
|
||||
|
||||
ConsistencyScanInfo() : smoothed_round_duration(20.0 * 60) {}
|
||||
ConsistencyScanInfo(bool enabled, bool r, uint64_t rate, uint64_t interval)
|
||||
: consistency_scan_enabled(enabled), restart(r), max_rate(rate), target_interval(interval),
|
||||
smoothed_round_duration(20.0 * 60) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
double round_total;
|
||||
if (!ar.isDeserializing) {
|
||||
round_total = smoothed_round_duration.getTotal();
|
||||
}
|
||||
serializer(ar,
|
||||
consistency_scan_enabled,
|
||||
restart,
|
||||
max_rate,
|
||||
target_interval,
|
||||
bytes_read_prev_round,
|
||||
last_round_start,
|
||||
last_round_finish,
|
||||
round_total,
|
||||
finished_rounds);
|
||||
if (ar.isDeserializing) {
|
||||
smoothed_round_duration.reset(round_total);
|
||||
}
|
||||
}
|
||||
|
||||
static Future<Void> setInfo(Reference<ReadYourWritesTransaction> tr, ConsistencyScanInfo info) {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
tr->set(consistencyScanInfoKey, ObjectWriter::toValue(info, IncludeVersion()));
|
||||
return Void();
|
||||
}
|
||||
|
||||
static Future<Void> setInfo(Database cx, ConsistencyScanInfo info) {
|
||||
return runRYWTransaction(
|
||||
cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> { return setInfo(tr, info); });
|
||||
}
|
||||
|
||||
static Future<Optional<Value>> getInfo(Reference<ReadYourWritesTransaction> tr) {
|
||||
tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
return tr->get(consistencyScanInfoKey);
|
||||
}
|
||||
|
||||
static Future<Optional<Value>> getInfo(Database cx) {
|
||||
return runRYWTransaction(
|
||||
cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Optional<Value>> { return getInfo(tr); });
|
||||
}
|
||||
|
||||
StatusObject toJSON() const {
|
||||
StatusObject result;
|
||||
result["consistency_scan_enabled"] = consistency_scan_enabled;
|
||||
result["restart"] = restart;
|
||||
result["max_rate"] = max_rate;
|
||||
result["target_interval"] = target_interval;
|
||||
result["bytes_read_prev_round"] = bytes_read_prev_round;
|
||||
result["last_round_start_datetime"] = epochsToGMTString(last_round_start);
|
||||
result["last_round_finish_datetime"] = epochsToGMTString(last_round_finish);
|
||||
result["last_round_start_timestamp"] = last_round_start;
|
||||
result["last_round_finish_timestamp"] = last_round_finish;
|
||||
result["smoothed_round_seconds"] = smoothed_round_duration.smoothTotal();
|
||||
result["finished_rounds"] = finished_rounds;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string toString() const {
|
||||
return format("consistency_scan_enabled = %d, restart = %d, max_rate = %ld, target_interval = %ld",
|
||||
consistency_scan_enabled,
|
||||
restart,
|
||||
max_rate,
|
||||
target_interval);
|
||||
}
|
||||
};
|
||||
|
||||
Future<Version> getVersion(Database const& cx);
|
||||
Future<bool> getKeyServers(
|
||||
Database const& cx,
|
||||
Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> const& keyServersPromise,
|
||||
KeyRangeRef const& kr,
|
||||
bool const& performQuiescentChecks);
|
||||
Future<bool> getKeyLocations(Database const& cx,
|
||||
std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> const& shards,
|
||||
Promise<Standalone<VectorRef<KeyValueRef>>> const& keyLocationPromise,
|
||||
bool const& performQuiescentChecks);
|
||||
Future<bool> checkDataConsistency(Database const& cx,
|
||||
VectorRef<KeyValueRef> const& keyLocations,
|
||||
DatabaseConfiguration const& configuration,
|
||||
std::map<UID, StorageServerInterface> const& tssMapping,
|
||||
bool const& performQuiescentChecks,
|
||||
bool const& performTSSCheck,
|
||||
bool const& firstClient,
|
||||
bool const& failureIsError,
|
||||
int const& clientId,
|
||||
int const& clientCount,
|
||||
bool const& distributed,
|
||||
bool const& shuffleShards,
|
||||
int const& shardSampleFactor,
|
||||
int64_t const& sharedRandomNumber,
|
||||
int64_t const& repetitions,
|
||||
int64_t* const& bytesReadInPreviousRound,
|
||||
int const& restart,
|
||||
int64_t const& maxRate,
|
||||
int64_t const& targetInterval,
|
||||
KeyRef const& progressKey);
|
||||
|
||||
#endif // FDBCLIENT_CONSISTENCYSCANINTERFACE_H
|
@ -457,6 +457,7 @@ public:
|
||||
double ATTEMPT_RECRUITMENT_DELAY;
|
||||
double WAIT_FOR_DISTRIBUTOR_JOIN_DELAY;
|
||||
double WAIT_FOR_RATEKEEPER_JOIN_DELAY;
|
||||
double WAIT_FOR_CONSISTENCYSCAN_JOIN_DELAY;
|
||||
double WAIT_FOR_BLOB_MANAGER_JOIN_DELAY;
|
||||
double WAIT_FOR_ENCRYPT_KEY_PROXY_JOIN_DELAY;
|
||||
double WORKER_FAILURE_TIME;
|
||||
@ -470,6 +471,7 @@ public:
|
||||
double CHECK_REMOTE_HEALTH_INTERVAL; // Remote DC health refresh interval.
|
||||
double FORCE_RECOVERY_CHECK_DELAY;
|
||||
double RATEKEEPER_FAILURE_TIME;
|
||||
double CONSISTENCYSCAN_FAILURE_TIME;
|
||||
double BLOB_MANAGER_FAILURE_TIME;
|
||||
double REPLACE_INTERFACE_DELAY;
|
||||
double REPLACE_INTERFACE_CHECK_DELAY;
|
||||
|
@ -163,6 +163,9 @@ extern const KeyRef cacheChangePrefix;
|
||||
const Key cacheChangeKeyFor(uint16_t idx);
|
||||
uint16_t cacheChangeKeyDecodeIndex(const KeyRef& key);
|
||||
|
||||
// For persisting the consistency scan configuration and metrics
|
||||
extern const KeyRef consistencyScanInfoKey;
|
||||
|
||||
// "\xff/tss/[[serverId]]" := "[[tssId]]"
|
||||
extern const KeyRangeRef tssMappingKeys;
|
||||
|
||||
|
@ -240,6 +240,24 @@ ProcessClass::Fitness ProcessClass::machineClassFitness(ClusterRole role) const
|
||||
default:
|
||||
return ProcessClass::WorstFit;
|
||||
}
|
||||
case ProcessClass::ConsistencyScan:
|
||||
switch (_class) {
|
||||
case ProcessClass::ConsistencyScanClass:
|
||||
return ProcessClass::BestFit;
|
||||
case ProcessClass::StatelessClass:
|
||||
return ProcessClass::GoodFit;
|
||||
case ProcessClass::UnsetClass:
|
||||
return ProcessClass::UnsetFit;
|
||||
case ProcessClass::MasterClass:
|
||||
return ProcessClass::OkayFit;
|
||||
case ProcessClass::CoordinatorClass:
|
||||
case ProcessClass::TesterClass:
|
||||
case ProcessClass::StorageCacheClass:
|
||||
case ProcessClass::BlobWorkerClass:
|
||||
return ProcessClass::NeverAssign;
|
||||
default:
|
||||
return ProcessClass::WorstFit;
|
||||
}
|
||||
case ProcessClass::BlobManager:
|
||||
switch (_class) {
|
||||
case ProcessClass::BlobManagerClass:
|
||||
|
@ -43,6 +43,7 @@ struct ProcessClass {
|
||||
DataDistributorClass,
|
||||
CoordinatorClass,
|
||||
RatekeeperClass,
|
||||
ConsistencyScanClass,
|
||||
StorageCacheClass,
|
||||
BackupClass,
|
||||
GrvProxyClass,
|
||||
@ -72,6 +73,7 @@ struct ProcessClass {
|
||||
ClusterController,
|
||||
DataDistributor,
|
||||
Ratekeeper,
|
||||
ConsistencyScan,
|
||||
BlobManager,
|
||||
BlobWorker,
|
||||
StorageCache,
|
||||
@ -110,6 +112,7 @@ public:
|
||||
else if (s=="data_distributor") _class = DataDistributorClass;
|
||||
else if (s=="coordinator") _class = CoordinatorClass;
|
||||
else if (s=="ratekeeper") _class = RatekeeperClass;
|
||||
else if (s=="consistency_scan") _class = ConsistencyScanClass;
|
||||
else if (s=="blob_manager") _class = BlobManagerClass;
|
||||
else if (s=="blob_worker") _class = BlobWorkerClass;
|
||||
else if (s=="storage_cache") _class = StorageCacheClass;
|
||||
@ -140,6 +143,7 @@ public:
|
||||
else if (classStr=="data_distributor") _class = DataDistributorClass;
|
||||
else if (classStr=="coordinator") _class = CoordinatorClass;
|
||||
else if (classStr=="ratekeeper") _class = RatekeeperClass;
|
||||
else if (classStr=="consistency_scan") _class = ConsistencyScanClass;
|
||||
else if (classStr=="blob_manager") _class = BlobManagerClass;
|
||||
else if (classStr=="blob_worker") _class = BlobWorkerClass;
|
||||
else if (classStr=="storage_cache") _class = StorageCacheClass;
|
||||
@ -180,6 +184,7 @@ public:
|
||||
case DataDistributorClass: return "data_distributor";
|
||||
case CoordinatorClass: return "coordinator";
|
||||
case RatekeeperClass: return "ratekeeper";
|
||||
case ConsistencyScanClass: return "consistency_scan";
|
||||
case BlobManagerClass: return "blob_manager";
|
||||
case BlobWorkerClass: return "blob_worker";
|
||||
case StorageCacheClass: return "storage_cache";
|
||||
|
@ -186,6 +186,8 @@ public:
|
||||
return false;
|
||||
case ProcessClass::RatekeeperClass:
|
||||
return false;
|
||||
case ProcessClass::ConsistencyScanClass:
|
||||
return false;
|
||||
case ProcessClass::BlobManagerClass:
|
||||
return false;
|
||||
case ProcessClass::StorageCacheClass:
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
#include "fdbrpc/FailureMonitor.h"
|
||||
#include "fdbclient/EncryptKeyProxyInterface.h"
|
||||
#include "fdbserver/Knobs.h"
|
||||
@ -138,6 +139,31 @@ struct DataDistributorSingleton : Singleton<DataDistributorInterface> {
|
||||
}
|
||||
};
|
||||
|
||||
struct ConsistencyScanSingleton : Singleton<ConsistencyScanInterface> {
|
||||
|
||||
ConsistencyScanSingleton(const Optional<ConsistencyScanInterface>& interface) : Singleton(interface) {}
|
||||
|
||||
Role getRole() const { return Role::CONSISTENCYSCAN; }
|
||||
ProcessClass::ClusterRole getClusterRole() const { return ProcessClass::ConsistencyScan; }
|
||||
|
||||
void setInterfaceToDbInfo(ClusterControllerData* cc) const {
|
||||
if (interface.present()) {
|
||||
TraceEvent("CCCK_SetInf", cc->id).detail("Id", interface.get().id());
|
||||
cc->db.setConsistencyScan(interface.get());
|
||||
}
|
||||
}
|
||||
void halt(ClusterControllerData* cc, Optional<Standalone<StringRef>> pid) const {
|
||||
if (interface.present()) {
|
||||
cc->id_worker[pid].haltConsistencyScan =
|
||||
brokenPromiseToNever(interface.get().haltConsistencyScan.getReply(HaltConsistencyScanRequest(cc->id)));
|
||||
}
|
||||
}
|
||||
void recruit(ClusterControllerData* cc) const {
|
||||
cc->lastRecruitTime = now();
|
||||
cc->recruitConsistencyScan.set(true);
|
||||
}
|
||||
};
|
||||
|
||||
struct BlobManagerSingleton : Singleton<BlobManagerInterface> {
|
||||
|
||||
BlobManagerSingleton(const Optional<BlobManagerInterface>& interface) : Singleton(interface) {}
|
||||
@ -248,6 +274,7 @@ ACTOR Future<Void> clusterWatchDatabase(ClusterControllerData* cluster,
|
||||
dbInfo.ratekeeper = db->serverInfo->get().ratekeeper;
|
||||
dbInfo.blobManager = db->serverInfo->get().blobManager;
|
||||
dbInfo.encryptKeyProxy = db->serverInfo->get().encryptKeyProxy;
|
||||
dbInfo.consistencyScan = db->serverInfo->get().consistencyScan;
|
||||
dbInfo.latencyBandConfig = db->serverInfo->get().latencyBandConfig;
|
||||
dbInfo.myLocality = db->serverInfo->get().myLocality;
|
||||
dbInfo.client = ClientDBInfo();
|
||||
@ -622,6 +649,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
// Try to find a new process for each singleton.
|
||||
WorkerDetails newRKWorker = findNewProcessForSingleton(self, ProcessClass::Ratekeeper, id_used);
|
||||
WorkerDetails newDDWorker = findNewProcessForSingleton(self, ProcessClass::DataDistributor, id_used);
|
||||
WorkerDetails newCSWorker = findNewProcessForSingleton(self, ProcessClass::ConsistencyScan, id_used);
|
||||
|
||||
WorkerDetails newBMWorker;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
@ -636,6 +664,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
// Find best possible fitnesses for each singleton.
|
||||
auto bestFitnessForRK = findBestFitnessForSingleton(self, newRKWorker, ProcessClass::Ratekeeper);
|
||||
auto bestFitnessForDD = findBestFitnessForSingleton(self, newDDWorker, ProcessClass::DataDistributor);
|
||||
auto bestFitnessForCS = findBestFitnessForSingleton(self, newCSWorker, ProcessClass::ConsistencyScan);
|
||||
|
||||
ProcessClass::Fitness bestFitnessForBM;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
@ -650,6 +679,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
auto& db = self->db.serverInfo->get();
|
||||
auto rkSingleton = RatekeeperSingleton(db.ratekeeper);
|
||||
auto ddSingleton = DataDistributorSingleton(db.distributor);
|
||||
ConsistencyScanSingleton csSingleton(db.consistencyScan);
|
||||
BlobManagerSingleton bmSingleton(db.blobManager);
|
||||
EncryptKeyProxySingleton ekpSingleton(db.encryptKeyProxy);
|
||||
|
||||
@ -661,6 +691,9 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
bool ddHealthy = isHealthySingleton<DataDistributorInterface>(
|
||||
self, newDDWorker, ddSingleton, bestFitnessForDD, self->recruitingDistributorID);
|
||||
|
||||
bool csHealthy = isHealthySingleton<ConsistencyScanInterface>(
|
||||
self, newCSWorker, csSingleton, bestFitnessForCS, self->recruitingConsistencyScanID);
|
||||
|
||||
bool bmHealthy = true;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
bmHealthy = isHealthySingleton<BlobManagerInterface>(
|
||||
@ -674,7 +707,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
}
|
||||
// if any of the singletons are unhealthy (rerecruited or not stable), then do not
|
||||
// consider any further re-recruitments
|
||||
if (!(rkHealthy && ddHealthy && bmHealthy && ekpHealthy)) {
|
||||
if (!(rkHealthy && ddHealthy && bmHealthy && ekpHealthy && csHealthy)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -682,8 +715,10 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
// check if we can colocate the singletons in a more optimal way
|
||||
Optional<Standalone<StringRef>> currRKProcessId = rkSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> currDDProcessId = ddSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> currCSProcessId = csSingleton.interface.get().locality.processId();
|
||||
Optional<Standalone<StringRef>> newRKProcessId = newRKWorker.interf.locality.processId();
|
||||
Optional<Standalone<StringRef>> newDDProcessId = newDDWorker.interf.locality.processId();
|
||||
Optional<Standalone<StringRef>> newCSProcessId = newCSWorker.interf.locality.processId();
|
||||
|
||||
Optional<Standalone<StringRef>> currBMProcessId, newBMProcessId;
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
@ -697,8 +732,8 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
newEKPProcessId = newEKPWorker.interf.locality.processId();
|
||||
}
|
||||
|
||||
std::vector<Optional<Standalone<StringRef>>> currPids = { currRKProcessId, currDDProcessId };
|
||||
std::vector<Optional<Standalone<StringRef>>> newPids = { newRKProcessId, newDDProcessId };
|
||||
std::vector<Optional<Standalone<StringRef>>> currPids = { currRKProcessId, currDDProcessId, currCSProcessId };
|
||||
std::vector<Optional<Standalone<StringRef>>> newPids = { newRKProcessId, newDDProcessId, newCSProcessId };
|
||||
if (self->db.blobGranulesEnabled.get()) {
|
||||
currPids.emplace_back(currBMProcessId);
|
||||
newPids.emplace_back(newBMProcessId);
|
||||
@ -728,7 +763,8 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
if (newColocMap[newRKProcessId] <= currColocMap[currRKProcessId] &&
|
||||
newColocMap[newDDProcessId] <= currColocMap[currDDProcessId] &&
|
||||
newColocMap[newBMProcessId] <= currColocMap[currBMProcessId] &&
|
||||
newColocMap[newEKPProcessId] <= currColocMap[currEKPProcessId]) {
|
||||
newColocMap[newEKPProcessId] <= currColocMap[currEKPProcessId] &&
|
||||
newColocMap[newCSProcessId] <= currColocMap[currCSProcessId]) {
|
||||
// rerecruit the singleton for which we have found a better process, if any
|
||||
if (newColocMap[newRKProcessId] < currColocMap[currRKProcessId]) {
|
||||
rkSingleton.recruit(self);
|
||||
@ -738,6 +774,8 @@ void checkBetterSingletons(ClusterControllerData* self) {
|
||||
bmSingleton.recruit(self);
|
||||
} else if (SERVER_KNOBS->ENABLE_ENCRYPTION && newColocMap[newEKPProcessId] < currColocMap[currEKPProcessId]) {
|
||||
ekpSingleton.recruit(self);
|
||||
} else if (newColocMap[newCSProcessId] < currColocMap[currCSProcessId]) {
|
||||
csSingleton.recruit(self);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1302,6 +1340,13 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
|
||||
self, w, currSingleton, registeringSingleton, self->recruitingEncryptKeyProxyID);
|
||||
}
|
||||
|
||||
if (req.consistencyScanInterf.present()) {
|
||||
auto currSingleton = ConsistencyScanSingleton(self->db.serverInfo->get().consistencyScan);
|
||||
auto registeringSingleton = ConsistencyScanSingleton(req.consistencyScanInterf);
|
||||
haltRegisteringOrCurrentSingleton<ConsistencyScanInterface>(
|
||||
self, w, currSingleton, registeringSingleton, self->recruitingConsistencyScanID);
|
||||
}
|
||||
|
||||
// Notify the worker to register again with new process class/exclusive property
|
||||
if (!req.reply.isSet() && newPriorityInfo != req.priorityInfo) {
|
||||
req.reply.send(RegisterWorkerReply(newProcessClass, newPriorityInfo));
|
||||
@ -2163,6 +2208,101 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startConsistencyScan(ClusterControllerData* self) {
|
||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
||||
TraceEvent("CCStartConsistencyScan", self->id).log();
|
||||
loop {
|
||||
try {
|
||||
state bool no_consistencyScan = !self->db.serverInfo->get().consistencyScan.present();
|
||||
while (!self->masterProcessId.present() ||
|
||||
self->masterProcessId != self->db.serverInfo->get().master.locality.processId() ||
|
||||
self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
wait(self->db.serverInfo->onChange() || delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
||||
}
|
||||
if (no_consistencyScan && self->db.serverInfo->get().consistencyScan.present()) {
|
||||
// Existing consistencyScan registers while waiting, so skip.
|
||||
return Void();
|
||||
}
|
||||
|
||||
std::map<Optional<Standalone<StringRef>>, int> id_used = self->getUsedIds();
|
||||
WorkerFitnessInfo csWorker = self->getWorkerForRoleInDatacenter(self->clusterControllerDcId,
|
||||
ProcessClass::ConsistencyScan,
|
||||
ProcessClass::NeverAssign,
|
||||
self->db.config,
|
||||
id_used);
|
||||
|
||||
InitializeConsistencyScanRequest req(deterministicRandom()->randomUniqueID());
|
||||
state WorkerDetails worker = csWorker.worker;
|
||||
if (self->onMasterIsBetter(worker, ProcessClass::ConsistencyScan)) {
|
||||
worker = self->id_worker[self->masterProcessId.get()].details;
|
||||
}
|
||||
|
||||
self->recruitingConsistencyScanID = req.reqId;
|
||||
TraceEvent("CCRecruitConsistencyScan", self->id)
|
||||
.detail("Addr", worker.interf.address())
|
||||
.detail("CSID", req.reqId);
|
||||
|
||||
ErrorOr<ConsistencyScanInterface> interf = wait(worker.interf.consistencyScan.getReplyUnlessFailedFor(
|
||||
req, SERVER_KNOBS->WAIT_FOR_CONSISTENCYSCAN_JOIN_DELAY, 0));
|
||||
if (interf.present()) {
|
||||
self->recruitConsistencyScan.set(false);
|
||||
self->recruitingConsistencyScanID = interf.get().id();
|
||||
const auto& consistencyScan = self->db.serverInfo->get().consistencyScan;
|
||||
TraceEvent("CCConsistencyScanRecruited", self->id)
|
||||
.detail("Addr", worker.interf.address())
|
||||
.detail("CKID", interf.get().id());
|
||||
if (consistencyScan.present() && consistencyScan.get().id() != interf.get().id() &&
|
||||
self->id_worker.count(consistencyScan.get().locality.processId())) {
|
||||
TraceEvent("CCHaltConsistencyScanAfterRecruit", self->id)
|
||||
.detail("CKID", consistencyScan.get().id())
|
||||
.detail("DcID", printable(self->clusterControllerDcId));
|
||||
ConsistencyScanSingleton(consistencyScan).halt(self, consistencyScan.get().locality.processId());
|
||||
}
|
||||
if (!consistencyScan.present() || consistencyScan.get().id() != interf.get().id()) {
|
||||
self->db.setConsistencyScan(interf.get());
|
||||
}
|
||||
checkOutstandingRequests(self);
|
||||
return Void();
|
||||
} else {
|
||||
TraceEvent("CCConsistencyScanRecruitEmpty", self->id).log();
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("CCConsistencyScanRecruitError", self->id).error(e);
|
||||
if (e.code() != error_code_no_more_servers) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> monitorConsistencyScan(ClusterControllerData* self) {
|
||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||
TraceEvent("CCMonitorConsistencyScanWaitingForRecovery", self->id).log();
|
||||
wait(self->db.serverInfo->onChange());
|
||||
}
|
||||
|
||||
TraceEvent("CCMonitorConsistencyScan", self->id).log();
|
||||
loop {
|
||||
if (self->db.serverInfo->get().consistencyScan.present() && !self->recruitConsistencyScan.get()) {
|
||||
state Future<Void> wfClient =
|
||||
waitFailureClient(self->db.serverInfo->get().consistencyScan.get().waitFailure,
|
||||
SERVER_KNOBS->CONSISTENCYSCAN_FAILURE_TIME);
|
||||
choose {
|
||||
when(wait(wfClient)) {
|
||||
TraceEvent("CCMonitorConsistencyScanDied", self->id)
|
||||
.detail("CKID", self->db.serverInfo->get().consistencyScan.get().id());
|
||||
self->db.clearInterf(ProcessClass::ConsistencyScanClass);
|
||||
}
|
||||
when(wait(self->recruitConsistencyScan.onChange())) {}
|
||||
}
|
||||
} else {
|
||||
TraceEvent("CCMonitorConsistencyScanStarting", self->id).log();
|
||||
wait(startConsistencyScan(self));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self, double waitTime) {
|
||||
// If master fails at the same time, give it a chance to clear master PID.
|
||||
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||
@ -2580,6 +2720,7 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
||||
self.addActor.send(monitorRatekeeper(&self));
|
||||
self.addActor.send(monitorBlobManager(&self));
|
||||
self.addActor.send(watchBlobGranulesConfigKey(&self));
|
||||
self.addActor.send(monitorConsistencyScan(&self));
|
||||
self.addActor.send(dbInfoUpdater(&self));
|
||||
self.addActor.send(traceCounters("ClusterControllerMetrics",
|
||||
self.id,
|
||||
|
1132
fdbserver/ConsistencyScan.actor.cpp
Normal file
1132
fdbserver/ConsistencyScan.actor.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -34,6 +34,7 @@
|
||||
#include "fdbserver/ClusterRecovery.actor.h"
|
||||
#include "fdbserver/CoordinationInterface.h"
|
||||
#include "fdbserver/DataDistribution.actor.h"
|
||||
#include "fdbclient/ConsistencyScanInterface.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "fdbserver/QuietDatabase.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
@ -809,6 +810,10 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
|
||||
roles.addRole("blob_manager", db->get().blobManager.get());
|
||||
}
|
||||
|
||||
if (db->get().consistencyScan.present()) {
|
||||
roles.addRole("consistency_scan", db->get().consistencyScan.get());
|
||||
}
|
||||
|
||||
if (SERVER_KNOBS->ENABLE_ENCRYPTION && db->get().encryptKeyProxy.present()) {
|
||||
roles.addRole("encrypt_key_proxy", db->get().encryptKeyProxy.get());
|
||||
}
|
||||
@ -2871,6 +2876,26 @@ ACTOR Future<JsonBuilderObject> storageWigglerStatsFetcher(Optional<DataDistribu
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// read consistencyScanInfo through Read-only tx
|
||||
ACTOR Future<Optional<Value>> consistencyScanInfoFetcher(Database cx) {
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
state Optional<Value> val;
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
wait(store(val, ConsistencyScanInfo::getInfo(tr)));
|
||||
wait(tr->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("ConsistencyScanInfoFetcher").log();
|
||||
return val.get();
|
||||
}
|
||||
|
||||
// constructs the cluster section of the json status output
|
||||
ACTOR Future<StatusReply> clusterGetStatus(
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
@ -2890,6 +2915,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||
state WorkerDetails ccWorker; // Cluster-Controller worker
|
||||
state WorkerDetails ddWorker; // DataDistributor worker
|
||||
state WorkerDetails rkWorker; // Ratekeeper worker
|
||||
state WorkerDetails csWorker; // ConsistencyScan worker
|
||||
|
||||
try {
|
||||
// Get the master Worker interface
|
||||
@ -2936,6 +2962,19 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||
rkWorker = _rkWorker.get();
|
||||
}
|
||||
|
||||
// Get the ConsistencyScan worker interface
|
||||
Optional<WorkerDetails> _csWorker;
|
||||
if (db->get().consistencyScan.present()) {
|
||||
_csWorker = getWorker(workers, db->get().consistencyScan.get().address());
|
||||
}
|
||||
|
||||
if (!db->get().consistencyScan.present() || !_csWorker.present()) {
|
||||
messages.push_back(JsonString::makeMessage("unreachable_consistencyScan_worker",
|
||||
"Unable to locate the consistencyScan worker."));
|
||||
} else {
|
||||
csWorker = _csWorker.get();
|
||||
}
|
||||
|
||||
// Get latest events for various event types from ALL workers
|
||||
// WorkerEvents is a map of worker's NetworkAddress to its event string
|
||||
// The pair represents worker responses and a set of worker NetworkAddress strings which did not respond.
|
||||
@ -3350,6 +3389,26 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||
messages.push_back(clientIssueMessage);
|
||||
}
|
||||
|
||||
// Fetch Consistency Scan Information
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
state Optional<Value> val;
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
wait(store(val, ConsistencyScanInfo::getInfo(tr)));
|
||||
wait(tr->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
if (val.present()) {
|
||||
ConsistencyScanInfo consistencyScanInfo =
|
||||
ObjectReader::fromStringRef<ConsistencyScanInfo>(val.get(), IncludeVersion());
|
||||
TraceEvent("StatusConsistencyScanGotVal").log();
|
||||
statusObj["consistency_scan_info"] = consistencyScanInfo.toJSON();
|
||||
}
|
||||
|
||||
// Create the status_incomplete message if there were any reasons that the status is incomplete.
|
||||
if (!status_incomplete_reasons.empty()) {
|
||||
JsonBuilderObject incomplete_message =
|
||||
|
@ -51,6 +51,7 @@ struct WorkerInfo : NonCopyable {
|
||||
Future<Void> haltDistributor;
|
||||
Future<Void> haltBlobManager;
|
||||
Future<Void> haltEncryptKeyProxy;
|
||||
Future<Void> haltConsistencyScan;
|
||||
Standalone<VectorRef<StringRef>> issues;
|
||||
|
||||
WorkerInfo()
|
||||
@ -73,7 +74,7 @@ struct WorkerInfo : NonCopyable {
|
||||
: watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen), reboots(r.reboots),
|
||||
initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
|
||||
haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), haltBlobManager(r.haltBlobManager),
|
||||
haltEncryptKeyProxy(r.haltEncryptKeyProxy), issues(r.issues) {}
|
||||
haltEncryptKeyProxy(r.haltEncryptKeyProxy), haltConsistencyScan(r.haltConsistencyScan), issues(r.issues) {}
|
||||
void operator=(WorkerInfo&& r) noexcept {
|
||||
watcher = std::move(r.watcher);
|
||||
reply = std::move(r.reply);
|
||||
@ -188,6 +189,14 @@ public:
|
||||
serverInfo->set(newInfo);
|
||||
}
|
||||
|
||||
void setConsistencyScan(const ConsistencyScanInterface& interf) {
|
||||
auto newInfo = serverInfo->get();
|
||||
newInfo.id = deterministicRandom()->randomUniqueID();
|
||||
newInfo.infoGeneration = ++dbInfoCount;
|
||||
newInfo.consistencyScan = interf;
|
||||
serverInfo->set(newInfo);
|
||||
}
|
||||
|
||||
void clearInterf(ProcessClass::ClassType t) {
|
||||
auto newInfo = serverInfo->get();
|
||||
newInfo.id = deterministicRandom()->randomUniqueID();
|
||||
@ -200,6 +209,8 @@ public:
|
||||
newInfo.blobManager = Optional<BlobManagerInterface>();
|
||||
} else if (t == ProcessClass::EncryptKeyProxyClass) {
|
||||
newInfo.encryptKeyProxy = Optional<EncryptKeyProxyInterface>();
|
||||
} else if (t == ProcessClass::ConsistencyScanClass) {
|
||||
newInfo.consistencyScan = Optional<ConsistencyScanInterface>();
|
||||
}
|
||||
serverInfo->set(newInfo);
|
||||
}
|
||||
@ -294,7 +305,9 @@ public:
|
||||
(db.serverInfo->get().blobManager.present() &&
|
||||
db.serverInfo->get().blobManager.get().locality.processId() == processId) ||
|
||||
(db.serverInfo->get().encryptKeyProxy.present() &&
|
||||
db.serverInfo->get().encryptKeyProxy.get().locality.processId() == processId);
|
||||
db.serverInfo->get().encryptKeyProxy.get().locality.processId() == processId) ||
|
||||
(db.serverInfo->get().consistencyScan.present() &&
|
||||
db.serverInfo->get().consistencyScan.get().locality.processId() == processId);
|
||||
}
|
||||
|
||||
WorkerDetails getStorageWorker(RecruitStorageRequest const& req) {
|
||||
@ -2880,7 +2893,8 @@ public:
|
||||
ASSERT(masterProcessId.present());
|
||||
const auto& pid = worker.interf.locality.processId();
|
||||
if ((role != ProcessClass::DataDistributor && role != ProcessClass::Ratekeeper &&
|
||||
role != ProcessClass::BlobManager && role != ProcessClass::EncryptKeyProxy) ||
|
||||
role != ProcessClass::BlobManager && role != ProcessClass::EncryptKeyProxy &&
|
||||
role != ProcessClass::ConsistencyScan) ||
|
||||
pid == masterProcessId.get()) {
|
||||
return false;
|
||||
}
|
||||
@ -3263,6 +3277,8 @@ public:
|
||||
Optional<UID> recruitingBlobManagerID;
|
||||
AsyncVar<bool> recruitEncryptKeyProxy;
|
||||
Optional<UID> recruitingEncryptKeyProxyID;
|
||||
AsyncVar<bool> recruitConsistencyScan;
|
||||
Optional<UID> recruitingConsistencyScanID;
|
||||
|
||||
// Stores the health information from a particular worker's perspective.
|
||||
struct WorkerHealth {
|
||||
@ -3300,7 +3316,7 @@ public:
|
||||
goodRecruitmentTime(Never()), goodRemoteRecruitmentTime(Never()), datacenterVersionDifference(0),
|
||||
versionDifferenceUpdated(false), remoteDCMonitorStarted(false), remoteTransactionSystemDegraded(false),
|
||||
recruitDistributor(false), recruitRatekeeper(false), recruitBlobManager(false), recruitEncryptKeyProxy(false),
|
||||
clusterControllerMetrics("ClusterController", id.toString()),
|
||||
recruitConsistencyScan(false), clusterControllerMetrics("ClusterController", id.toString()),
|
||||
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
|
||||
registerWorkerRequests("RegisterWorkerRequests", clusterControllerMetrics),
|
||||
getWorkersRequests("GetWorkersRequests", clusterControllerMetrics),
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "fdbserver/LogSystemConfig.h"
|
||||
#include "fdbserver/RatekeeperInterface.h"
|
||||
#include "fdbserver/BlobManagerInterface.h"
|
||||
#include "fdbclient/ConsistencyScanInterface.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/LatencyBandConfig.h"
|
||||
#include "fdbserver/WorkerInterface.actor.h"
|
||||
@ -50,6 +51,7 @@ struct ServerDBInfo {
|
||||
Optional<RatekeeperInterface> ratekeeper;
|
||||
Optional<BlobManagerInterface> blobManager;
|
||||
Optional<EncryptKeyProxyInterface> encryptKeyProxy;
|
||||
Optional<ConsistencyScanInterface> consistencyScan;
|
||||
std::vector<ResolverInterface> resolvers;
|
||||
DBRecoveryCount
|
||||
recoveryCount; // A recovery count from DBCoreState. A successful cluster recovery increments it twice;
|
||||
@ -83,6 +85,7 @@ struct ServerDBInfo {
|
||||
ratekeeper,
|
||||
blobManager,
|
||||
encryptKeyProxy,
|
||||
consistencyScan,
|
||||
resolvers,
|
||||
recoveryCount,
|
||||
recoveryState,
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "fdbserver/MasterInterface.h"
|
||||
#include "fdbserver/TLogInterface.h"
|
||||
#include "fdbserver/RatekeeperInterface.h"
|
||||
#include "fdbclient/ConsistencyScanInterface.h"
|
||||
#include "fdbserver/BlobManagerInterface.h"
|
||||
#include "fdbserver/ResolverInterface.h"
|
||||
#include "fdbclient/BlobWorkerInterface.h"
|
||||
@ -57,6 +58,7 @@ struct WorkerInterface {
|
||||
RequestStream<struct InitializeRatekeeperRequest> ratekeeper;
|
||||
RequestStream<struct InitializeBlobManagerRequest> blobManager;
|
||||
RequestStream<struct InitializeBlobWorkerRequest> blobWorker;
|
||||
RequestStream<struct InitializeConsistencyScanRequest> consistencyScan;
|
||||
RequestStream<struct InitializeResolverRequest> resolver;
|
||||
RequestStream<struct InitializeStorageRequest> storage;
|
||||
RequestStream<struct InitializeLogRouterRequest> logRouter;
|
||||
@ -112,6 +114,7 @@ struct WorkerInterface {
|
||||
ratekeeper,
|
||||
blobManager,
|
||||
blobWorker,
|
||||
consistencyScan,
|
||||
resolver,
|
||||
storage,
|
||||
logRouter,
|
||||
@ -428,6 +431,7 @@ struct RegisterWorkerRequest {
|
||||
Optional<RatekeeperInterface> ratekeeperInterf;
|
||||
Optional<BlobManagerInterface> blobManagerInterf;
|
||||
Optional<EncryptKeyProxyInterface> encryptKeyProxyInterf;
|
||||
Optional<ConsistencyScanInterface> consistencyScanInterf;
|
||||
Standalone<VectorRef<StringRef>> issues;
|
||||
std::vector<NetworkAddress> incompatiblePeers;
|
||||
ReplyPromise<RegisterWorkerReply> reply;
|
||||
@ -449,6 +453,7 @@ struct RegisterWorkerRequest {
|
||||
Optional<RatekeeperInterface> rkInterf,
|
||||
Optional<BlobManagerInterface> bmInterf,
|
||||
Optional<EncryptKeyProxyInterface> ekpInterf,
|
||||
Optional<ConsistencyScanInterface> csInterf,
|
||||
bool degraded,
|
||||
Optional<Version> lastSeenKnobVersion,
|
||||
Optional<ConfigClassSet> knobConfigClassSet,
|
||||
@ -456,9 +461,9 @@ struct RegisterWorkerRequest {
|
||||
ConfigBroadcastInterface configBroadcastInterface)
|
||||
: wi(wi), initialClass(initialClass), processClass(processClass), priorityInfo(priorityInfo),
|
||||
generation(generation), distributorInterf(ddInterf), ratekeeperInterf(rkInterf), blobManagerInterf(bmInterf),
|
||||
encryptKeyProxyInterf(ekpInterf), degraded(degraded), lastSeenKnobVersion(lastSeenKnobVersion),
|
||||
knobConfigClassSet(knobConfigClassSet), requestDbInfo(false), recoveredDiskFiles(recoveredDiskFiles),
|
||||
configBroadcastInterface(configBroadcastInterface) {}
|
||||
encryptKeyProxyInterf(ekpInterf), consistencyScanInterf(csInterf), degraded(degraded),
|
||||
lastSeenKnobVersion(lastSeenKnobVersion), knobConfigClassSet(knobConfigClassSet), requestDbInfo(false),
|
||||
recoveredDiskFiles(recoveredDiskFiles), configBroadcastInterface(configBroadcastInterface) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
@ -472,6 +477,7 @@ struct RegisterWorkerRequest {
|
||||
ratekeeperInterf,
|
||||
blobManagerInterf,
|
||||
encryptKeyProxyInterf,
|
||||
consistencyScanInterf,
|
||||
issues,
|
||||
incompatiblePeers,
|
||||
reply,
|
||||
@ -728,6 +734,19 @@ struct InitializeRatekeeperRequest {
|
||||
}
|
||||
};
|
||||
|
||||
struct InitializeConsistencyScanRequest {
|
||||
constexpr static FileIdentifier file_identifier = 3104275;
|
||||
UID reqId;
|
||||
ReplyPromise<ConsistencyScanInterface> reply;
|
||||
|
||||
InitializeConsistencyScanRequest() {}
|
||||
explicit InitializeConsistencyScanRequest(UID uid) : reqId(uid) {}
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, reqId, reply);
|
||||
}
|
||||
};
|
||||
|
||||
struct InitializeBlobManagerRequest {
|
||||
constexpr static FileIdentifier file_identifier = 2567474;
|
||||
UID reqId;
|
||||
@ -990,6 +1009,7 @@ struct Role {
|
||||
static const Role COORDINATOR;
|
||||
static const Role BACKUP;
|
||||
static const Role ENCRYPT_KEY_PROXY;
|
||||
static const Role CONSISTENCYSCAN;
|
||||
|
||||
std::string roleName;
|
||||
std::string abbreviation;
|
||||
@ -1027,6 +1047,8 @@ struct Role {
|
||||
return BACKUP;
|
||||
case ProcessClass::EncryptKeyProxy:
|
||||
return ENCRYPT_KEY_PROXY;
|
||||
case ProcessClass::ConsistencyScan:
|
||||
return CONSISTENCYSCAN;
|
||||
case ProcessClass::Worker:
|
||||
return WORKER;
|
||||
case ProcessClass::NoRole:
|
||||
@ -1148,6 +1170,7 @@ ACTOR Future<Void> logRouter(TLogInterface interf,
|
||||
Reference<AsyncVar<ServerDBInfo> const> db);
|
||||
ACTOR Future<Void> dataDistributor(DataDistributorInterface ddi, Reference<AsyncVar<ServerDBInfo> const> db);
|
||||
ACTOR Future<Void> ratekeeper(RatekeeperInterface rki, Reference<AsyncVar<ServerDBInfo> const> db);
|
||||
ACTOR Future<Void> consistencyScan(ConsistencyScanInterface csInterf, Reference<AsyncVar<ServerDBInfo> const> dbInfo);
|
||||
ACTOR Future<Void> blobManager(BlobManagerInterface bmi, Reference<AsyncVar<ServerDBInfo> const> db, int64_t epoch);
|
||||
ACTOR Future<Void> storageCacheServer(StorageServerInterface interf,
|
||||
uint16_t id,
|
||||
|
@ -4843,6 +4843,11 @@ ACTOR Future<Void> getKeyValuesStreamQ(StorageServer* data, GetKeyValuesStreamRe
|
||||
!data->isTss() && !data->isSSWithTSSPair())
|
||||
? 1
|
||||
: CLIENT_KNOBS->REPLY_BYTE_LIMIT;
|
||||
TraceEvent(SevDebug, "SSGetKeyValueStreamLimits")
|
||||
.detail("ByteLimit", byteLimit)
|
||||
.detail("ReqLimit", req.limit)
|
||||
.detail("Begin", begin.printable())
|
||||
.detail("End", end.printable());
|
||||
GetKeyValuesReply _r = wait(readRange(data,
|
||||
version,
|
||||
KeyRangeRef(begin, end),
|
||||
|
@ -562,6 +562,7 @@ ACTOR Future<Void> registrationClient(
|
||||
Reference<AsyncVar<Optional<RatekeeperInterface>> const> rkInterf,
|
||||
Reference<AsyncVar<Optional<std::pair<int64_t, BlobManagerInterface>>> const> bmInterf,
|
||||
Reference<AsyncVar<Optional<EncryptKeyProxyInterface>> const> ekpInterf,
|
||||
Reference<AsyncVar<Optional<ConsistencyScanInterface>> const> csInterf,
|
||||
Reference<AsyncVar<bool> const> degraded,
|
||||
Reference<IClusterConnectionRecord> connRecord,
|
||||
Reference<AsyncVar<std::set<std::string>> const> issues,
|
||||
@ -602,6 +603,7 @@ ACTOR Future<Void> registrationClient(
|
||||
rkInterf->get(),
|
||||
bmInterf->get().present() ? bmInterf->get().get().second : Optional<BlobManagerInterface>(),
|
||||
ekpInterf->get(),
|
||||
csInterf->get(),
|
||||
degraded->get(),
|
||||
localConfig.isValid() ? localConfig->lastSeenVersion() : Optional<Version>(),
|
||||
localConfig.isValid() ? localConfig->configClassSet() : Optional<ConfigClassSet>(),
|
||||
@ -670,6 +672,7 @@ ACTOR Future<Void> registrationClient(
|
||||
when(wait(ccInterface->onChange())) { break; }
|
||||
when(wait(ddInterf->onChange())) { break; }
|
||||
when(wait(rkInterf->onChange())) { break; }
|
||||
when(wait(csInterf->onChange())) { break; }
|
||||
when(wait(bmInterf->onChange())) { break; }
|
||||
when(wait(ekpInterf->onChange())) { break; }
|
||||
when(wait(degraded->onChange())) { break; }
|
||||
@ -696,6 +699,10 @@ bool addressInDbAndPrimaryDc(const NetworkAddress& address, Reference<AsyncVar<S
|
||||
return true;
|
||||
}
|
||||
|
||||
if (dbi.consistencyScan.present() && dbi.consistencyScan.get().address() == address) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (dbi.blobManager.present() && dbi.blobManager.get().address() == address) {
|
||||
return true;
|
||||
}
|
||||
@ -1620,6 +1627,8 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||
state UID lastBMRecruitRequestId;
|
||||
state Reference<AsyncVar<Optional<EncryptKeyProxyInterface>>> ekpInterf(
|
||||
new AsyncVar<Optional<EncryptKeyProxyInterface>>());
|
||||
state Reference<AsyncVar<Optional<ConsistencyScanInterface>>> csInterf(
|
||||
new AsyncVar<Optional<ConsistencyScanInterface>>());
|
||||
state Future<Void> handleErrors = workerHandleErrors(errors.getFuture()); // Needs to be stopped last
|
||||
state ActorCollection errorForwarders(false);
|
||||
state Future<Void> loggingTrigger = Void();
|
||||
@ -1942,6 +1951,7 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||
rkInterf,
|
||||
bmEpochAndInterf,
|
||||
ekpInterf,
|
||||
csInterf,
|
||||
degraded,
|
||||
connRecord,
|
||||
issues,
|
||||
@ -2136,6 +2146,31 @@ ACTOR Future<Void> workerServer(Reference<IClusterConnectionRecord> connRecord,
|
||||
TraceEvent("Ratekeeper_InitRequest", req.reqId).detail("RatekeeperId", recruited.id());
|
||||
req.reply.send(recruited);
|
||||
}
|
||||
when(InitializeConsistencyScanRequest req = waitNext(interf.consistencyScan.getFuture())) {
|
||||
LocalLineage _;
|
||||
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::ClusterRole::ConsistencyScan;
|
||||
ConsistencyScanInterface recruited(locality, req.reqId);
|
||||
recruited.initEndpoints();
|
||||
|
||||
if (csInterf->get().present()) {
|
||||
recruited = csInterf->get().get();
|
||||
CODE_PROBE(true, "Recovered while already a consistencyscan");
|
||||
} else {
|
||||
startRole(Role::CONSISTENCYSCAN, recruited.id(), interf.id());
|
||||
DUMPTOKEN(recruited.waitFailure);
|
||||
DUMPTOKEN(recruited.haltConsistencyScan);
|
||||
|
||||
Future<Void> consistencyScanProcess = consistencyScan(recruited, dbInfo);
|
||||
errorForwarders.add(forwardError(
|
||||
errors,
|
||||
Role::CONSISTENCYSCAN,
|
||||
recruited.id(),
|
||||
setWhenDoneOrError(consistencyScanProcess, csInterf, Optional<ConsistencyScanInterface>())));
|
||||
csInterf->set(Optional<ConsistencyScanInterface>(recruited));
|
||||
}
|
||||
TraceEvent("ConsistencyScanReceived", req.reqId).detail("ConsistencyScanId", recruited.id());
|
||||
req.reply.send(recruited);
|
||||
}
|
||||
when(InitializeBlobManagerRequest req = waitNext(interf.blobManager.getFuture())) {
|
||||
LocalLineage _;
|
||||
getCurrentLineage()->modify(&RoleLineage::role) = ProcessClass::ClusterRole::BlobManager;
|
||||
@ -3459,3 +3494,4 @@ const Role Role::STORAGE_CACHE("StorageCache", "SC");
|
||||
const Role Role::COORDINATOR("Coordinator", "CD");
|
||||
const Role Role::BACKUP("Backup", "BK");
|
||||
const Role Role::ENCRYPT_KEY_PROXY("EncryptKeyProxy", "EP");
|
||||
const Role Role::CONSISTENCYSCAN("ConsistencyScan", "CS");
|
||||
|
@ -321,19 +321,41 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
|
||||
// Get a list of key servers; verify that the TLogs and master all agree about who the key servers are
|
||||
state Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServerPromise;
|
||||
bool keyServerResult = wait(self->getKeyServers(cx, self, keyServerPromise, keyServersKeys));
|
||||
bool keyServerResult =
|
||||
wait(getKeyServers(cx, keyServerPromise, keyServersKeys, self->performQuiescentChecks));
|
||||
if (keyServerResult) {
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> keyServers =
|
||||
keyServerPromise.getFuture().get();
|
||||
|
||||
// Get the locations of all the shards in the database
|
||||
state Promise<Standalone<VectorRef<KeyValueRef>>> keyLocationPromise;
|
||||
bool keyLocationResult = wait(self->getKeyLocations(cx, keyServers, self, keyLocationPromise));
|
||||
bool keyLocationResult =
|
||||
wait(getKeyLocations(cx, keyServers, keyLocationPromise, self->performQuiescentChecks));
|
||||
if (keyLocationResult) {
|
||||
state Standalone<VectorRef<KeyValueRef>> keyLocations = keyLocationPromise.getFuture().get();
|
||||
|
||||
// Check that each shard has the same data on all storage servers that it resides on
|
||||
wait(::success(self->checkDataConsistency(cx, keyLocations, configuration, tssMapping, self)));
|
||||
wait(::success(
|
||||
checkDataConsistency(cx,
|
||||
keyLocations,
|
||||
configuration,
|
||||
tssMapping,
|
||||
self->performQuiescentChecks,
|
||||
self->performTSSCheck,
|
||||
self->firstClient,
|
||||
self->failureIsError,
|
||||
self->clientId,
|
||||
self->clientCount,
|
||||
self->distributed,
|
||||
self->shuffleShards,
|
||||
self->shardSampleFactor,
|
||||
self->sharedRandomNumber,
|
||||
self->repetitions,
|
||||
&(self->bytesReadInPreviousRound),
|
||||
true,
|
||||
self->rateLimitMax,
|
||||
CLIENT_KNOBS->CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME,
|
||||
KeyRef())));
|
||||
|
||||
// Cache consistency check
|
||||
if (self->performCacheCheck)
|
||||
@ -343,11 +365,12 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_transaction_too_old || e.code() == error_code_future_version ||
|
||||
e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed ||
|
||||
e.code() == error_code_process_behind)
|
||||
e.code() == error_code_process_behind || e.code() == error_code_actor_cancelled) {
|
||||
TraceEvent("ConsistencyCheck_Retry")
|
||||
.error(e); // FIXME: consistency check does not retry in this case
|
||||
else
|
||||
} else {
|
||||
self->testFailure(format("Error %d - %s", e.code(), e.name()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -526,7 +549,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
lastSampleKey = lastStartSampleKey;
|
||||
|
||||
// Get the min version of the storage servers
|
||||
Version version = wait(self->getVersion(cx, self));
|
||||
Version version = wait(getVersion(cx));
|
||||
|
||||
state GetKeyValuesRequest req;
|
||||
req.begin = begin;
|
||||
@ -744,7 +767,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
bool removePrefix) {
|
||||
// get shards paired with corresponding storage servers
|
||||
state Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServerPromise;
|
||||
bool keyServerResult = wait(self->getKeyServers(cx, self, keyServerPromise, range));
|
||||
bool keyServerResult = wait(getKeyServers(cx, keyServerPromise, range, self->performQuiescentChecks));
|
||||
if (!keyServerResult)
|
||||
return false;
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> shards =
|
||||
@ -762,7 +785,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
for (i = 0; i < shards.size(); i++) {
|
||||
while (beginKey < std::min<KeyRef>(shards[i].first.end, endKey)) {
|
||||
try {
|
||||
Version version = wait(self->getVersion(cx, self));
|
||||
Version version = wait(getVersion(cx));
|
||||
|
||||
GetKeyValuesRequest req;
|
||||
req.begin = firstGreaterOrEqual(beginKey);
|
||||
@ -846,879 +869,12 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Gets a version at which to read from the storage servers
|
||||
ACTOR Future<Version> getVersion(Database cx, ConsistencyCheckWorkload* self) {
|
||||
loop {
|
||||
state Transaction tr(cx);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
try {
|
||||
Version version = wait(tr.getReadVersion());
|
||||
return version;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of storage servers(persisting keys within range "kr") from the master and compares them with the
|
||||
// TLogs. If this is a quiescent check, then each commit proxy needs to respond, otherwise only one needs to
|
||||
// respond. Returns false if there is a failure (in this case, keyServersPromise will never be set)
|
||||
ACTOR Future<bool> getKeyServers(
|
||||
Database cx,
|
||||
ConsistencyCheckWorkload* self,
|
||||
Promise<std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>>> keyServersPromise,
|
||||
KeyRangeRef kr) {
|
||||
state std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> keyServers;
|
||||
|
||||
// Try getting key server locations from the master proxies
|
||||
state std::vector<Future<ErrorOr<GetKeyServerLocationsReply>>> keyServerLocationFutures;
|
||||
state Key begin = kr.begin;
|
||||
state Key end = kr.end;
|
||||
state int limitKeyServers = BUGGIFY ? 1 : 100;
|
||||
state Span span(SpanContext(deterministicRandom()->randomUniqueID(), deterministicRandom()->randomUInt64()),
|
||||
"WL:ConsistencyCheck"_loc);
|
||||
|
||||
while (begin < end) {
|
||||
state Reference<CommitProxyInfo> commitProxyInfo =
|
||||
wait(cx->getCommitProxiesFuture(UseProvisionalProxies::False));
|
||||
keyServerLocationFutures.clear();
|
||||
for (int i = 0; i < commitProxyInfo->size(); i++)
|
||||
keyServerLocationFutures.push_back(
|
||||
commitProxyInfo->get(i, &CommitProxyInterface::getKeyServersLocations)
|
||||
.getReplyUnlessFailedFor(
|
||||
GetKeyServerLocationsRequest(
|
||||
span.context, TenantInfo(), begin, end, limitKeyServers, false, latestVersion, Arena()),
|
||||
2,
|
||||
0));
|
||||
|
||||
state bool keyServersInsertedForThisIteration = false;
|
||||
choose {
|
||||
when(wait(waitForAll(keyServerLocationFutures))) {
|
||||
// Read the key server location results
|
||||
for (int i = 0; i < keyServerLocationFutures.size(); i++) {
|
||||
ErrorOr<GetKeyServerLocationsReply> shards = keyServerLocationFutures[i].get();
|
||||
|
||||
// If performing quiescent check, then all master proxies should be reachable. Otherwise, only
|
||||
// one needs to be reachable
|
||||
if (self->performQuiescentChecks && !shards.present()) {
|
||||
TraceEvent("ConsistencyCheck_CommitProxyUnavailable")
|
||||
.detail("CommitProxyID", commitProxyInfo->getId(i));
|
||||
self->testFailure("Commit proxy unavailable");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the list of shards if one was returned. If not doing a quiescent check, we can break if
|
||||
// it is. If we are doing a quiescent check, then we only need to do this for the first shard.
|
||||
if (shards.present() && !keyServersInsertedForThisIteration) {
|
||||
keyServers.insert(
|
||||
keyServers.end(), shards.get().results.begin(), shards.get().results.end());
|
||||
keyServersInsertedForThisIteration = true;
|
||||
begin = shards.get().results.back().first.end;
|
||||
|
||||
if (!self->performQuiescentChecks)
|
||||
break;
|
||||
}
|
||||
} // End of For
|
||||
}
|
||||
when(wait(cx->onProxiesChanged())) {}
|
||||
} // End of choose
|
||||
|
||||
if (!keyServersInsertedForThisIteration) // Retry the entire workflow
|
||||
wait(delay(1.0));
|
||||
|
||||
} // End of while
|
||||
|
||||
keyServersPromise.send(keyServers);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Retrieves the locations of all shards in the database
|
||||
// Returns false if there is a failure (in this case, keyLocationPromise will never be set)
|
||||
ACTOR Future<bool> getKeyLocations(Database cx,
|
||||
std::vector<std::pair<KeyRange, std::vector<StorageServerInterface>>> shards,
|
||||
ConsistencyCheckWorkload* self,
|
||||
Promise<Standalone<VectorRef<KeyValueRef>>> keyLocationPromise) {
|
||||
state Standalone<VectorRef<KeyValueRef>> keyLocations;
|
||||
state Key beginKey = allKeys.begin.withPrefix(keyServersPrefix);
|
||||
state Key endKey = allKeys.end.withPrefix(keyServersPrefix);
|
||||
state int i = 0;
|
||||
state Transaction onErrorTr(cx); // This transaction exists only to access onError and its backoff behavior
|
||||
|
||||
// If the responses are too big, we may use multiple requests to get the key locations. Each request begins
|
||||
// where the last left off
|
||||
for (; i < shards.size(); i++) {
|
||||
while (beginKey < std::min<KeyRef>(shards[i].first.end, endKey)) {
|
||||
try {
|
||||
Version version = wait(self->getVersion(cx, self));
|
||||
|
||||
GetKeyValuesRequest req;
|
||||
req.begin = firstGreaterOrEqual(beginKey);
|
||||
req.end = firstGreaterOrEqual(std::min<KeyRef>(shards[i].first.end, endKey));
|
||||
req.limit = SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT;
|
||||
req.limitBytes = SERVER_KNOBS->MOVE_KEYS_KRM_LIMIT_BYTES;
|
||||
req.version = version;
|
||||
req.tags = TagSet();
|
||||
|
||||
// Try getting the shard locations from the key servers
|
||||
state std::vector<Future<ErrorOr<GetKeyValuesReply>>> keyValueFutures;
|
||||
for (const auto& kv : shards[i].second) {
|
||||
resetReply(req);
|
||||
keyValueFutures.push_back(kv.getKeyValues.getReplyUnlessFailedFor(req, 2, 0));
|
||||
}
|
||||
|
||||
wait(waitForAll(keyValueFutures));
|
||||
|
||||
int firstValidStorageServer = -1;
|
||||
|
||||
// Read the shard location results
|
||||
for (int j = 0; j < keyValueFutures.size(); j++) {
|
||||
ErrorOr<GetKeyValuesReply> reply = keyValueFutures[j].get();
|
||||
|
||||
if (!reply.present() || reply.get().error.present()) {
|
||||
// If the storage server didn't reply in a quiescent database, then the check fails
|
||||
if (self->performQuiescentChecks) {
|
||||
TraceEvent("ConsistencyCheck_KeyServerUnavailable")
|
||||
.detail("StorageServer", shards[i].second[j].id().toString().c_str());
|
||||
self->testFailure("Key server unavailable");
|
||||
return false;
|
||||
}
|
||||
|
||||
// If no storage servers replied, then throw all_alternatives_failed to force a retry
|
||||
else if (firstValidStorageServer < 0 && j == keyValueFutures.size() - 1)
|
||||
throw all_alternatives_failed();
|
||||
}
|
||||
|
||||
// If this is the first storage server, store the locations to send back to the caller
|
||||
else if (firstValidStorageServer < 0) {
|
||||
firstValidStorageServer = j;
|
||||
|
||||
// Otherwise, compare the data to the results from the first storage server. If they are
|
||||
// different, then the check fails
|
||||
} else if (reply.get().data != keyValueFutures[firstValidStorageServer].get().get().data ||
|
||||
reply.get().more != keyValueFutures[firstValidStorageServer].get().get().more) {
|
||||
TraceEvent("ConsistencyCheck_InconsistentKeyServers")
|
||||
.detail("StorageServer1", shards[i].second[firstValidStorageServer].id())
|
||||
.detail("StorageServer2", shards[i].second[j].id());
|
||||
self->testFailure("Key servers inconsistent", true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto keyValueResponse = keyValueFutures[firstValidStorageServer].get().get();
|
||||
RangeResult currentLocations = krmDecodeRanges(
|
||||
keyServersPrefix,
|
||||
KeyRangeRef(beginKey.removePrefix(keyServersPrefix),
|
||||
std::min<KeyRef>(shards[i].first.end, endKey).removePrefix(keyServersPrefix)),
|
||||
RangeResultRef(keyValueResponse.data, keyValueResponse.more));
|
||||
|
||||
if (keyValueResponse.data.size() && beginKey == keyValueResponse.data[0].key) {
|
||||
keyLocations.push_back_deep(keyLocations.arena(), currentLocations[0]);
|
||||
}
|
||||
|
||||
if (currentLocations.size() > 2) {
|
||||
keyLocations.append_deep(
|
||||
keyLocations.arena(), ¤tLocations[1], currentLocations.size() - 2);
|
||||
}
|
||||
|
||||
// Next iteration should pick up where we left off
|
||||
ASSERT(currentLocations.size() > 1);
|
||||
if (!keyValueResponse.more) {
|
||||
beginKey = shards[i].first.end;
|
||||
} else {
|
||||
beginKey = keyValueResponse.data.end()[-1].key;
|
||||
}
|
||||
|
||||
// If this is the last iteration, then push the allKeys.end KV pair
|
||||
if (beginKey >= endKey)
|
||||
keyLocations.push_back_deep(keyLocations.arena(), currentLocations.end()[-1]);
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
wait(onErrorTr.onError(err));
|
||||
TraceEvent("ConsistencyCheck_RetryGetKeyLocations").error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
keyLocationPromise.send(keyLocations);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Retrieves a vector of the storage servers' estimates for the size of a particular shard
|
||||
// If a storage server can't be reached, its estimate will be -1
|
||||
// If there is an error, then the returned vector will have 0 size
|
||||
ACTOR Future<std::vector<int64_t>> getStorageSizeEstimate(std::vector<StorageServerInterface> storageServers,
|
||||
KeyRangeRef shard) {
|
||||
state std::vector<int64_t> estimatedBytes;
|
||||
|
||||
state WaitMetricsRequest req;
|
||||
req.keys = shard;
|
||||
req.max.bytes = -1;
|
||||
req.min.bytes = 0;
|
||||
|
||||
state std::vector<Future<ErrorOr<StorageMetrics>>> metricFutures;
|
||||
|
||||
try {
|
||||
// Check the size of the shard on each storage server
|
||||
for (int i = 0; i < storageServers.size(); i++) {
|
||||
resetReply(req);
|
||||
metricFutures.push_back(storageServers[i].waitMetrics.getReplyUnlessFailedFor(req, 2, 0));
|
||||
}
|
||||
|
||||
// Wait for the storage servers to respond
|
||||
wait(waitForAll(metricFutures));
|
||||
|
||||
int firstValidStorageServer = -1;
|
||||
|
||||
// Retrieve the size from the storage server responses
|
||||
for (int i = 0; i < storageServers.size(); i++) {
|
||||
ErrorOr<StorageMetrics> reply = metricFutures[i].get();
|
||||
|
||||
// If the storage server doesn't reply, then return -1
|
||||
if (!reply.present()) {
|
||||
TraceEvent("ConsistencyCheck_FailedToFetchMetrics")
|
||||
.error(reply.getError())
|
||||
.detail("Begin", printable(shard.begin))
|
||||
.detail("End", printable(shard.end))
|
||||
.detail("StorageServer", storageServers[i].id())
|
||||
.detail("IsTSS", storageServers[i].isTss() ? "True" : "False");
|
||||
estimatedBytes.push_back(-1);
|
||||
}
|
||||
|
||||
// Add the result to the list of estimates
|
||||
else if (reply.present()) {
|
||||
int64_t numBytes = reply.get().bytes;
|
||||
estimatedBytes.push_back(numBytes);
|
||||
if (firstValidStorageServer < 0)
|
||||
firstValidStorageServer = i;
|
||||
else if (estimatedBytes[firstValidStorageServer] != numBytes) {
|
||||
TraceEvent("ConsistencyCheck_InconsistentStorageMetrics")
|
||||
.detail("ByteEstimate1", estimatedBytes[firstValidStorageServer])
|
||||
.detail("ByteEstimate2", numBytes)
|
||||
.detail("Begin", shard.begin)
|
||||
.detail("End", shard.end)
|
||||
.detail("StorageServer1", storageServers[firstValidStorageServer].id())
|
||||
.detail("StorageServer2", storageServers[i].id())
|
||||
.detail("IsTSS",
|
||||
storageServers[i].isTss() || storageServers[firstValidStorageServer].isTss()
|
||||
? "True"
|
||||
: "False");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent("ConsistencyCheck_ErrorFetchingMetrics")
|
||||
.error(e)
|
||||
.detail("Begin", printable(shard.begin))
|
||||
.detail("End", printable(shard.end));
|
||||
estimatedBytes.clear();
|
||||
}
|
||||
|
||||
return estimatedBytes;
|
||||
}
|
||||
|
||||
// Comparison function used to compare map elements by value
|
||||
template <class K, class T>
|
||||
static bool compareByValue(std::pair<K, T> a, std::pair<K, T> b) {
|
||||
return a.second < b.second;
|
||||
}
|
||||
|
||||
ACTOR Future<int64_t> getDatabaseSize(Database cx) {
|
||||
state Transaction tr(cx);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
loop {
|
||||
try {
|
||||
StorageMetrics metrics =
|
||||
wait(tr.getDatabase()->getStorageMetrics(KeyRangeRef(allKeys.begin, keyServersPrefix), 100000));
|
||||
return metrics.bytes;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Checks that the data in each shard is the same on each storage server that it resides on. Also performs some
|
||||
// sanity checks on the sizes of shards and storage servers. Returns false if there is a failure
|
||||
ACTOR Future<bool> checkDataConsistency(Database cx,
|
||||
VectorRef<KeyValueRef> keyLocations,
|
||||
DatabaseConfiguration configuration,
|
||||
std::map<UID, StorageServerInterface> tssMapping,
|
||||
ConsistencyCheckWorkload* self) {
|
||||
// Stores the total number of bytes on each storage server
|
||||
// In a distributed test, this will be an estimated size
|
||||
state std::map<UID, int64_t> storageServerSizes;
|
||||
|
||||
// Iterate through each shard, checking its values on all of its storage servers
|
||||
// If shardSampleFactor > 1, then not all shards are processed
|
||||
// Also, in a distributed data consistency check, each client processes a subset of the shards
|
||||
// Note: this may cause some shards to be processed more than once or not at all in a non-quiescent database
|
||||
state int effectiveClientCount = (self->distributed) ? self->clientCount : 1;
|
||||
state int i = self->clientId * (self->shardSampleFactor + 1);
|
||||
state int increment =
|
||||
(self->distributed && !self->firstClient) ? effectiveClientCount * self->shardSampleFactor : 1;
|
||||
state int rateLimitForThisRound =
|
||||
self->bytesReadInPreviousRound == 0
|
||||
? self->rateLimitMax
|
||||
: std::min(
|
||||
self->rateLimitMax,
|
||||
static_cast<int>(ceil(self->bytesReadInPreviousRound /
|
||||
(float)CLIENT_KNOBS->CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME)));
|
||||
ASSERT(rateLimitForThisRound >= 0 && rateLimitForThisRound <= self->rateLimitMax);
|
||||
TraceEvent("ConsistencyCheck_RateLimitForThisRound").detail("RateLimit", rateLimitForThisRound);
|
||||
state Reference<IRateControl> rateLimiter = Reference<IRateControl>(new SpeedLimit(rateLimitForThisRound, 1));
|
||||
state double rateLimiterStartTime = now();
|
||||
state int64_t bytesReadInthisRound = 0;
|
||||
|
||||
state double dbSize = 100e12;
|
||||
if (g_network->isSimulated()) {
|
||||
// This call will get all shard ranges in the database, which is too expensive on real clusters.
|
||||
int64_t _dbSize = wait(self->getDatabaseSize(cx));
|
||||
dbSize = _dbSize;
|
||||
}
|
||||
|
||||
state std::vector<KeyRangeRef> ranges;
|
||||
|
||||
for (int k = 0; k < keyLocations.size() - 1; k++) {
|
||||
KeyRangeRef range(keyLocations[k].key, keyLocations[k + 1].key);
|
||||
ranges.push_back(range);
|
||||
}
|
||||
|
||||
state std::vector<int> shardOrder;
|
||||
shardOrder.reserve(ranges.size());
|
||||
for (int k = 0; k < ranges.size(); k++)
|
||||
shardOrder.push_back(k);
|
||||
if (self->shuffleShards) {
|
||||
uint32_t seed = self->sharedRandomNumber + self->repetitions;
|
||||
DeterministicRandom sharedRandom(seed == 0 ? 1 : seed);
|
||||
sharedRandom.randomShuffle(shardOrder);
|
||||
}
|
||||
|
||||
for (; i < ranges.size(); i += increment) {
|
||||
state int shard = shardOrder[i];
|
||||
|
||||
state KeyRangeRef range = ranges[shard];
|
||||
state std::vector<UID> sourceStorageServers;
|
||||
state std::vector<UID> destStorageServers;
|
||||
state Transaction tr(cx);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
state int bytesReadInRange = 0;
|
||||
|
||||
RangeResult UIDtoTagMap = wait(tr.getRange(serverTagKeys, CLIENT_KNOBS->TOO_MANY));
|
||||
ASSERT(!UIDtoTagMap.more && UIDtoTagMap.size() < CLIENT_KNOBS->TOO_MANY);
|
||||
decodeKeyServersValue(
|
||||
UIDtoTagMap, keyLocations[shard].value, sourceStorageServers, destStorageServers, false);
|
||||
|
||||
// If the destStorageServers is non-empty, then this shard is being relocated
|
||||
state bool isRelocating = destStorageServers.size() > 0;
|
||||
|
||||
// This check was disabled because we now disable data distribution during the consistency check,
|
||||
// which can leave shards with dest storage servers.
|
||||
|
||||
// Disallow relocations in a quiescent database
|
||||
/*if(self->firstClient && self->performQuiescentChecks && isRelocating)
|
||||
{
|
||||
TraceEvent("ConsistencyCheck_QuiescentShardRelocation").detail("ShardBegin", printable(range.start)).detail("ShardEnd", printable(range.end));
|
||||
self->testFailure("Shard is being relocated in quiescent database");
|
||||
return false;
|
||||
}*/
|
||||
|
||||
// In a quiescent database, check that the team size is the same as the desired team size
|
||||
if (self->firstClient && self->performQuiescentChecks &&
|
||||
sourceStorageServers.size() != configuration.usableRegions * configuration.storageTeamSize) {
|
||||
TraceEvent("ConsistencyCheck_InvalidTeamSize")
|
||||
.detail("ShardBegin", printable(range.begin))
|
||||
.detail("ShardEnd", printable(range.end))
|
||||
.detail("SourceTeamSize", sourceStorageServers.size())
|
||||
.detail("DestServerSize", destStorageServers.size())
|
||||
.detail("ConfigStorageTeamSize", configuration.storageTeamSize)
|
||||
.detail("UsableRegions", configuration.usableRegions);
|
||||
// Record the server reponsible for the problematic shards
|
||||
int i = 0;
|
||||
for (auto& id : sourceStorageServers) {
|
||||
TraceEvent("IncorrectSizeTeamInfo").detail("ServerUID", id).detail("TeamIndex", i++);
|
||||
}
|
||||
self->testFailure("Invalid team size");
|
||||
return false;
|
||||
}
|
||||
|
||||
state std::vector<UID> storageServers = (isRelocating) ? destStorageServers : sourceStorageServers;
|
||||
state std::vector<StorageServerInterface> storageServerInterfaces;
|
||||
|
||||
//TraceEvent("ConsistencyCheck_GetStorageInfo").detail("StorageServers", storageServers.size());
|
||||
loop {
|
||||
try {
|
||||
std::vector<Future<Optional<Value>>> serverListEntries;
|
||||
serverListEntries.reserve(storageServers.size());
|
||||
for (int s = 0; s < storageServers.size(); s++)
|
||||
serverListEntries.push_back(tr.get(serverListKeyFor(storageServers[s])));
|
||||
state std::vector<Optional<Value>> serverListValues = wait(getAll(serverListEntries));
|
||||
for (int s = 0; s < serverListValues.size(); s++) {
|
||||
if (serverListValues[s].present())
|
||||
storageServerInterfaces.push_back(decodeServerListValue(serverListValues[s].get()));
|
||||
else if (self->performQuiescentChecks)
|
||||
self->testFailure("/FF/serverList changing in a quiescent database");
|
||||
}
|
||||
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
// add TSS to end of list, if configured and if not relocating
|
||||
if (!isRelocating && self->performTSSCheck) {
|
||||
int initialSize = storageServers.size();
|
||||
for (int i = 0; i < initialSize; i++) {
|
||||
auto tssPair = tssMapping.find(storageServers[i]);
|
||||
if (tssPair != tssMapping.end()) {
|
||||
CODE_PROBE(true, "TSS checked in consistency check");
|
||||
storageServers.push_back(tssPair->second.id());
|
||||
storageServerInterfaces.push_back(tssPair->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state std::vector<int64_t> estimatedBytes =
|
||||
wait(self->getStorageSizeEstimate(storageServerInterfaces, range));
|
||||
|
||||
// Gets permitted size range of shard
|
||||
int64_t maxShardSize = getMaxShardSize(dbSize);
|
||||
state ShardSizeBounds shardBounds = getShardSizeBounds(range, maxShardSize);
|
||||
|
||||
if (self->firstClient) {
|
||||
// If there was an error retrieving shard estimated size
|
||||
if (self->performQuiescentChecks && estimatedBytes.size() == 0)
|
||||
self->testFailure("Error fetching storage metrics");
|
||||
|
||||
// If running a distributed test, storage server size is an accumulation of shard estimates
|
||||
else if (self->distributed && self->firstClient)
|
||||
for (int j = 0; j < storageServers.size(); j++)
|
||||
storageServerSizes[storageServers[j]] += std::max(estimatedBytes[j], (int64_t)0);
|
||||
}
|
||||
|
||||
// The first client may need to skip the rest of the loop contents if it is just processing this shard to
|
||||
// get a size estimate
|
||||
if (!self->firstClient || shard % (effectiveClientCount * self->shardSampleFactor) == 0) {
|
||||
state int shardKeys = 0;
|
||||
state int shardBytes = 0;
|
||||
state int sampledBytes = 0;
|
||||
state int splitBytes = 0;
|
||||
state int firstKeySampledBytes = 0;
|
||||
state int sampledKeys = 0;
|
||||
state int sampledKeysWithProb = 0;
|
||||
state double shardVariance = 0;
|
||||
state bool canSplit = false;
|
||||
state Key lastSampleKey;
|
||||
state Key lastStartSampleKey;
|
||||
state int64_t totalReadAmount = 0;
|
||||
|
||||
state KeySelector begin = firstGreaterOrEqual(range.begin);
|
||||
state Transaction onErrorTr(
|
||||
cx); // This transaction exists only to access onError and its backoff behavior
|
||||
|
||||
// Read a limited number of entries at a time, repeating until all keys in the shard have been read
|
||||
loop {
|
||||
try {
|
||||
lastSampleKey = lastStartSampleKey;
|
||||
|
||||
// Get the min version of the storage servers
|
||||
Version version = wait(self->getVersion(cx, self));
|
||||
|
||||
state GetKeyValuesRequest req;
|
||||
req.begin = begin;
|
||||
req.end = firstGreaterOrEqual(range.end);
|
||||
req.limit = 1e4;
|
||||
req.limitBytes = CLIENT_KNOBS->REPLY_BYTE_LIMIT;
|
||||
req.version = version;
|
||||
req.tags = TagSet();
|
||||
|
||||
// Try getting the entries in the specified range
|
||||
state std::vector<Future<ErrorOr<GetKeyValuesReply>>> keyValueFutures;
|
||||
state int j = 0;
|
||||
for (j = 0; j < storageServerInterfaces.size(); j++) {
|
||||
resetReply(req);
|
||||
keyValueFutures.push_back(
|
||||
storageServerInterfaces[j].getKeyValues.getReplyUnlessFailedFor(req, 2, 0));
|
||||
}
|
||||
|
||||
wait(waitForAll(keyValueFutures));
|
||||
|
||||
// Read the resulting entries
|
||||
state int firstValidServer = -1;
|
||||
totalReadAmount = 0;
|
||||
for (j = 0; j < keyValueFutures.size(); j++) {
|
||||
ErrorOr<GetKeyValuesReply> rangeResult = keyValueFutures[j].get();
|
||||
|
||||
// Compare the results with other storage servers
|
||||
if (rangeResult.present() && !rangeResult.get().error.present()) {
|
||||
state GetKeyValuesReply current = rangeResult.get();
|
||||
totalReadAmount += current.data.expectedSize();
|
||||
// If we haven't encountered a valid storage server yet, then mark this as the baseline
|
||||
// to compare against
|
||||
if (firstValidServer == -1)
|
||||
firstValidServer = j;
|
||||
|
||||
// Compare this shard against the first
|
||||
else {
|
||||
GetKeyValuesReply reference = keyValueFutures[firstValidServer].get().get();
|
||||
|
||||
if (current.data != reference.data || current.more != reference.more) {
|
||||
// Be especially verbose if in simulation
|
||||
if (g_network->isSimulated()) {
|
||||
int invalidIndex = -1;
|
||||
printf("\n%sSERVER %d (%s); shard = %s - %s:\n",
|
||||
storageServerInterfaces[j].isTss() ? "TSS " : "",
|
||||
j,
|
||||
storageServerInterfaces[j].address().toString().c_str(),
|
||||
printable(req.begin.getKey()).c_str(),
|
||||
printable(req.end.getKey()).c_str());
|
||||
for (int k = 0; k < current.data.size(); k++) {
|
||||
printf("%d. %s => %s\n",
|
||||
k,
|
||||
printable(current.data[k].key).c_str(),
|
||||
printable(current.data[k].value).c_str());
|
||||
if (invalidIndex < 0 &&
|
||||
(k >= reference.data.size() ||
|
||||
current.data[k].key != reference.data[k].key ||
|
||||
current.data[k].value != reference.data[k].value))
|
||||
invalidIndex = k;
|
||||
}
|
||||
|
||||
printf(
|
||||
"\n%sSERVER %d (%s); shard = %s - %s:\n",
|
||||
storageServerInterfaces[firstValidServer].isTss() ? "TSS " : "",
|
||||
firstValidServer,
|
||||
storageServerInterfaces[firstValidServer].address().toString().c_str(),
|
||||
printable(req.begin.getKey()).c_str(),
|
||||
printable(req.end.getKey()).c_str());
|
||||
for (int k = 0; k < reference.data.size(); k++) {
|
||||
printf("%d. %s => %s\n",
|
||||
k,
|
||||
printable(reference.data[k].key).c_str(),
|
||||
printable(reference.data[k].value).c_str());
|
||||
if (invalidIndex < 0 &&
|
||||
(k >= current.data.size() ||
|
||||
reference.data[k].key != current.data[k].key ||
|
||||
reference.data[k].value != current.data[k].value))
|
||||
invalidIndex = k;
|
||||
}
|
||||
|
||||
printf("\nMISMATCH AT %d\n\n", invalidIndex);
|
||||
}
|
||||
|
||||
// Data for trace event
|
||||
// The number of keys unique to the current shard
|
||||
int currentUniques = 0;
|
||||
// The number of keys unique to the reference shard
|
||||
int referenceUniques = 0;
|
||||
// The number of keys in both shards with conflicting values
|
||||
int valueMismatches = 0;
|
||||
// The number of keys in both shards with matching values
|
||||
int matchingKVPairs = 0;
|
||||
// Last unique key on the current shard
|
||||
KeyRef currentUniqueKey;
|
||||
// Last unique key on the reference shard
|
||||
KeyRef referenceUniqueKey;
|
||||
// Last value mismatch
|
||||
KeyRef valueMismatchKey;
|
||||
|
||||
// Loop indeces
|
||||
int currentI = 0;
|
||||
int referenceI = 0;
|
||||
while (currentI < current.data.size() || referenceI < reference.data.size()) {
|
||||
if (currentI >= current.data.size()) {
|
||||
referenceUniqueKey = reference.data[referenceI].key;
|
||||
referenceUniques++;
|
||||
referenceI++;
|
||||
} else if (referenceI >= reference.data.size()) {
|
||||
currentUniqueKey = current.data[currentI].key;
|
||||
currentUniques++;
|
||||
currentI++;
|
||||
} else {
|
||||
KeyValueRef currentKV = current.data[currentI];
|
||||
KeyValueRef referenceKV = reference.data[referenceI];
|
||||
|
||||
if (currentKV.key == referenceKV.key) {
|
||||
if (currentKV.value == referenceKV.value)
|
||||
matchingKVPairs++;
|
||||
else {
|
||||
valueMismatchKey = currentKV.key;
|
||||
valueMismatches++;
|
||||
}
|
||||
|
||||
currentI++;
|
||||
referenceI++;
|
||||
} else if (currentKV.key < referenceKV.key) {
|
||||
currentUniqueKey = currentKV.key;
|
||||
currentUniques++;
|
||||
currentI++;
|
||||
} else {
|
||||
referenceUniqueKey = referenceKV.key;
|
||||
referenceUniques++;
|
||||
referenceI++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("ConsistencyCheck_DataInconsistent")
|
||||
.detail(format("StorageServer%d", j).c_str(), storageServers[j].toString())
|
||||
.detail(format("StorageServer%d", firstValidServer).c_str(),
|
||||
storageServers[firstValidServer].toString())
|
||||
.detail("ShardBegin", req.begin.getKey())
|
||||
.detail("ShardEnd", req.end.getKey())
|
||||
.detail("VersionNumber", req.version)
|
||||
.detail(format("Server%dUniques", j).c_str(), currentUniques)
|
||||
.detail(format("Server%dUniqueKey", j).c_str(), currentUniqueKey)
|
||||
.detail(format("Server%dUniques", firstValidServer).c_str(),
|
||||
referenceUniques)
|
||||
.detail(format("Server%dUniqueKey", firstValidServer).c_str(),
|
||||
referenceUniqueKey)
|
||||
.detail("ValueMismatches", valueMismatches)
|
||||
.detail("ValueMismatchKey", valueMismatchKey)
|
||||
.detail("MatchingKVPairs", matchingKVPairs)
|
||||
.detail("IsTSS",
|
||||
storageServerInterfaces[j].isTss() ||
|
||||
storageServerInterfaces[firstValidServer].isTss()
|
||||
? "True"
|
||||
: "False");
|
||||
|
||||
if ((g_network->isSimulated() &&
|
||||
g_simulator->tssMode != ISimulator::TSSMode::EnabledDropMutations) ||
|
||||
(!storageServerInterfaces[j].isTss() &&
|
||||
!storageServerInterfaces[firstValidServer].isTss())) {
|
||||
self->testFailure("Data inconsistent", true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the data is not available and we aren't relocating this shard
|
||||
else if (!isRelocating) {
|
||||
Error e =
|
||||
rangeResult.isError() ? rangeResult.getError() : rangeResult.get().error.get();
|
||||
|
||||
TraceEvent("ConsistencyCheck_StorageServerUnavailable")
|
||||
.errorUnsuppressed(e)
|
||||
.suppressFor(1.0)
|
||||
.detail("StorageServer", storageServers[j])
|
||||
.detail("ShardBegin", printable(range.begin))
|
||||
.detail("ShardEnd", printable(range.end))
|
||||
.detail("Address", storageServerInterfaces[j].address())
|
||||
.detail("UID", storageServerInterfaces[j].id())
|
||||
.detail("GetKeyValuesToken",
|
||||
storageServerInterfaces[j].getKeyValues.getEndpoint().token)
|
||||
.detail("IsTSS", storageServerInterfaces[j].isTss() ? "True" : "False");
|
||||
|
||||
// All shards should be available in quiscence
|
||||
if (self->performQuiescentChecks && !storageServerInterfaces[j].isTss()) {
|
||||
self->testFailure("Storage server unavailable");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (firstValidServer >= 0) {
|
||||
VectorRef<KeyValueRef> data = keyValueFutures[firstValidServer].get().get().data;
|
||||
// Calculate the size of the shard, the variance of the shard size estimate, and the correct
|
||||
// shard size estimate
|
||||
for (int k = 0; k < data.size(); k++) {
|
||||
ByteSampleInfo sampleInfo = isKeyValueInSample(data[k]);
|
||||
shardBytes += sampleInfo.size;
|
||||
double itemProbability = ((double)sampleInfo.size) / sampleInfo.sampledSize;
|
||||
if (itemProbability < 1)
|
||||
shardVariance += itemProbability * (1 - itemProbability) *
|
||||
pow((double)sampleInfo.sampledSize, 2);
|
||||
|
||||
if (sampleInfo.inSample) {
|
||||
sampledBytes += sampleInfo.sampledSize;
|
||||
if (!canSplit && sampledBytes >= shardBounds.min.bytes &&
|
||||
data[k].key.size() <= CLIENT_KNOBS->SPLIT_KEY_SIZE_LIMIT &&
|
||||
sampledBytes <= shardBounds.max.bytes *
|
||||
CLIENT_KNOBS->STORAGE_METRICS_UNFAIR_SPLIT_LIMIT / 2) {
|
||||
canSplit = true;
|
||||
splitBytes = sampledBytes;
|
||||
}
|
||||
|
||||
/*TraceEvent("ConsistencyCheck_ByteSample").detail("ShardBegin", printable(range.begin)).detail("ShardEnd", printable(range.end))
|
||||
.detail("SampledBytes", sampleInfo.sampledSize).detail("Key",
|
||||
printable(data[k].key)).detail("KeySize", data[k].key.size()).detail("ValueSize",
|
||||
data[k].value.size());*/
|
||||
|
||||
// In data distribution, the splitting process ignores the first key in a shard.
|
||||
// Thus, we shouldn't consider it when validating the upper bound of estimated shard
|
||||
// sizes
|
||||
if (k == 0)
|
||||
firstKeySampledBytes += sampleInfo.sampledSize;
|
||||
|
||||
sampledKeys++;
|
||||
if (itemProbability < 1) {
|
||||
sampledKeysWithProb++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate number of keys in this shard
|
||||
shardKeys += data.size();
|
||||
}
|
||||
// after requesting each shard, enforce rate limit based on how much data will likely be read
|
||||
if (rateLimitForThisRound > 0) {
|
||||
wait(rateLimiter->getAllowance(totalReadAmount));
|
||||
// Set ratelimit to max allowed if current round has been going on for a while
|
||||
if (now() - rateLimiterStartTime >
|
||||
1.1 * CLIENT_KNOBS->CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME &&
|
||||
rateLimitForThisRound != self->rateLimitMax) {
|
||||
rateLimitForThisRound = self->rateLimitMax;
|
||||
rateLimiter = Reference<IRateControl>(new SpeedLimit(rateLimitForThisRound, 1));
|
||||
rateLimiterStartTime = now();
|
||||
TraceEvent(SevInfo, "ConsistencyCheck_RateLimitSetMaxForThisRound")
|
||||
.detail("RateLimit", rateLimitForThisRound);
|
||||
}
|
||||
}
|
||||
bytesReadInRange += totalReadAmount;
|
||||
bytesReadInthisRound += totalReadAmount;
|
||||
|
||||
// Advance to the next set of entries
|
||||
if (firstValidServer >= 0 && keyValueFutures[firstValidServer].get().get().more) {
|
||||
VectorRef<KeyValueRef> result = keyValueFutures[firstValidServer].get().get().data;
|
||||
ASSERT(result.size() > 0);
|
||||
begin = firstGreaterThan(result[result.size() - 1].key);
|
||||
ASSERT(begin.getKey() != allKeys.end);
|
||||
lastStartSampleKey = lastSampleKey;
|
||||
} else
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
state Error err = e;
|
||||
wait(onErrorTr.onError(err));
|
||||
TraceEvent("ConsistencyCheck_RetryDataConsistency").error(err);
|
||||
}
|
||||
}
|
||||
|
||||
canSplit = canSplit && sampledBytes - splitBytes >= shardBounds.min.bytes && sampledBytes > splitBytes;
|
||||
|
||||
// Update the size of all storage servers containing this shard
|
||||
// This is only done in a non-distributed consistency check; the distributed check uses shard size
|
||||
// estimates
|
||||
if (!self->distributed)
|
||||
for (int j = 0; j < storageServers.size(); j++)
|
||||
storageServerSizes[storageServers[j]] += shardBytes;
|
||||
|
||||
// FIXME: Where is this intended to be used?
|
||||
[[maybe_unused]] bool hasValidEstimate = estimatedBytes.size() > 0;
|
||||
|
||||
// If the storage servers' sampled estimate of shard size is different from ours
|
||||
if (self->performQuiescentChecks) {
|
||||
for (int j = 0; j < estimatedBytes.size(); j++) {
|
||||
if (estimatedBytes[j] >= 0 && estimatedBytes[j] != sampledBytes) {
|
||||
TraceEvent("ConsistencyCheck_IncorrectEstimate")
|
||||
.detail("EstimatedBytes", estimatedBytes[j])
|
||||
.detail("CorrectSampledBytes", sampledBytes)
|
||||
.detail("StorageServer", storageServers[j])
|
||||
.detail("IsTSS", storageServerInterfaces[j].isTss() ? "True" : "False");
|
||||
|
||||
if (!storageServerInterfaces[j].isTss()) {
|
||||
self->testFailure("Storage servers had incorrect sampled estimate");
|
||||
}
|
||||
|
||||
hasValidEstimate = false;
|
||||
|
||||
break;
|
||||
} else if (estimatedBytes[j] < 0 &&
|
||||
((g_network->isSimulated() &&
|
||||
g_simulator->tssMode <= ISimulator::TSSMode::EnabledNormal) ||
|
||||
!storageServerInterfaces[j].isTss())) {
|
||||
// Ignore a non-responding TSS outside of simulation, or if tss fault injection is enabled
|
||||
hasValidEstimate = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the difference between the shard size estimate and its actual size. If it is sufficiently
|
||||
// large, then fail
|
||||
double stdDev = sqrt(shardVariance);
|
||||
|
||||
double failErrorNumStdDev = 7;
|
||||
int estimateError = abs(shardBytes - sampledBytes);
|
||||
|
||||
// Only perform the check if there are sufficient keys to get a distribution that should resemble a
|
||||
// normal distribution
|
||||
if (sampledKeysWithProb > 30 && estimateError > failErrorNumStdDev * stdDev) {
|
||||
double numStdDev = estimateError / sqrt(shardVariance);
|
||||
TraceEvent("ConsistencyCheck_InaccurateShardEstimate")
|
||||
.detail("Min", shardBounds.min.bytes)
|
||||
.detail("Max", shardBounds.max.bytes)
|
||||
.detail("Estimate", sampledBytes)
|
||||
.detail("Actual", shardBytes)
|
||||
.detail("NumStdDev", numStdDev)
|
||||
.detail("Variance", shardVariance)
|
||||
.detail("StdDev", stdDev)
|
||||
.detail("ShardBegin", printable(range.begin))
|
||||
.detail("ShardEnd", printable(range.end))
|
||||
.detail("NumKeys", shardKeys)
|
||||
.detail("NumSampledKeys", sampledKeys)
|
||||
.detail("NumSampledKeysWithProb", sampledKeysWithProb);
|
||||
|
||||
self->testFailure(format("Shard size is more than %f std dev from estimate", failErrorNumStdDev));
|
||||
}
|
||||
|
||||
// In a quiescent database, check that the (estimated) size of the shard is within permitted bounds
|
||||
// Min and max shard sizes have a 3 * shardBounds.permittedError.bytes cushion for error since shard
|
||||
// sizes are not precise Shard splits ignore the first key in a shard, so its size shouldn't be
|
||||
// considered when checking the upper bound 0xff shards are not checked
|
||||
if (canSplit && sampledKeys > 5 && self->performQuiescentChecks &&
|
||||
!range.begin.startsWith(keyServersPrefix) &&
|
||||
(sampledBytes < shardBounds.min.bytes - 3 * shardBounds.permittedError.bytes ||
|
||||
sampledBytes - firstKeySampledBytes >
|
||||
shardBounds.max.bytes + 3 * shardBounds.permittedError.bytes)) {
|
||||
TraceEvent("ConsistencyCheck_InvalidShardSize")
|
||||
.detail("Min", shardBounds.min.bytes)
|
||||
.detail("Max", shardBounds.max.bytes)
|
||||
.detail("Size", shardBytes)
|
||||
.detail("EstimatedSize", sampledBytes)
|
||||
.detail("ShardBegin", printable(range.begin))
|
||||
.detail("ShardEnd", printable(range.end))
|
||||
.detail("ShardCount", ranges.size())
|
||||
.detail("SampledKeys", sampledKeys);
|
||||
self->testFailure(format("Shard size in quiescent database is too %s",
|
||||
(sampledBytes < shardBounds.min.bytes) ? "small" : "large"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytesReadInRange > 0) {
|
||||
TraceEvent("ConsistencyCheck_ReadRange")
|
||||
.suppressFor(1.0)
|
||||
.detail("Range", range)
|
||||
.detail("BytesRead", bytesReadInRange);
|
||||
}
|
||||
}
|
||||
|
||||
// SOMEDAY: when background data distribution is implemented, include this test
|
||||
// In a quiescent database, check that the sizes of storage servers are roughly the same
|
||||
/*if(self->performQuiescentChecks)
|
||||
{
|
||||
auto minStorageServer = std::min_element(storageServerSizes.begin(), storageServerSizes.end(),
|
||||
ConsistencyCheckWorkload::compareByValue<UID, int64_t>); auto maxStorageServer =
|
||||
std::max_element(storageServerSizes.begin(), storageServerSizes.end(),
|
||||
ConsistencyCheckWorkload::compareByValue<UID, int64_t>);
|
||||
|
||||
int bias = SERVER_KNOBS->MIN_SHARD_BYTES;
|
||||
if(1.1 * (minStorageServer->second + SERVER_KNOBS->MIN_SHARD_BYTES) < maxStorageServer->second +
|
||||
SERVER_KNOBS->MIN_SHARD_BYTES)
|
||||
{
|
||||
TraceEvent("ConsistencyCheck_InconsistentStorageServerSizes").detail("MinSize", minStorageServer->second).detail("MaxSize", maxStorageServer->second)
|
||||
.detail("MinStorageServer", minStorageServer->first).detail("MaxStorageServer",
|
||||
maxStorageServer->first);
|
||||
|
||||
self->testFailure(format("Storage servers differ significantly in size by a factor of %f",
|
||||
((double)maxStorageServer->second) / minStorageServer->second)); return false;
|
||||
}
|
||||
}*/
|
||||
|
||||
self->bytesReadInPreviousRound = bytesReadInthisRound;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if any storage servers have the exact same network address or are not using the correct key value
|
||||
// store type
|
||||
ACTOR Future<bool> checkForUndesirableServers(Database cx,
|
||||
|
Loading…
x
Reference in New Issue
Block a user