Merge pull request #1963 from etschannen/master

Code cleanup and bug fixes
This commit is contained in:
Evan Tschannen 2019-08-06 18:43:09 -07:00 committed by GitHub
commit 46dc9db879
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 30 additions and 35 deletions

View File

@ -3348,7 +3348,7 @@ void enableClientInfoLogging() {
}
ACTOR Future<Void> snapshotDatabase(Reference<DatabaseContext> cx, StringRef snapPayload, UID snapUID, Optional<UID> debugID) {
TraceEvent("NativeAPI.SnapshotDatabaseEnter")
TraceEvent("SnapshotDatabaseEnter")
.detail("SnapPayload", snapPayload)
.detail("SnapUID", snapUID);
try {
@ -3365,10 +3365,10 @@ ACTOR Future<Void> snapshotDatabase(Reference<DatabaseContext> cx, StringRef sna
}
}
} catch (Error& e) {
TraceEvent("NativeAPI.SnapshotDatabaseError")
TraceEvent("SnapshotDatabaseError")
.error(e)
.detail("SnapPayload", snapPayload)
.detail("SnapUID", snapUID)
.error(e, true /* includeCancelled */);
.detail("SnapUID", snapUID);
throw;
}
return Void();

View File

@ -36,14 +36,6 @@ const KeyRef keyServersEnd = keyServersKeys.end;
const KeyRangeRef keyServersKeyServersKeys ( LiteralStringRef("\xff/keyServers/\xff/keyServers/"), LiteralStringRef("\xff/keyServers/\xff/keyServers0"));
const KeyRef keyServersKeyServersKey = keyServersKeyServersKeys.begin;
// list of reserved exec commands
const StringRef execSnap = LiteralStringRef("snap"); // snapshot persistent state of
// storage, TLog and coordinated state
const StringRef execDisableTLogPop = LiteralStringRef("\xff/TLogDisablePop"); // disable pop on TLog
const StringRef execEnableTLogPop = LiteralStringRef("\xff/TLogEnablePop"); // enable pop on TLog
// used to communicate snap failures between TLog and SnapTest Workload, used only in simulator
const StringRef snapTestFailStatus = LiteralStringRef("\xff/SnapTestFailStatus/");
const Key keyServersKey( const KeyRef& k ) {
return k.withPrefix( keyServersPrefix );
}

View File

@ -287,10 +287,6 @@ extern const KeyRef rebalanceDDIgnoreKey;
const Value healthyZoneValue( StringRef const& zoneId, Version version );
std::pair<Key,Version> decodeHealthyZoneValue( ValueRef const& );
extern const StringRef execSnap;
extern const StringRef execDisableTLogPop;
extern const StringRef execEnableTLogPop;
extern const StringRef snapTestFailStatus;
// All mutations done to this range are blindly copied into txnStateStore.
// Used to create artifically large txnStateStore instances in testing.

View File

@ -4138,7 +4138,7 @@ static std::set<int> const& normalDataDistributorErrors() {
ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<AsyncVar<struct ServerDBInfo>> db ) {
state Database cx = openDBOnServer(db, TaskPriority::DefaultDelay, true, true);
TraceEvent("SnapDataDistributor.SnapReqEnter")
TraceEvent("SnapDataDistributor_SnapReqEnter")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
try {
@ -4152,12 +4152,12 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
}
wait(waitForAll(disablePops));
TraceEvent("SnapDataDistributor.AfterDisableTLogPop")
TraceEvent("SnapDataDistributor_AfterDisableTLogPop")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
// snap local storage nodes
std::vector<WorkerInterface> storageWorkers = wait(getStorageWorkers(cx, db, true /* localOnly */));
TraceEvent("SnapDataDistributor.GotStorageWorkers")
TraceEvent("SnapDataDistributor_GotStorageWorkers")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
std::vector<Future<Void>> storageSnapReqs;
@ -4168,7 +4168,7 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
}
wait(waitForAll(storageSnapReqs));
TraceEvent("SnapDataDistributor.AfterSnapStorage")
TraceEvent("SnapDataDistributor_AfterSnapStorage")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
// snap local tlog nodes
@ -4180,7 +4180,7 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
}
wait(waitForAll(tLogSnapReqs));
TraceEvent("SnapDataDistributor.AfterTLogStorage")
TraceEvent("SnapDataDistributor_AfterTLogStorage")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
// enable tlog pop on local tlog nodes
@ -4192,12 +4192,12 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
}
wait(waitForAll(enablePops));
TraceEvent("SnapDataDistributor.AfterEnableTLogPops")
TraceEvent("SnapDataDistributor_AfterEnableTLogPops")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
// snap the coordinators
std::vector<WorkerInterface> coordWorkers = wait(getCoordWorkers(cx, db));
TraceEvent("SnapDataDistributor.GotCoordWorkers")
TraceEvent("SnapDataDistributor_GotCoordWorkers")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
std::vector<Future<Void>> coordSnapReqs;
@ -4207,11 +4207,11 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
);
}
wait(waitForAll(coordSnapReqs));
TraceEvent("SnapDataDistributor.AfterSnapCoords")
TraceEvent("SnapDataDistributor_AfterSnapCoords")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
} catch (Error& e) {
TraceEvent("SnapDataDistributor.SnapReqExit")
TraceEvent("SnapDataDistributor_SnapReqExit")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID)
.error(e, true /*includeCancelled */);

View File

@ -81,6 +81,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( TLOG_DEGRADED_DELAY_COUNT, 5 );
init( TLOG_DEGRADED_DURATION, 5.0 );
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
init( TXS_POPPED_MAX_DELAY, 1.0 ); if ( randomize && BUGGIFY ) TXS_POPPED_MAX_DELAY = deterministicRandom()->random01();
// disk snapshot max timeout, to be put in TLog, storage and coordinator nodes
init( SNAP_CREATE_MAX_TIMEOUT, 300.0 );

View File

@ -83,6 +83,7 @@ public:
int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead.
int TLOG_DEGRADED_DELAY_COUNT;
double TLOG_DEGRADED_DURATION;
double TXS_POPPED_MAX_DELAY;
// Data distribution queue
double HEALTH_POLL_TIME;

View File

@ -1042,7 +1042,7 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriori
cursor->advanceTo(self->messageVersion);
}
self->messageIndex = self->messages.size();
if (self->messages.size() > 0 && self->messages[self->messages.size()-1].version < self->messageVersion) {
if (self->messages.size() > 0 && self->messages[self->messages.size()-1].version.version < self->poppedVersion) {
self->hasNextMessage = false;
} else {
auto iter = std::lower_bound(self->messages.begin(), self->messages.end(),

View File

@ -1455,7 +1455,7 @@ ACTOR Future<Void> lastCommitUpdater(ProxyCommitData* self, PromiseStream<Future
}
ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* commitData) {
TraceEvent("SnapMasterProxy.SnapReqEnter")
TraceEvent("SnapMasterProxy_SnapReqEnter")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
try {
@ -1463,7 +1463,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
ExecCmdValueString execArg(snapReq.snapPayload);
StringRef binPath = execArg.getBinaryPath();
if (!isWhitelisted(commitData->whitelistedBinPathVec, binPath)) {
TraceEvent("SnapMasterProxy.WhiteListCheckFailed")
TraceEvent("SnapMasterProxy_WhiteListCheckFailed")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
throw transaction_not_permitted();
@ -1475,7 +1475,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
// Currently, snapshot of old tlog generation is not
// supported and hence failing the snapshot request until
// cluster is fully_recovered.
TraceEvent("SnapMasterProxy.ClusterNotFullyRecovered")
TraceEvent("SnapMasterProxy_ClusterNotFullyRecovered")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
throw cluster_not_fully_recovered();
@ -1490,7 +1490,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
// FIXME: logAntiQuorum not supported, remove it later,
// In version2, we probably don't need this limtiation, but this needs to be tested.
if (logAntiQuorum > 0) {
TraceEvent("SnapMasterProxy.LogAnitQuorumNotSupported")
TraceEvent("SnapMasterProxy_LogAnitQuorumNotSupported")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
throw txn_exec_log_anti_quorum();
@ -1506,7 +1506,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
try {
wait(throwErrorOr(ddSnapReq));
} catch (Error& e) {
TraceEvent("SnapMasterProxy.DDSnapResponseError")
TraceEvent("SnapMasterProxy_DDSnapResponseError")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID)
.error(e, true /*includeCancelled*/ );
@ -1514,7 +1514,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
}
snapReq.reply.send(Void());
} catch (Error& e) {
TraceEvent("SnapMasterProxy.SnapReqError")
TraceEvent("SnapMasterProxy_SnapReqError")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID)
.error(e, true /*includeCancelled*/);
@ -1524,7 +1524,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
throw e;
}
}
TraceEvent("SnapMasterProxy.SnapReqExit")
TraceEvent("SnapMasterProxy_SnapReqExit")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
return Void();

View File

@ -1093,7 +1093,12 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
wait( waitForAll(poppedReady) );
state Future<Void> maxGetPoppedDuration = delay(SERVER_KNOBS->TXS_POPPED_MAX_DELAY);
wait( waitForAll(poppedReady) || maxGetPoppedDuration );
if(maxGetPoppedDuration.isReady()) {
TraceEvent(SevWarnAlways, "PoppedTxsNotReady", self->dbgid);
}
Version maxPopped = 1;
for(auto &it : poppedFutures) {

View File

@ -122,7 +122,7 @@ public: // workload functions
// read the key SnapFailedTLog.$UID
loop {
try {
Standalone<StringRef> keyStr = snapTestFailStatus.withSuffix(StringRef(self->snapUID.toString()));
Standalone<StringRef> keyStr = LiteralStringRef("\xff/SnapTestFailStatus/").withSuffix(StringRef(self->snapUID.toString()));
TraceEvent("TestKeyStr").detail("Value", keyStr);
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
Optional<Value> val = wait(tr.get(keyStr));