Merge pull request #3273 from apple/release-6.3

Merge release-6.3 into master
This commit is contained in:
A.J. Beamon 2020-05-29 16:58:36 -07:00 committed by GitHub
commit 414206015d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 50 additions and 13 deletions

View File

@ -11,6 +11,7 @@ Fixes
* HTTPS requests made by backup could hang indefinitely. `(PR #3027) <https://github.com/apple/foundationdb/pull/3027>`_ * HTTPS requests made by backup could hang indefinitely. `(PR #3027) <https://github.com/apple/foundationdb/pull/3027>`_
* ``fdbrestore`` prefix options required exactly a single hyphen instead of the standard two. `(PR #3056) <https://github.com/apple/foundationdb/pull/3056>`_ * ``fdbrestore`` prefix options required exactly a single hyphen instead of the standard two. `(PR #3056) <https://github.com/apple/foundationdb/pull/3056>`_
* Commits could stall on a newly elected proxy because of inaccurate compute estimates. `(PR #3123) <https://github.com/apple/foundationdb/pull/3123>`_ * Commits could stall on a newly elected proxy because of inaccurate compute estimates. `(PR #3123) <https://github.com/apple/foundationdb/pull/3123>`_
* A transaction class process with a bad disk could be repeatedly recruited as a transaction log. `(PR #3268) <https://github.com/apple/foundationdb/pull/3268>`_
Features Features
-------- --------

View File

@ -167,7 +167,11 @@ class DLDatabase : public IDatabase, ThreadSafeReferenceCounted<DLDatabase> {
public: public:
DLDatabase(Reference<FdbCApi> api, FdbCApi::FDBDatabase *db) : api(api), db(db), ready(Void()) {} DLDatabase(Reference<FdbCApi> api, FdbCApi::FDBDatabase *db) : api(api), db(db), ready(Void()) {}
DLDatabase(Reference<FdbCApi> api, ThreadFuture<FdbCApi::FDBDatabase*> dbFuture); DLDatabase(Reference<FdbCApi> api, ThreadFuture<FdbCApi::FDBDatabase*> dbFuture);
~DLDatabase() { api->databaseDestroy(db); } ~DLDatabase() {
if (db) {
api->databaseDestroy(db);
}
}
ThreadFuture<Void> onReady(); ThreadFuture<Void> onReady();

View File

@ -99,7 +99,6 @@ int mostUsedZoneCount(Reference<LocalitySet>& logServerSet, std::vector<Locality
bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet, bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet,
int desired) { int desired) {
auto& mutableEntries = logServerSet->getMutableEntries(); auto& mutableEntries = logServerSet->getMutableEntries();
deterministicRandom()->randomShuffle(mutableEntries);
// First make sure the current localitySet is able to fulfuill the policy // First make sure the current localitySet is able to fulfuill the policy
AttribKey indexKey = logServerSet->keyIndex("zoneid"); AttribKey indexKey = logServerSet->keyIndex("zoneid");
int uniqueValueCount = logServerSet->getKeyValueArray()[indexKey._id].size(); int uniqueValueCount = logServerSet->getKeyValueArray()[indexKey._id].size();
@ -118,18 +117,24 @@ bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>&
} }
ASSERT_WE_THINK(uniqueValueCount == entries.size()); ASSERT_WE_THINK(uniqueValueCount == entries.size());
std::vector<std::vector<int>> randomizedEntries;
randomizedEntries.resize(entries.size());
for(auto it : entries) {
randomizedEntries.push_back(it.second);
}
deterministicRandom()->randomShuffle(randomizedEntries);
desired = std::max(desired, targetUniqueValueCount); desired = std::max(desired, targetUniqueValueCount);
auto it = entries.begin(); auto it = randomizedEntries.begin();
while (bestSet.size() < desired) { while (bestSet.size() < desired) {
if(it->second.size()) { if(it->size()) {
bestSet.push_back(mutableEntries[it->second.back()]); bestSet.push_back(mutableEntries[it->back()]);
it->second.pop_back(); it->pop_back();
} }
++it; ++it;
if(it == entries.end()) { if(it == randomizedEntries.end()) {
it = entries.begin(); it = randomizedEntries.begin();
} }
} }

View File

@ -85,6 +85,10 @@ struct LogRouterData {
bool allowPops; bool allowPops;
LogSet logSet; LogSet logSet;
bool foundEpochEnd; bool foundEpochEnd;
double waitForVersionTime = 0;
double maxWaitForVersionTime = 0;
double getMoreTime = 0;
double maxGetMoreTime = 0;
struct PeekTrackerData { struct PeekTrackerData {
std::map<int, Promise<std::pair<Version, bool>>> sequence_version; std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
@ -94,6 +98,7 @@ struct LogRouterData {
std::map<UID, PeekTrackerData> peekTracker; std::map<UID, PeekTrackerData> peekTracker;
CounterCollection cc; CounterCollection cc;
Counter getMoreCount, getMoreBlockedCount;
Future<Void> logger; Future<Void> logger;
Reference<EventCacheHolder> eventCacheHolder; Reference<EventCacheHolder> eventCacheHolder;
@ -116,7 +121,7 @@ struct LogRouterData {
LogRouterData(UID dbgid, const InitializeLogRouterRequest& req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()), LogRouterData(UID dbgid, const InitializeLogRouterRequest& req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()),
version(req.startVersion-1), minPopped(0), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false), version(req.startVersion-1), minPopped(0), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false),
cc("LogRouter", dbgid.toString()) { cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc), getMoreBlockedCount("GetMoreBlockedCount", cc) {
//setup just enough of a logSet to be able to call getPushLocations //setup just enough of a logSet to be able to call getPushLocations
logSet.logServers.resize(req.tLogLocalities.size()); logSet.logServers.resize(req.tLogLocalities.size());
logSet.tLogPolicy = req.tLogPolicy; logSet.tLogPolicy = req.tLogPolicy;
@ -133,11 +138,16 @@ struct LogRouterData {
eventCacheHolder = Reference<EventCacheHolder>( new EventCacheHolder(dbgid.shortString() + ".PeekLocation") ); eventCacheHolder = Reference<EventCacheHolder>( new EventCacheHolder(dbgid.shortString() + ".PeekLocation") );
specialCounter(cc, "Version", [this](){return this->version.get(); }); specialCounter(cc, "Version", [this](){ return this->version.get(); });
specialCounter(cc, "MinPopped", [this](){return this->minPopped.get(); }); specialCounter(cc, "MinPopped", [this](){ return this->minPopped.get(); });
specialCounter(cc, "FetchedVersions", [this](){ return std::max<Version>(0, std::min<Version>(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, this->version.get() - this->minPopped.get())); }); specialCounter(cc, "FetchedVersions", [this](){ return std::max<Version>(0, std::min<Version>(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, this->version.get() - this->minPopped.get())); });
specialCounter(cc, "MinKnownCommittedVersion", [this](){ return this->minKnownCommittedVersion; }); specialCounter(cc, "MinKnownCommittedVersion", [this](){ return this->minKnownCommittedVersion; });
specialCounter(cc, "PoppedVersion", [this](){ return this->poppedVersion; }); specialCounter(cc, "PoppedVersion", [this](){ return this->poppedVersion; });
specialCounter(cc, "FoundEpochEnd", [this](){ return this->foundEpochEnd; });
specialCounter(cc, "WaitForVersionMS", [this](){ double val = this->waitForVersionTime; this->waitForVersionTime = 0; return 1000*val; });
specialCounter(cc, "WaitForVersionMaxMS", [this](){ double val = this->maxWaitForVersionTime; this->maxWaitForVersionTime = 0; return 1000*val; });
specialCounter(cc, "GetMoreMS", [this](){ double val = this->getMoreTime; this->getMoreTime = 0; return 1000*val; });
specialCounter(cc, "GetMoreMaxMS", [this](){ double val = this->maxGetMoreTime; this->maxGetMoreTime = 0; return 1000*val; });
logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "LogRouterMetrics"); logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "LogRouterMetrics");
} }
}; };
@ -195,11 +205,14 @@ void commitMessages( LogRouterData* self, Version version, const std::vector<Tag
ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) { ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
// The only time the log router should allow a gap in versions larger than MAX_READ_TRANSACTION_LIFE_VERSIONS is when processing epoch end. // The only time the log router should allow a gap in versions larger than MAX_READ_TRANSACTION_LIFE_VERSIONS is when processing epoch end.
// Since one set of log routers is created per generation of transaction logs, the gap caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version. // Since one set of log routers is created per generation of transaction logs, the gap caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version.
state double startTime = now();
if(self->version.get() < self->startVersion) { if(self->version.get() < self->startVersion) {
if(ver > self->startVersion) { if(ver > self->startVersion) {
self->version.set(self->startVersion); self->version.set(self->startVersion);
wait(self->minPopped.whenAtLeast(self->version.get())); wait(self->minPopped.whenAtLeast(self->version.get()));
} }
self->waitForVersionTime += now() - startTime;
self->maxWaitForVersionTime = std::max(self->maxWaitForVersionTime, now() - startTime);
return Void(); return Void();
} }
if(!self->foundEpochEnd) { if(!self->foundEpochEnd) {
@ -217,6 +230,8 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
if(ver >= self->startVersion + SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT) { if(ver >= self->startVersion + SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT) {
self->foundEpochEnd = true; self->foundEpochEnd = true;
} }
self->waitForVersionTime += now() - startTime;
self->maxWaitForVersionTime = std::max(self->maxWaitForVersionTime, now() - startTime);
return Void(); return Void();
} }
@ -229,8 +244,19 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
loop { loop {
loop { loop {
Future<Void> getMoreF = Never();
if(r) {
getMoreF = r->getMore(TaskPriority::TLogCommit);
++self->getMoreCount;
if(!getMoreF.isReady()) {
++self->getMoreBlockedCount;
}
}
state double startTime = now();
choose { choose {
when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) { when(wait( getMoreF ) ) {
self->getMoreTime += now() - startTime;
self->maxGetMoreTime = std::max(self->maxGetMoreTime, now() - startTime);
break; break;
} }
when( wait( dbInfoChange ) ) { //FIXME: does this actually happen? when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?

View File

@ -212,7 +212,8 @@ ACTOR Future<Void> workerHandleErrors(FutureStream<ErrorInfo> errors) {
endRole(err.role, err.id, "Error", ok, err.error); endRole(err.role, err.id, "Error", ok, err.error);
if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout) throw err.error;
if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout || (err.role == Role::SHARED_TRANSACTION_LOG && err.error.code() == error_code_io_error )) throw err.error;
} }
} }
} }