/* * PaxosConfigTransaction.actor.cpp * * This source file is part of the FoundationDB open source project * * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fdbclient/DatabaseContext.h" #include "fdbclient/PaxosConfigTransaction.h" #include "flow/actorcompiler.h" // must be last include using ConfigTransactionInfo = ModelInterface; class CommitQuorum { ActorCollection actors{ false }; std::vector ctis; size_t failed{ 0 }; size_t successful{ 0 }; size_t maybeCommitted{ 0 }; Promise result; Standalone> mutations; ConfigCommitAnnotation annotation; ConfigTransactionCommitRequest getCommitRequest(ConfigGeneration generation) const { return ConfigTransactionCommitRequest(generation, mutations, annotation); } void updateResult() { if (successful >= ctis.size() / 2 + 1 && result.canBeSet()) { result.send(Void()); } else if (failed >= ctis.size() / 2 + 1 && result.canBeSet()) { // Rollforwards could cause a version that didn't have quorum to // commit, so send commit_unknown_result instead of commit_failed. // Calling sendError could delete this auto local = this->result; local.sendError(commit_unknown_result()); } else { // Check if it is possible to ever receive quorum agreement auto totalRequestsOutstanding = ctis.size() - (failed + successful + maybeCommitted); if ((failed + totalRequestsOutstanding < ctis.size() / 2 + 1) && (successful + totalRequestsOutstanding < ctis.size() / 2 + 1) && result.canBeSet()) { // Calling sendError could delete this auto local = this->result; local.sendError(commit_unknown_result()); } } } ACTOR static Future addRequestActor(CommitQuorum* self, ConfigGeneration generation, ConfigTransactionInterface cti) { try { if (cti.hostname.present()) { wait(timeoutError(retryGetReplyFromHostname( self->getCommitRequest(generation), cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT), CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT)); } else { wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)), CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT)); } ++self->successful; } catch (Error& e) { // self might be destroyed if this actor is cancelled if (e.code() == error_code_actor_cancelled) { throw; } if (e.code() == error_code_not_committed || e.code() == error_code_timed_out) { ++self->failed; } else { ++self->maybeCommitted; } } self->updateResult(); return Void(); } public: CommitQuorum() = default; explicit CommitQuorum(std::vector const& ctis) : ctis(ctis) {} void set(KeyRef key, ValueRef value) { if (key == configTransactionDescriptionKey) { annotation.description = ValueRef(annotation.arena(), value); } else { mutations.push_back_deep(mutations.arena(), IKnobCollection::createSetMutation(mutations.arena(), key, value)); } } void clear(KeyRef key) { if (key == configTransactionDescriptionKey) { annotation.description = ""_sr; } else { mutations.push_back_deep(mutations.arena(), IKnobCollection::createClearMutation(mutations.arena(), key)); } } void setTimestamp() { annotation.timestamp = now(); } size_t expectedSize() const { return annotation.expectedSize() + mutations.expectedSize(); } Future commit(ConfigGeneration generation) { // Send commit message to all replicas, even those that did not return the used replica. // This way, slow replicas are kept up date. for (const auto& cti : ctis) { actors.add(addRequestActor(this, generation, cti)); } return result.getFuture(); } bool committed() const { return result.isSet() && !result.isError(); } }; class GetGenerationQuorum { ActorCollection actors{ false }; std::vector ctis; std::map> seenGenerations; Promise result; size_t totalRepliesReceived{ 0 }; size_t maxAgreement{ 0 }; Optional lastSeenLiveVersion; Future getGenerationFuture; ACTOR static Future addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) { loop { try { state ConfigTransactionGetGenerationReply reply; if (cti.hostname.present()) { wait(timeoutError(store(reply, retryGetReplyFromHostname( ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion }, cti.hostname.get(), WLTOKEN_CONFIGTXN_GETGENERATION)), CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT)); } else { wait(timeoutError(store(reply, cti.getGeneration.getReply( ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion })), CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT)); } ++self->totalRepliesReceived; auto gen = reply.generation; self->lastSeenLiveVersion = std::max(gen.liveVersion, self->lastSeenLiveVersion.orDefault(::invalidVersion)); auto& replicas = self->seenGenerations[gen]; replicas.push_back(cti); self->maxAgreement = std::max(replicas.size(), self->maxAgreement); if (replicas.size() >= self->ctis.size() / 2 + 1 && !self->result.isSet()) { self->result.send(gen); } else if (self->maxAgreement + (self->ctis.size() - self->totalRepliesReceived) < (self->ctis.size() / 2 + 1)) { if (!self->result.isError()) { // Calling sendError could delete self auto local = self->result; local.sendError(failed_to_reach_quorum()); } } break; } catch (Error& e) { if (e.code() == error_code_broken_promise) { continue; } else if (e.code() == error_code_timed_out) { ++self->totalRepliesReceived; if (self->totalRepliesReceived == self->ctis.size() && self->result.canBeSet() && !self->result.isError()) { // Calling sendError could delete self auto local = self->result; local.sendError(failed_to_reach_quorum()); } break; } else { throw; } } } return Void(); } ACTOR static Future getGenerationActor(GetGenerationQuorum* self) { state int retries = 0; loop { for (const auto& cti : self->ctis) { self->actors.add(addRequestActor(self, cti)); } try { choose { when(ConfigGeneration generation = wait(self->result.getFuture())) { return generation; } when(wait(self->actors.getResult())) { ASSERT(false); } } } catch (Error& e) { if (e.code() == error_code_failed_to_reach_quorum) { CODE_PROBE(true, "Failed to reach quorum getting generation"); wait(delayJittered( std::clamp(0.005 * (1 << retries), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND))); ++retries; self->actors.clear(false); self->seenGenerations.clear(); self->result.reset(); self->totalRepliesReceived = 0; self->maxAgreement = 0; } else { throw e; } } } } public: GetGenerationQuorum() = default; explicit GetGenerationQuorum(std::vector const& ctis, Optional const& lastSeenLiveVersion = {}) : ctis(ctis), lastSeenLiveVersion(lastSeenLiveVersion) {} Future getGeneration() { if (!getGenerationFuture.isValid()) { getGenerationFuture = getGenerationActor(this); } return getGenerationFuture; } bool isReady() const { return getGenerationFuture.isValid() && getGenerationFuture.isReady() && !getGenerationFuture.isError(); } Optional getCachedGeneration() const { return isReady() ? getGenerationFuture.get() : Optional{}; } std::vector getReadReplicas() const { ASSERT(isReady()); return seenGenerations.at(getGenerationFuture.get()); } Optional getLastSeenLiveVersion() const { return lastSeenLiveVersion; } }; class PaxosConfigTransactionImpl { std::vector ctis; GetGenerationQuorum getGenerationQuorum; CommitQuorum commitQuorum; int numRetries{ 0 }; Optional dID; Database cx; ACTOR static Future> get(PaxosConfigTransactionImpl* self, Key key) { state ConfigKey configKey = ConfigKey::decodeKey(key); loop { try { state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); state std::vector readReplicas = self->getGenerationQuorum.getReadReplicas(); std::vector> fs; for (ConfigTransactionInterface& readReplica : readReplicas) { if (readReplica.hostname.present()) { fs.push_back(tryInitializeRequestStream( &readReplica.get, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GET)); } } wait(waitForAll(fs)); state Reference configNodes(new ConfigTransactionInfo(readReplicas)); ConfigTransactionGetReply reply = wait(timeoutError(basicLoadBalance(configNodes, &ConfigTransactionInterface::get, ConfigTransactionGetRequest{ generation, configKey }), CLIENT_KNOBS->GET_KNOB_TIMEOUT)); if (reply.value.present()) { return reply.value.get().toValue(); } else { return Optional{}; } } catch (Error& e) { if (e.code() != error_code_timed_out && e.code() != error_code_broken_promise) { throw; } self->reset(); } } } ACTOR static Future getConfigClasses(PaxosConfigTransactionImpl* self) { state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); state std::vector readReplicas = self->getGenerationQuorum.getReadReplicas(); std::vector> fs; for (ConfigTransactionInterface& readReplica : readReplicas) { if (readReplica.hostname.present()) { fs.push_back(tryInitializeRequestStream( &readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES)); } } wait(waitForAll(fs)); state Reference configNodes(new ConfigTransactionInfo(readReplicas)); ConfigTransactionGetConfigClassesReply reply = wait(basicLoadBalance(configNodes, &ConfigTransactionInterface::getClasses, ConfigTransactionGetConfigClassesRequest{ generation })); RangeResult result; result.reserve(result.arena(), reply.configClasses.size()); for (const auto& configClass : reply.configClasses) { result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr)); } return result; } ACTOR static Future getKnobs(PaxosConfigTransactionImpl* self, Optional configClass) { state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); state std::vector readReplicas = self->getGenerationQuorum.getReadReplicas(); std::vector> fs; for (ConfigTransactionInterface& readReplica : readReplicas) { if (readReplica.hostname.present()) { fs.push_back(tryInitializeRequestStream( &readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS)); } } wait(waitForAll(fs)); state Reference configNodes(new ConfigTransactionInfo(readReplicas)); ConfigTransactionGetKnobsReply reply = wait(basicLoadBalance(configNodes, &ConfigTransactionInterface::getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass })); RangeResult result; result.reserve(result.arena(), reply.knobNames.size()); for (const auto& knobName : reply.knobNames) { result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr)); } return result; } ACTOR static Future commit(PaxosConfigTransactionImpl* self) { ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); self->commitQuorum.setTimestamp(); wait(self->commitQuorum.commit(generation)); return Void(); } ACTOR static Future onError(PaxosConfigTransactionImpl* self, Error e) { // TODO: Improve this: TraceEvent("ConfigIncrementOnError").error(e).detail("NumRetries", self->numRetries); if (e.code() == error_code_transaction_too_old || e.code() == error_code_not_committed) { wait(delay(std::clamp((1 << self->numRetries++) * 0.01 * deterministicRandom()->random01(), 0.0, CLIENT_KNOBS->TIMEOUT_RETRY_UPPER_BOUND))); self->reset(); return Void(); } throw e; } public: Future getReadVersion() { return map(getGenerationQuorum.getGeneration(), [](auto const& gen) { return gen.committedVersion; }); } Optional getCachedReadVersion() const { auto gen = getGenerationQuorum.getCachedGeneration(); if (gen.present()) { return gen.get().committedVersion; } else { return {}; } } Version getCommittedVersion() const { return commitQuorum.committed() ? getGenerationQuorum.getCachedGeneration().get().liveVersion : ::invalidVersion; } int64_t getApproximateSize() const { return commitQuorum.expectedSize(); } void set(KeyRef key, ValueRef value) { commitQuorum.set(key, value); } void clear(KeyRef key) { commitQuorum.clear(key); } Future> get(Key const& key) { return get(this, key); } Future getRange(KeyRangeRef keys) { if (keys == configClassKeys) { return getConfigClasses(this); } else if (keys == globalConfigKnobKeys) { return getKnobs(this, {}); } else if (configKnobKeys.contains(keys) && keys.singleKeyRange()) { const auto configClass = keys.begin.removePrefix(configKnobKeys.begin); return getKnobs(this, configClass); } else { throw invalid_config_db_range_read(); } } Future onError(Error const& e) { return onError(this, e); } void debugTransaction(UID dID) { this->dID = dID; } void reset() { getGenerationQuorum = GetGenerationQuorum{ ctis }; commitQuorum = CommitQuorum{ ctis }; } void fullReset() { numRetries = 0; dID = {}; reset(); } void checkDeferredError(Error const& deferredError) const { if (deferredError.code() != invalid_error_code) { throw deferredError; } if (cx.getPtr()) { cx->checkDeferredError(); } } Future commit() { return commit(this); } PaxosConfigTransactionImpl(Database const& cx) : cx(cx) { const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString(); ctis.reserve(cs.hostnames.size() + cs.coords.size()); for (const auto& h : cs.hostnames) { ctis.emplace_back(h); } for (const auto& c : cs.coords) { ctis.emplace_back(c); } getGenerationQuorum = GetGenerationQuorum{ ctis }; commitQuorum = CommitQuorum{ ctis }; } PaxosConfigTransactionImpl(std::vector const& ctis) : ctis(ctis), getGenerationQuorum(ctis), commitQuorum(ctis) {} }; Future PaxosConfigTransaction::getReadVersion() { return impl->getReadVersion(); } Optional PaxosConfigTransaction::getCachedReadVersion() const { return impl->getCachedReadVersion(); } Future> PaxosConfigTransaction::get(Key const& key, Snapshot) { return impl->get(key); } Future PaxosConfigTransaction::getRange(KeySelector const& begin, KeySelector const& end, int limit, Snapshot snapshot, Reverse reverse) { if (reverse) { throw client_invalid_operation(); } return impl->getRange(KeyRangeRef(begin.getKey(), end.getKey())); } Future PaxosConfigTransaction::getRange(KeySelector begin, KeySelector end, GetRangeLimits limits, Snapshot snapshot, Reverse reverse) { if (reverse) { throw client_invalid_operation(); } return impl->getRange(KeyRangeRef(begin.getKey(), end.getKey())); } void PaxosConfigTransaction::set(KeyRef const& key, ValueRef const& value) { return impl->set(key, value); } void PaxosConfigTransaction::clear(KeyRef const& key) { return impl->clear(key); } Future PaxosConfigTransaction::commit() { return impl->commit(); } Version PaxosConfigTransaction::getCommittedVersion() const { return impl->getCommittedVersion(); } int64_t PaxosConfigTransaction::getApproximateSize() const { return impl->getApproximateSize(); } void PaxosConfigTransaction::setOption(FDBTransactionOptions::Option option, Optional value) { // TODO: Support using this option to determine atomicity } Future PaxosConfigTransaction::onError(Error const& e) { return impl->onError(e); } void PaxosConfigTransaction::cancel() { // TODO: Implement someday throw client_invalid_operation(); } void PaxosConfigTransaction::reset() { impl->reset(); } void PaxosConfigTransaction::fullReset() { impl->fullReset(); } void PaxosConfigTransaction::debugTransaction(UID dID) { impl->debugTransaction(dID); } void PaxosConfigTransaction::checkDeferredError() const { impl->checkDeferredError(deferredError); } PaxosConfigTransaction::PaxosConfigTransaction(std::vector const& ctis) : impl(PImpl::create(ctis)) {} PaxosConfigTransaction::PaxosConfigTransaction() = default; PaxosConfigTransaction::~PaxosConfigTransaction() = default; void PaxosConfigTransaction::construct(Database const& cx) { impl = PImpl::create(cx); }