mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
trace log and reset changes; byteAcknownledge overflow
This commit is contained in:
parent
1584ed5853
commit
227570357a
@ -277,9 +277,9 @@ struct AcknowledgementReceiver final : FlowReceiver, FastAllocated<Acknowledgeme
|
|||||||
using FastAllocated<AcknowledgementReceiver>::operator new;
|
using FastAllocated<AcknowledgementReceiver>::operator new;
|
||||||
using FastAllocated<AcknowledgementReceiver>::operator delete;
|
using FastAllocated<AcknowledgementReceiver>::operator delete;
|
||||||
|
|
||||||
int64_t bytesSent;
|
uint64_t bytesSent;
|
||||||
int64_t bytesAcknowledged;
|
uint64_t bytesAcknowledged;
|
||||||
int64_t bytesLimit;
|
uint64_t bytesLimit;
|
||||||
Promise<Void> ready;
|
Promise<Void> ready;
|
||||||
Future<Void> failures;
|
Future<Void> failures;
|
||||||
|
|
||||||
@ -358,6 +358,13 @@ struct NetNotifiedQueueWithAcknowledgements final : NotifiedQueue<T>,
|
|||||||
// send an ack immediately
|
// send an ack immediately
|
||||||
if (acknowledgements.getRawEndpoint().isValid()) {
|
if (acknowledgements.getRawEndpoint().isValid()) {
|
||||||
acknowledgements.bytesAcknowledged += message.get().asUnderlyingType().expectedSize();
|
acknowledgements.bytesAcknowledged += message.get().asUnderlyingType().expectedSize();
|
||||||
|
// int64_t overflow: we need to reset this stream
|
||||||
|
if (acknowledgements.bytesAcknowledged > std::numeric_limits<int64_t>::max()) {
|
||||||
|
FlowTransport::transport().sendUnreliable(
|
||||||
|
SerializeSource<ErrorOr<AcknowledgementReply>>(operation_obsolete()),
|
||||||
|
acknowledgements.getEndpoint(TaskPriority::ReadSocket),
|
||||||
|
false);
|
||||||
|
} else {
|
||||||
FlowTransport::transport().sendUnreliable(
|
FlowTransport::transport().sendUnreliable(
|
||||||
SerializeSource<ErrorOr<AcknowledgementReply>>(
|
SerializeSource<ErrorOr<AcknowledgementReply>>(
|
||||||
AcknowledgementReply(acknowledgements.bytesAcknowledged)),
|
AcknowledgementReply(acknowledgements.bytesAcknowledged)),
|
||||||
@ -365,6 +372,7 @@ struct NetNotifiedQueueWithAcknowledgements final : NotifiedQueue<T>,
|
|||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this->send(std::move(message.get().asUnderlyingType()));
|
this->send(std::move(message.get().asUnderlyingType()));
|
||||||
}
|
}
|
||||||
@ -376,11 +384,18 @@ struct NetNotifiedQueueWithAcknowledgements final : NotifiedQueue<T>,
|
|||||||
// A reply that has been queued up is being consumed, so send an ack to the server
|
// A reply that has been queued up is being consumed, so send an ack to the server
|
||||||
if (acknowledgements.getRawEndpoint().isValid()) {
|
if (acknowledgements.getRawEndpoint().isValid()) {
|
||||||
acknowledgements.bytesAcknowledged += res.expectedSize();
|
acknowledgements.bytesAcknowledged += res.expectedSize();
|
||||||
|
if (acknowledgements.bytesAcknowledged > std::numeric_limits<int64_t>::max()) {
|
||||||
|
FlowTransport::transport().sendUnreliable(
|
||||||
|
SerializeSource<ErrorOr<AcknowledgementReply>>(operation_obsolete()),
|
||||||
|
acknowledgements.getEndpoint(TaskPriority::ReadSocket),
|
||||||
|
false);
|
||||||
|
} else {
|
||||||
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<AcknowledgementReply>>(
|
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<AcknowledgementReply>>(
|
||||||
AcknowledgementReply(acknowledgements.bytesAcknowledged)),
|
AcknowledgementReply(acknowledgements.bytesAcknowledged)),
|
||||||
acknowledgements.getEndpoint(TaskPriority::ReadSocket),
|
acknowledgements.getEndpoint(TaskPriority::ReadSocket),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -406,7 +421,6 @@ struct NetNotifiedQueueWithAcknowledgements final : NotifiedQueue<T>,
|
|||||||
template <class T>
|
template <class T>
|
||||||
class ReplyPromiseStream {
|
class ReplyPromiseStream {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// stream.send( request )
|
// stream.send( request )
|
||||||
// Unreliable at most once delivery: Delivers request unless there is a connection failure (zero or one times)
|
// Unreliable at most once delivery: Delivers request unless there is a connection failure (zero or one times)
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ void tryEstablishPeekStream(ILogSystem::ServerPeekCursor* self) {
|
|||||||
}
|
}
|
||||||
self->peekReplyStream = self->interf->get().interf().peekStreamMessages.getReplyStream(TLogPeekStreamRequest(
|
self->peekReplyStream = self->interf->get().interf().peekStreamMessages.getReplyStream(TLogPeekStreamRequest(
|
||||||
self->messageVersion.version, self->tag, self->returnIfBlocked, std::numeric_limits<int>::max()));
|
self->messageVersion.version, self->tag, self->returnIfBlocked, std::numeric_limits<int>::max()));
|
||||||
TraceEvent(SevDebug, "StreamCreated");
|
TraceEvent(SevDebug, "SPC_StreamCreated", self->randomID);
|
||||||
}
|
}
|
||||||
|
|
||||||
ILogSystem::ServerPeekCursor::ServerPeekCursor(Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf,
|
ILogSystem::ServerPeekCursor::ServerPeekCursor(Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf,
|
||||||
@ -51,7 +51,11 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor(Reference<AsyncVar<OptionalInterf
|
|||||||
resetCheck(Void()), usePeekStream(SERVER_KNOBS->PEEK_USEING_STREAMING) {
|
resetCheck(Void()), usePeekStream(SERVER_KNOBS->PEEK_USEING_STREAMING) {
|
||||||
this->results.maxKnownVersion = 0;
|
this->results.maxKnownVersion = 0;
|
||||||
this->results.minKnownCommittedVersion = 0;
|
this->results.minKnownCommittedVersion = 0;
|
||||||
//TraceEvent("SPC_Starting", randomID).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).backtrace();
|
TraceEvent("SPC_Starting", randomID)
|
||||||
|
.detail("Tag", tag.toString())
|
||||||
|
.detail("Begin", begin)
|
||||||
|
.detail("End", end)
|
||||||
|
.backtrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
ILogSystem::ServerPeekCursor::ServerPeekCursor(TLogPeekReply const& results,
|
ILogSystem::ServerPeekCursor::ServerPeekCursor(TLogPeekReply const& results,
|
||||||
@ -316,29 +320,34 @@ ACTOR Future<Void> serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self,
|
|||||||
|
|
||||||
ACTOR Future<Void> serverPeekStreamGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
ACTOR Future<Void> serverPeekStreamGetMore(ILogSystem::ServerPeekCursor* self, TaskPriority taskID) {
|
||||||
if (!self->interf || self->isExhausted()) {
|
if (!self->interf || self->isExhausted()) {
|
||||||
|
self->peekReplyStream.reset();
|
||||||
if (self->hasMessage())
|
if (self->hasMessage())
|
||||||
return Void();
|
return Void();
|
||||||
return Never();
|
return Never();
|
||||||
}
|
}
|
||||||
|
|
||||||
tryEstablishPeekStream(self);
|
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
|
tryEstablishPeekStream(self);
|
||||||
|
state Future<TLogPeekReply> fPeekReply = self->peekReplyStream.present()
|
||||||
|
? map(waitAndForward(self->peekReplyStream.get().getFuture()),
|
||||||
|
[](const TLogPeekStreamReply& r) { return r.rep; })
|
||||||
|
: Never();
|
||||||
choose {
|
choose {
|
||||||
when(wait(self->interf->onChange())) {
|
when(wait(self->interf->onChange())) {
|
||||||
self->onlySpilled = false;
|
self->onlySpilled = false;
|
||||||
self->peekReplyStream.reset();
|
self->peekReplyStream.reset();
|
||||||
tryEstablishPeekStream(self);
|
|
||||||
}
|
}
|
||||||
when(TLogPeekStreamReply res =
|
when(TLogPeekReply res =
|
||||||
wait(self->peekReplyStream.present()
|
wait(self->peekReplyStream.present()
|
||||||
? brokenPromiseToNever(waitAndForward(self->peekReplyStream.get().getFuture()))
|
? recordRequestMetrics(
|
||||||
|
self, self->peekReplyStream.get().getEndpoint().getPrimaryAddress(), fPeekReply)
|
||||||
: Never())) {
|
: Never())) {
|
||||||
updateCursorWithReply(self, res.rep);
|
updateCursorWithReply(self, res);
|
||||||
TraceEvent("SPC_GetMoreB", self->randomID)
|
TraceEvent("SPC_GetMoreB", self->randomID)
|
||||||
.detail("Has", self->hasMessage())
|
.detail("Has", self->hasMessage())
|
||||||
.detail("End", res.rep.end)
|
.detail("End", res.end)
|
||||||
.detail("Popped", res.rep.popped.present() ? res.rep.popped.get() : 0);
|
.detail("Popped", res.popped.present() ? res.popped.get() : 0);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -388,19 +397,24 @@ ACTOR Future<Void> serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPri
|
|||||||
}
|
}
|
||||||
|
|
||||||
Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
|
Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
|
||||||
//TraceEvent("SPC_GetMore", randomID).detail("HasMessage", hasMessage()).detail("More", !more.isValid() || more.isReady()).detail("MessageVersion", messageVersion.toString()).detail("End", end.toString());
|
TraceEvent("SPC_GetMore", randomID)
|
||||||
|
.detail("HasMessage", hasMessage())
|
||||||
|
.detail("More", !more.isValid() || more.isReady())
|
||||||
|
.detail("MessageVersion", messageVersion.toString())
|
||||||
|
.detail("End", end.toString());
|
||||||
if (hasMessage() && !parallelGetMore)
|
if (hasMessage() && !parallelGetMore)
|
||||||
return Void();
|
return Void();
|
||||||
if (!more.isValid() || more.isReady()) {
|
if (!more.isValid() || more.isReady()) {
|
||||||
if (usePeekStream && taskID == TaskPriority::TLogPeekReply) {
|
|
||||||
more = serverPeekStreamGetMore(this, taskID);
|
more = serverPeekStreamGetMore(this, taskID);
|
||||||
}
|
// if (usePeekStream && taskID == TaskPriority::TLogPeekReply) {
|
||||||
// if (parallelGetMore || onlySpilled || futureResults.size()) {
|
// more = serverPeekStreamGetMore(this, taskID);
|
||||||
// more = serverPeekParallelGetMore(this, taskID);
|
// }
|
||||||
// }
|
// if (parallelGetMore || onlySpilled || futureResults.size()) {
|
||||||
else {
|
// more = serverPeekParallelGetMore(this, taskID);
|
||||||
more = serverPeekGetMore(this, taskID);
|
// }
|
||||||
}
|
// else {
|
||||||
|
// more = serverPeekGetMore(this, taskID);
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
return more;
|
return more;
|
||||||
}
|
}
|
||||||
|
@ -557,6 +557,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||||||
TLogData* tLogData;
|
TLogData* tLogData;
|
||||||
Promise<Void> recoveryComplete, committingQueue;
|
Promise<Void> recoveryComplete, committingQueue;
|
||||||
Version unrecoveredBefore, recoveredAt;
|
Version unrecoveredBefore, recoveredAt;
|
||||||
|
int activePeekStreams = 0;
|
||||||
|
|
||||||
struct PeekTrackerData {
|
struct PeekTrackerData {
|
||||||
std::map<int, Promise<std::pair<Version, bool>>>
|
std::map<int, Promise<std::pair<Version, bool>>>
|
||||||
@ -668,6 +669,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||||||
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
||||||
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
||||||
specialCounter(cc, "Generation", [this]() { return this->recoveryCount; });
|
specialCounter(cc, "Generation", [this]() { return this->recoveryCount; });
|
||||||
|
specialCounter(cc, "ActivePeekStreams", [this]() { return this->activePeekStreams; });
|
||||||
}
|
}
|
||||||
|
|
||||||
~LogData() {
|
~LogData() {
|
||||||
@ -1167,9 +1169,11 @@ ACTOR Future<Void> tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion;
|
uint64_t PoppedVersionLag = logData->persistentDataDurableVersion - logData->queuePoppedVersion;
|
||||||
if ( SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE &&
|
if (SERVER_KNOBS->ENABLE_DETAILED_TLOG_POP_TRACE &&
|
||||||
(logData->queuePoppedVersion > 0) && //avoid generating massive events at beginning
|
(logData->queuePoppedVersion > 0) && // avoid generating massive events at beginning
|
||||||
(tagData->unpoppedRecovered || PoppedVersionLag >= SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { //when recovery or long lag
|
(tagData->unpoppedRecovered ||
|
||||||
|
PoppedVersionLag >=
|
||||||
|
SERVER_KNOBS->TLOG_POPPED_VER_LAG_THRESHOLD_FOR_TLOGPOP_TRACE)) { // when recovery or long lag
|
||||||
TraceEvent("TLogPopDetails", logData->logId)
|
TraceEvent("TLogPopDetails", logData->logId)
|
||||||
.detail("Tag", tagData->tag.toString())
|
.detail("Tag", tagData->tag.toString())
|
||||||
.detail("UpTo", upTo)
|
.detail("UpTo", upTo)
|
||||||
@ -1915,6 +1919,7 @@ ACTOR Future<TLogPeekReply> peekTLog(TLogData* self,
|
|||||||
|
|
||||||
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
// This actor keep pushing TLogPeekStreamReply until it's removed from the cluster or should recover
|
||||||
ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Reference<LogData> logData) {
|
ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Reference<LogData> logData) {
|
||||||
|
logData->activePeekStreams ++;
|
||||||
state Version begin = req.begin;
|
state Version begin = req.begin;
|
||||||
state bool onlySpilled = false;
|
state bool onlySpilled = false;
|
||||||
if (req.tag.locality == tagLocalityTxs && req.tag.id >= logData->txsTags && logData->txsTags > 0) {
|
if (req.tag.locality == tagLocalityTxs && req.tag.id >= logData->txsTags && logData->txsTags > 0) {
|
||||||
@ -1929,14 +1934,13 @@ ACTOR Future<Void> tLogPeekStream(TLogData* self, TLogPeekStreamRequest req, Ref
|
|||||||
req.reply.send(reply);
|
req.reply.send(reply);
|
||||||
begin = reply.rep.end;
|
begin = reply.rep.end;
|
||||||
onlySpilled = reply.rep.onlySpilled;
|
onlySpilled = reply.rep.onlySpilled;
|
||||||
|
wait(delay(0, g_network->getCurrentTask()));
|
||||||
wait(delay(.05, g_network->getCurrentTask()));
|
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
|
logData->activePeekStreams --;
|
||||||
if (e.code() == error_code_end_of_stream) {
|
if (e.code() == error_code_end_of_stream) {
|
||||||
req.reply.sendError(e);
|
req.reply.sendError(e);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
} else if (e.code() == error_code_operation_obsolete) {
|
||||||
else if (e.code() == error_code_operation_obsolete) {
|
|
||||||
// reply stream is cancelled on the client
|
// reply stream is cancelled on the client
|
||||||
return Void();
|
return Void();
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user