From aef5130da2b8b0241571968a880bc7abeb26c116 Mon Sep 17 00:00:00 2001
From: Josh Slocum <josh.slocum@snowflake.com>
Date: Thu, 6 Apr 2023 15:08:40 -0500
Subject: [PATCH] adding system priority option to getDatabaseConfiguration,
 and several debugging improvements (#9864)

---
 fdbclient/ManagementAPI.actor.cpp                 |  9 ++++++---
 fdbclient/include/fdbclient/ManagementAPI.actor.h |  4 ++--
 fdbserver/BlobWorker.actor.cpp                    | 10 ++++++++--
 fdbserver/Ratekeeper.actor.cpp                    |  4 +++-
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp
index aff6f553c1..ed422136cb 100644
--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@@ -673,7 +673,10 @@ TEST_CASE("/ManagementAPI/ChangeConfig/TenantAndEncryptMode") {
 	return Void();
 }
 
-ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr) {
+ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr, bool useSystemPriority) {
+	if (useSystemPriority) {
+		tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
+	}
 	tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE);
 	tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
 	RangeResult res = wait(tr->getRange(configKeys, CLIENT_KNOBS->TOO_MANY));
@@ -683,11 +686,11 @@ ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr) {
 	return config;
 }
 
-ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Database cx) {
+ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Database cx, bool useSystemPriority) {
 	state Transaction tr(cx);
 	loop {
 		try {
-			DatabaseConfiguration config = wait(getDatabaseConfiguration(&tr));
+			DatabaseConfiguration config = wait(getDatabaseConfiguration(&tr, useSystemPriority));
 			return config;
 		} catch (Error& e) {
 			wait(tr.onError(e));
diff --git a/fdbclient/include/fdbclient/ManagementAPI.actor.h b/fdbclient/include/fdbclient/ManagementAPI.actor.h
index 51c117fa41..9e954e3c4c 100644
--- a/fdbclient/include/fdbclient/ManagementAPI.actor.h
+++ b/fdbclient/include/fdbclient/ManagementAPI.actor.h
@@ -41,8 +41,8 @@ standard API and some knowledge of the contents of the system key space.
 #include "fdbclient/MonitorLeader.h"
 #include "flow/actorcompiler.h" // has to be last include
 
-ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr);
-ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Database cx);
+ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Transaction* tr, bool useSystemPriority = false);
+ACTOR Future<DatabaseConfiguration> getDatabaseConfiguration(Database cx, bool useSystemPriority = false);
 ACTOR Future<Void> waitForFullReplication(Database cx);
 
 struct IQuorumChange : ReferenceCounted<IQuorumChange> {
diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp
index b27ecc6deb..36e059d4d3 100644
--- a/fdbserver/BlobWorker.actor.cpp
+++ b/fdbserver/BlobWorker.actor.cpp
@@ -5522,16 +5522,20 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
 	state Reference<BlobWorkerData> self(new BlobWorkerData(bwInterf.id(), dbInfo, cx, persistentData));
 	self->id = bwInterf.id();
 	self->locality = bwInterf.locality;
+
+	TraceEvent("BlobWorkerInitStart", self->id).detail("Recovering", false).log();
+
 	try {
 		// Since the blob worker gets initalized through the blob manager it is more reliable to fetch the encryption
 		// state using the DB Config rather than passing it through the initalization request for the blob manager and
 		// blob worker
-		state Future<DatabaseConfiguration> configFuture = getDatabaseConfiguration(cx);
+		state Future<DatabaseConfiguration> configFuture = getDatabaseConfiguration(cx, true);
 
 		if (self->storage) {
 			wait(self->storage->init());
 			self->storage->set(KeyValueRef(persistID, BinaryWriter::toValue(self->id, Unversioned())));
 			wait(self->storage->commit());
+			TraceEvent("BlobWorkerStorageInitComplete", self->id).log();
 		}
 
 		if (BW_DEBUG) {
@@ -5604,6 +5608,8 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
 	state Reference<BlobWorkerData> self(new BlobWorkerData(bwInterf.id(), dbInfo, cx, persistentData));
 	self->id = bwInterf.id();
 	self->locality = bwInterf.locality;
+	TraceEvent("BlobWorkerInitStart", self->id).detail("Recovering", true).log();
+
 	try {
 		wait(self->storage->init());
 		wait(self->storage->commit());
@@ -5616,7 +5622,7 @@ ACTOR Future<Void> blobWorker(BlobWorkerInterface bwInterf,
 		// Since the blob worker gets initalized through the blob manager it is more reliable to fetch the encryption
 		// state using the DB Config rather than passing it through the initalization request for the blob manager and
 		// blob worker
-		state Future<DatabaseConfiguration> configFuture = getDatabaseConfiguration(cx);
+		state Future<DatabaseConfiguration> configFuture = getDatabaseConfiguration(cx, true);
 
 		if (BW_DEBUG) {
 			printf("Initializing blob worker s3 stuff\n");
diff --git a/fdbserver/Ratekeeper.actor.cpp b/fdbserver/Ratekeeper.actor.cpp
index 132ef50f94..8bf7f001a2 100644
--- a/fdbserver/Ratekeeper.actor.cpp
+++ b/fdbserver/Ratekeeper.actor.cpp
@@ -353,7 +353,9 @@ public:
 					TraceEvent("RkMinBlobWorkerVersion")
 					    .detail("BWVersion", minVer)
 					    .detail("MaxVer", self->maxVersion)
-					    .detail("MinId", blobWorkers.size() > 0 ? blobWorkers[minIdx].id() : UID());
+					    .detail("MinId", blobWorkers.size() > 0 ? blobWorkers[minIdx].id() : UID())
+					    .detail("BMBlocked",
+					            now() - self->unblockedAssignmentTime >= SERVER_KNOBS->BW_MAX_BLOCKED_INTERVAL);
 				}
 			}
 			wait(blobWorkerDelay);