Merge pull request #4094 from sfc-gh-clin/add-c-function-for-management-commands

Add c function for kill/suspend
This commit is contained in:
Andrew Noyes 2021-01-15 09:45:16 -08:00 committed by GitHub
commit 56f46d0645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 210 additions and 3 deletions

View File

@ -389,6 +389,10 @@ fdb_error_t fdb_database_create_transaction( FDBDatabase* d,
*out_transaction = (FDBTransaction*)tr.extractPtr(); );
}
extern "C" DLLEXPORT FDBFuture* fdb_database_reboot_worker(FDBDatabase* db, uint8_t const* address, int address_length,
fdb_bool_t check, int duration) {
return (FDBFuture*)(DB(db)->rebootWorker(StringRef(address, address_length), check, duration).extractPtr());
}
extern "C" DLLEXPORT
void fdb_transaction_destroy( FDBTransaction* tr ) {

View File

@ -173,6 +173,10 @@ extern "C" {
fdb_database_create_transaction( FDBDatabase* d,
FDBTransaction** out_transaction );
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
fdb_database_reboot_worker( FDBDatabase* db, uint8_t const* address,
int address_length, fdb_bool_t check, int duration);
DLLEXPORT void fdb_transaction_destroy( FDBTransaction* tr);
DLLEXPORT void fdb_transaction_cancel( FDBTransaction* tr);

View File

@ -92,6 +92,12 @@ void Future::cancel() {
return fdb_future_get_keyvalue_array(future_, out_kv, out_count, out_more);
}
// Database
Int64Future Database::reboot_worker(FDBDatabase* db, const uint8_t* address, int address_length, fdb_bool_t check,
int duration) {
return Int64Future(fdb_database_reboot_worker(db, address, address_length, check, duration));
}
// Transaction
Transaction::Transaction(FDBDatabase* db) {

View File

@ -77,7 +77,6 @@ class Future {
FDBFuture* future_;
};
class Int64Future : public Future {
public:
// Call this function instead of fdb_future_get_int64 when using the
@ -86,6 +85,7 @@ class Int64Future : public Future {
private:
friend class Transaction;
friend class Database;
Int64Future(FDBFuture* f) : Future(f) {}
};
@ -147,6 +147,13 @@ class EmptyFuture : public Future {
EmptyFuture(FDBFuture* f) : Future(f) {}
};
// Wrapper around FDBDatabase, providing database-level API
class Database final {
public:
static Int64Future reboot_worker(FDBDatabase* db, const uint8_t* address, int address_length, fdb_bool_t check,
int duration);
};
// Wrapper around FDBTransaction, providing the same set of calls as the C API.
// Handles cleanup of memory, removing the need to call
// fdb_transaction_destroy.

View File

@ -37,6 +37,7 @@
#define DOCTEST_CONFIG_IMPLEMENT
#include "doctest.h"
#include "fdbclient/rapidjson/document.h"
#include "fdb_api.hpp"
@ -1967,6 +1968,65 @@ TEST_CASE("special-key-space tracing get range") {
}
}
std::string get_valid_status_json() {
fdb::Transaction tr(db);
while (1) {
fdb::ValueFuture f1 = tr.get("\xff\xff/status/json", false);
fdb_error_t err = wait_future(f1);
if (err) {
fdb::EmptyFuture f2 = tr.on_error(err);
fdb_check(wait_future(f2));
continue;
}
int out_present;
char *val;
int vallen;
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
assert(out_present);
std::string statusJsonStr(val, vallen);
rapidjson::Document statusJson;
statusJson.Parse(statusJsonStr.c_str());
// make sure it is available
bool available = statusJson["client"]["database_status"]["available"].GetBool();
if (!available)
continue; // cannot reach to the cluster, retry
return statusJsonStr;
}
}
TEST_CASE("fdb_database_reboot_worker") {
std::string status_json = get_valid_status_json();
rapidjson::Document statusJson;
statusJson.Parse(status_json.c_str());
CHECK(statusJson.HasMember("cluster"));
CHECK(statusJson["cluster"].HasMember("generation"));
int old_generation = statusJson["cluster"]["generation"].GetInt();
CHECK(statusJson["cluster"].HasMember("processes"));
// Make sure we only have one process in the cluster
// Thus, rebooting the worker ensures a recovery
// Configuration changes may break the contract here
CHECK(statusJson["cluster"]["processes"].MemberCount() == 1);
auto processPtr = statusJson["cluster"]["processes"].MemberBegin();
CHECK(processPtr->value.HasMember("address"));
std::string network_address = processPtr->value["address"].GetString();
while (1) {
fdb::Int64Future f =
fdb::Database::reboot_worker(db, (const uint8_t*)network_address.c_str(), network_address.size(), false, 0);
fdb_check(wait_future(f));
int64_t successful;
fdb_check(f.get(&successful));
if (successful) break; // retry rebooting until success
}
status_json = get_valid_status_json();
statusJson.Parse(status_json.c_str());
CHECK(statusJson.HasMember("cluster"));
CHECK(statusJson["cluster"].HasMember("generation"));
int new_generation = statusJson["cluster"]["generation"].GetInt();
// The generation number should increase after the recovery
CHECK(new_generation > old_generation);
}
TEST_CASE("fdb_error_predicate") {
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1007)); // transaction_too_old
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1020)); // not_committed

View File

@ -20,6 +20,7 @@
#include "fdb_flow.h"
#include <cstdint>
#include <stdio.h>
#include <cinttypes>
@ -101,6 +102,7 @@ namespace FDB {
Reference<Transaction> createTransaction() override;
void setDatabaseOption(FDBDatabaseOption option, Optional<StringRef> value = Optional<StringRef>()) override;
Future<int64_t> rebootWorker(const StringRef& address, bool check = false, int duration = 0) override;
private:
FDBDatabase* db;
@ -284,6 +286,16 @@ namespace FDB {
throw_on_error(fdb_database_set_option(db, option, nullptr, 0));
}
Future<int64_t> DatabaseImpl::rebootWorker(const StringRef &address, bool check, int duration) {
return backToFuture<int64_t>( fdb_database_reboot_worker(db, address.begin(), address.size(), check, duration), [](Reference<CFuture> f) {
int64_t res;
throw_on_error(fdb_future_get_int64( f->f, &res ) );
return res;
} );
}
TransactionImpl::TransactionImpl(FDBDatabase* db) {
throw_on_error(fdb_database_create_transaction(db, &tr));
}

View File

@ -124,6 +124,7 @@ namespace FDB {
virtual ~Database(){};
virtual Reference<Transaction> createTransaction() = 0;
virtual void setDatabaseOption(FDBDatabaseOption option, Optional<StringRef> value = Optional<StringRef>()) = 0;
virtual Future<int64_t> rebootWorker(const StringRef& address, bool check = false, int duration = 0) = 0;
};
class API {

View File

@ -426,6 +426,25 @@ An |database-blurb1| Modifications to a database are performed via transactions.
``*out_transaction``
Set to point to the newly created :type:`FDBTransaction`.
.. function:: FDBFuture* fdb_database_reboot_worker(FDBDatabase* database, uint8_t const* address, int address_length, fdb_bool_t check, int duration)
Reboot the specified process in the database.
|future-return0| a :type:`int64_t` which represents whether the reboot request is sent or not. In particular, 1 means request sent and 0 means failure (e.g. the process with the specified address does not exist). |future-return1| call :func:`fdb_future_get_int64()` to extract the result, |future-return2|
``address``
A pointer to the network address of the process.
``address_length``
|length-of| ``address``.
``check``
whether to perform a storage engine integrity check. In particular, the check-on-reboot is implemented by writing a check/validation file on disk as breadcrumb for the process to find after reboot, at which point it will eat the breadcrumb file and pass true to the integrityCheck parameter of the openKVStore() factory method.
``duration``
If positive, the process will be first suspended for ``duration`` seconds before being rebooted.
Transaction
===========

View File

@ -35,7 +35,7 @@ Status
Bindings
--------
* Python: The function ``get_estimated_range_size_bytes`` will now throw an error if the ``begin_key`` or ``end_key`` is ``None``. `(PR #3394) <https://github.com/apple/foundationdb/pull/3394>`_
* C: Added a function, ``fdb_database_reboot_worker``, to reboot or suspend the specified process. `(PR #4094) <https://github.com/apple/foundationdb/pull/4094>`_
Other Changes
-------------

View File

@ -206,6 +206,9 @@ public:
Future<Void> connectionFileChanged();
bool switchable = false;
// Management API, Attempt to kill or suspend a process, return 1 for success, 0 for failure
Future<int64_t> rebootWorker(StringRef address, bool check = false, int duration = 0);
//private:
explicit DatabaseContext( Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile, Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
Future<Void> clientInfoMonitor, TaskPriority taskID, LocalityData const& clientLocality,

View File

@ -84,6 +84,9 @@ public:
virtual void addref() = 0;
virtual void delref() = 0;
// Management API, Attempt to kill or suspend a process, return 1 for success, 0 for failure
virtual ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) = 0;
};
class IClientApi {

View File

@ -285,6 +285,20 @@ void DLDatabase::setOption(FDBDatabaseOptions::Option option, Optional<StringRef
throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
}
ThreadFuture<int64_t> DLDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
if(!api->databaseRebootWorker) {
return unsupported_operation();
}
FdbCApi::FDBFuture *f = api->databaseRebootWorker(db, address.begin(), address.size(), check, duration);
return toThreadFuture<int64_t>(api, f, [](FdbCApi::FDBFuture *f, FdbCApi *api) {
int64_t res;
FdbCApi::fdb_error_t error = api->futureGetInt64(f, &res);
ASSERT(!error);
return res;
});
}
// DLApi
template<class T>
void loadClientFunction(T *fp, void *lib, std::string libPath, const char *functionName, bool requireFunction = true) {
@ -319,6 +333,7 @@ void DLApi::init() {
loadClientFunction(&api->databaseCreateTransaction, lib, fdbCPath, "fdb_database_create_transaction");
loadClientFunction(&api->databaseSetOption, lib, fdbCPath, "fdb_database_set_option");
loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy");
loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700);
loadClientFunction(&api->transactionSetOption, lib, fdbCPath, "fdb_transaction_set_option");
loadClientFunction(&api->transactionDestroy, lib, fdbCPath, "fdb_transaction_destroy");
@ -781,6 +796,13 @@ void MultiVersionDatabase::setOption(FDBDatabaseOptions::Option option, Optional
}
}
ThreadFuture<int64_t> MultiVersionDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
if (dbState->db) {
return dbState->db->rebootWorker(address, check, duration);
}
return false;
}
void MultiVersionDatabase::Connector::connect() {
addref();
onMainThreadVoid([this]() {

View File

@ -66,6 +66,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
fdb_error_t (*databaseCreateTransaction)(FDBDatabase *database, FDBTransaction **tr);
fdb_error_t (*databaseSetOption)(FDBDatabase *database, FDBDatabaseOptions::Option option, uint8_t const *value, int valueLength);
void (*databaseDestroy)(FDBDatabase *database);
FDBFuture* (*databaseRebootWorker)(FDBDatabase *database, uint8_t const *address, int addressLength, fdb_bool_t check, int duration);
//Transaction
fdb_error_t (*transactionSetOption)(FDBTransaction *tr, FDBTransactionOptions::Option option, uint8_t const *value, int valueLength);
@ -109,6 +110,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
fdb_error_t (*futureGetDatabase)(FDBFuture *f, FDBDatabase **outDb);
fdb_error_t (*futureGetInt64)(FDBFuture *f, int64_t *outValue);
fdb_error_t (*futureGetUInt64)(FDBFuture *f, uint64_t *outValue);
fdb_error_t (*futureGetBool) (FDBFuture *f, bool *outValue);
fdb_error_t (*futureGetError)(FDBFuture *f);
fdb_error_t (*futureGetKey)(FDBFuture *f, uint8_t const **outKey, int *outKeyLength);
fdb_error_t (*futureGetValue)(FDBFuture *f, fdb_bool_t *outPresent, uint8_t const **outValue, int *outValueLength);
@ -194,6 +196,8 @@ public:
void addref() override { ThreadSafeReferenceCounted<DLDatabase>::addref(); }
void delref() override { ThreadSafeReferenceCounted<DLDatabase>::delref(); }
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) override;
private:
const Reference<FdbCApi> api;
FdbCApi::FDBDatabase* db; // Always set if API version >= 610, otherwise guaranteed to be set when onReady future is set
@ -325,6 +329,8 @@ public:
static Reference<IDatabase> debugCreateFromExistingDatabase(Reference<IDatabase> db);
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration);
private:
struct DatabaseState;

View File

@ -4779,3 +4779,53 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
return (ddCheck && coordinatorCheck);
}
ACTOR Future<Void> addInterfaceActor( std::map<Key,std::pair<Value,ClientLeaderRegInterface>>* address_interface, Reference<FlowLock> connectLock, KeyValue kv) {
wait(connectLock->take());
state FlowLock::Releaser releaser(*connectLock);
state ClientWorkerInterface workerInterf = BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
choose {
when( Optional<LeaderInfo> rep = wait( brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())) ) ) {
StringRef ip_port =
kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key;
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
if(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
Key full_ip_port2 =
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls")) ? full_ip_port2.removeSuffix(LiteralStringRef(":tls")) : full_ip_port2;
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
}
}
when( wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT)) ) {} // NOTE : change timeout time here if necessary
}
return Void();
}
ACTOR Future<int64_t> rebootWorkerActor(DatabaseContext* cx, ValueRef addr, bool check, int duration) {
// ignore negative value
if (duration < 0) duration = 0;
// fetch the addresses of all workers
state std::map<Key,std::pair<Value,ClientLeaderRegInterface>> address_interface;
if (!cx->getConnectionFile())
return 0;
Standalone<RangeResultRef> kvs = wait(getWorkerInterfaces(cx->getConnectionFile()));
ASSERT(!kvs.more);
// Note: reuse this knob from fdbcli, change it if necessary
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
std::vector<Future<Void>> addInterfs;
for( const auto& it : kvs ) {
addInterfs.push_back(addInterfaceActor(&address_interface, connectLock, it));
}
wait(waitForAll(addInterfs));
if (!address_interface.count(addr)) return 0;
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[addr].first, IncludeVersion())
.reboot.send(RebootRequest(false, check, duration));
return 1;
}
Future<int64_t> DatabaseContext::rebootWorker(StringRef addr, bool check, int duration) {
return rebootWorkerActor(this, addr, check, duration);
}

View File

@ -68,6 +68,14 @@ void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional<
}, &db->deferredError );
}
ThreadFuture<int64_t> ThreadSafeDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
DatabaseContext *db = this->db;
Key addressKey = address;
return onMainThread( [db, addressKey, check, duration]() -> Future<int64_t> {
return db->rebootWorker(addressKey, check, duration);
} );
}
ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) {
ClusterConnectionFile *connFile = new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first);

View File

@ -41,6 +41,8 @@ public:
void addref() { ThreadSafeReferenceCounted<ThreadSafeDatabase>::addref(); }
void delref() { ThreadSafeReferenceCounted<ThreadSafeDatabase>::delref(); }
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration);
private:
friend class ThreadSafeTransaction;
DatabaseContext* db;