mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
Merge pull request #4094 from sfc-gh-clin/add-c-function-for-management-commands
Add c function for kill/suspend
This commit is contained in:
commit
56f46d0645
@ -389,6 +389,10 @@ fdb_error_t fdb_database_create_transaction( FDBDatabase* d,
|
||||
*out_transaction = (FDBTransaction*)tr.extractPtr(); );
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_database_reboot_worker(FDBDatabase* db, uint8_t const* address, int address_length,
|
||||
fdb_bool_t check, int duration) {
|
||||
return (FDBFuture*)(DB(db)->rebootWorker(StringRef(address, address_length), check, duration).extractPtr());
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT
|
||||
void fdb_transaction_destroy( FDBTransaction* tr ) {
|
||||
|
@ -173,6 +173,10 @@ extern "C" {
|
||||
fdb_database_create_transaction( FDBDatabase* d,
|
||||
FDBTransaction** out_transaction );
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture*
|
||||
fdb_database_reboot_worker( FDBDatabase* db, uint8_t const* address,
|
||||
int address_length, fdb_bool_t check, int duration);
|
||||
|
||||
DLLEXPORT void fdb_transaction_destroy( FDBTransaction* tr);
|
||||
|
||||
DLLEXPORT void fdb_transaction_cancel( FDBTransaction* tr);
|
||||
|
@ -92,6 +92,12 @@ void Future::cancel() {
|
||||
return fdb_future_get_keyvalue_array(future_, out_kv, out_count, out_more);
|
||||
}
|
||||
|
||||
// Database
|
||||
Int64Future Database::reboot_worker(FDBDatabase* db, const uint8_t* address, int address_length, fdb_bool_t check,
|
||||
int duration) {
|
||||
return Int64Future(fdb_database_reboot_worker(db, address, address_length, check, duration));
|
||||
}
|
||||
|
||||
// Transaction
|
||||
|
||||
Transaction::Transaction(FDBDatabase* db) {
|
||||
|
@ -77,7 +77,6 @@ class Future {
|
||||
FDBFuture* future_;
|
||||
};
|
||||
|
||||
|
||||
class Int64Future : public Future {
|
||||
public:
|
||||
// Call this function instead of fdb_future_get_int64 when using the
|
||||
@ -86,6 +85,7 @@ class Int64Future : public Future {
|
||||
|
||||
private:
|
||||
friend class Transaction;
|
||||
friend class Database;
|
||||
Int64Future(FDBFuture* f) : Future(f) {}
|
||||
};
|
||||
|
||||
@ -147,6 +147,13 @@ class EmptyFuture : public Future {
|
||||
EmptyFuture(FDBFuture* f) : Future(f) {}
|
||||
};
|
||||
|
||||
// Wrapper around FDBDatabase, providing database-level API
|
||||
class Database final {
|
||||
public:
|
||||
static Int64Future reboot_worker(FDBDatabase* db, const uint8_t* address, int address_length, fdb_bool_t check,
|
||||
int duration);
|
||||
};
|
||||
|
||||
// Wrapper around FDBTransaction, providing the same set of calls as the C API.
|
||||
// Handles cleanup of memory, removing the need to call
|
||||
// fdb_transaction_destroy.
|
||||
|
@ -37,6 +37,7 @@
|
||||
|
||||
#define DOCTEST_CONFIG_IMPLEMENT
|
||||
#include "doctest.h"
|
||||
#include "fdbclient/rapidjson/document.h"
|
||||
|
||||
#include "fdb_api.hpp"
|
||||
|
||||
@ -1967,6 +1968,65 @@ TEST_CASE("special-key-space tracing get range") {
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_valid_status_json() {
|
||||
fdb::Transaction tr(db);
|
||||
while (1) {
|
||||
fdb::ValueFuture f1 = tr.get("\xff\xff/status/json", false);
|
||||
fdb_error_t err = wait_future(f1);
|
||||
if (err) {
|
||||
fdb::EmptyFuture f2 = tr.on_error(err);
|
||||
fdb_check(wait_future(f2));
|
||||
continue;
|
||||
}
|
||||
|
||||
int out_present;
|
||||
char *val;
|
||||
int vallen;
|
||||
fdb_check(f1.get(&out_present, (const uint8_t **)&val, &vallen));
|
||||
assert(out_present);
|
||||
std::string statusJsonStr(val, vallen);
|
||||
rapidjson::Document statusJson;
|
||||
statusJson.Parse(statusJsonStr.c_str());
|
||||
// make sure it is available
|
||||
bool available = statusJson["client"]["database_status"]["available"].GetBool();
|
||||
if (!available)
|
||||
continue; // cannot reach to the cluster, retry
|
||||
return statusJsonStr;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_database_reboot_worker") {
|
||||
std::string status_json = get_valid_status_json();
|
||||
rapidjson::Document statusJson;
|
||||
statusJson.Parse(status_json.c_str());
|
||||
CHECK(statusJson.HasMember("cluster"));
|
||||
CHECK(statusJson["cluster"].HasMember("generation"));
|
||||
int old_generation = statusJson["cluster"]["generation"].GetInt();
|
||||
CHECK(statusJson["cluster"].HasMember("processes"));
|
||||
// Make sure we only have one process in the cluster
|
||||
// Thus, rebooting the worker ensures a recovery
|
||||
// Configuration changes may break the contract here
|
||||
CHECK(statusJson["cluster"]["processes"].MemberCount() == 1);
|
||||
auto processPtr = statusJson["cluster"]["processes"].MemberBegin();
|
||||
CHECK(processPtr->value.HasMember("address"));
|
||||
std::string network_address = processPtr->value["address"].GetString();
|
||||
while (1) {
|
||||
fdb::Int64Future f =
|
||||
fdb::Database::reboot_worker(db, (const uint8_t*)network_address.c_str(), network_address.size(), false, 0);
|
||||
fdb_check(wait_future(f));
|
||||
int64_t successful;
|
||||
fdb_check(f.get(&successful));
|
||||
if (successful) break; // retry rebooting until success
|
||||
}
|
||||
status_json = get_valid_status_json();
|
||||
statusJson.Parse(status_json.c_str());
|
||||
CHECK(statusJson.HasMember("cluster"));
|
||||
CHECK(statusJson["cluster"].HasMember("generation"));
|
||||
int new_generation = statusJson["cluster"]["generation"].GetInt();
|
||||
// The generation number should increase after the recovery
|
||||
CHECK(new_generation > old_generation);
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_error_predicate") {
|
||||
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1007)); // transaction_too_old
|
||||
CHECK(fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, 1020)); // not_committed
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include "fdb_flow.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdio.h>
|
||||
#include <cinttypes>
|
||||
|
||||
@ -101,6 +102,7 @@ namespace FDB {
|
||||
|
||||
Reference<Transaction> createTransaction() override;
|
||||
void setDatabaseOption(FDBDatabaseOption option, Optional<StringRef> value = Optional<StringRef>()) override;
|
||||
Future<int64_t> rebootWorker(const StringRef& address, bool check = false, int duration = 0) override;
|
||||
|
||||
private:
|
||||
FDBDatabase* db;
|
||||
@ -284,6 +286,16 @@ namespace FDB {
|
||||
throw_on_error(fdb_database_set_option(db, option, nullptr, 0));
|
||||
}
|
||||
|
||||
Future<int64_t> DatabaseImpl::rebootWorker(const StringRef &address, bool check, int duration) {
|
||||
return backToFuture<int64_t>( fdb_database_reboot_worker(db, address.begin(), address.size(), check, duration), [](Reference<CFuture> f) {
|
||||
int64_t res;
|
||||
|
||||
throw_on_error(fdb_future_get_int64( f->f, &res ) );
|
||||
|
||||
return res;
|
||||
} );
|
||||
}
|
||||
|
||||
TransactionImpl::TransactionImpl(FDBDatabase* db) {
|
||||
throw_on_error(fdb_database_create_transaction(db, &tr));
|
||||
}
|
||||
|
@ -124,6 +124,7 @@ namespace FDB {
|
||||
virtual ~Database(){};
|
||||
virtual Reference<Transaction> createTransaction() = 0;
|
||||
virtual void setDatabaseOption(FDBDatabaseOption option, Optional<StringRef> value = Optional<StringRef>()) = 0;
|
||||
virtual Future<int64_t> rebootWorker(const StringRef& address, bool check = false, int duration = 0) = 0;
|
||||
};
|
||||
|
||||
class API {
|
||||
|
@ -426,6 +426,25 @@ An |database-blurb1| Modifications to a database are performed via transactions.
|
||||
``*out_transaction``
|
||||
Set to point to the newly created :type:`FDBTransaction`.
|
||||
|
||||
.. function:: FDBFuture* fdb_database_reboot_worker(FDBDatabase* database, uint8_t const* address, int address_length, fdb_bool_t check, int duration)
|
||||
|
||||
Reboot the specified process in the database.
|
||||
|
||||
|future-return0| a :type:`int64_t` which represents whether the reboot request is sent or not. In particular, 1 means request sent and 0 means failure (e.g. the process with the specified address does not exist). |future-return1| call :func:`fdb_future_get_int64()` to extract the result, |future-return2|
|
||||
|
||||
``address``
|
||||
A pointer to the network address of the process.
|
||||
|
||||
``address_length``
|
||||
|length-of| ``address``.
|
||||
|
||||
``check``
|
||||
whether to perform a storage engine integrity check. In particular, the check-on-reboot is implemented by writing a check/validation file on disk as breadcrumb for the process to find after reboot, at which point it will eat the breadcrumb file and pass true to the integrityCheck parameter of the openKVStore() factory method.
|
||||
|
||||
``duration``
|
||||
If positive, the process will be first suspended for ``duration`` seconds before being rebooted.
|
||||
|
||||
|
||||
Transaction
|
||||
===========
|
||||
|
||||
|
@ -35,7 +35,7 @@ Status
|
||||
Bindings
|
||||
--------
|
||||
* Python: The function ``get_estimated_range_size_bytes`` will now throw an error if the ``begin_key`` or ``end_key`` is ``None``. `(PR #3394) <https://github.com/apple/foundationdb/pull/3394>`_
|
||||
|
||||
* C: Added a function, ``fdb_database_reboot_worker``, to reboot or suspend the specified process. `(PR #4094) <https://github.com/apple/foundationdb/pull/4094>`_
|
||||
|
||||
Other Changes
|
||||
-------------
|
||||
|
@ -206,6 +206,9 @@ public:
|
||||
Future<Void> connectionFileChanged();
|
||||
bool switchable = false;
|
||||
|
||||
// Management API, Attempt to kill or suspend a process, return 1 for success, 0 for failure
|
||||
Future<int64_t> rebootWorker(StringRef address, bool check = false, int duration = 0);
|
||||
|
||||
//private:
|
||||
explicit DatabaseContext( Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile, Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
|
||||
Future<Void> clientInfoMonitor, TaskPriority taskID, LocalityData const& clientLocality,
|
||||
|
@ -84,6 +84,9 @@ public:
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
// Management API, Attempt to kill or suspend a process, return 1 for success, 0 for failure
|
||||
virtual ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) = 0;
|
||||
};
|
||||
|
||||
class IClientApi {
|
||||
|
@ -285,6 +285,20 @@ void DLDatabase::setOption(FDBDatabaseOptions::Option option, Optional<StringRef
|
||||
throwIfError(api->databaseSetOption(db, option, value.present() ? value.get().begin() : nullptr, value.present() ? value.get().size() : 0));
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> DLDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
|
||||
if(!api->databaseRebootWorker) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
FdbCApi::FDBFuture *f = api->databaseRebootWorker(db, address.begin(), address.size(), check, duration);
|
||||
return toThreadFuture<int64_t>(api, f, [](FdbCApi::FDBFuture *f, FdbCApi *api) {
|
||||
int64_t res;
|
||||
FdbCApi::fdb_error_t error = api->futureGetInt64(f, &res);
|
||||
ASSERT(!error);
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
// DLApi
|
||||
template<class T>
|
||||
void loadClientFunction(T *fp, void *lib, std::string libPath, const char *functionName, bool requireFunction = true) {
|
||||
@ -319,6 +333,7 @@ void DLApi::init() {
|
||||
loadClientFunction(&api->databaseCreateTransaction, lib, fdbCPath, "fdb_database_create_transaction");
|
||||
loadClientFunction(&api->databaseSetOption, lib, fdbCPath, "fdb_database_set_option");
|
||||
loadClientFunction(&api->databaseDestroy, lib, fdbCPath, "fdb_database_destroy");
|
||||
loadClientFunction(&api->databaseRebootWorker, lib, fdbCPath, "fdb_database_reboot_worker", headerVersion >= 700);
|
||||
|
||||
loadClientFunction(&api->transactionSetOption, lib, fdbCPath, "fdb_transaction_set_option");
|
||||
loadClientFunction(&api->transactionDestroy, lib, fdbCPath, "fdb_transaction_destroy");
|
||||
@ -781,6 +796,13 @@ void MultiVersionDatabase::setOption(FDBDatabaseOptions::Option option, Optional
|
||||
}
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> MultiVersionDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
|
||||
if (dbState->db) {
|
||||
return dbState->db->rebootWorker(address, check, duration);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MultiVersionDatabase::Connector::connect() {
|
||||
addref();
|
||||
onMainThreadVoid([this]() {
|
||||
|
@ -66,6 +66,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
|
||||
fdb_error_t (*databaseCreateTransaction)(FDBDatabase *database, FDBTransaction **tr);
|
||||
fdb_error_t (*databaseSetOption)(FDBDatabase *database, FDBDatabaseOptions::Option option, uint8_t const *value, int valueLength);
|
||||
void (*databaseDestroy)(FDBDatabase *database);
|
||||
FDBFuture* (*databaseRebootWorker)(FDBDatabase *database, uint8_t const *address, int addressLength, fdb_bool_t check, int duration);
|
||||
|
||||
//Transaction
|
||||
fdb_error_t (*transactionSetOption)(FDBTransaction *tr, FDBTransactionOptions::Option option, uint8_t const *value, int valueLength);
|
||||
@ -109,6 +110,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
|
||||
fdb_error_t (*futureGetDatabase)(FDBFuture *f, FDBDatabase **outDb);
|
||||
fdb_error_t (*futureGetInt64)(FDBFuture *f, int64_t *outValue);
|
||||
fdb_error_t (*futureGetUInt64)(FDBFuture *f, uint64_t *outValue);
|
||||
fdb_error_t (*futureGetBool) (FDBFuture *f, bool *outValue);
|
||||
fdb_error_t (*futureGetError)(FDBFuture *f);
|
||||
fdb_error_t (*futureGetKey)(FDBFuture *f, uint8_t const **outKey, int *outKeyLength);
|
||||
fdb_error_t (*futureGetValue)(FDBFuture *f, fdb_bool_t *outPresent, uint8_t const **outValue, int *outValueLength);
|
||||
@ -194,6 +196,8 @@ public:
|
||||
void addref() override { ThreadSafeReferenceCounted<DLDatabase>::addref(); }
|
||||
void delref() override { ThreadSafeReferenceCounted<DLDatabase>::delref(); }
|
||||
|
||||
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration) override;
|
||||
|
||||
private:
|
||||
const Reference<FdbCApi> api;
|
||||
FdbCApi::FDBDatabase* db; // Always set if API version >= 610, otherwise guaranteed to be set when onReady future is set
|
||||
@ -325,6 +329,8 @@ public:
|
||||
|
||||
static Reference<IDatabase> debugCreateFromExistingDatabase(Reference<IDatabase> db);
|
||||
|
||||
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration);
|
||||
|
||||
private:
|
||||
struct DatabaseState;
|
||||
|
||||
|
@ -4779,3 +4779,53 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exc
|
||||
|
||||
return (ddCheck && coordinatorCheck);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> addInterfaceActor( std::map<Key,std::pair<Value,ClientLeaderRegInterface>>* address_interface, Reference<FlowLock> connectLock, KeyValue kv) {
|
||||
wait(connectLock->take());
|
||||
state FlowLock::Releaser releaser(*connectLock);
|
||||
state ClientWorkerInterface workerInterf = BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
|
||||
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
|
||||
choose {
|
||||
when( Optional<LeaderInfo> rep = wait( brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())) ) ) {
|
||||
StringRef ip_port =
|
||||
kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key;
|
||||
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
|
||||
|
||||
if(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
|
||||
Key full_ip_port2 =
|
||||
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
|
||||
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls")) ? full_ip_port2.removeSuffix(LiteralStringRef(":tls")) : full_ip_port2;
|
||||
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
|
||||
}
|
||||
}
|
||||
when( wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT)) ) {} // NOTE : change timeout time here if necessary
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<int64_t> rebootWorkerActor(DatabaseContext* cx, ValueRef addr, bool check, int duration) {
|
||||
// ignore negative value
|
||||
if (duration < 0) duration = 0;
|
||||
// fetch the addresses of all workers
|
||||
state std::map<Key,std::pair<Value,ClientLeaderRegInterface>> address_interface;
|
||||
if (!cx->getConnectionFile())
|
||||
return 0;
|
||||
Standalone<RangeResultRef> kvs = wait(getWorkerInterfaces(cx->getConnectionFile()));
|
||||
ASSERT(!kvs.more);
|
||||
// Note: reuse this knob from fdbcli, change it if necessary
|
||||
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
|
||||
std::vector<Future<Void>> addInterfs;
|
||||
for( const auto& it : kvs ) {
|
||||
addInterfs.push_back(addInterfaceActor(&address_interface, connectLock, it));
|
||||
}
|
||||
wait(waitForAll(addInterfs));
|
||||
if (!address_interface.count(addr)) return 0;
|
||||
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[addr].first, IncludeVersion())
|
||||
.reboot.send(RebootRequest(false, check, duration));
|
||||
return 1;
|
||||
}
|
||||
|
||||
Future<int64_t> DatabaseContext::rebootWorker(StringRef addr, bool check, int duration) {
|
||||
return rebootWorkerActor(this, addr, check, duration);
|
||||
}
|
||||
|
@ -68,6 +68,14 @@ void ThreadSafeDatabase::setOption( FDBDatabaseOptions::Option option, Optional<
|
||||
}, &db->deferredError );
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> ThreadSafeDatabase::rebootWorker(const StringRef& address, bool check, int duration) {
|
||||
DatabaseContext *db = this->db;
|
||||
Key addressKey = address;
|
||||
return onMainThread( [db, addressKey, check, duration]() -> Future<int64_t> {
|
||||
return db->rebootWorker(addressKey, check, duration);
|
||||
} );
|
||||
}
|
||||
|
||||
ThreadSafeDatabase::ThreadSafeDatabase(std::string connFilename, int apiVersion) {
|
||||
ClusterConnectionFile *connFile = new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFilename).first);
|
||||
|
||||
|
@ -41,6 +41,8 @@ public:
|
||||
void addref() { ThreadSafeReferenceCounted<ThreadSafeDatabase>::addref(); }
|
||||
void delref() { ThreadSafeReferenceCounted<ThreadSafeDatabase>::delref(); }
|
||||
|
||||
ThreadFuture<int64_t> rebootWorker(const StringRef& address, bool check, int duration);
|
||||
|
||||
private:
|
||||
friend class ThreadSafeTransaction;
|
||||
DatabaseContext* db;
|
||||
|
Loading…
x
Reference in New Issue
Block a user