Make health check consider underlying replication state.

This commit is contained in:
kishorenc 2020-04-02 19:38:30 +05:30
parent b9bc0bd31a
commit deeec05f69
6 changed files with 40 additions and 10 deletions

View File

@ -123,6 +123,8 @@ struct http_req {
const std::map<std::string, std::string> & params, std::string body):
_req(_req), http_method(http_method), route_hash(route_hash), params(params), body(body) {}
// NOTE: we don't ser/de all fields, only ones needed for write forwarding
void deserialize(const std::string& serialized_content) {
nlohmann::json content = nlohmann::json::parse(serialized_content);
route_hash = content["route_hash"];

View File

@ -74,6 +74,8 @@ public:
ReplicationState* get_replication_state() const;
bool is_alive() const;
void set_auth_handler(bool (*handler)(const route_path & rpath, const std::string & auth_key));
void get(const std::string & path, bool (*handler)(http_req & req, http_res & res), bool async = false);

View File

@ -124,6 +124,8 @@ public:
return init_readiness_count >= 2;
}
bool is_alive() const;
// Shut this node down.
void shutdown() {
if (node) {

View File

@ -180,9 +180,15 @@ bool get_debug(http_req & req, http_res & res) {
bool get_health(http_req & req, http_res & res) {
nlohmann::json result;
result["ok"] = true;
res.send_200(result.dump());
return true;
bool alive = server->is_alive();
result["ok"] = alive;
if(alive) {
res.send_body(200, result.dump());
} else {
res.send_body(503, result.dump());
}
return alive;
}
bool get_search(http_req & req, http_res & res) {

View File

@ -243,13 +243,6 @@ uint64_t HttpServer::find_route(const std::vector<std::string> & path_parts, con
int HttpServer::catch_all_handler(h2o_handler_t *_self, h2o_req_t *req) {
h2o_custom_req_handler_t *self = (h2o_custom_req_handler_t *)_self;
// Wait for replicating state to be ready before starting http
// Follower or leader must have started AND data must also have been loaded
if(!self->http_server->get_replication_state()->is_ready()) {
std::string message = "{ \"message\": \"Not Ready\"}";
return send_response(req, 503, message);
}
const std::string & http_method = std::string(req->method.base, req->method.len);
const std::string & path = std::string(req->path.base, req->path.len);
@ -257,6 +250,13 @@ int HttpServer::catch_all_handler(h2o_handler_t *_self, h2o_req_t *req) {
StringUtils::split(path, path_with_query_parts, "?");
const std::string & path_without_query = path_with_query_parts[0];
// Except for health check, wait for replicating state to be ready before allowing requests
// Follower or leader must have started AND data must also have been loaded
if(path_without_query != "/health" && !self->http_server->get_replication_state()->is_ready()) {
std::string message = "{ \"message\": \"Not Ready\"}";
return send_response(req, 503, message);
}
std::vector<std::string> path_parts;
StringUtils::split(path_without_query, path_parts, "/");
@ -460,6 +460,10 @@ ReplicationState* HttpServer::get_replication_state() const {
return replication_state;
}
bool HttpServer::is_alive() const {
return replication_state->is_alive();
}
bool HttpServer::get_route(uint64_t hash, route_path** found_rpath) {
for (auto& hash_route : routes) {
if(hash_route.first == hash) {

View File

@ -354,6 +354,20 @@ size_t ReplicationState::get_init_readiness_count() const {
return init_readiness_count.load();
}
bool ReplicationState::is_alive() const {
if(node == nullptr) {
return false;
}
if(!is_ready()) {
return false;
}
braft::NodeStatus node_status;
node->get_status(&node_status);
return node_status.state == braft::State::STATE_CANDIDATE || node_status.state == braft::State::STATE_LEADER;
}
void InitSnapshotClosure::Run() {
// Auto delete this after Run()
std::unique_ptr<InitSnapshotClosure> self_guard(this);