mirror of
https://github.com/typesense/typesense.git
synced 2025-05-28 09:46:05 +08:00
Address lag in health check resource error update.
We will now call resource check even for health checks.
This commit is contained in:
parent
51525108c2
commit
9c90290ea7
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <sys/statvfs.h>
|
||||
@ -14,18 +16,8 @@ public:
|
||||
|
||||
private:
|
||||
const static size_t REFRESH_INTERVAL_SECS = 5;
|
||||
uint64_t disk_total_bytes = 0;
|
||||
uint64_t disk_used_bytes = 0;
|
||||
|
||||
uint64_t memory_total_bytes = 0;
|
||||
uint64_t memory_available_bytes = 0;
|
||||
|
||||
uint64_t swap_total_bytes = 0;
|
||||
uint64_t swap_free_bytes = 0;
|
||||
|
||||
uint64_t last_checked_ts = 0;
|
||||
|
||||
resource_check_t resource_error;
|
||||
std::atomic<uint64_t> last_checked_ts = 0;
|
||||
std::mutex m;
|
||||
|
||||
cached_resource_stat_t() = default;
|
||||
|
||||
@ -41,6 +33,4 @@ public:
|
||||
resource_check_t has_enough_resources(const std::string& data_dir_path,
|
||||
const int disk_used_max_percentage,
|
||||
const int memory_used_max_percentage);
|
||||
|
||||
const resource_check_t get_out_of_resource_error() const;
|
||||
};
|
||||
|
@ -11,6 +11,17 @@ cached_resource_stat_t::has_enough_resources(const std::string& data_dir_path,
|
||||
return cached_resource_stat_t::OK;
|
||||
}
|
||||
|
||||
std::unique_lock lk(m);
|
||||
|
||||
uint64_t disk_total_bytes = 0;
|
||||
uint64_t disk_used_bytes = 0;
|
||||
|
||||
uint64_t memory_total_bytes = 0;
|
||||
uint64_t memory_available_bytes = 0;
|
||||
|
||||
uint64_t swap_total_bytes = 0;
|
||||
uint64_t swap_free_bytes = 0;
|
||||
|
||||
uint64_t now = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
@ -61,12 +72,12 @@ cached_resource_stat_t::has_enough_resources(const std::string& data_dir_path,
|
||||
last_checked_ts = now;
|
||||
}
|
||||
|
||||
lk.unlock();
|
||||
|
||||
double disk_used_percentage = (double(disk_used_bytes)/double(disk_total_bytes)) * 100;
|
||||
if(disk_used_percentage > disk_used_max_percentage) {
|
||||
LOG(INFO) << "disk_total_bytes: " << disk_total_bytes << ", disk_used_bytes: " << disk_used_bytes
|
||||
<< ", disk_used_percentage: " << disk_used_percentage;
|
||||
|
||||
resource_error = cached_resource_stat_t::OUT_OF_DISK;
|
||||
return cached_resource_stat_t::OUT_OF_DISK;
|
||||
}
|
||||
|
||||
@ -79,7 +90,6 @@ cached_resource_stat_t::has_enough_resources(const std::string& data_dir_path,
|
||||
uint64_t all_memory_used = (memory_total_bytes - memory_available_bytes) + (swap_total_bytes - swap_free_bytes);
|
||||
|
||||
if(all_memory_used >= memory_total_bytes) {
|
||||
resource_error = cached_resource_stat_t::OUT_OF_MEMORY;
|
||||
return cached_resource_stat_t::OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
@ -92,14 +102,8 @@ cached_resource_stat_t::has_enough_resources(const std::string& data_dir_path,
|
||||
LOG(INFO) << "memory_total: " << memory_total_bytes << ", memory_available: " << memory_available_bytes
|
||||
<< ", all_memory_used: " << all_memory_used << ", free_mem: " << free_mem
|
||||
<< ", memory_free_min: " << memory_free_min_bytes;
|
||||
resource_error = cached_resource_stat_t::OUT_OF_MEMORY;
|
||||
return cached_resource_stat_t::OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
resource_error = cached_resource_stat_t::OK;
|
||||
return cached_resource_stat_t::OK;
|
||||
}
|
||||
|
||||
const cached_resource_stat_t::resource_check_t cached_resource_stat_t::get_out_of_resource_error() const {
|
||||
return resource_error;
|
||||
}
|
@ -315,9 +315,14 @@ bool get_health_with_resource_usage(const std::shared_ptr<http_req>& req, const
|
||||
nlohmann::json result;
|
||||
bool alive = server->is_alive();
|
||||
|
||||
auto resource_error = cached_resource_stat_t::get_instance().get_out_of_resource_error();
|
||||
if (resource_error != cached_resource_stat_t::resource_check_t::OK) {
|
||||
result["resource_error"] = std::string(magic_enum::enum_name(resource_error));
|
||||
auto resource_check = cached_resource_stat_t::get_instance().has_enough_resources(
|
||||
Config::get_instance().get_data_dir(),
|
||||
Config::get_instance().get_disk_used_max_percentage(),
|
||||
Config::get_instance().get_memory_used_max_percentage()
|
||||
);
|
||||
|
||||
if (resource_check != cached_resource_stat_t::resource_check_t::OK) {
|
||||
result["resource_error"] = std::string(magic_enum::enum_name(resource_check));
|
||||
}
|
||||
|
||||
if(req->params.count("cpu_threshold") != 0 && StringUtils::is_float(req->params["cpu_threshold"])) {
|
||||
@ -345,9 +350,14 @@ bool get_health(const std::shared_ptr<http_req>& req, const std::shared_ptr<http
|
||||
bool alive = server->is_alive();
|
||||
result["ok"] = alive;
|
||||
|
||||
auto resource_error = cached_resource_stat_t::get_instance().get_out_of_resource_error();
|
||||
if (resource_error != cached_resource_stat_t::resource_check_t::OK) {
|
||||
result["resource_error"] = std::string(magic_enum::enum_name(resource_error));
|
||||
auto resource_check = cached_resource_stat_t::get_instance().has_enough_resources(
|
||||
Config::get_instance().get_data_dir(),
|
||||
Config::get_instance().get_disk_used_max_percentage(),
|
||||
Config::get_instance().get_memory_used_max_percentage()
|
||||
);
|
||||
|
||||
if (resource_check != cached_resource_stat_t::resource_check_t::OK) {
|
||||
result["resource_error"] = std::string(magic_enum::enum_name(resource_check));
|
||||
}
|
||||
|
||||
if(alive) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user