mirror of
https://github.com/typesense/typesense.git
synced 2025-05-26 00:36:22 +08:00
459 lines
19 KiB
C++
459 lines
19 KiB
C++
#include "batched_indexer.h"
|
|
#include "core_api.h"
|
|
#include "thread_local_vars.h"
|
|
#include "cached_resource_stat.h"
|
|
|
|
BatchedIndexer::BatchedIndexer(HttpServer* server, Store* store, Store* meta_store, const size_t num_threads,
|
|
const Config& config, const std::atomic<bool>& skip_writes):
|
|
server(server), store(store), meta_store(meta_store), num_threads(num_threads),
|
|
last_gc_run(std::chrono::high_resolution_clock::now()), quit(false),
|
|
config(config), skip_writes(skip_writes) {
|
|
queues.resize(num_threads);
|
|
qmutuxes = new await_t[num_threads];
|
|
skip_index_iter_upper_bound = new rocksdb::Slice(skip_index_upper_bound_key);
|
|
}
|
|
|
|
void BatchedIndexer::enqueue(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
|
|
// Called by the raft write thread: goal is to quickly send the request to a queue and move on
|
|
// NOTE: it's ok to access `req` and `res` in this function without synchronization
|
|
// because the read thread for *this* request is paused now and resumes only messaged at the end
|
|
|
|
//LOG(INFO) << "BatchedIndexer::enqueue";
|
|
uint32_t chunk_sequence = 0;
|
|
|
|
{
|
|
uint64_t now = std::chrono::duration_cast<std::chrono::seconds>(
|
|
std::chrono::system_clock::now().time_since_epoch()).count();
|
|
|
|
std::unique_lock lk(mutex);
|
|
auto req_res_map_it = req_res_map.find(req->start_ts);
|
|
|
|
if(req_res_map_it == req_res_map.end()) {
|
|
// first chunk
|
|
req_res_t req_res(req->start_ts, "", req, res, now, 1, 0, false);
|
|
req_res_map.emplace(req->start_ts, req_res);
|
|
} else {
|
|
chunk_sequence = req_res_map_it->second.num_chunks;
|
|
req_res_map_it->second.num_chunks += 1;
|
|
req_res_map_it->second.last_updated = now;
|
|
}
|
|
}
|
|
|
|
const std::string& req_key_prefix = get_req_prefix_key(req->start_ts);
|
|
const std::string& request_chunk_key = req_key_prefix + StringUtils::serialize_uint32_t(chunk_sequence);
|
|
|
|
//LOG(INFO) << "request_chunk_key: " << req->start_ts << "_" << chunk_sequence << ", req body: " << req->body;
|
|
|
|
store->insert(request_chunk_key, req->to_json());
|
|
|
|
bool is_old_serialized_request = (req->start_ts == 0);
|
|
bool read_more_input = (req->_req != nullptr && req->_req->proceed_req);
|
|
|
|
if(req->last_chunk_aggregate) {
|
|
//LOG(INFO) << "Last chunk for req_id: " << req->start_ts;
|
|
queued_writes += (chunk_sequence + 1);
|
|
|
|
{
|
|
const std::string& coll_name = get_collection_name(req);
|
|
uint64_t queue_id = StringUtils::hash_wy(coll_name.c_str(), coll_name.size()) % num_threads;
|
|
req->body = "";
|
|
|
|
{
|
|
std::unique_lock lk2(mutex);
|
|
req_res_map[req->start_ts].is_complete = true;
|
|
}
|
|
|
|
{
|
|
std::lock_guard lk1(qmutuxes[queue_id].mcv);
|
|
queues[queue_id].emplace_back(req->start_ts);
|
|
}
|
|
|
|
qmutuxes[queue_id].cv.notify_one();
|
|
}
|
|
|
|
// IMPORTANT: must not read `req` variables (except _req) henceforth to prevent data races with indexing thread
|
|
|
|
if(is_old_serialized_request) {
|
|
// Indicates a serialized request from a version that did not support batching (v0.21 and below).
|
|
// We can only do serial writes as we cannot reliably distinguish one streaming request from another.
|
|
// So, wait for `req_res_map` to be empty before proceeding
|
|
while(true) {
|
|
{
|
|
std::unique_lock lk(mutex);
|
|
if(req_res_map.empty()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds (10));
|
|
}
|
|
}
|
|
} else {
|
|
req->body = "";
|
|
}
|
|
|
|
if(read_more_input) {
|
|
// Tell the http library to read more input data
|
|
deferred_req_res_t* req_res = new deferred_req_res_t(req, res, server, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::REQUEST_PROCEED_MESSAGE, req_res);
|
|
}
|
|
}
|
|
|
|
std::string BatchedIndexer::get_collection_name(const std::shared_ptr<http_req>& req) {
|
|
std::string& coll_name = req->params["collection"];
|
|
|
|
if(coll_name.empty()) {
|
|
route_path* rpath;
|
|
server->get_route(req->route_hash, &rpath);
|
|
|
|
// ensure that collection creation is sent to the same queue as writes to that collection
|
|
if(rpath->handler == post_create_collection) {
|
|
nlohmann::json obj = nlohmann::json::parse(req->body, nullptr, false);
|
|
|
|
if(!obj.is_discarded() && obj.is_object() &&
|
|
obj.count("name") != 0 && obj["name"].is_string()) {
|
|
coll_name = obj["name"];
|
|
}
|
|
}
|
|
}
|
|
|
|
return coll_name;
|
|
}
|
|
|
|
void BatchedIndexer::run() {
|
|
LOG(INFO) << "Starting batch indexer with " << num_threads << " threads.";
|
|
ThreadPool* thread_pool = new ThreadPool(num_threads);
|
|
skip_index_iter = meta_store->scan(SKIP_INDICES_PREFIX, skip_index_iter_upper_bound);
|
|
populate_skip_index();
|
|
|
|
LOG(INFO) << "BatchedIndexer skip_index: " << skip_index;
|
|
|
|
for(size_t i = 0; i < num_threads; i++) {
|
|
std::deque<uint64_t>& queue = queues[i];
|
|
await_t& queue_mutex = qmutuxes[i];
|
|
|
|
thread_pool->enqueue([&queue, &queue_mutex, this, i]() {
|
|
while(!quit) {
|
|
std::unique_lock<std::mutex> qlk(queue_mutex.mcv);
|
|
queue_mutex.cv.wait(qlk, [&] { return quit || !queue.empty(); });
|
|
|
|
if(quit) {
|
|
break;
|
|
}
|
|
|
|
uint64_t req_id = queue.front();
|
|
queue.pop_front();
|
|
qlk.unlock();
|
|
|
|
std::unique_lock mlk(mutex);
|
|
auto req_res_map_it = req_res_map.find(req_id);
|
|
if(req_res_map_it == req_res_map.end()) {
|
|
LOG(ERROR) << "Req ID " << req_id << " not found in req_res_map.";
|
|
continue;
|
|
}
|
|
|
|
req_res_t& orig_req_res = req_res_map_it->second;
|
|
mlk.unlock();
|
|
|
|
// scan db for all logs associated with request
|
|
const std::string& req_key_prefix = get_req_prefix_key(req_id);
|
|
|
|
/* Format of the key: $RL_reqId_chunkId
|
|
NOTE: we use an explicit `next_chunk_index` so that the reads can resume from a partially request.
|
|
*/
|
|
const std::string& req_key_start_prefix = req_key_prefix + StringUtils::serialize_uint32_t(
|
|
orig_req_res.next_chunk_index);
|
|
|
|
const std::string& req_key_upper_bound = get_req_suffix_key(req_id); // cannot inline this
|
|
rocksdb::Slice upper_bound(req_key_upper_bound);
|
|
rocksdb::Iterator* iter = store->scan(req_key_start_prefix, &upper_bound);
|
|
|
|
// used to handle partial JSON documents caused by chunking
|
|
std::string& prev_body = orig_req_res.prev_req_body;
|
|
|
|
const std::shared_ptr<http_req>& orig_req = orig_req_res.req;
|
|
const std::shared_ptr<http_res>& orig_res = orig_req_res.res;
|
|
bool is_live_req = orig_res->is_alive;
|
|
|
|
route_path* found_rpath = nullptr;
|
|
bool route_found = server->get_route(orig_req->route_hash, &found_rpath);
|
|
bool async_res = false;
|
|
|
|
while(iter->Valid() && iter->key().starts_with(req_key_prefix)) {
|
|
std::shared_lock slk(pause_mutex); // used for snapshot
|
|
orig_req->body = prev_body;
|
|
orig_req->load_from_json(iter->value().ToString());
|
|
|
|
// update thread local for reference during a crash
|
|
write_log_index = orig_req->log_index;
|
|
|
|
if(write_log_index == skip_index) {
|
|
LOG(ERROR) << "Skipping write log index " << write_log_index
|
|
<< " which seems to have triggered a crash previously.";
|
|
populate_skip_index();
|
|
}
|
|
|
|
else {
|
|
//LOG(INFO) << "index req " << req_id << ", chunk index: " << orig_req_res.next_chunk_index;
|
|
auto resource_check = cached_resource_stat_t::get_instance()
|
|
.has_enough_resources(config.get_data_dir(),
|
|
config.get_disk_used_max_percentage(),
|
|
config.get_memory_used_max_percentage());
|
|
|
|
if (resource_check != cached_resource_stat_t::OK && orig_req->http_method != "DELETE") {
|
|
const std::string& err_msg = "Rejecting write: running out of resource type: " +
|
|
std::string(magic_enum::enum_name(resource_check));
|
|
LOG(ERROR) << err_msg;
|
|
orig_res->set_422(err_msg);
|
|
async_req_res_t* async_req_res = new async_req_res_t(orig_req, orig_res, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::STREAM_RESPONSE_MESSAGE, async_req_res);
|
|
break;
|
|
}
|
|
|
|
else if(route_found) {
|
|
if(skip_writes && found_rpath->handler != post_config) {
|
|
orig_res->set(422, "Skipping write.");
|
|
async_req_res_t* async_req_res = new async_req_res_t(orig_req, orig_res, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::STREAM_RESPONSE_MESSAGE, async_req_res);
|
|
break;
|
|
}
|
|
|
|
async_res = found_rpath->async_res;
|
|
try {
|
|
found_rpath->handler(orig_req, orig_res);
|
|
} catch(const std::exception& e) {
|
|
LOG(ERROR) << "Exception while calling handler " << found_rpath->_get_action();
|
|
LOG(ERROR) << "Raw error: " << e.what();
|
|
// bad request gets a response immediately
|
|
orig_res->set_400("Bad request.");
|
|
async_res = false;
|
|
}
|
|
prev_body = orig_req->body;
|
|
} else {
|
|
orig_res->set_404();
|
|
}
|
|
|
|
if(is_live_req && (!route_found ||!async_res)) {
|
|
// sync request gets a response immediately
|
|
async_req_res_t* async_req_res = new async_req_res_t(orig_req, orig_res, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::STREAM_RESPONSE_MESSAGE, async_req_res);
|
|
}
|
|
|
|
if(!route_found) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
queued_writes--;
|
|
orig_req_res.next_chunk_index++;
|
|
iter->Next();
|
|
|
|
if(quit) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
delete iter;
|
|
|
|
//LOG(INFO) << "Erasing request data from disk and memory for request " << req_id;
|
|
|
|
// we can delete the buffered request content
|
|
store->delete_range(req_key_prefix, req_key_prefix + StringUtils::serialize_uint32_t(UINT32_MAX));
|
|
|
|
std::unique_lock lk(mutex);
|
|
req_res_map.erase(req_id);
|
|
}
|
|
});
|
|
}
|
|
|
|
while(!quit) {
|
|
std::this_thread::sleep_for(std::chrono::milliseconds (1000));
|
|
|
|
// do gc, if we are due for one
|
|
uint64_t seconds_elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
|
std::chrono::high_resolution_clock::now() - last_gc_run).count();
|
|
|
|
if(seconds_elapsed > GC_INTERVAL_SECONDS) {
|
|
|
|
std::unique_lock lk(mutex);
|
|
LOG(INFO) << "Running GC for aborted requests, req map size: " << req_res_map.size();
|
|
|
|
// iterate through all map entries and delete ones which are not complete but > GC_PRUNE_MAX_SECONDS
|
|
for (auto it = req_res_map.cbegin(); it != req_res_map.cend();) {
|
|
uint64_t seconds_since_batch_update = std::chrono::duration_cast<std::chrono::seconds>(
|
|
std::chrono::system_clock::now().time_since_epoch()).count() - it->second.last_updated;
|
|
|
|
//LOG(INFO) << "GC checking on req id: " << it->first;
|
|
//LOG(INFO) << "Seconds since last batch update: " << seconds_since_batch_update;
|
|
|
|
if(!it->second.is_complete && seconds_since_batch_update > GC_PRUNE_MAX_SECONDS) {
|
|
LOG(INFO) << "Deleting partial upload for req id " << it->second.start_ts;
|
|
|
|
const std::string& req_key_prefix = get_req_prefix_key(it->second.start_ts);
|
|
store->delete_range(req_key_prefix, req_key_prefix + StringUtils::serialize_uint32_t(UINT32_MAX));
|
|
|
|
if(it->second.res->is_alive) {
|
|
it->second.res->final = true;
|
|
async_req_res_t* async_req_res = new async_req_res_t(it->second.req, it->second.res, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::STREAM_RESPONSE_MESSAGE, async_req_res);
|
|
}
|
|
|
|
it = req_res_map.erase(it);
|
|
} else {
|
|
it++;
|
|
}
|
|
}
|
|
|
|
last_gc_run = std::chrono::high_resolution_clock::now();
|
|
}
|
|
}
|
|
|
|
LOG(INFO) << "Notifying batch indexer threads about shutdown...";
|
|
for(size_t i = 0; i < num_threads; i++) {
|
|
await_t& queue_mutex = qmutuxes[i];
|
|
std::lock_guard<std::mutex> lk(queue_mutex.mcv);
|
|
queue_mutex.cv.notify_one();
|
|
}
|
|
|
|
|
|
LOG(INFO) << "Batched indexer threadpool shutdown...";
|
|
thread_pool->shutdown();
|
|
delete thread_pool;
|
|
}
|
|
|
|
std::string BatchedIndexer::get_req_prefix_key(uint64_t req_id) {
|
|
const std::string& req_key_prefix = RAFT_REQ_LOG_PREFIX + StringUtils::serialize_uint64_t(req_id) + "_";
|
|
return req_key_prefix;
|
|
}
|
|
|
|
std::string BatchedIndexer::get_req_suffix_key(uint64_t req_id) {
|
|
const std::string& req_key_prefix = RAFT_REQ_LOG_PREFIX + StringUtils::serialize_uint64_t(req_id) + "`";
|
|
return req_key_prefix;
|
|
}
|
|
|
|
BatchedIndexer::~BatchedIndexer() {
|
|
delete [] qmutuxes;
|
|
delete skip_index_iter_upper_bound;
|
|
delete skip_index_iter;
|
|
}
|
|
|
|
void BatchedIndexer::stop() {
|
|
quit = true;
|
|
}
|
|
|
|
int64_t BatchedIndexer::get_queued_writes() {
|
|
return queued_writes;
|
|
}
|
|
|
|
void BatchedIndexer::populate_skip_index() {
|
|
if(skip_index_iter->Valid() && skip_index_iter->key().starts_with(SKIP_INDICES_PREFIX)) {
|
|
const std::string& index_value = skip_index_iter->value().ToString();
|
|
if(StringUtils::is_int64_t(index_value)) {
|
|
skip_index = std::stoll(index_value);
|
|
}
|
|
|
|
skip_index_iter->Next();
|
|
} else {
|
|
skip_index = UNSET_SKIP_INDEX;
|
|
}
|
|
}
|
|
|
|
void BatchedIndexer::persist_applying_index() {
|
|
LOG(INFO) << "Saving currently applying index: " << write_log_index;
|
|
std::string key = SKIP_INDICES_PREFIX + std::to_string(write_log_index);
|
|
meta_store->insert(key, std::to_string(write_log_index));
|
|
}
|
|
|
|
void BatchedIndexer::serialize_state(nlohmann::json& state) {
|
|
// requires external synchronization!
|
|
state["queued_writes"] = queued_writes.load();
|
|
state["req_res_map"] = nlohmann::json();
|
|
|
|
size_t num_reqs_stored = 0;
|
|
std::unique_lock lk(mutex);
|
|
|
|
for(auto& kv: req_res_map) {
|
|
std::string req_key = std::to_string(kv.first);
|
|
state["req_res_map"].emplace(req_key, nlohmann::json());
|
|
nlohmann::json& req_res = state["req_res_map"][req_key];
|
|
req_res["start_ts"] = kv.second.start_ts;
|
|
req_res["last_updated"] = kv.second.last_updated;
|
|
req_res["num_chunks"] = kv.second.num_chunks;
|
|
req_res["next_chunk_index"] = kv.second.next_chunk_index;
|
|
req_res["is_complete"] = kv.second.is_complete;
|
|
req_res["req"] = kv.second.req->to_json();
|
|
req_res["prev_req_body"] = kv.second.prev_req_body;
|
|
num_reqs_stored++;
|
|
|
|
//LOG(INFO) << "req_key: " << req_key << ", next_chunk_index: " << kv.second.next_chunk_index;
|
|
}
|
|
|
|
LOG(INFO) << "Serialized " << num_reqs_stored << " in-flight requests for snapshot.";
|
|
}
|
|
|
|
void BatchedIndexer::load_state(const nlohmann::json& state) {
|
|
queued_writes = state["queued_writes"].get<int64_t>();
|
|
|
|
size_t num_reqs_restored = 0;
|
|
std::set<uint64_t> queue_ids;
|
|
|
|
for(auto& kv: state["req_res_map"].items()) {
|
|
std::shared_ptr<http_req> req = std::make_shared<http_req>();
|
|
req->load_from_json(kv.value()["req"].get<std::string>());
|
|
|
|
std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
|
|
req_res_t req_res(kv.value()["start_ts"].get<uint64_t>(),
|
|
kv.value()["prev_req_body"].get<std::string>(), req, res,
|
|
kv.value()["last_updated"].get<uint64_t>(),
|
|
kv.value()["num_chunks"].get<uint32_t>(),
|
|
kv.value()["next_chunk_index"].get<uint32_t>(),
|
|
kv.value()["is_complete"].get<bool>());
|
|
|
|
{
|
|
std::unique_lock mlk(mutex);
|
|
req_res_map.emplace(std::stoull(kv.key()), req_res);
|
|
}
|
|
|
|
// add only completed requests to their respective collection-based queues
|
|
// the rest will be added by enqueue() when raft log is completely read
|
|
|
|
if(req_res.is_complete) {
|
|
LOG(INFO) << "req_res.start_ts: " << req_res.start_ts
|
|
<< ", req_res.next_chunk_index: " << req_res.next_chunk_index;
|
|
|
|
const std::string& coll_name = get_collection_name(req);
|
|
uint64_t queue_id = StringUtils::hash_wy(coll_name.c_str(), coll_name.size()) % num_threads;
|
|
queue_ids.insert(queue_id);
|
|
std::unique_lock qlk(qmutuxes[queue_id].mcv);
|
|
queues[queue_id].emplace_back(req->start_ts);
|
|
}
|
|
|
|
num_reqs_restored++;
|
|
}
|
|
|
|
// need to sort on `start_ts` to preserve original order before notifying queues
|
|
for(auto queue_id: queue_ids) {
|
|
std::unique_lock lk(qmutuxes[queue_id].mcv);
|
|
std::sort(queues[queue_id].begin(), queues[queue_id].end());
|
|
qmutuxes[queue_id].cv.notify_one();
|
|
}
|
|
|
|
LOG(INFO) << "Restored " << num_reqs_restored << " in-flight requests from snapshot.";
|
|
}
|
|
|
|
std::shared_mutex& BatchedIndexer::get_pause_mutex() {
|
|
return pause_mutex;
|
|
}
|
|
|
|
void BatchedIndexer::clear_skip_indices() {
|
|
delete skip_index_iter;
|
|
skip_index_iter = meta_store->scan(SKIP_INDICES_PREFIX, skip_index_iter_upper_bound);
|
|
|
|
while(skip_index_iter->Valid() && skip_index_iter->key().starts_with(SKIP_INDICES_PREFIX)) {
|
|
meta_store->remove(skip_index_iter->key().ToString());
|
|
skip_index_iter->Next();
|
|
}
|
|
|
|
meta_store->flush();
|
|
}
|