mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 14:12:27 +08:00
221 lines
8.7 KiB
C++
221 lines
8.7 KiB
C++
#include "batched_indexer.h"
|
|
#include "core_api.h"
|
|
#include "thread_local_vars.h"
|
|
|
|
BatchedIndexer::BatchedIndexer(HttpServer* server, Store* store, const size_t num_threads):
|
|
server(server), store(store), num_threads(num_threads),
|
|
last_gc_run(std::chrono::high_resolution_clock::now()), exit(false) {
|
|
thread_pool = new ThreadPool(num_threads);
|
|
queues.resize(num_threads);
|
|
qmutuxes = new std::mutex[num_threads];
|
|
}
|
|
|
|
void BatchedIndexer::enqueue(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
|
|
std::string& coll_name = req->params["collection"];
|
|
|
|
if(coll_name.empty()) {
|
|
route_path* rpath;
|
|
server->get_route(req->route_hash, &rpath);
|
|
|
|
// ensure that collection creation is sent to the same queue as writes to that collection
|
|
if(rpath->handler == post_create_collection) {
|
|
nlohmann::json obj = nlohmann::json::parse(req->body, nullptr, false);
|
|
|
|
if(obj != nlohmann::json::value_t::discarded && obj.is_object() &&
|
|
obj.count("name") != 0 && obj["name"].is_string()) {
|
|
coll_name = obj["name"];
|
|
}
|
|
}
|
|
}
|
|
|
|
uint64_t queue_id = StringUtils::hash_wy(coll_name.c_str(), coll_name.size()) % num_threads;
|
|
|
|
uint32_t chunk_sequence = 0;
|
|
|
|
{
|
|
std::unique_lock lk(mutex);
|
|
chunk_sequence = request_to_chunk[req->start_ts];
|
|
request_to_chunk[req->start_ts] += 1;
|
|
}
|
|
|
|
const std::string& req_key_prefix = get_req_prefix_key(req->start_ts);
|
|
const std::string& request_chunk_key = req_key_prefix + StringUtils::serialize_uint32_t(chunk_sequence);
|
|
|
|
//LOG(INFO) << "req_id: " << req->start_ts << ", chunk_sequence: " << chunk_sequence;
|
|
|
|
store->insert(request_chunk_key, req->serialize());
|
|
req->body = "";
|
|
|
|
{
|
|
std::unique_lock lk(mutex);
|
|
auto req_res_map_it = req_res_map.find(req->start_ts);
|
|
if(req_res_map_it == req_res_map.end()) {
|
|
uint64_t batch_begin_ts = std::chrono::duration_cast<std::chrono::seconds>(
|
|
std::chrono::system_clock::now().time_since_epoch()).count();
|
|
|
|
req_res_t req_res{req, res, batch_begin_ts};
|
|
req_res_map[req->start_ts] = req_res;
|
|
}
|
|
}
|
|
|
|
if(req->last_chunk_aggregate) {
|
|
//LOG(INFO) << "Last chunk for req_id: " << req->start_ts << ", queue_id: " << queue_id;
|
|
|
|
queued_writes += (chunk_sequence + 1);
|
|
|
|
{
|
|
std::unique_lock lk(qmutuxes[queue_id]);
|
|
queues[queue_id].emplace_back(req->start_ts);
|
|
}
|
|
|
|
std::unique_lock lk(mutex);
|
|
request_to_chunk.erase(req->start_ts);
|
|
}
|
|
|
|
if(req->_req != nullptr && req->_req->proceed_req) {
|
|
deferred_req_res_t* req_res = new deferred_req_res_t(req, res, server, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::REQUEST_PROCEED_MESSAGE, req_res);
|
|
}
|
|
}
|
|
|
|
void BatchedIndexer::run() {
|
|
LOG(INFO) << "Starting batch indexer with " << num_threads << " threads.";
|
|
|
|
for(size_t i = 0; i < num_threads; i++) {
|
|
std::deque<uint64_t>& queue = queues[i];
|
|
std::mutex& queue_mutex = qmutuxes[i];
|
|
|
|
thread_pool->enqueue([&queue, &queue_mutex, this, i]() {
|
|
while(!exit) {
|
|
std::unique_lock<std::mutex> qlk(queue_mutex);
|
|
|
|
if(queue.empty()) {
|
|
qlk.unlock();
|
|
} else {
|
|
uint64_t req_id = queue.front();
|
|
queue.pop_front();
|
|
qlk.unlock();
|
|
|
|
std::unique_lock mlk(mutex);
|
|
req_res_t orig_req_res = req_res_map[req_id];
|
|
mlk.unlock();
|
|
|
|
// scan db for all logs associated with request
|
|
const std::string& req_key_prefix = get_req_prefix_key(req_id);
|
|
|
|
rocksdb::Iterator* iter = store->scan(req_key_prefix);
|
|
std::string prev_body = ""; // used to handle partial JSON documents caused by chunking
|
|
|
|
while(iter->Valid() && iter->key().starts_with(req_key_prefix)) {
|
|
std::shared_ptr<http_req>& orig_req = orig_req_res.req;
|
|
auto _req = orig_req->_req;
|
|
orig_req->body = prev_body;
|
|
orig_req->deserialize(iter->value().ToString());
|
|
orig_req->_req = _req;
|
|
|
|
// update thread local for reference during a crash
|
|
write_log_index = orig_req->log_index;
|
|
|
|
//LOG(INFO) << "original request: " << orig_req_res.req << ", _req: " << orig_req_res.req->_req;
|
|
|
|
route_path* found_rpath = nullptr;
|
|
bool route_found = server->get_route(orig_req->route_hash, &found_rpath);
|
|
bool async_res = false;
|
|
|
|
if(route_found) {
|
|
async_res = found_rpath->async_res;
|
|
found_rpath->handler(orig_req, orig_req_res.res);
|
|
prev_body = orig_req->body;
|
|
} else {
|
|
orig_req_res.res->set_404();
|
|
prev_body = "";
|
|
}
|
|
|
|
if(!async_res && orig_req_res.req->_req != nullptr) {
|
|
deferred_req_res_t* deferred_req_res = new deferred_req_res_t(orig_req_res.req,
|
|
orig_req_res.res,
|
|
server, true);
|
|
server->get_message_dispatcher()->send_message(HttpServer::STREAM_RESPONSE_MESSAGE,
|
|
deferred_req_res);
|
|
}
|
|
|
|
queued_writes--;
|
|
iter->Next();
|
|
}
|
|
|
|
delete iter;
|
|
|
|
//LOG(INFO) << "Erasing request data from disk and memory for request " << req_res.req->start_ts;
|
|
|
|
// we can delete the buffered request content
|
|
store->delete_range(req_key_prefix, req_key_prefix + StringUtils::serialize_uint32_t(UINT32_MAX));
|
|
|
|
std::unique_lock lk(mutex);
|
|
req_res_map.erase(req_id);
|
|
}
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds (10));
|
|
}
|
|
});
|
|
}
|
|
|
|
while(!exit) {
|
|
std::this_thread::sleep_for(std::chrono::milliseconds (1000));
|
|
|
|
//LOG(INFO) << "Batch indexer main thread";
|
|
|
|
// do gc, if we are due for one
|
|
uint64_t seconds_elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
|
std::chrono::high_resolution_clock::now() - last_gc_run).count();
|
|
|
|
if(seconds_elapsed > GC_INTERVAL_SECONDS) {
|
|
|
|
std::unique_lock lk(mutex);
|
|
LOG(INFO) << "Running GC for aborted requests, req map size: " << req_res_map.size();
|
|
|
|
// iterate through all map entries and delete ones which are > GC_PRUNE_MAX_SECONDS
|
|
for (auto it = req_res_map.cbegin(); it != req_res_map.cend();) {
|
|
uint64_t seconds_since_batch_start = std::chrono::duration_cast<std::chrono::seconds>(
|
|
std::chrono::system_clock::now().time_since_epoch()).count() - it->second.batch_begin_ts;
|
|
|
|
//LOG(INFO) << "Seconds since batch start: " << seconds_since_batch_start;
|
|
|
|
if(seconds_since_batch_start > GC_PRUNE_MAX_SECONDS) {
|
|
LOG(INFO) << "Deleting partial upload for req id " << it->second.req->start_ts;
|
|
const std::string& req_key_prefix = get_req_prefix_key(it->second.req->start_ts);
|
|
store->delete_range(req_key_prefix, req_key_prefix + StringUtils::serialize_uint32_t(UINT32_MAX));
|
|
request_to_chunk.erase(it->second.req->start_ts);
|
|
it = req_res_map.erase(it);
|
|
} else {
|
|
it++;
|
|
}
|
|
}
|
|
|
|
last_gc_run = std::chrono::high_resolution_clock::now();
|
|
}
|
|
}
|
|
|
|
LOG(INFO) << "Batched indexer threadpool shutdown...";
|
|
thread_pool->shutdown();
|
|
}
|
|
|
|
std::string BatchedIndexer::get_req_prefix_key(uint64_t req_id) {
|
|
const std::string& req_key_prefix =
|
|
RAFT_REQ_LOG_PREFIX + StringUtils::serialize_uint64_t(req_id) + "_";
|
|
|
|
return req_key_prefix;
|
|
}
|
|
|
|
BatchedIndexer::~BatchedIndexer() {
|
|
delete [] qmutuxes;
|
|
delete thread_pool;
|
|
}
|
|
|
|
void BatchedIndexer::stop() {
|
|
exit = true;
|
|
}
|
|
|
|
int64_t BatchedIndexer::get_queued_writes() {
|
|
return queued_writes;
|
|
}
|