mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Optimize facet removal.
This commit is contained in:
parent
3071389648
commit
9872eebdf5
@ -114,6 +114,12 @@ private:
|
||||
// auto incrementing ID that is assigned to each unique facet value string
|
||||
std::atomic_uint32_t next_facet_id = 0;
|
||||
|
||||
void get_stringified_value(const nlohmann::json& value, const field& afield,
|
||||
std::vector<std::string>& values);
|
||||
|
||||
void get_stringified_values(const nlohmann::json& document, const field& afield,
|
||||
std::vector<std::string>& values);
|
||||
|
||||
public:
|
||||
|
||||
facet_index_t() = default;
|
||||
@ -127,7 +133,7 @@ public:
|
||||
|
||||
void erase(const std::string& field_name);
|
||||
|
||||
void remove(const std::string& field_name, const uint32_t seq_id);
|
||||
void remove(const nlohmann::json& doc, const field& afield, const uint32_t seq_id);
|
||||
|
||||
bool contains(const std::string& field_name);
|
||||
|
||||
|
@ -1872,8 +1872,6 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
|
||||
LOG(INFO) << "Indexed " << num_indexed_docs << "/" << num_found_docs
|
||||
<< " documents into collection " << collection->get_name();
|
||||
|
||||
collection->_get_index()->log_insert_time = true;
|
||||
collection->_get_index()->_get_facet_index()->log_insert_time = true;
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
|
@ -213,48 +213,97 @@ void facet_index_t::erase(const std::string& field_name) {
|
||||
facet_field_map.erase(field_name);
|
||||
}
|
||||
|
||||
void facet_index_t::remove(const std::string& field_name, const uint32_t seq_id) {
|
||||
const auto facet_field_it = facet_field_map.find(field_name);
|
||||
if(facet_field_it != facet_field_map.end()) {
|
||||
auto& facet_index_map = facet_field_it->second.fvalue_seq_ids;
|
||||
std::vector<std::string> dead_fvalues;
|
||||
void facet_index_t::get_stringified_value(const nlohmann::json& value, const field& afield,
|
||||
std::vector<std::string>& values) {
|
||||
if(afield.is_int32()) {
|
||||
int32_t raw_val = value.get<int32_t>();
|
||||
values.push_back(std::to_string(raw_val));
|
||||
}
|
||||
else if(afield.is_int64()) {
|
||||
int64_t raw_val = value.get<int64_t>();
|
||||
values.push_back(std::to_string(raw_val));
|
||||
}
|
||||
else if(afield.is_string()) {
|
||||
const std::string& raw_val = value.get<std::string>().substr(0, 100);
|
||||
values.push_back(raw_val);
|
||||
}
|
||||
else if(afield.is_float()) {
|
||||
float raw_val = value.get<float>();
|
||||
values.push_back(StringUtils::float_to_str(raw_val));
|
||||
}
|
||||
else if(afield.is_bool()) {
|
||||
bool raw_val = value.get<bool>();
|
||||
auto fhash = (uint32_t)raw_val;
|
||||
auto str_val = (raw_val == 1) ? "true" : "false";
|
||||
values.emplace_back(str_val);
|
||||
}
|
||||
}
|
||||
|
||||
for(auto facet_ids_seq_ids = facet_index_map.begin(); facet_ids_seq_ids != facet_index_map.end(); facet_ids_seq_ids++) {
|
||||
void*& ids = facet_ids_seq_ids->second.seq_ids;
|
||||
if(ids && ids_t::contains(ids, seq_id)) {
|
||||
ids_t::erase(ids, seq_id);
|
||||
auto& count_list = facet_field_it->second.counts;
|
||||
auto curr = facet_ids_seq_ids->second.facet_count_it;
|
||||
auto old_count = curr->count;
|
||||
curr->count = ids_t::num_ids(ids);
|
||||
auto new_count = curr->count;
|
||||
void facet_index_t::get_stringified_values(const nlohmann::json& document, const field& afield,
|
||||
std::vector<std::string>& values) {
|
||||
bool is_array = afield.is_array();
|
||||
|
||||
// move the node lower in the count list
|
||||
auto& count_map = facet_field_it->second.count_map;
|
||||
update_count_nodes(count_list, count_map, old_count, new_count, curr);
|
||||
if(!is_array) {
|
||||
return get_stringified_value(document[afield.name], afield, values);
|
||||
} else {
|
||||
const auto& field_values = document[afield.name];
|
||||
for(size_t i = 0; i < field_values.size(); i++) {
|
||||
get_stringified_value(field_values[i], afield, values);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(ids_t::num_ids(ids) == 0) {
|
||||
ids_t::destroy_list(ids);
|
||||
dead_fvalues.push_back(facet_ids_seq_ids->first);
|
||||
void facet_index_t::remove(const nlohmann::json& doc, const field& afield, const uint32_t seq_id) {
|
||||
const auto facet_field_it = facet_field_map.find(afield.name);
|
||||
if(facet_field_it == facet_field_map.end()) {
|
||||
return ;
|
||||
}
|
||||
|
||||
//remove from int64 lookup map first
|
||||
auto& fhash_int64_map = facet_field_it->second.fhash_to_int64_map;
|
||||
uint32_t fhash = facet_ids_seq_ids->second.facet_id;
|
||||
fhash_int64_map.erase(fhash);
|
||||
auto& facet_index_map = facet_field_it->second.fvalue_seq_ids;
|
||||
std::vector<std::string> dead_fvalues;
|
||||
std::vector<std::string> values;
|
||||
get_stringified_values(doc, afield, values);
|
||||
|
||||
count_map.erase(new_count);
|
||||
count_list.erase(curr);
|
||||
}
|
||||
for(const auto& value: values) {
|
||||
auto fvalue_it = facet_index_map.find(value);
|
||||
if(fvalue_it == facet_index_map.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
void*& ids = fvalue_it->second.seq_ids;
|
||||
if(ids && ids_t::contains(ids, seq_id)) {
|
||||
ids_t::erase(ids, seq_id);
|
||||
auto& count_list = facet_field_it->second.counts;
|
||||
auto curr = fvalue_it->second.facet_count_it;
|
||||
auto old_count = curr->count;
|
||||
curr->count = ids_t::num_ids(ids);
|
||||
auto new_count = curr->count;
|
||||
|
||||
// move the node lower in the count list
|
||||
auto& count_map = facet_field_it->second.count_map;
|
||||
update_count_nodes(count_list, count_map, old_count, new_count, curr);
|
||||
|
||||
if(ids_t::num_ids(ids) == 0) {
|
||||
ids_t::destroy_list(ids);
|
||||
dead_fvalues.push_back(fvalue_it->first);
|
||||
|
||||
//remove from int64 lookup map first
|
||||
auto& fhash_int64_map = facet_field_it->second.fhash_to_int64_map;
|
||||
uint32_t fhash = fvalue_it->second.facet_id;
|
||||
fhash_int64_map.erase(fhash);
|
||||
|
||||
count_map.erase(new_count);
|
||||
count_list.erase(curr);
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& dead_fvalue: dead_fvalues) {
|
||||
facet_index_map.erase(dead_fvalue);
|
||||
}
|
||||
|
||||
auto& seq_id_hashes = facet_field_it->second.seq_id_hashes;
|
||||
seq_id_hashes->erase(seq_id);
|
||||
}
|
||||
|
||||
for(auto& dead_fvalue: dead_fvalues) {
|
||||
facet_index_map.erase(dead_fvalue);
|
||||
}
|
||||
|
||||
auto& seq_id_hashes = facet_field_it->second.seq_id_hashes;
|
||||
seq_id_hashes->erase(seq_id);
|
||||
}
|
||||
|
||||
size_t facet_index_t::get_facet_count(const std::string& field_name) {
|
||||
|
@ -6460,7 +6460,7 @@ void Index::remove_field(uint32_t seq_id, const nlohmann::json& document, const
|
||||
}
|
||||
|
||||
// remove facets
|
||||
facet_index_v4->remove(field_name, seq_id);
|
||||
facet_index_v4->remove(document, search_field, seq_id);
|
||||
|
||||
// remove sort field
|
||||
if(sort_index.count(field_name) != 0) {
|
||||
|
@ -2855,7 +2855,11 @@ TEST_F(CollectionFacetingTest, FhashInt64MapTest) {
|
||||
facet_index_v4.insert("visitors", fvalue_to_seq_ids, seq_id_to_fvalues);
|
||||
ASSERT_EQ(3, facet_index_v4.get_fhash_int64_map("visitors").size());
|
||||
|
||||
facet_index_v4.remove("visitors", 0);
|
||||
field visitorsf("visitors", field_types::INT64, true);
|
||||
nlohmann::json doc;
|
||||
doc["visitors"] = 227489798;
|
||||
|
||||
facet_index_v4.remove(doc, visitorsf, 0);
|
||||
ASSERT_EQ(2, facet_index_v4.get_fhash_int64_map("visitors").size());
|
||||
|
||||
fvalue_to_seq_ids.clear();
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "facet_index.h"
|
||||
|
||||
TEST(FacetIndexTest, FacetValueDeletion) {
|
||||
TEST(FacetIndexTest, FacetValueDeletionString) {
|
||||
facet_index_t findex;
|
||||
std::unordered_map<facet_value_id_t, std::vector<uint32_t>, facet_value_id_t::Hash> fvalue_to_seq_ids;
|
||||
std::unordered_map<uint32_t, std::vector<facet_value_id_t>> seq_id_to_fvalues;
|
||||
@ -13,12 +13,40 @@ TEST(FacetIndexTest, FacetValueDeletion) {
|
||||
seq_id_to_fvalues[1] = {nike};
|
||||
seq_id_to_fvalues[2] = {nike};
|
||||
|
||||
findex.insert("brands", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
findex.remove("nike", 0);
|
||||
findex.remove("nike", 1);
|
||||
findex.remove("nike", 2);
|
||||
field brandf("brand", field_types::STRING, true);
|
||||
nlohmann::json doc;
|
||||
doc["brand"] = "nike";
|
||||
|
||||
ASSERT_FALSE(findex.facet_value_exists("brands", "nike"));
|
||||
findex.insert("brand", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
findex.remove(doc, brandf, 0);
|
||||
findex.remove(doc, brandf, 1);
|
||||
findex.remove(doc, brandf, 2);
|
||||
|
||||
ASSERT_FALSE(findex.facet_value_exists("brand", "nike"));
|
||||
}
|
||||
|
||||
TEST(FacetIndexTest, FacetValueDeletionFloat) {
|
||||
facet_index_t findex;
|
||||
std::unordered_map<facet_value_id_t, std::vector<uint32_t>, facet_value_id_t::Hash> fvalue_to_seq_ids;
|
||||
std::unordered_map<uint32_t, std::vector<facet_value_id_t>> seq_id_to_fvalues;
|
||||
|
||||
facet_value_id_t price1("99.95", 1);
|
||||
|
||||
fvalue_to_seq_ids[price1] = {0, 1, 2};
|
||||
seq_id_to_fvalues[0] = {price1};
|
||||
seq_id_to_fvalues[1] = {price1};
|
||||
seq_id_to_fvalues[2] = {price1};
|
||||
|
||||
field pricef("price", field_types::FLOAT, true);
|
||||
nlohmann::json doc;
|
||||
doc["price"] = "99.95";
|
||||
|
||||
findex.insert("brand", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
findex.remove(doc, pricef, 0);
|
||||
findex.remove(doc, pricef, 1);
|
||||
findex.remove(doc, pricef, 2);
|
||||
|
||||
ASSERT_FALSE(findex.facet_value_exists("price", "99.95"));
|
||||
}
|
||||
|
||||
TEST(FacetIndexTest, UpdateWhenAllCountsLessThanNewCount) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user