Handle repeating facet values in value facet index.

This commit is contained in:
Kishore Nallan 2023-11-11 20:00:49 +05:30
parent 41faf876c0
commit 60d5951b80
5 changed files with 76 additions and 3 deletions

View File

@ -163,4 +163,8 @@ public:
std::list<facet_count_t>::iterator& curr);
bool facet_value_exists(const std::string& field_name, const std::string& fvalue);
size_t facet_val_num_ids(const std::string& field_name, const std::string& fvalue);
size_t facet_node_count(const std::string& field_name, const std::string& fvalue);
};

View File

@ -652,6 +652,8 @@ public:
const spp::sparse_hash_map<std::string, hnsw_index_t*>& _get_vector_index() const;
facet_index_t* _get_facet_index() const;
static int get_bounded_typo_cost(const size_t max_cost, const size_t token_len,
size_t min_len_1typo, size_t min_len_2typo);

View File

@ -59,9 +59,8 @@ void facet_index_t::insert(const std::string& field_name,std::unordered_map<face
fis.facet_id = facet_id;
if(facet_index.has_value_index) {
auto new_count = seq_ids.size();
fis.seq_ids = ids_t::create(seq_ids);
auto new_count = ids_t::num_ids(fis.seq_ids);
auto& count_map = facet_index.count_map;
auto count_map_it = count_map.lower_bound(new_count);
@ -227,7 +226,7 @@ void facet_index_t::remove(const std::string& field_name, const uint32_t seq_id)
auto& count_list = facet_field_it->second.counts;
auto curr = facet_ids_seq_ids->second.facet_count_it;
auto old_count = curr->count;
curr->count--;
curr->count = ids_t::num_ids(ids);
auto new_count = curr->count;
// move the node lower in the count list
@ -496,3 +495,30 @@ bool facet_index_t::facet_value_exists(const std::string& field_name, const std:
const auto& facet_index = facet_field_map_it->second;
return facet_index.fvalue_seq_ids.find(fvalue) != facet_index.fvalue_seq_ids.end();
}
size_t facet_index_t::facet_val_num_ids(const string &field_name, const string &fvalue) {
const auto facet_field_map_it = facet_field_map.find(field_name);
if(facet_field_map_it == facet_field_map.end()) {
return 0;
}
if(facet_field_map_it->second.fvalue_seq_ids.count(fvalue) == 0) {
return 0;
}
return ids_t::num_ids(facet_field_map_it->second.fvalue_seq_ids[fvalue].seq_ids);
}
size_t facet_index_t::facet_node_count(const string &field_name, const string &fvalue) {
const auto facet_field_map_it = facet_field_map.find(field_name);
if(facet_field_map_it == facet_field_map.end()) {
return 0;
}
if(facet_field_map_it->second.fvalue_seq_ids.count(fvalue) == 0) {
return 0;
}
return facet_field_map_it->second.fvalue_seq_ids[fvalue].facet_count_it->count;
}

View File

@ -6525,6 +6525,10 @@ const spp::sparse_hash_map<std::string, hnsw_index_t*>& Index::_get_vector_index
return vector_index;
}
facet_index_t* Index::_get_facet_index() const {
return facet_index_v4;
}
void Index::refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields) {
std::unique_lock lock(mutex);

View File

@ -608,6 +608,43 @@ TEST_F(CollectionOptimizedFacetingTest, FacetCountsFloatPrecision) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionOptimizedFacetingTest, FacetDeleteRepeatingValuesInArray) {
Collection *coll1;
std::vector<field> fields = {field("tags", field_types::STRING_ARRAY, true)};
std::vector<sort_by> sort_fields = {};
coll1 = collectionManager.get_collection("coll1").get();
if (coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", 4, fields).get();
}
nlohmann::json doc;
doc["id"] = "0";
doc["tags"] = {"alpha", "beta", "alpha"};
coll1->add(doc.dump());
auto findex = coll1->_get_index()->_get_facet_index();
ASSERT_EQ(1, findex->facet_val_num_ids("tags", "alpha"));
ASSERT_EQ(1, findex->facet_node_count("tags", "alpha"));
doc["id"] = "1";
doc["tags"] = {"alpha"};
coll1->add(doc.dump());
coll1->remove("0");
ASSERT_EQ(1, findex->facet_val_num_ids("tags", "alpha"));
ASSERT_EQ(1, findex->facet_node_count("tags", "alpha"));
ASSERT_EQ(0, findex->facet_val_num_ids("tags", "beta"));
ASSERT_EQ(0, findex->facet_node_count("tags", "beta"));
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionOptimizedFacetingTest, FacetStatOnFloatFields) {
Collection *coll_float_fields;