Fix facet query max count on value based index.

This commit is contained in:
Kishore Nallan 2023-08-26 13:11:28 +05:30
parent f8f9eb89dc
commit a05e1e2f81
4 changed files with 70 additions and 22 deletions

View File

@ -125,7 +125,9 @@ public:
size_t get_facet_count(const std::string& field_name);
size_t intersect(const std::string& val, const uint32_t* result_ids, size_t result_id_len,
size_t intersect(const std::string& field_name,
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_id_len,
size_t max_facet_count, std::map<std::string, uint32_t>& found,
bool is_wildcard_no_filter_query);

View File

@ -145,12 +145,14 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
}
//returns the count of matching seq_ids from result array
size_t facet_index_t::intersect(const std::string& field, const uint32_t* result_ids, size_t result_ids_len,
size_t facet_index_t::intersect(const std::string& field_name,
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_ids_len,
size_t max_facet_count, std::map<std::string, uint32_t>& found,
bool is_wildcard_no_filter_query) {
//LOG (INFO) << "intersecting field " << field;
const auto& facet_field_it = facet_field_map.find(field);
const auto& facet_field_it = facet_field_map.find(field_name);
if(facet_field_it == facet_field_map.end()) {
return 0;
}
@ -169,6 +171,22 @@ size_t facet_index_t::intersect(const std::string& field, const uint32_t* result
//LOG(INFO) << "checking ids in facet_value " << facet_count.facet_value << " having total count "
// << facet_count.count << ", is_wildcard_no_filter_query: " << is_wildcard_no_filter_query;
uint32_t count = 0;
if(has_facet_query) {
bool found_search_token = false;
auto facet_str = facet_count.facet_value;
transform(facet_str.begin(), facet_str.end(), facet_str.begin(), ::tolower);
for(const auto& searched_token: fvalue_searched_tokens) {
if(facet_str.find(searched_token) != std::string::npos) {
found_search_token = true;
break;
}
}
if(!found_search_token) {
continue;
}
}
if(is_wildcard_no_filter_query) {
count = facet_count.count;

View File

@ -1292,7 +1292,8 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
std::map<std::string, uint32_t> facet_results;
facet_index_v4->intersect(facet_field.name, result_ids,
facet_index_v4->intersect(facet_field.name, use_facet_query,
facet_infos[findex].fvalue_searched_tokens, result_ids,
results_size, max_facet_count, facet_results, is_wildcard_no_filter_query);
for(const auto& kv : facet_results) {
@ -1306,24 +1307,8 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
facet_count.count = kv.second;
}
} else {
if(use_facet_query) {
const auto& searched_tokens = facet_infos[findex].fvalue_searched_tokens;
auto facet_str = kv.first;
transform(facet_str.begin(), facet_str.end(), facet_str.begin(), ::tolower);
for(const auto& val : searched_tokens) {
if(facet_str.find(val) != std::string::npos) {
facet_count_t& facet_count = a_facet.value_result_map[kv.first];
facet_count.count = kv.second;
a_facet.fvalue_tokens[kv.first] = searched_tokens;
}
}
} else {
facet_count_t& facet_count = a_facet.value_result_map[kv.first];
facet_count.count = kv.second;
}
facet_count_t& facet_count = a_facet.value_result_map[kv.first];
facet_count.count = kv.second;
}
if(should_compute_stats) {

View File

@ -1349,6 +1349,49 @@ TEST_F(CollectionOptimizedFacetingTest, FacetTestWithDeletedDoc) {
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
}
TEST_F(CollectionOptimizedFacetingTest, FacetQueryTest) {
std::vector<field> fields = {
field("color", field_types::STRING, true),
};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
std::vector<std::string> colors = {"apple red", "azure", "amazon green", "apricot orange",
"blue", "barrel blue", "banana yellow", "ball green", "baikal"};
for(size_t i = 0; i < 100; i++) {
nlohmann::json doc;
doc["color"] = colors[i % colors.size()];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
// add colors that DON'T start with "b" to push these up the count list
for(size_t i = 0; i < 4; i++) {
nlohmann::json doc;
doc["color"] = colors[i];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
auto results = coll1->search("*", {},
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "color:b", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size()); // 4 is default candidate size
// junk string should produce no facets
results = coll1->search("*", {},
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "color:xsda", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(0, results["facet_counts"][0]["counts"].size());
}
TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
std::vector<field> fields = {
field("tags", field_types::STRING_ARRAY, true),