Handle repeated facet values in arrays during searching.

This commit is contained in:
Kishore Nallan 2023-08-03 15:23:30 +05:30
parent 876e998cfe
commit 956d596e43
3 changed files with 25 additions and 13 deletions

View File

@ -1727,13 +1727,6 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
if(!facet_query.query.empty()) {
// identify facet hash tokens
for(const auto& the_facet: facets) {
if(the_facet.field_name == facet_query.field_name) {
//the_facet.hash_tokens
break;
}
}
auto fq_field = search_schema.at(facet_query.field_name);
bool is_cyrillic = Tokenizer::is_cyrillic(fq_field.locale);
bool normalise = is_cyrillic ? false : true;

View File

@ -1056,8 +1056,6 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets,
std::vector<uint64_t>& facet_hashes) {
std::set<uint64_t> facet_hash_set; // required to deal with repeating phrases
for(size_t array_index = 0; array_index < strings.size(); array_index++) {
const std::string& str = strings[array_index];
std::set<std::string> token_set; // required to deal with repeating tokens
@ -1091,9 +1089,8 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
}
}
if(is_facet && facet_hash_set.count(facet_hash) == 0) {
if(is_facet) {
facet_hashes.push_back(facet_hash);
facet_hash_set.insert(facet_hash);
}
if(token_set.empty()) {
@ -1226,11 +1223,19 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
RETURN_CIRCUIT_BREAKER
}
std::set<uint32_t> unique_facet_hashes;
for(size_t j = 0; j < facet_hash_count; j++) {
if(facet_field.is_array()) {
fhash = facet_map_it->second.hashes[j];
}
if(unique_facet_hashes.count(fhash) == 0) {
unique_facet_hashes.insert(fhash);
} else {
continue;
}
if(should_compute_stats) {
compute_facet_stats(a_facet, fhash, facet_field.type);

View File

@ -1111,7 +1111,7 @@ TEST_F(CollectionFacetingTest, FacetByArrayField) {
})"_json;
auto doc2 = R"({
"data": ["Foo", "Foo"]
"data": ["Foo", "Foo", "Bazinga"]
})"_json;
ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok());
@ -1124,9 +1124,23 @@ TEST_F(CollectionFacetingTest, FacetByArrayField) {
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ("data", results["facet_counts"][0]["field_name"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
ASSERT_EQ("Foo", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"].get<size_t>());
ASSERT_EQ("Bazinga", results["facet_counts"][0]["counts"][1]["value"].get<std::string>());
results = coll1->search("*", {}, "", {"data"}, {}, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "data:baz", 30, 4).get();
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ("data", results["facet_counts"][0]["field_name"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
ASSERT_EQ("Bazinga", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
}
TEST_F(CollectionFacetingTest, FacetParseTest){