mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 05:32:30 +08:00
Handle repeated facet values in arrays during searching.
This commit is contained in:
parent
876e998cfe
commit
956d596e43
@ -1727,13 +1727,6 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
if(!facet_query.query.empty()) {
|
||||
// identify facet hash tokens
|
||||
|
||||
for(const auto& the_facet: facets) {
|
||||
if(the_facet.field_name == facet_query.field_name) {
|
||||
//the_facet.hash_tokens
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto fq_field = search_schema.at(facet_query.field_name);
|
||||
bool is_cyrillic = Tokenizer::is_cyrillic(fq_field.locale);
|
||||
bool normalise = is_cyrillic ? false : true;
|
||||
|
@ -1056,8 +1056,6 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
|
||||
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets,
|
||||
std::vector<uint64_t>& facet_hashes) {
|
||||
|
||||
std::set<uint64_t> facet_hash_set; // required to deal with repeating phrases
|
||||
|
||||
for(size_t array_index = 0; array_index < strings.size(); array_index++) {
|
||||
const std::string& str = strings[array_index];
|
||||
std::set<std::string> token_set; // required to deal with repeating tokens
|
||||
@ -1091,9 +1089,8 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
|
||||
}
|
||||
}
|
||||
|
||||
if(is_facet && facet_hash_set.count(facet_hash) == 0) {
|
||||
if(is_facet) {
|
||||
facet_hashes.push_back(facet_hash);
|
||||
facet_hash_set.insert(facet_hash);
|
||||
}
|
||||
|
||||
if(token_set.empty()) {
|
||||
@ -1226,11 +1223,19 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
RETURN_CIRCUIT_BREAKER
|
||||
}
|
||||
|
||||
std::set<uint32_t> unique_facet_hashes;
|
||||
|
||||
for(size_t j = 0; j < facet_hash_count; j++) {
|
||||
|
||||
if(facet_field.is_array()) {
|
||||
fhash = facet_map_it->second.hashes[j];
|
||||
}
|
||||
|
||||
if(unique_facet_hashes.count(fhash) == 0) {
|
||||
unique_facet_hashes.insert(fhash);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(should_compute_stats) {
|
||||
compute_facet_stats(a_facet, fhash, facet_field.type);
|
||||
|
@ -1111,7 +1111,7 @@ TEST_F(CollectionFacetingTest, FacetByArrayField) {
|
||||
})"_json;
|
||||
|
||||
auto doc2 = R"({
|
||||
"data": ["Foo", "Foo"]
|
||||
"data": ["Foo", "Foo", "Bazinga"]
|
||||
})"_json;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok());
|
||||
@ -1124,9 +1124,23 @@ TEST_F(CollectionFacetingTest, FacetByArrayField) {
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ("data", results["facet_counts"][0]["field_name"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
|
||||
ASSERT_EQ("Foo", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"].get<size_t>());
|
||||
ASSERT_EQ("Bazinga", results["facet_counts"][0]["counts"][1]["value"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {}, "", {"data"}, {}, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "data:baz", 30, 4).get();
|
||||
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ("data", results["facet_counts"][0]["field_name"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
|
||||
ASSERT_EQ("Bazinga", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, FacetParseTest){
|
||||
|
Loading…
x
Reference in New Issue
Block a user