diff --git a/include/facet_index.h b/include/facet_index.h index af643fe8..36fc1929 100644 --- a/include/facet_index.h +++ b/include/facet_index.h @@ -133,7 +133,7 @@ public: size_t get_facet_count(const std::string& field_name); - size_t intersect(facet& a_facet, + size_t intersect(facet& a_facet, const field& facet_field, bool has_facet_query, const std::vector>& fvalue_searched_tokens, const uint32_t* result_ids, size_t result_id_len, size_t max_facet_count, std::map& found, diff --git a/src/facet_index.cpp b/src/facet_index.cpp index 530af159..f1ff5fc1 100644 --- a/src/facet_index.cpp +++ b/src/facet_index.cpp @@ -258,7 +258,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) { } //returns the count of matching seq_ids from result array -size_t facet_index_t::intersect(facet& a_facet, +size_t facet_index_t::intersect(facet& a_facet, const field& facet_field, bool has_facet_query, const std::vector>& fvalue_searched_tokens, const uint32_t* result_ids, size_t result_ids_len, size_t max_facet_count, std::map& found, @@ -286,9 +286,12 @@ size_t facet_index_t::intersect(facet& a_facet, if(has_facet_query) { bool found_search_token = false; auto facet_str = facet_count_it->facet_value; - transform(facet_str.begin(), facet_str.end(), facet_str.begin(), ::tolower); std::vector facet_tokens; - StringUtils::split(facet_str, facet_tokens, " "); + if(facet_field.is_string()) { + Tokenizer(facet_str, true, false, facet_field.locale).tokenize(facet_tokens); + } else { + facet_tokens.push_back(facet_str); + } for(const auto& searched_tokens : fvalue_searched_tokens) { bool found_all_search_tokens = true; diff --git a/src/index.cpp b/src/index.cpp index c171ede0..1ba401c3 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1340,7 +1340,7 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, std::map facet_results; std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : ""; - facet_index_v4->intersect(a_facet, use_facet_query, + facet_index_v4->intersect(a_facet, facet_field,use_facet_query, facet_infos[findex].fvalue_searched_tokens, result_ids, results_size, max_facet_count, facet_results, is_wildcard_no_filter_query, sort_order); diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index 8f3e03cd..50066856 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -2502,6 +2502,70 @@ TEST_F(CollectionFacetingTest, FacetSortValidation) { ASSERT_EQ("Fusion Plus", results["facet_counts"][0]["counts"][0]["value"]); } +TEST_F(CollectionFacetingTest, FacetQueryWithDifferentLocale) { + nlohmann::json schema = R"({ + "name": "coll1", + "fields": [ + {"name": "phone", "type": "string", "optional": false, "facet": true }, + {"name": "brand", "type": "string", "optional": false, "facet": true }, + {"name": "rating", "type": "float", "optional": false, "facet": true } + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection *coll1 = op.get(); + + nlohmann::json doc; + doc["phone"] = "çapeta"; + doc["brand"] = "Samsung"; + doc["rating"] = 4.1; + auto add_op = coll1->add(doc.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + doc["phone"] = "teléfono justo"; + doc["brand"] = "Oneplus"; + doc["rating"] = 4.6; + add_op = coll1->add(doc.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + auto search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"}, + sort_fields, {0}, 10, 1, FREQUENCY,{false}, + Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set(), + spp::sparse_hash_set(),10, "phone: ç", + 30UL, 4UL,"", 1UL, + "", "", {}, 3UL, "", + "", {},4294967295UL, true, + false, true, "", false, 6000000UL, + 4UL,7UL, fallback, 4UL, {off}, 32767UL, + 32767UL, 2UL, 2UL, false, + "", true, 0UL, max_score, 100UL, + 0UL, 4294967295UL, VALUE); + + auto results = search_op.get(); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ("çapeta", results["facet_counts"][0]["counts"][0]["value"]); + + search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"}, + sort_fields, {0}, 10, 1, FREQUENCY,{false}, + Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set(), + spp::sparse_hash_set(),10, "phone: telé", + 30UL, 4UL,"", 1UL, + "", "", {}, 3UL, "", + "", {},4294967295UL, true, + false, true, "", false, 6000000UL, + 4UL,7UL, fallback, 4UL, {off}, 32767UL, + 32767UL, 2UL, 2UL, false, + "", true, 0UL, max_score, 100UL, + 0UL, 4294967295UL, VALUE); + + results = search_op.get(); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ("teléfono justo", results["facet_counts"][0]["counts"][0]["value"]); +} + TEST_F(CollectionFacetingTest, FhashInt64MapTest) { std::vector visitors = {227489798, 124098972, 180247624}; facet_index_t facet_index_v4; diff --git a/test/collection_optimized_faceting_test.cpp b/test/collection_optimized_faceting_test.cpp index 7d2a73df..bc40b063 100644 --- a/test/collection_optimized_faceting_test.cpp +++ b/test/collection_optimized_faceting_test.cpp @@ -2177,6 +2177,70 @@ TEST_F(CollectionOptimizedFacetingTest, FacetSortValidation) { ASSERT_EQ("Fusion Plus", results["facet_counts"][0]["counts"][0]["value"]); } +TEST_F(CollectionOptimizedFacetingTest, FacetQueryWithDifferentLocale) { + nlohmann::json schema = R"({ + "name": "coll1", + "fields": [ + {"name": "phone", "type": "string", "optional": false, "facet": true }, + {"name": "brand", "type": "string", "optional": false, "facet": true }, + {"name": "rating", "type": "float", "optional": false, "facet": true } + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection *coll1 = op.get(); + + nlohmann::json doc; + doc["phone"] = "çapeta"; + doc["brand"] = "Samsung"; + doc["rating"] = 4.1; + auto add_op = coll1->add(doc.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + doc["phone"] = "teléfono justo"; + doc["brand"] = "Oneplus"; + doc["rating"] = 4.6; + add_op = coll1->add(doc.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + auto search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"}, + sort_fields, {0}, 10, 1, FREQUENCY,{false}, + Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set(), + spp::sparse_hash_set(),10, "phone: ç", + 30UL, 4UL,"", 1UL, + "", "", {}, 3UL, "", + "", {},4294967295UL, true, + false, true, "", false, 6000000UL, + 4UL,7UL, fallback, 4UL, {off}, 32767UL, + 32767UL, 2UL, 2UL, false, + "", true, 0UL, max_score, 100UL, + 0UL, 4294967295UL, VALUE); + + auto results = search_op.get(); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ("çapeta", results["facet_counts"][0]["counts"][0]["value"]); + + search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"}, + sort_fields, {0}, 10, 1, FREQUENCY,{false}, + Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set(), + spp::sparse_hash_set(),10, "phone: telé", + 30UL, 4UL,"", 1UL, + "", "", {}, 3UL, "", + "", {},4294967295UL, true, + false, true, "", false, 6000000UL, + 4UL,7UL, fallback, 4UL, {off}, 32767UL, + 32767UL, 2UL, 2UL, false, + "", true, 0UL, max_score, 100UL, + 0UL, 4294967295UL, VALUE); + + results = search_op.get(); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ("teléfono justo", results["facet_counts"][0]["counts"][0]["value"]); +} + TEST_F(CollectionOptimizedFacetingTest, ValueIndexStatsMinMax) { Collection *coll1;