Merge pull request #1348 from krunal1313/facet_query_fix

fixing locale with facet_query
This commit is contained in:
Kishore Nallan 2023-10-30 20:49:44 +05:30 committed by GitHub
commit c8267807d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 136 additions and 5 deletions

View File

@ -133,7 +133,7 @@ public:
size_t get_facet_count(const std::string& field_name);
size_t intersect(facet& a_facet,
size_t intersect(facet& a_facet, const field& facet_field,
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_id_len,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,

View File

@ -258,7 +258,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
}
//returns the count of matching seq_ids from result array
size_t facet_index_t::intersect(facet& a_facet,
size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_ids_len,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
@ -286,9 +286,12 @@ size_t facet_index_t::intersect(facet& a_facet,
if(has_facet_query) {
bool found_search_token = false;
auto facet_str = facet_count_it->facet_value;
transform(facet_str.begin(), facet_str.end(), facet_str.begin(), ::tolower);
std::vector<std::string> facet_tokens;
StringUtils::split(facet_str, facet_tokens, " ");
if(facet_field.is_string()) {
Tokenizer(facet_str, true, false, facet_field.locale).tokenize(facet_tokens);
} else {
facet_tokens.push_back(facet_str);
}
for(const auto& searched_tokens : fvalue_searched_tokens) {
bool found_all_search_tokens = true;

View File

@ -1340,7 +1340,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
std::map<std::string, docid_count_t> facet_results;
std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : "";
facet_index_v4->intersect(a_facet, use_facet_query,
facet_index_v4->intersect(a_facet, facet_field,use_facet_query,
facet_infos[findex].fvalue_searched_tokens, result_ids,
results_size, max_facet_count, facet_results,
is_wildcard_no_filter_query, sort_order);

View File

@ -2502,6 +2502,70 @@ TEST_F(CollectionFacetingTest, FacetSortValidation) {
ASSERT_EQ("Fusion Plus", results["facet_counts"][0]["counts"][0]["value"]);
}
TEST_F(CollectionFacetingTest, FacetQueryWithDifferentLocale) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "phone", "type": "string", "optional": false, "facet": true },
{"name": "brand", "type": "string", "optional": false, "facet": true },
{"name": "rating", "type": "float", "optional": false, "facet": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection *coll1 = op.get();
nlohmann::json doc;
doc["phone"] = "çapeta";
doc["brand"] = "Samsung";
doc["rating"] = 4.1;
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
doc["phone"] = "teléfono justo";
doc["brand"] = "Oneplus";
doc["rating"] = 4.6;
add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"},
sort_fields, {0}, 10, 1, FREQUENCY,{false},
Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "phone: ç",
30UL, 4UL,"", 1UL,
"", "", {}, 3UL, "<mark>",
"</mark>", {},4294967295UL, true,
false, true, "", false, 6000000UL,
4UL,7UL, fallback, 4UL, {off}, 32767UL,
32767UL, 2UL, 2UL, false,
"", true, 0UL, max_score, 100UL,
0UL, 4294967295UL, VALUE);
auto results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("çapeta", results["facet_counts"][0]["counts"][0]["value"]);
search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"},
sort_fields, {0}, 10, 1, FREQUENCY,{false},
Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "phone: telé",
30UL, 4UL,"", 1UL,
"", "", {}, 3UL, "<mark>",
"</mark>", {},4294967295UL, true,
false, true, "", false, 6000000UL,
4UL,7UL, fallback, 4UL, {off}, 32767UL,
32767UL, 2UL, 2UL, false,
"", true, 0UL, max_score, 100UL,
0UL, 4294967295UL, VALUE);
results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("teléfono justo", results["facet_counts"][0]["counts"][0]["value"]);
}
TEST_F(CollectionFacetingTest, FhashInt64MapTest) {
std::vector<int64_t> visitors = {227489798, 124098972, 180247624};
facet_index_t facet_index_v4;

View File

@ -2177,6 +2177,70 @@ TEST_F(CollectionOptimizedFacetingTest, FacetSortValidation) {
ASSERT_EQ("Fusion Plus", results["facet_counts"][0]["counts"][0]["value"]);
}
TEST_F(CollectionOptimizedFacetingTest, FacetQueryWithDifferentLocale) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "phone", "type": "string", "optional": false, "facet": true },
{"name": "brand", "type": "string", "optional": false, "facet": true },
{"name": "rating", "type": "float", "optional": false, "facet": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection *coll1 = op.get();
nlohmann::json doc;
doc["phone"] = "çapeta";
doc["brand"] = "Samsung";
doc["rating"] = 4.1;
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
doc["phone"] = "teléfono justo";
doc["brand"] = "Oneplus";
doc["rating"] = 4.6;
add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"},
sort_fields, {0}, 10, 1, FREQUENCY,{false},
Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "phone: ç",
30UL, 4UL,"", 1UL,
"", "", {}, 3UL, "<mark>",
"</mark>", {},4294967295UL, true,
false, true, "", false, 6000000UL,
4UL,7UL, fallback, 4UL, {off}, 32767UL,
32767UL, 2UL, 2UL, false,
"", true, 0UL, max_score, 100UL,
0UL, 4294967295UL, VALUE);
auto results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("çapeta", results["facet_counts"][0]["counts"][0]["value"]);
search_op = coll1->search("*", query_fields, "", {"phone(sort_by:_alpha:desc)"},
sort_fields, {0}, 10, 1, FREQUENCY,{false},
Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "phone: telé",
30UL, 4UL,"", 1UL,
"", "", {}, 3UL, "<mark>",
"</mark>", {},4294967295UL, true,
false, true, "", false, 6000000UL,
4UL,7UL, fallback, 4UL, {off}, 32767UL,
32767UL, 2UL, 2UL, false,
"", true, 0UL, max_score, 100UL,
0UL, 4294967295UL, VALUE);
results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("teléfono justo", results["facet_counts"][0]["counts"][0]["value"]);
}
TEST_F(CollectionOptimizedFacetingTest, ValueIndexStatsMinMax) {
Collection *coll1;