From ec7c54d31cd26935d3d0aad8dbd9480666c70b65 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 7 Nov 2023 21:00:35 +0530 Subject: [PATCH] Use symbols to index + separators for facet query parsing. --- include/facet_index.h | 1 + src/facet_index.cpp | 5 ++-- src/index.cpp | 5 ++-- test/collection_optimized_faceting_test.cpp | 33 +++++++++++++++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/include/facet_index.h b/include/facet_index.h index 36fc1929..fe7cc322 100644 --- a/include/facet_index.h +++ b/include/facet_index.h @@ -135,6 +135,7 @@ public: size_t intersect(facet& a_facet, const field& facet_field, bool has_facet_query, const std::vector>& fvalue_searched_tokens, + const std::vector& symbols_to_index, const std::vector& token_separators, const uint32_t* result_ids, size_t result_id_len, size_t max_facet_count, std::map& found, bool is_wildcard_no_filter_query, const std::string& sort_order = ""); diff --git a/src/facet_index.cpp b/src/facet_index.cpp index f1ff5fc1..67447948 100644 --- a/src/facet_index.cpp +++ b/src/facet_index.cpp @@ -260,6 +260,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) { //returns the count of matching seq_ids from result array size_t facet_index_t::intersect(facet& a_facet, const field& facet_field, bool has_facet_query, const std::vector>& fvalue_searched_tokens, + const std::vector& symbols_to_index, const std::vector& token_separators, const uint32_t* result_ids, size_t result_ids_len, size_t max_facet_count, std::map& found, bool is_wildcard_no_filter_query, const std::string& sort_order) { @@ -288,7 +289,8 @@ size_t facet_index_t::intersect(facet& a_facet, const field& facet_field, auto facet_str = facet_count_it->facet_value; std::vector facet_tokens; if(facet_field.is_string()) { - Tokenizer(facet_str, true, false, facet_field.locale).tokenize(facet_tokens); + Tokenizer(facet_str, true, false, facet_field.locale, + symbols_to_index, token_separators).tokenize(facet_tokens); } else { facet_tokens.push_back(facet_str); } @@ -306,7 +308,6 @@ size_t facet_index_t::intersect(facet& a_facet, const field& facet_field, if(!facet_tokens_found) { found_all_search_tokens = false; } - } if (found_all_search_tokens) { diff --git a/src/index.cpp b/src/index.cpp index e7b04d1d..c508955d 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1344,8 +1344,9 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : ""; facet_index_v4->intersect(a_facet, facet_field,use_facet_query, - facet_infos[findex].fvalue_searched_tokens, result_ids, - results_size, max_facet_count, facet_results, + facet_infos[findex].fvalue_searched_tokens, + symbols_to_index, token_separators, + result_ids, results_size, max_facet_count, facet_results, is_wildcard_no_filter_query, sort_order); for(const auto& kv : facet_results) { diff --git a/test/collection_optimized_faceting_test.cpp b/test/collection_optimized_faceting_test.cpp index 71ad6527..472d0277 100644 --- a/test/collection_optimized_faceting_test.cpp +++ b/test/collection_optimized_faceting_test.cpp @@ -1423,6 +1423,39 @@ TEST_F(CollectionOptimizedFacetingTest, FacetQueryTest) { ASSERT_EQ("amazon green", results["facet_counts"][0]["counts"][0]["highlighted"]); } +TEST_F(CollectionOptimizedFacetingTest, FacetQueryWithSymbols) { + nlohmann::json schema = R"({ + "name": "coll1", + "fields": [ + {"name": "title", "type": "string", "facet": true} + ], + "symbols_to_index": ["[", "]"], + "token_separators": ["[", "]"] + })"_json; + + Collection* coll1 = collectionManager.create_collection(schema).get(); + + std::vector titles = {"Article 4", "Article 4[7]", "Article 4[11]", "Article 4[22][a]"}; + + for(size_t i = 0; i < titles.size(); i++) { + nlohmann::json doc; + doc["title"] = titles[i]; + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + } + + auto results = coll1->search("*", {}, + "", {"title"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 5, "title:article 4[", 30, 4, "", 20, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback, + 4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get(); + + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(3, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ("Article 4[7]", results["facet_counts"][0]["counts"][0]["highlighted"]); + ASSERT_EQ("Article 4[11]", results["facet_counts"][0]["counts"][1]["highlighted"]); + ASSERT_EQ("Article 4[22][a]", results["facet_counts"][0]["counts"][2]["highlighted"]); +} + TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) { std::vector fields = { field("tags", field_types::STRING_ARRAY, true),