Use symbols to index + separators for facet query parsing.

This commit is contained in:
Kishore Nallan 2023-11-07 21:00:35 +05:30
parent 8be74519a7
commit ec7c54d31c
4 changed files with 40 additions and 4 deletions

View File

@ -135,6 +135,7 @@ public:
size_t intersect(facet& a_facet, const field& facet_field,
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
const std::vector<char>& symbols_to_index, const std::vector<char>& token_separators,
const uint32_t* result_ids, size_t result_id_len,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
bool is_wildcard_no_filter_query, const std::string& sort_order = "");

View File

@ -260,6 +260,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
//returns the count of matching seq_ids from result array
size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
const std::vector<char>& symbols_to_index, const std::vector<char>& token_separators,
const uint32_t* result_ids, size_t result_ids_len,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
bool is_wildcard_no_filter_query, const std::string& sort_order) {
@ -288,7 +289,8 @@ size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
auto facet_str = facet_count_it->facet_value;
std::vector<std::string> facet_tokens;
if(facet_field.is_string()) {
Tokenizer(facet_str, true, false, facet_field.locale).tokenize(facet_tokens);
Tokenizer(facet_str, true, false, facet_field.locale,
symbols_to_index, token_separators).tokenize(facet_tokens);
} else {
facet_tokens.push_back(facet_str);
}
@ -306,7 +308,6 @@ size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
if(!facet_tokens_found) {
found_all_search_tokens = false;
}
}
if (found_all_search_tokens) {

View File

@ -1344,8 +1344,9 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : "";
facet_index_v4->intersect(a_facet, facet_field,use_facet_query,
facet_infos[findex].fvalue_searched_tokens, result_ids,
results_size, max_facet_count, facet_results,
facet_infos[findex].fvalue_searched_tokens,
symbols_to_index, token_separators,
result_ids, results_size, max_facet_count, facet_results,
is_wildcard_no_filter_query, sort_order);
for(const auto& kv : facet_results) {

View File

@ -1423,6 +1423,39 @@ TEST_F(CollectionOptimizedFacetingTest, FacetQueryTest) {
ASSERT_EQ("<mark>a</mark>mazon <mark>green</mark>", results["facet_counts"][0]["counts"][0]["highlighted"]);
}
TEST_F(CollectionOptimizedFacetingTest, FacetQueryWithSymbols) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "title", "type": "string", "facet": true}
],
"symbols_to_index": ["[", "]"],
"token_separators": ["[", "]"]
})"_json;
Collection* coll1 = collectionManager.create_collection(schema).get();
std::vector<std::string> titles = {"Article 4", "Article 4[7]", "Article 4[11]", "Article 4[22][a]"};
for(size_t i = 0; i < titles.size(); i++) {
nlohmann::json doc;
doc["title"] = titles[i];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
auto results = coll1->search("*", {},
"", {"title"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "title:article 4[", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("<mark>Article</mark> <mark>4[</mark>7]", results["facet_counts"][0]["counts"][0]["highlighted"]);
ASSERT_EQ("<mark>Article</mark> <mark>4[</mark>11]", results["facet_counts"][0]["counts"][1]["highlighted"]);
ASSERT_EQ("<mark>Article</mark> <mark>4[</mark>22][a]", results["facet_counts"][0]["counts"][2]["highlighted"]);
}
TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
std::vector<field> fields = {
field("tags", field_types::STRING_ARRAY, true),