mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Use symbols to index + separators for facet query parsing.
This commit is contained in:
parent
8be74519a7
commit
ec7c54d31c
@ -135,6 +135,7 @@ public:
|
||||
|
||||
size_t intersect(facet& a_facet, const field& facet_field,
|
||||
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
|
||||
const std::vector<char>& symbols_to_index, const std::vector<char>& token_separators,
|
||||
const uint32_t* result_ids, size_t result_id_len,
|
||||
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
|
||||
bool is_wildcard_no_filter_query, const std::string& sort_order = "");
|
||||
|
@ -260,6 +260,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
|
||||
//returns the count of matching seq_ids from result array
|
||||
size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
|
||||
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
|
||||
const std::vector<char>& symbols_to_index, const std::vector<char>& token_separators,
|
||||
const uint32_t* result_ids, size_t result_ids_len,
|
||||
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
|
||||
bool is_wildcard_no_filter_query, const std::string& sort_order) {
|
||||
@ -288,7 +289,8 @@ size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
|
||||
auto facet_str = facet_count_it->facet_value;
|
||||
std::vector<std::string> facet_tokens;
|
||||
if(facet_field.is_string()) {
|
||||
Tokenizer(facet_str, true, false, facet_field.locale).tokenize(facet_tokens);
|
||||
Tokenizer(facet_str, true, false, facet_field.locale,
|
||||
symbols_to_index, token_separators).tokenize(facet_tokens);
|
||||
} else {
|
||||
facet_tokens.push_back(facet_str);
|
||||
}
|
||||
@ -306,7 +308,6 @@ size_t facet_index_t::intersect(facet& a_facet, const field& facet_field,
|
||||
if(!facet_tokens_found) {
|
||||
found_all_search_tokens = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (found_all_search_tokens) {
|
||||
|
@ -1344,8 +1344,9 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : "";
|
||||
|
||||
facet_index_v4->intersect(a_facet, facet_field,use_facet_query,
|
||||
facet_infos[findex].fvalue_searched_tokens, result_ids,
|
||||
results_size, max_facet_count, facet_results,
|
||||
facet_infos[findex].fvalue_searched_tokens,
|
||||
symbols_to_index, token_separators,
|
||||
result_ids, results_size, max_facet_count, facet_results,
|
||||
is_wildcard_no_filter_query, sort_order);
|
||||
|
||||
for(const auto& kv : facet_results) {
|
||||
|
@ -1423,6 +1423,39 @@ TEST_F(CollectionOptimizedFacetingTest, FacetQueryTest) {
|
||||
ASSERT_EQ("<mark>a</mark>mazon <mark>green</mark>", results["facet_counts"][0]["counts"][0]["highlighted"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, FacetQueryWithSymbols) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string", "facet": true}
|
||||
],
|
||||
"symbols_to_index": ["[", "]"],
|
||||
"token_separators": ["[", "]"]
|
||||
})"_json;
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
std::vector<std::string> titles = {"Article 4", "Article 4[7]", "Article 4[11]", "Article 4[22][a]"};
|
||||
|
||||
for(size_t i = 0; i < titles.size(); i++) {
|
||||
nlohmann::json doc;
|
||||
doc["title"] = titles[i];
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
auto results = coll1->search("*", {},
|
||||
"", {"title"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 5, "title:article 4[", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("<mark>Article</mark> <mark>4[</mark>7]", results["facet_counts"][0]["counts"][0]["highlighted"]);
|
||||
ASSERT_EQ("<mark>Article</mark> <mark>4[</mark>11]", results["facet_counts"][0]["counts"][1]["highlighted"]);
|
||||
ASSERT_EQ("<mark>Article</mark> <mark>4[</mark>22][a]", results["facet_counts"][0]["counts"][2]["highlighted"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
|
||||
std::vector<field> fields = {
|
||||
field("tags", field_types::STRING_ARRAY, true),
|
||||
|
Loading…
x
Reference in New Issue
Block a user