mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 21:52:23 +08:00
Parameterize facet query num typos.
This commit is contained in:
parent
7cc533ff8d
commit
d0a0597fcb
@ -406,7 +406,8 @@ public:
|
||||
size_t max_candidates = 4,
|
||||
const std::vector<infix_t>& infixes = {off},
|
||||
const size_t max_extra_prefix = INT16_MAX,
|
||||
const size_t max_extra_suffix = INT16_MAX) const;
|
||||
const size_t max_extra_suffix = INT16_MAX,
|
||||
const size_t facet_query_num_typos = 2) const;
|
||||
|
||||
Option<bool> get_filter_ids(const std::string & simple_filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids);
|
||||
|
@ -301,6 +301,7 @@ struct search_args {
|
||||
std::vector<infix_t> infixes;
|
||||
const size_t max_extra_prefix;
|
||||
const size_t max_extra_suffix;
|
||||
const size_t facet_query_num_typos;
|
||||
|
||||
spp::sparse_hash_set<uint64_t> groups_processed;
|
||||
std::vector<std::vector<art_leaf*>> searched_queries;
|
||||
@ -309,27 +310,17 @@ struct search_args {
|
||||
std::vector<std::vector<KV*>> raw_result_kvs;
|
||||
std::vector<std::vector<KV*>> override_result_kvs;
|
||||
|
||||
search_args(std::vector<query_tokens_t> field_query_tokens,
|
||||
std::vector<search_field_t> search_fields, std::vector<filter> filters,
|
||||
std::vector<facet>& facets,
|
||||
search_args(std::vector<query_tokens_t> field_query_tokens, std::vector<search_field_t> search_fields,
|
||||
std::vector<filter> filters, std::vector<facet>& facets,
|
||||
std::map<size_t, std::map<size_t, uint32_t>> included_ids, std::vector<uint32_t> excluded_ids,
|
||||
std::vector<sort_by> sort_fields_std, facet_query_t facet_query, const std::vector<uint32_t>& num_typos,
|
||||
size_t max_facet_values, size_t max_hits, size_t per_page, size_t page, token_ordering token_order,
|
||||
const std::vector<bool>& prefixes,
|
||||
size_t drop_tokens_threshold, size_t typo_tokens_threshold,
|
||||
const std::vector<bool>& prefixes, size_t drop_tokens_threshold, size_t typo_tokens_threshold,
|
||||
const std::vector<std::string>& group_by_fields, size_t group_limit,
|
||||
const std::string& default_sorting_field,
|
||||
bool prioritize_exact_match,
|
||||
bool exhaustive_search,
|
||||
size_t concurrency,
|
||||
const std::vector<const override_t*>& dynamic_overrides,
|
||||
size_t search_cutoff_ms,
|
||||
size_t min_len_1typo,
|
||||
size_t min_len_2typo,
|
||||
size_t max_candidates,
|
||||
const std::vector<infix_t>& infixes,
|
||||
const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix):
|
||||
const string& default_sorting_field, bool prioritize_exact_match, bool exhaustive_search,
|
||||
size_t concurrency, const std::vector<const override_t*>& dynamic_overrides, size_t search_cutoff_ms,
|
||||
size_t min_len_1typo, size_t min_len_2typo, size_t max_candidates, const std::vector<infix_t>& infixes,
|
||||
const size_t max_extra_prefix, const size_t max_extra_suffix, const size_t facet_query_num_typos) :
|
||||
field_query_tokens(field_query_tokens),
|
||||
search_fields(search_fields), filters(filters), facets(facets),
|
||||
included_ids(included_ids), excluded_ids(excluded_ids), sort_fields_std(sort_fields_std),
|
||||
@ -341,7 +332,8 @@ struct search_args {
|
||||
exhaustive_search(exhaustive_search), concurrency(concurrency),
|
||||
filter_overrides(dynamic_overrides), search_cutoff_ms(search_cutoff_ms),
|
||||
min_len_1typo(min_len_1typo), min_len_2typo(min_len_2typo), max_candidates(max_candidates),
|
||||
infixes(infixes), max_extra_prefix(max_extra_prefix), max_extra_suffix(max_extra_suffix) {
|
||||
infixes(infixes), max_extra_prefix(max_extra_prefix), max_extra_suffix(max_extra_suffix),
|
||||
facet_query_num_typos(facet_query_num_typos) {
|
||||
|
||||
const size_t topster_size = std::max((size_t)1, max_hits); // needs to be atleast 1 since scoring is mandatory
|
||||
topster = new Topster(topster_size, group_limit);
|
||||
@ -687,36 +679,22 @@ public:
|
||||
|
||||
void run_search(search_args* search_params);
|
||||
|
||||
void search(std::vector<query_tokens_t>& field_query_tokens,
|
||||
const std::vector<search_field_t> & search_fields,
|
||||
std::vector<filter> & filters, std::vector<facet> & facets,
|
||||
facet_query_t & facet_query,
|
||||
const std::map<size_t, std::map<size_t, uint32_t>> & included_ids_map,
|
||||
const std::vector<uint32_t> & excluded_ids,
|
||||
const std::vector<sort_by> & sort_fields_std, const std::vector<uint32_t>& num_typos,
|
||||
Topster* topster, Topster* curated_topster,
|
||||
const size_t per_page, const size_t page, const token_ordering token_order,
|
||||
const std::vector<bool>& prefixes, const size_t drop_tokens_threshold,
|
||||
size_t & all_result_ids_len,
|
||||
spp::sparse_hash_set<uint64_t>& groups_processed,
|
||||
std::vector<std::vector<art_leaf*>> & searched_queries,
|
||||
std::vector<std::vector<KV*>> & raw_result_kvs,
|
||||
std::vector<std::vector<KV*>> & override_result_kvs,
|
||||
const size_t typo_tokens_threshold,
|
||||
const size_t group_limit,
|
||||
const std::vector<std::string>& group_by_fields,
|
||||
const std::vector<const override_t*>& filter_overrides,
|
||||
const std::string& default_sorting_field,
|
||||
bool prioritize_exact_match,
|
||||
bool exhaustive_search,
|
||||
size_t concurrency,
|
||||
size_t search_cutoff_ms,
|
||||
size_t min_len_1typo,
|
||||
size_t min_len_2typo,
|
||||
size_t max_candidates,
|
||||
const std::vector<infix_t>& infixes,
|
||||
const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix) const;
|
||||
void search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
|
||||
std::vector<filter>& filters, std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
|
||||
const std::vector<uint32_t>& excluded_ids, const std::vector<sort_by>& sort_fields_std,
|
||||
const std::vector<uint32_t>& num_typos, Topster* topster, Topster* curated_topster,
|
||||
const size_t per_page,
|
||||
const size_t page, const token_ordering token_order, const std::vector<bool>& prefixes,
|
||||
const size_t drop_tokens_threshold, size_t& all_result_ids_len,
|
||||
spp::sparse_hash_set<uint64_t>& groups_processed, std::vector<std::vector<art_leaf*>>& searched_queries,
|
||||
std::vector<std::vector<KV*>>& raw_result_kvs, std::vector<std::vector<KV*>>& override_result_kvs,
|
||||
const size_t typo_tokens_threshold, const size_t group_limit,
|
||||
const std::vector<std::string>& group_by_fields, const std::vector<const override_t*>& filter_overrides,
|
||||
const string& default_sorting_field, bool prioritize_exact_match, bool exhaustive_search,
|
||||
size_t concurrency, size_t search_cutoff_ms, size_t min_len_1typo, size_t min_len_2typo,
|
||||
size_t max_candidates, const std::vector<infix_t>& infixes, const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix, const size_t facet_query_num_typos) const;
|
||||
|
||||
Option<uint32_t> remove(const uint32_t seq_id, const nlohmann::json & document, const bool is_update);
|
||||
|
||||
@ -788,6 +766,7 @@ public:
|
||||
static void remove_matched_tokens(std::vector<std::string>& tokens, const std::set<std::string>& rule_token_set) ;
|
||||
|
||||
void compute_facet_infos(const std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const size_t facet_query_num_typos,
|
||||
const uint32_t* all_result_ids, const size_t& all_result_ids_len,
|
||||
const std::vector<std::string>& group_by_fields,
|
||||
std::vector<facet_info_t>& facet_infos) const;
|
||||
|
@ -697,7 +697,8 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query, const s
|
||||
const size_t max_candidates,
|
||||
const std::vector<infix_t>& infixes,
|
||||
const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix) const {
|
||||
const size_t max_extra_suffix,
|
||||
const size_t facet_query_num_typos) const {
|
||||
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
@ -1027,11 +1028,12 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query, const s
|
||||
sort_fields_std, facet_query, num_typos, max_facet_values, max_hits,
|
||||
per_page, page, token_order, prefixes,
|
||||
drop_tokens_threshold, typo_tokens_threshold,
|
||||
group_by_fields, group_limit, default_sorting_field, prioritize_exact_match,
|
||||
group_by_fields, group_limit, default_sorting_field,
|
||||
prioritize_exact_match,
|
||||
exhaustive_search, 4, filter_overrides,
|
||||
search_stop_millis,
|
||||
min_len_1typo, min_len_2typo, max_candidates, infixes,
|
||||
max_extra_prefix, max_extra_suffix);
|
||||
max_extra_prefix, max_extra_suffix, facet_query_num_typos);
|
||||
|
||||
index->run_search(search_params);
|
||||
|
||||
|
@ -597,6 +597,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
|
||||
const char *FACET_BY = "facet_by";
|
||||
const char *FACET_QUERY = "facet_query";
|
||||
const char *FACET_QUERY_NUM_TYPOS = "facet_query_num_typos";
|
||||
const char *MAX_FACET_VALUES = "max_facet_values";
|
||||
|
||||
const char *GROUP_BY = "group_by";
|
||||
@ -687,6 +688,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
|
||||
size_t max_facet_values = 10;
|
||||
std::string simple_facet_query;
|
||||
size_t facet_query_num_typos = 2;
|
||||
size_t snippet_threshold = 30;
|
||||
size_t highlight_affix_num_tokens = 4;
|
||||
std::string highlight_full_fields;
|
||||
@ -727,6 +729,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
{MAX_EXTRA_PREFIX, &max_extra_prefix},
|
||||
{MAX_EXTRA_SUFFIX, &max_extra_suffix},
|
||||
{MAX_CANDIDATES, &max_candidates},
|
||||
{FACET_QUERY_NUM_TYPOS, &facet_query_num_typos},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::string*> str_values = {
|
||||
|
@ -1652,7 +1652,8 @@ void Index::run_search(search_args* search_params) {
|
||||
search_params->max_candidates,
|
||||
search_params->infixes,
|
||||
search_params->max_extra_prefix,
|
||||
search_params->max_extra_suffix);
|
||||
search_params->max_extra_suffix,
|
||||
search_params->facet_query_num_typos);
|
||||
}
|
||||
|
||||
void Index::collate_included_ids(const std::vector<std::string>& q_included_tokens,
|
||||
@ -2081,37 +2082,24 @@ void Index::search_infix(const std::string& query, const std::string& field_name
|
||||
}
|
||||
}
|
||||
|
||||
void Index::search(std::vector<query_tokens_t>& field_query_tokens,
|
||||
const std::vector<search_field_t>& the_fields,
|
||||
std::vector<filter>& filters,
|
||||
std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const std::map<size_t, std::map<size_t, uint32_t>> & included_ids_map,
|
||||
const std::vector<uint32_t> & excluded_ids,
|
||||
const std::vector<sort_by> & sort_fields_std, const std::vector<uint32_t>& num_typos,
|
||||
Topster* topster,
|
||||
Topster* curated_topster,
|
||||
const size_t per_page, const size_t page, const token_ordering token_order,
|
||||
const std::vector<bool>& prefixes, const size_t drop_tokens_threshold,
|
||||
size_t & all_result_ids_len,
|
||||
void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
|
||||
std::vector<filter>& filters, std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
|
||||
const std::vector<uint32_t>& excluded_ids, const std::vector<sort_by>& sort_fields_std,
|
||||
const std::vector<uint32_t>& num_typos, Topster* topster, Topster* curated_topster,
|
||||
const size_t per_page,
|
||||
const size_t page, const token_ordering token_order, const std::vector<bool>& prefixes,
|
||||
const size_t drop_tokens_threshold, size_t& all_result_ids_len,
|
||||
spp::sparse_hash_set<uint64_t>& groups_processed,
|
||||
std::vector<std::vector<art_leaf*>>& searched_queries,
|
||||
std::vector<std::vector<KV*>> & raw_result_kvs,
|
||||
std::vector<std::vector<KV*>> & override_result_kvs,
|
||||
const size_t typo_tokens_threshold,
|
||||
const size_t group_limit,
|
||||
std::vector<std::vector<KV*>>& raw_result_kvs, std::vector<std::vector<KV*>>& override_result_kvs,
|
||||
const size_t typo_tokens_threshold, const size_t group_limit,
|
||||
const std::vector<std::string>& group_by_fields,
|
||||
const std::vector<const override_t*>& filter_overrides,
|
||||
const std::string& default_sorting_field,
|
||||
bool prioritize_exact_match,
|
||||
const bool exhaustive_search,
|
||||
const size_t concurrency,
|
||||
const size_t search_cutoff_ms,
|
||||
size_t min_len_1typo,
|
||||
size_t min_len_2typo,
|
||||
const size_t max_candidates,
|
||||
const std::vector<infix_t>& infixes,
|
||||
const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix) const {
|
||||
const string& default_sorting_field, bool prioritize_exact_match, bool exhaustive_search,
|
||||
size_t concurrency, size_t search_cutoff_ms, size_t min_len_1typo, size_t min_len_2typo,
|
||||
size_t max_candidates, const std::vector<infix_t>& infixes, const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix, const size_t facet_query_num_typos) const {
|
||||
|
||||
search_begin = std::chrono::high_resolution_clock::now();
|
||||
search_stop_ms = search_cutoff_ms;
|
||||
@ -2231,7 +2219,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens,
|
||||
std::condition_variable cv_process;
|
||||
|
||||
std::vector<facet_info_t> facet_infos(facets.size());
|
||||
compute_facet_infos(facets, facet_query, all_result_ids, all_result_ids_len,
|
||||
compute_facet_infos(facets, facet_query, facet_query_num_typos, all_result_ids, all_result_ids_len,
|
||||
group_by_fields, facet_infos);
|
||||
|
||||
std::vector<std::vector<facet>> facet_batches(num_threads);
|
||||
@ -2316,7 +2304,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens,
|
||||
}
|
||||
|
||||
std::vector<facet_info_t> facet_infos(facets.size());
|
||||
compute_facet_infos(facets, facet_query, &included_ids[0], included_ids.size(), group_by_fields, facet_infos);
|
||||
compute_facet_infos(facets, facet_query, facet_query_num_typos,
|
||||
&included_ids[0], included_ids.size(), group_by_fields, facet_infos);
|
||||
do_facets(facets, facet_query, facet_infos, group_limit, group_by_fields, &included_ids[0], included_ids.size());
|
||||
|
||||
all_result_ids_len += curated_topster->size;
|
||||
@ -2845,6 +2834,7 @@ void Index::handle_exclusion(const size_t num_search_fields, std::vector<query_t
|
||||
}
|
||||
|
||||
void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const size_t facet_query_num_typos,
|
||||
const uint32_t* all_result_ids, const size_t& all_result_ids_len,
|
||||
const std::vector<std::string>& group_by_fields,
|
||||
std::vector<facet_info_t>& facet_infos) const {
|
||||
@ -2907,7 +2897,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
|
||||
search_field(0, qtokens, search_tokens, nullptr, 0, num_toks_dropped,
|
||||
facet_field, facet_field.faceted_name(),
|
||||
all_result_ids, all_result_ids_len, {}, {}, 2, searched_queries, topster, groups_processed,
|
||||
all_result_ids, all_result_ids_len, {}, {}, facet_query_num_typos, searched_queries, topster, groups_processed,
|
||||
&field_result_ids, field_result_ids_len, field_num_results, 0, group_by_fields,
|
||||
false, 4, query_hashes, MAX_SCORE, true, 0, 1, false, 3, 1000, 4);
|
||||
|
||||
|
@ -811,5 +811,30 @@ TEST_F(CollectionFacetingTest, FacetQueryOnStringArray) {
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("<mark>Country</mark> Punk <mark>Roc</mark>k", results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>());
|
||||
|
||||
// with facet query num typo parameter
|
||||
|
||||
results = coll1->search("*", {}, "", {"genres"}, sort_fields, {0}, 0, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "genres: eletronic",
|
||||
30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
|
||||
4, {off}, 32767, 32767, 1).get();
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("<mark>Electroni</mark>c", results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {}, "", {"genres"}, sort_fields, {0}, 0, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "genres: eletronic",
|
||||
30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
|
||||
4, {off}, 32767, 32767, 0).get();
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(0, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user