mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 06:02:26 +08:00
Add flag for lazy filtering.
This commit is contained in:
parent
9c0ca02812
commit
c1b49ef0b9
@ -584,7 +584,8 @@ public:
|
||||
std::string conversation_id = "",
|
||||
const std::string& override_tags_str = "",
|
||||
const std::string& voice_query = "",
|
||||
bool enable_typos_for_numerical_tokens = true) const;
|
||||
bool enable_typos_for_numerical_tokens = true,
|
||||
bool enable_lazy_filter = true) const;
|
||||
|
||||
Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
|
||||
|
||||
|
@ -174,6 +174,8 @@ struct search_args {
|
||||
size_t facet_sample_threshold;
|
||||
drop_tokens_param_t drop_tokens_mode;
|
||||
|
||||
bool enable_lazy_filter;
|
||||
|
||||
search_args(std::vector<query_tokens_t> field_query_tokens, std::vector<search_field_t> search_fields,
|
||||
const text_match_type_t match_type,
|
||||
filter_node_t* filter_tree_root, std::vector<facet>& facets,
|
||||
@ -189,7 +191,8 @@ struct search_args {
|
||||
size_t min_len_1typo, size_t min_len_2typo, size_t max_candidates, const std::vector<enable_t>& infixes,
|
||||
const size_t max_extra_prefix, const size_t max_extra_suffix, const size_t facet_query_num_typos,
|
||||
const bool filter_curated_hits, const enable_t split_join_tokens, vector_query_t& vector_query,
|
||||
size_t facet_sample_percent, size_t facet_sample_threshold, drop_tokens_param_t drop_tokens_mode) :
|
||||
size_t facet_sample_percent, size_t facet_sample_threshold, drop_tokens_param_t drop_tokens_mode,
|
||||
bool enable_lazy_filter) :
|
||||
field_query_tokens(field_query_tokens),
|
||||
search_fields(search_fields), match_type(match_type), filter_tree_root(filter_tree_root), facets(facets),
|
||||
included_ids(included_ids), excluded_ids(excluded_ids), sort_fields_std(sort_fields_std),
|
||||
@ -208,7 +211,7 @@ struct search_args {
|
||||
facet_query_num_typos(facet_query_num_typos), filter_curated_hits(filter_curated_hits),
|
||||
split_join_tokens(split_join_tokens), vector_query(vector_query),
|
||||
facet_sample_percent(facet_sample_percent), facet_sample_threshold(facet_sample_threshold),
|
||||
drop_tokens_mode(drop_tokens_mode) {
|
||||
drop_tokens_mode(drop_tokens_mode), enable_lazy_filter(enable_lazy_filter) {
|
||||
|
||||
const size_t topster_size = std::max((size_t)1, max_hits); // needs to be atleast 1 since scoring is mandatory
|
||||
topster = new Topster(topster_size, group_limit);
|
||||
@ -689,7 +692,8 @@ public:
|
||||
const std::string& collection_name,
|
||||
const drop_tokens_param_t drop_tokens_mode,
|
||||
facet_index_type_t facet_index_type = DETECT,
|
||||
bool enable_typos_for_numerical_tokens = true
|
||||
bool enable_typos_for_numerical_tokens = true,
|
||||
bool enable_lazy_filter = true
|
||||
) const;
|
||||
|
||||
void remove_field(uint32_t seq_id, const nlohmann::json& document, const std::string& field_name,
|
||||
|
@ -1757,7 +1757,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
std::string conversation_id,
|
||||
const std::string& override_tags_str,
|
||||
const std::string& voice_query,
|
||||
bool enable_typos_for_numerical_tokens) const {
|
||||
bool enable_typos_for_numerical_tokens,
|
||||
bool enable_lazy_filter) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
// setup thread local vars
|
||||
@ -2343,7 +2344,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
min_len_1typo, min_len_2typo, max_candidates, infixes,
|
||||
max_extra_prefix, max_extra_suffix, facet_query_num_typos,
|
||||
filter_curated_hits, split_join_tokens, vector_query,
|
||||
facet_sample_percent, facet_sample_threshold, drop_tokens_param);
|
||||
facet_sample_percent, facet_sample_threshold, drop_tokens_param,
|
||||
enable_lazy_filter);
|
||||
|
||||
std::unique_ptr<search_args> search_params_guard(search_params);
|
||||
|
||||
|
@ -1475,6 +1475,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
const char *VOICE_QUERY = "voice_query";
|
||||
|
||||
const char *ENABLE_TYPOS_FOR_NUMERICAL_TOKENS = "enable_typos_for_numerical_tokens";
|
||||
const char *ENABLE_LAZY_FILTER = "enable_lazy_filter";
|
||||
|
||||
// enrich params with values from embedded params
|
||||
for(auto& item: embedded_params.items()) {
|
||||
@ -1595,6 +1596,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
bool enable_highlight_v1 = true;
|
||||
text_match_type_t match_type = max_score;
|
||||
bool enable_typos_for_numerical_tokens = true;
|
||||
bool enable_lazy_filter = true;
|
||||
|
||||
size_t remote_embedding_timeout_ms = 5000;
|
||||
size_t remote_embedding_num_tries = 2;
|
||||
@ -1667,6 +1669,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
{PRIORITIZE_NUM_MATCHING_FIELDS, &prioritize_num_matching_fields},
|
||||
{GROUP_MISSING_VALUES, &group_missing_values},
|
||||
{ENABLE_TYPOS_FOR_NUMERICAL_TOKENS, &enable_typos_for_numerical_tokens},
|
||||
{ENABLE_LAZY_FILTER, &enable_lazy_filter},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>*> str_list_values = {
|
||||
@ -1881,7 +1884,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
conversation_id,
|
||||
override_tags,
|
||||
voice_query,
|
||||
enable_typos_for_numerical_tokens);
|
||||
enable_typos_for_numerical_tokens,
|
||||
enable_lazy_filter);
|
||||
|
||||
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::high_resolution_clock::now() - begin).count();
|
||||
|
@ -2252,7 +2252,8 @@ Option<bool> Index::run_search(search_args* search_params, const std::string& co
|
||||
collection_name,
|
||||
search_params->drop_tokens_mode,
|
||||
facet_index_type,
|
||||
enable_typos_for_numerical_tokens
|
||||
enable_typos_for_numerical_tokens,
|
||||
search_params->enable_lazy_filter
|
||||
);
|
||||
}
|
||||
|
||||
@ -2740,7 +2741,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
const std::string& collection_name,
|
||||
const drop_tokens_param_t drop_tokens_mode,
|
||||
facet_index_type_t facet_index_type,
|
||||
bool enable_typos_for_numerical_tokens) const {
|
||||
bool enable_typos_for_numerical_tokens,
|
||||
bool enable_lazy_filter) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto filter_result_iterator = new filter_result_iterator_t(collection_name, this, filter_tree_root,
|
||||
@ -2763,7 +2765,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
}
|
||||
#else
|
||||
|
||||
if (filter_result_iterator->approx_filter_ids_length < 25'000) {
|
||||
if (!enable_lazy_filter || filter_result_iterator->approx_filter_ids_length < 25'000) {
|
||||
filter_result_iterator->compute_iterators();
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user