This commit is contained in:
krunal1313 2023-05-01 17:37:26 +05:30
parent aea35edb84
commit 712296bfa1
4 changed files with 28 additions and 23 deletions

View File

@ -461,7 +461,7 @@ public:
const size_t facet_sample_percent = 100,
const size_t facet_sample_threshold = 0,
const size_t page_offset = UINT32_MAX,
bool force_intersection = false
bool use_facet_intersection = false
) const;
Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;

View File

@ -314,6 +314,8 @@ private:
// sort_field => (seq_id => value)
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> sort_index;
typedef spp::sparse_hash_map<std::string,
spp::sparse_hash_map<uint32_t, int64_t>*>::iterator sort_index_iterator;
// str_sort_field => adi_tree_t
spp::sparse_hash_map<std::string, adi_tree_t*> str_sort_index;
@ -364,7 +366,7 @@ private:
size_t group_limit, const std::vector<std::string>& group_by_fields,
const uint32_t* result_ids, size_t results_size,
int max_facet_count, bool is_wildcard_query, bool no_filters_provided,
bool force_intersection = false
bool use_facet_intersection = false
) const;
bool static_filter_query_eval(const override_t* override, std::vector<std::string>& tokens,
@ -626,7 +628,7 @@ public:
// Public operations
Option<bool> run_search(search_args* search_params, const std::string& collection_name,
bool force_intersection);
bool use_facet_intersection);
Option<bool> search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
const text_match_type_t match_type,
@ -651,7 +653,7 @@ public:
const size_t max_extra_suffix, const size_t facet_query_num_typos,
const bool filter_curated_hits, enable_t split_join_tokens,
const vector_query_t& vector_query, size_t facet_sample_percent, size_t facet_sample_threshold,
const std::string& collection_name, bool force_intersection = false) const;
const std::string& collection_name, bool use_facet_intersection = false) const;
void remove_field(uint32_t seq_id, const nlohmann::json& document, const std::string& field_name);

View File

@ -1074,7 +1074,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
const size_t facet_sample_percent,
const size_t facet_sample_threshold,
const size_t page_offset,
bool force_intersection) const {
bool use_facet_intersection) const {
std::shared_lock lock(mutex);
@ -1524,7 +1524,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
std::unique_ptr<search_args> search_params_guard(search_params);
auto search_op = index->run_search(search_params, name, force_intersection);
auto search_op = index->run_search(search_params, name, use_facet_intersection);
// filter_tree_root might be updated in Index::static_filter_query_eval.
filter_tree_root_guard.release();

View File

@ -1228,10 +1228,8 @@ void Index::compute_facet_stats(facet &a_facet, const int64_t raw_value, const s
}
}
int64_t Index::get_doc_val_from_sort_index(const std::string& field_name, uint32_t doc_seq_id) const {
int64_t Index::get_doc_val_from_sort_index(sort_index_iterator sort_index_it, uint32_t doc_seq_id) const {
auto sort_index_it = sort_index.find(field_name);
if(sort_index_it != sort_index.end()){
auto doc_id_val_map = sort_index_it->second;
auto doc_seq_id_it = doc_id_val_map->find(doc_seq_id);
@ -1241,7 +1239,7 @@ int64_t Index::get_doc_val_from_sort_index(const std::string& field_name, uint32
}
}
return 0;
return INT64_MAX;
}
void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
@ -1250,7 +1248,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
const size_t group_limit, const std::vector<std::string>& group_by_fields,
const uint32_t* result_ids, size_t results_size,
int max_facet_count, bool is_wildcard_query, bool no_filters_provided,
bool force_intersection) const {
bool use_facet_intersection) const {
// assumed that facet fields have already been validated upstream
for(size_t findex=0; findex < facets.size(); findex++) {
auto& a_facet = facets[findex];
@ -1280,19 +1278,19 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
bool use_hashes = false;
if(!force_intersection) {
if(!use_facet_intersection) {
use_hashes = true;
}
#ifndef TEST_BUILD
// non-test build should not accidentally set this flag
force_intersection = false;
use_facet_intersection = false;
use_hashes = false;
#endif
if(results_size && facet_records && ((facet_records <= 10 || is_wildcard_query) &&
!use_facet_query && group_limit == 0 && no_filters_provided)
&& !use_hashes || force_intersection) {
&& !use_hashes || use_facet_intersection) {
//LOG(INFO) << "Using intersection to find facets";
a_facet.is_intersected = true;
@ -1387,12 +1385,17 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
fhash = facet_map_it->second.hashes[j];
}
if(should_compute_stats) {
doc_val = get_doc_val_from_sort_index(a_facet.field_name, doc_seq_id);
compute_facet_stats(a_facet, doc_val, facet_field.type);
doc_val = get_doc_val_from_sort_index(sort_index_it, doc_seq_id);
if(doc_val != INT64_MAX) {
compute_facet_stats(a_facet, doc_val, facet_field.type);
}
}
if(a_facet.is_range_query) {
doc_val = get_doc_val_from_sort_index(a_facet.field_name, doc_seq_id);
if(doc_val == INT64_MAX) {
doc_val = get_doc_val_from_sort_index(sort_index_it, doc_seq_id);
}
std::pair<std::string, std::string> range_pair {};
if(a_facet.get_range(std::to_string(doc_val), range_pair)) {
@ -1932,7 +1935,7 @@ Option<bool> Index::get_approximate_reference_filter_ids_with_lock(filter_node_t
}
Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name,
bool force_intersection) {
bool use_facet_intersection) {
return search(search_params->field_query_tokens,
search_params->search_fields,
search_params->match_type,
@ -1968,7 +1971,7 @@ Option<bool> Index::run_search(search_args* search_params, const std::string& co
search_params->facet_sample_percent,
search_params->facet_sample_threshold,
collection_name,
force_intersection);
use_facet_intersection);
}
void Index::collate_included_ids(const std::vector<token_t>& q_included_tokens,
@ -2420,7 +2423,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
const bool filter_curated_hits, const enable_t split_join_tokens,
const vector_query_t& vector_query,
size_t facet_sample_percent, size_t facet_sample_threshold,
const std::string& collection_name, bool force_intersection) const {
const std::string& collection_name, bool use_facet_intersection) const {
std::shared_lock lock(mutex);
uint32_t approx_filter_ids_length = 0;
@ -2983,7 +2986,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
batch_result_ids, batch_res_len, &facet_infos, max_facet_values,
is_wildcard_query, no_filters_provided, estimate_facets, facet_sample_percent,
&parent_search_begin, &parent_search_stop_ms, &parent_search_cutoff,
&num_processed, &m_process, &cv_process, force_intersection]() {
&num_processed, &m_process, &cv_process, use_facet_intersection]() {
search_begin_us = parent_search_begin;
search_stop_us = parent_search_stop_ms;
search_cutoff = parent_search_cutoff;
@ -2994,7 +2997,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
facet_infos, group_limit, group_by_fields,
batch_result_ids, batch_res_len, max_facet_values,
is_wildcard_query, no_filters_provided,
force_intersection);
use_facet_intersection);
std::unique_lock<std::mutex> lock(m_process);
num_processed++;
parent_search_cutoff = parent_search_cutoff || search_cutoff;
@ -3080,7 +3083,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
do_facets(facets, facet_query, estimate_facets, facet_sample_percent,
facet_infos, group_limit, group_by_fields, &included_ids_vec[0],
included_ids_vec.size(), max_facet_values, is_wildcard_query, no_filters_provided,
force_intersection);
use_facet_intersection);
all_result_ids_len += curated_topster->size;